@@ -3616,165 +3616,4 @@ bool OpenclDevice::selectedDeviceIsNativeCPU() {
3616
3616
return (device.type == DS_DEVICE_NATIVE_CPU);
3617
3617
}
3618
3618
3619
- /* !
3620
- * pixConvertRGBToGray() from leptonica, converted to opencl kernel
3621
- *
3622
- * Input: pix (32 bpp RGB)
3623
- * rwt, gwt, bwt (non-negative; these should add to 1.0,
3624
- * or use 0.0 for default)
3625
- * Return: 8 bpp pix, or null on error
3626
- *
3627
- * Notes:
3628
- * (1) Use a weighted average of the RGB values.
3629
- */
3630
- #define SET_DATA_BYTE (pdata, n, val ) \
3631
- (*(l_uint8 *)((l_uintptr_t )((l_uint8 *)(pdata) + (n)) ^ 3 ) = (val))
3632
-
3633
- Pix *OpenclDevice::pixConvertRGBToGrayOCL (Pix *srcPix, // 32-bit source
3634
- float rwt, float gwt, float bwt) {
3635
- PERF_COUNT_START (" pixConvertRGBToGrayOCL" )
3636
- Pix *dstPix; // 8-bit destination
3637
-
3638
- if (rwt < 0.0 || gwt < 0.0 || bwt < 0.0 ) return nullptr ;
3639
-
3640
- if (rwt == 0.0 && gwt == 0.0 && bwt == 0.0 ) {
3641
- // magic numbers from leptonica
3642
- rwt = 0.3 ;
3643
- gwt = 0.5 ;
3644
- bwt = 0.2 ;
3645
- }
3646
- // normalize
3647
- float sum = rwt + gwt + bwt;
3648
- rwt /= sum;
3649
- gwt /= sum;
3650
- bwt /= sum;
3651
-
3652
- // source pix
3653
- int w, h;
3654
- pixGetDimensions (srcPix, &w, &h, nullptr );
3655
- // printf("Image is %i x %i\n", w, h);
3656
- unsigned int *srcData = pixGetData (srcPix);
3657
- int srcWPL = pixGetWpl (srcPix);
3658
- int srcSize = srcWPL * h * sizeof (unsigned int );
3659
-
3660
- // destination pix
3661
- if ((dstPix = pixCreate (w, h, 8 )) == nullptr ) return nullptr ;
3662
- pixCopyResolution (dstPix, srcPix);
3663
- unsigned int *dstData = pixGetData (dstPix);
3664
- int dstWPL = pixGetWpl (dstPix);
3665
- int dstWords = dstWPL * h;
3666
- int dstSize = dstWords * sizeof (unsigned int );
3667
- // printf("dstSize = %i\n", dstSize);
3668
- PERF_COUNT_SUB (" pix setup" )
3669
-
3670
- // opencl objects
3671
- cl_int clStatus;
3672
- KernelEnv kEnv ;
3673
- SetKernelEnv (&kEnv );
3674
-
3675
- // source buffer
3676
- cl_mem srcBuffer =
3677
- clCreateBuffer (kEnv .mpkContext , CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
3678
- srcSize, srcData, &clStatus);
3679
- CHECK_OPENCL (clStatus, " clCreateBuffer srcBuffer" );
3680
-
3681
- // destination buffer
3682
- cl_mem dstBuffer =
3683
- clCreateBuffer (kEnv .mpkContext , CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
3684
- dstSize, dstData, &clStatus);
3685
- CHECK_OPENCL (clStatus, " clCreateBuffer dstBuffer" );
3686
-
3687
- // setup work group size parameters
3688
- int block_size = 256 ;
3689
- int numWorkGroups = ((h * w + block_size - 1 ) / block_size);
3690
- int numThreads = block_size * numWorkGroups;
3691
- size_t local_work_size[] = {static_cast <size_t >(block_size)};
3692
- size_t global_work_size[] = {static_cast <size_t >(numThreads)};
3693
- // printf("Enqueueing %i threads for %i output pixels\n", numThreads, w*h);
3694
-
3695
- /* compile kernel */
3696
- kEnv .mpkKernel =
3697
- clCreateKernel (kEnv .mpkProgram , " kernel_RGBToGray" , &clStatus);
3698
- CHECK_OPENCL (clStatus, " clCreateKernel kernel_RGBToGray" );
3699
-
3700
- /* set kernel arguments */
3701
- clStatus = clSetKernelArg (kEnv .mpkKernel , 0 , sizeof (cl_mem), &srcBuffer);
3702
- CHECK_OPENCL (clStatus, " clSetKernelArg srcBuffer" );
3703
- clStatus = clSetKernelArg (kEnv .mpkKernel , 1 , sizeof (cl_mem), &dstBuffer);
3704
- CHECK_OPENCL (clStatus, " clSetKernelArg dstBuffer" );
3705
- clStatus = clSetKernelArg (kEnv .mpkKernel , 2 , sizeof (int ), &srcWPL);
3706
- CHECK_OPENCL (clStatus, " clSetKernelArg srcWPL" );
3707
- clStatus = clSetKernelArg (kEnv .mpkKernel , 3 , sizeof (int ), &dstWPL);
3708
- CHECK_OPENCL (clStatus, " clSetKernelArg dstWPL" );
3709
- clStatus = clSetKernelArg (kEnv .mpkKernel , 4 , sizeof (int ), &h);
3710
- CHECK_OPENCL (clStatus, " clSetKernelArg height" );
3711
- clStatus = clSetKernelArg (kEnv .mpkKernel , 5 , sizeof (int ), &w);
3712
- CHECK_OPENCL (clStatus, " clSetKernelArg width" );
3713
- clStatus = clSetKernelArg (kEnv .mpkKernel , 6 , sizeof (float ), &rwt);
3714
- CHECK_OPENCL (clStatus, " clSetKernelArg rwt" );
3715
- clStatus = clSetKernelArg (kEnv .mpkKernel , 7 , sizeof (float ), &gwt);
3716
- CHECK_OPENCL (clStatus, " clSetKernelArg gwt" );
3717
- clStatus = clSetKernelArg (kEnv .mpkKernel , 8 , sizeof (float ), &bwt);
3718
- CHECK_OPENCL (clStatus, " clSetKernelArg bwt" );
3719
-
3720
- /* launch kernel & wait */
3721
- PERF_COUNT_SUB (" before" )
3722
- clStatus = clEnqueueNDRangeKernel (kEnv .mpkCmdQueue , kEnv .mpkKernel , 1 ,
3723
- nullptr , global_work_size, local_work_size,
3724
- 0 , nullptr , nullptr );
3725
- CHECK_OPENCL (clStatus, " clEnqueueNDRangeKernel kernel_RGBToGray" );
3726
- clFinish (kEnv .mpkCmdQueue );
3727
- PERF_COUNT_SUB (" kernel" )
3728
-
3729
- /* map results back from gpu */
3730
- void *ptr =
3731
- clEnqueueMapBuffer (kEnv .mpkCmdQueue , dstBuffer, CL_TRUE, CL_MAP_READ, 0 ,
3732
- dstSize, 0 , nullptr , nullptr , &clStatus);
3733
- CHECK_OPENCL (clStatus, " clEnqueueMapBuffer dstBuffer" );
3734
- clEnqueueUnmapMemObject (rEnv.mpkCmdQueue , dstBuffer, ptr, 0 , nullptr ,
3735
- nullptr );
3736
-
3737
- #if 0
3738
- // validate: compute on cpu
3739
- Pix *cpuPix = pixCreate(w, h, 8);
3740
- pixCopyResolution(cpuPix, srcPix);
3741
- unsigned int *cpuData = pixGetData(cpuPix);
3742
- int cpuWPL = pixGetWpl(cpuPix);
3743
- unsigned int *cpuLine, *srcLine;
3744
- int i, j;
3745
- for (i = 0, srcLine = srcData, cpuLine = cpuData; i < h; i++) {
3746
- for (j = 0; j < w; j++) {
3747
- unsigned int word = *(srcLine + j);
3748
- int val = (l_int32)(rwt * ((word >> L_RED_SHIFT) & 0xff) +
3749
- gwt * ((word >> L_GREEN_SHIFT) & 0xff) +
3750
- bwt * ((word >> L_BLUE_SHIFT) & 0xff) + 0.5);
3751
- SET_DATA_BYTE(cpuLine, j, val);
3752
- }
3753
- srcLine += srcWPL;
3754
- cpuLine += cpuWPL;
3755
- }
3756
-
3757
- // validate: compare
3758
- printf("converted 32-bit -> 8-bit image\n");
3759
- for (int row = 0; row < h; row++) {
3760
- for (int col = 0; col < w; col++) {
3761
- int idx = row*w + col;
3762
- unsigned int srcVal = srcData[idx];
3763
- unsigned char cpuVal = ((unsigned char *)cpuData)[idx];
3764
- unsigned char oclVal = ((unsigned char *)dstData)[idx];
3765
- if (srcVal > 0) {
3766
- printf("%4i,%4i: %u, %u, %u\n", row, col, srcVal, cpuVal, oclVal);
3767
- }
3768
- }
3769
- //printf("\n");
3770
- }
3771
- #endif
3772
- // release opencl objects
3773
- clReleaseMemObject (srcBuffer);
3774
- clReleaseMemObject (dstBuffer);
3775
-
3776
- PERF_COUNT_END
3777
- // success
3778
- return dstPix;
3779
- }
3780
3619
#endif
0 commit comments