Fix ocl::filter2D.

In current implementation, this function only works when anchor point is in the kernel center and kernel size supported is either 3x3 or 5x5.

Fix ocl::filter2D.
In current implementation, this function only works when anchor point is in the kernel center and kernel size supported is either 3x3 or 5x5.
b1c248fc · peng xiao · 37091b08 · b1c248fc · b1c248fc · b1c248fc
Commit b1c248fc authored May 31, 2013 by peng xiao
Showing with 15 additions and 9 deletions

ocl.hpp modules/ocl/include/opencv2/ocl/ocl.hpp +2 -0

filtering.cpp modules/ocl/src/filtering.cpp +7 -3

filtering_laplacian.cl modules/ocl/src/opencl/filtering_laplacian.cl +6 -6

No files found.
--- a/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@@ -689,6 +689,8 @@ namespace cv
        }
        //! applies non-separable 2D linear filter to the image
+        //  Note, at the moment this function only works when anchor point is in the kernel center
+        //  and kernel size supported is either 3x3 or 5x5; otherwise the function will fail to output valid result
        CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
                                 Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);

--- a/modules/ocl/src/filtering.cpp
+++ b/modules/ocl/src/filtering.cpp
@@ -645,7 +645,11 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
    args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols));
    args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows));
-    openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth);
+    const int buffer_size = 100;
+    char opt_buffer [buffer_size] = "";
+    sprintf(opt_buffer, "-DANCHOR=%d -DANX=%d -DANY=%d", ksize.width, anchor.x, anchor.y);
+    openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth, opt_buffer);
 }
 Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
        Point anchor, int borderType)
@@ -656,7 +660,7 @@ Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const
    oclMat gpu_krnl;
    int nDivisor;
-    normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true);
+    normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, false);
    normalizeAnchor(anchor, ksize);
    return Ptr<BaseFilter_GPU>(new LinearFilter_GPU(ksize, anchor, gpu_krnl, GPUFilter2D_callers[CV_MAT_CN(srcType)],
@@ -1172,7 +1176,7 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel
    args.push_back(make_pair(sizeof(cl_int), (void *)&ridusy));
    args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
-    openCLExecuteKernel2(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option, CLFLUSH);
+    openCLExecuteKernel(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option);
 }
 Ptr<BaseRowFilter_GPU> cv::ocl::getLinearRowFilter_GPU(int srcType, int /*bufType*/, const Mat &rowKernel, int anchor, int bordertype)

--- a/modules/ocl/src/opencl/filtering_laplacian.cl
+++ b/modules/ocl/src/opencl/filtering_laplacian.cl
@@ -82,9 +82,9 @@
 //////////////////////////////////////////////////////////////////////////////////////////////////////
 /////////////////////////////Macro for define elements number per thread/////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////
-#define ANCHOR                  3
+//#define ANCHOR                  3
-#define ANX                     1
+//#define ANX                     1
-#define ANY                     1
+//#define ANY                     1
 #define ROWS_PER_GROUP          4
 #define ROWS_PER_GROUP_BITS     2
@@ -185,7 +185,7 @@ __kernel void filter2D_C1_D0(__global uchar *src, int src_step, int src_offset_x
        for(int i = 0; i < ANCHOR; i++)
        {
-#pragma unroll 3
+#pragma unroll
            for(int j = 0; j < ANCHOR; j++)
            {
                if(dst_rows_index < dst_rows_end)
@@ -295,7 +295,7 @@ __kernel void filter2D_C1_D5(__global float *src, int src_step, int src_offset_x
        for(int i = 0; i < ANCHOR; i++)
        {
-#pragma unroll 3
+#pragma unroll
            for(int j = 0; j < ANCHOR; j++)
            {
                if(dst_rows_index < dst_rows_end)
@@ -410,7 +410,7 @@ __kernel void filter2D_C4_D0(__global uchar4 *src, int src_step, int src_offset_
        for(int i = 0; i < ANCHOR; i++)
        {
-#pragma unroll 3
+#pragma unroll
            for(int j = 0; j < ANCHOR; j++)
            {
                if(dst_rows_index < dst_rows_end)