Commit b1c248fc authored by peng xiao's avatar peng xiao

Fix ocl::filter2D.

In current implementation, this function only works when anchor point is
in the kernel center and kernel size supported is either 3x3 or 5x5.
parent 37091b08
...@@ -689,6 +689,8 @@ namespace cv ...@@ -689,6 +689,8 @@ namespace cv
} }
//! applies non-separable 2D linear filter to the image //! applies non-separable 2D linear filter to the image
// Note, at the moment this function only works when anchor point is in the kernel center
// and kernel size supported is either 3x3 or 5x5; otherwise the function will fail to output valid result
CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel, CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
......
...@@ -645,7 +645,11 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel, ...@@ -645,7 +645,11 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols)); args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols));
args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows)); args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows));
openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth); const int buffer_size = 100;
char opt_buffer [buffer_size] = "";
sprintf(opt_buffer, "-DANCHOR=%d -DANX=%d -DANY=%d", ksize.width, anchor.x, anchor.y);
openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth, opt_buffer);
} }
Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize, Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
Point anchor, int borderType) Point anchor, int borderType)
...@@ -656,7 +660,7 @@ Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const ...@@ -656,7 +660,7 @@ Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const
oclMat gpu_krnl; oclMat gpu_krnl;
int nDivisor; int nDivisor;
normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true); normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, false);
normalizeAnchor(anchor, ksize); normalizeAnchor(anchor, ksize);
return Ptr<BaseFilter_GPU>(new LinearFilter_GPU(ksize, anchor, gpu_krnl, GPUFilter2D_callers[CV_MAT_CN(srcType)], return Ptr<BaseFilter_GPU>(new LinearFilter_GPU(ksize, anchor, gpu_krnl, GPUFilter2D_callers[CV_MAT_CN(srcType)],
...@@ -1172,7 +1176,7 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel ...@@ -1172,7 +1176,7 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel
args.push_back(make_pair(sizeof(cl_int), (void *)&ridusy)); args.push_back(make_pair(sizeof(cl_int), (void *)&ridusy));
args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
openCLExecuteKernel2(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option, CLFLUSH); openCLExecuteKernel(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option);
} }
Ptr<BaseRowFilter_GPU> cv::ocl::getLinearRowFilter_GPU(int srcType, int /*bufType*/, const Mat &rowKernel, int anchor, int bordertype) Ptr<BaseRowFilter_GPU> cv::ocl::getLinearRowFilter_GPU(int srcType, int /*bufType*/, const Mat &rowKernel, int anchor, int bordertype)
......
...@@ -82,9 +82,9 @@ ...@@ -82,9 +82,9 @@
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////Macro for define elements number per thread///////////////////////////// /////////////////////////////Macro for define elements number per thread/////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
#define ANCHOR 3 //#define ANCHOR 3
#define ANX 1 //#define ANX 1
#define ANY 1 //#define ANY 1
#define ROWS_PER_GROUP 4 #define ROWS_PER_GROUP 4
#define ROWS_PER_GROUP_BITS 2 #define ROWS_PER_GROUP_BITS 2
...@@ -185,7 +185,7 @@ __kernel void filter2D_C1_D0(__global uchar *src, int src_step, int src_offset_x ...@@ -185,7 +185,7 @@ __kernel void filter2D_C1_D0(__global uchar *src, int src_step, int src_offset_x
for(int i = 0; i < ANCHOR; i++) for(int i = 0; i < ANCHOR; i++)
{ {
#pragma unroll 3 #pragma unroll
for(int j = 0; j < ANCHOR; j++) for(int j = 0; j < ANCHOR; j++)
{ {
if(dst_rows_index < dst_rows_end) if(dst_rows_index < dst_rows_end)
...@@ -295,7 +295,7 @@ __kernel void filter2D_C1_D5(__global float *src, int src_step, int src_offset_x ...@@ -295,7 +295,7 @@ __kernel void filter2D_C1_D5(__global float *src, int src_step, int src_offset_x
for(int i = 0; i < ANCHOR; i++) for(int i = 0; i < ANCHOR; i++)
{ {
#pragma unroll 3 #pragma unroll
for(int j = 0; j < ANCHOR; j++) for(int j = 0; j < ANCHOR; j++)
{ {
if(dst_rows_index < dst_rows_end) if(dst_rows_index < dst_rows_end)
...@@ -410,7 +410,7 @@ __kernel void filter2D_C4_D0(__global uchar4 *src, int src_step, int src_offset_ ...@@ -410,7 +410,7 @@ __kernel void filter2D_C4_D0(__global uchar4 *src, int src_step, int src_offset_
for(int i = 0; i < ANCHOR; i++) for(int i = 0; i < ANCHOR; i++)
{ {
#pragma unroll 3 #pragma unroll
for(int j = 0; j < ANCHOR; j++) for(int j = 0; j < ANCHOR; j++)
{ {
if(dst_rows_index < dst_rows_end) if(dst_rows_index < dst_rows_end)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment