Commit 6ae9870d authored by Roman Donchenko's avatar Roman Donchenko Committed by OpenCV Buildbot

Merge pull request #1254 from pengx17:2.4_filter2d_

parents bcba3fc6 124ede61
......@@ -691,7 +691,7 @@ namespace cv
//! returns 2D filter with the specified kernel
// supports CV_8UC1 and CV_8UC4 types
CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
//! returns the non-separable linear filter engine
CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel,
......
......@@ -572,7 +572,7 @@ void cv::ocl::morphologyEx(const oclMat &src, oclMat &dst, int op, const Mat &ke
namespace
{
typedef void (*GPUFilter2D_t)(const oclMat & , oclMat & , oclMat & , Size &, const Point, const int);
typedef void (*GPUFilter2D_t)(const oclMat & , oclMat & , const oclMat & , const Size &, const Point&, const int);
class LinearFilter_GPU : public BaseFilter_GPU
{
......@@ -591,21 +591,22 @@ public:
};
}
static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
Size &ksize, const Point anchor, const int borderType)
static void GPUFilter2D(const oclMat &src, oclMat &dst, const oclMat &mat_kernel,
const Size &ksize, const Point& anchor, const int borderType)
{
CV_Assert(src.clCxt == dst.clCxt);
CV_Assert((src.cols == dst.cols) &&
(src.rows == dst.rows));
CV_Assert((src.oclchannels() == dst.oclchannels()));
CV_Assert((borderType != 0));
CV_Assert(ksize.height > 0 && ksize.width > 0 && ((ksize.height & 1) == 1) && ((ksize.width & 1) == 1));
CV_Assert((anchor.x == -1 && anchor.y == -1) || (anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1));
CV_Assert(ksize.width == ksize.height);
Context *clCxt = src.clCxt;
int cn = src.oclchannels();
int depth = src.depth();
string kernelName = "filter2D";
int filterWidth = ksize.width;
bool ksize_3x3 = filterWidth == 3 && src.type() != CV_32FC4; // CV_32FC4 is not tuned up with filter2d_3x3 kernel
string kernelName = ksize_3x3 ? "filter2D_3x3" : "filter2D";
size_t src_offset_x = (src.offset % src.step) / src.elemSize();
size_t src_offset_y = src.offset / src.step;
......@@ -613,55 +614,81 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
size_t dst_offset_x = (dst.offset % dst.step) / dst.elemSize();
size_t dst_offset_y = dst.offset / dst.step;
int vector_lengths[4][7] = {{4, 4, 4, 4, 4, 4, 4},
{4, 4, 1, 1, 1, 1, 1},
{1, 1, 1, 1, 1, 1, 1},
{4, 4, 4, 4, 1, 1, 4}
};
int paddingPixels = filterWidth & (-2);
size_t localThreads[3] = {ksize_3x3 ? 256 : 16, ksize_3x3 ? 1 : 16, 1};
size_t globalThreads[3] = {src.wholecols, src.wholerows, 1};
int vector_length = vector_lengths[cn - 1][depth];
int offset_cols = (dst_offset_x) & (vector_length - 1);
int cols = dst.cols + offset_cols;
int rows = divUp(dst.rows, vector_length);
int cn = src.oclchannels();
int src_step = (int)(src.step/src.elemSize());
int dst_step = (int)(dst.step/src.elemSize());
int localWidth = localThreads[0] + paddingPixels;
int localHeight = localThreads[1] + paddingPixels;
size_t localThreads[3] = {256, 1, 1};
size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
divUp(rows, localThreads[1]) *localThreads[1], 1
};
// 260 = divup((localThreads[0] + filterWidth * 2), 4) * 4
// 6 = (ROWS_PER_GROUP_WHICH_IS_4 + filterWidth * 2)
size_t localMemSize = ksize_3x3 ? 260 * 6 * src.elemSize() : (localWidth * localHeight) * src.elemSize();
int vector_lengths[4][7] = {{4, 4, 4, 4, 4, 4, 4},
{4, 4, 1, 1, 1, 1, 1},
{1, 1, 1, 1, 1, 1, 1},
{4, 4, 4, 4, 1, 1, 4}
};
int cols = dst.cols + ((dst_offset_x) & (vector_lengths[cn - 1][src.depth()] - 1));
vector< pair<size_t, const void *> > args;
args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back(make_pair(sizeof(cl_int), (void *)&src.step));
args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back(make_pair(sizeof(cl_int), (void *)&src_step));
args.push_back(make_pair(sizeof(cl_int), (void *)&dst_step));
args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
args.push_back(make_pair(localMemSize, (void *)NULL));
args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows));
args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols));
args.push_back(make_pair(sizeof(cl_int), (void *)&src_offset_x));
args.push_back(make_pair(sizeof(cl_int), (void *)&src_offset_y));
args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back(make_pair(sizeof(cl_int), (void *)&dst.step));
args.push_back(make_pair(sizeof(cl_int), (void *)&dst_offset_x));
args.push_back(make_pair(sizeof(cl_int), (void *)&dst_offset_y));
args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
args.push_back(make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back(make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back(make_pair(sizeof(cl_int), (void *)&cols));
args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols));
args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows));
const int buffer_size = 100;
char opt_buffer [buffer_size] = "";
sprintf(opt_buffer, "-DANCHOR=%d -DANX=%d -DANY=%d", ksize.width, anchor.x, anchor.y);
openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth, opt_buffer);
char btype[30];
switch (borderType)
{
case 0:
sprintf(btype, "BORDER_CONSTANT");
break;
case 1:
sprintf(btype, "BORDER_REPLICATE");
break;
case 2:
sprintf(btype, "BORDER_REFLECT");
break;
case 3:
CV_Error(CV_StsUnsupportedFormat, "BORDER_WRAP is not supported!");
return;
case 4:
sprintf(btype, "BORDER_REFLECT_101");
break;
}
int type = src.depth();
char build_options[150];
sprintf(build_options, "-D %s -D IMG_C_%d_%d -D CN=%d -D FILTER_SIZE=%d", btype, cn, type, cn, ksize.width);
openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
}
Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
Point anchor, int borderType)
const Point &anchor, int borderType)
{
static const GPUFilter2D_t GPUFilter2D_callers[] = {0, GPUFilter2D, 0, GPUFilter2D, GPUFilter2D};
CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC3 || srcType == CV_8UC4 || srcType == CV_32FC1 || srcType == CV_32FC3 || srcType == CV_32FC4) && dstType == srcType);
oclMat gpu_krnl;
int nDivisor;
normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, false);
normalizeAnchor(anchor, ksize);
Point norm_archor = anchor;
normalizeKernel(kernel, gpu_krnl, CV_32FC1);
normalizeAnchor(norm_archor, ksize);
return Ptr<BaseFilter_GPU>(new LinearFilter_GPU(ksize, anchor, gpu_krnl, GPUFilter2D_callers[CV_MAT_CN(srcType)],
borderType));
......
......@@ -324,6 +324,35 @@ TEST_P(GaussianBlur, Mat)
////////////////////////////////////////////////////////////////////////////////////////////////////
// Filter2D
struct Filter2D : FilterTestBase
{
int type;
cv::Size ksize;
int bordertype;
Point anchor;
virtual void SetUp()
{
type = GET_PARAM(0);
ksize = GET_PARAM(1);
bordertype = GET_PARAM(3);
Init(type);
anchor = Point(-1,-1);
}
};
TEST_P(Filter2D, Mat)
{
cv::Mat kernel = randomMat(cv::Size(ksize.width, ksize.height), CV_32FC1, 0.0, 1.0);
for(int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
cv::filter2D(mat1_roi, dst_roi, -1, kernel, anchor, 0.0, bordertype);
cv::ocl::filter2D(gmat1, gdst, -1, kernel, anchor, bordertype);
Near(1);
}
}
INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)),
......@@ -331,7 +360,7 @@ INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
INSTANTIATE_TEST_CASE_P(Filters, Laplacian, Combine(
INSTANTIATE_TEST_CASE_P(Filter, Laplacian, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(Size(3, 3)),
Values(Size(0, 0)), //not use
......@@ -365,4 +394,10 @@ INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
INSTANTIATE_TEST_CASE_P(Filter, Filter2D, testing::Combine(
Values(CV_8UC1, CV_32FC1, CV_32FC4),
Values(Size(3, 3), Size(15, 15), Size(25, 25)),
Values(Size(0, 0)), //not use
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REFLECT101, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT)));
#endif // HAVE_OPENCL
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment