Commit e2c6ab01 authored by Ilya Lavrenov's avatar Ilya Lavrenov

refactored filter2D; eliminated restrictions sdepth == ddepth, delta == 0

parent b6833fdd
...@@ -3154,74 +3154,50 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth, ...@@ -3154,74 +3154,50 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
InputArray _kernel, Point anchor, InputArray _kernel, Point anchor,
double delta, int borderType ) double delta, int borderType )
{ {
if (abs(delta) > FLT_MIN) int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
return false; ddepth = ddepth < 0 ? sdepth : ddepth;
int dtype = CV_MAKE_TYPE(ddepth, cn), wdepth = std::max(std::max(sdepth, ddepth), CV_32F),
int type = _src.type(), cn = CV_MAT_CN(type); wtype = CV_MAKE_TYPE(wdepth, cn);
if (cn > 4) if (cn > 4)
return false; return false;
int sdepth = CV_MAT_DEPTH(type);
Size ksize = _kernel.size(); Size ksize = _kernel.size();
if( anchor.x < 0 ) if (anchor.x < 0)
anchor.x = ksize.width / 2; anchor.x = ksize.width / 2;
if( anchor.y < 0 ) if (anchor.y < 0)
anchor.y = ksize.height / 2; anchor.y = ksize.height / 2;
if( ddepth < 0 )
ddepth = sdepth;
else if (ddepth != sdepth)
return false;
bool isIsolatedBorder = (borderType & BORDER_ISOLATED) != 0; bool isolated = (borderType & BORDER_ISOLATED) != 0;
bool useDouble = (CV_64F == sdepth); borderType &= ~BORDER_ISOLATED;
const cv::ocl::Device &device = cv::ocl::Device::getDefault(); const cv::ocl::Device &device = cv::ocl::Device::getDefault();
int doubleFPConfig = device.doubleFPConfig(); bool doubleSupport = device.doubleFPConfig() > 0;
if (useDouble && (0 == doubleFPConfig)) if (wdepth == CV_64F && !doubleSupport)
return false; return false;
const char* btype = NULL; const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT",
switch (borderType & ~BORDER_ISOLATED) "BORDER_WRAP", "BORDER_REFLECT_101" };
{
case BORDER_CONSTANT:
btype = "BORDER_CONSTANT";
break;
case BORDER_REPLICATE:
btype = "BORDER_REPLICATE";
break;
case BORDER_REFLECT:
btype = "BORDER_REFLECT";
break;
case BORDER_WRAP:
return false;
case BORDER_REFLECT101:
btype = "BORDER_REFLECT_101";
break;
}
cv::Mat kernelMat = _kernel.getMat(); cv::Mat kernelMat = _kernel.getMat();
std::vector<float> kernelMatDataFloat; std::vector<float> kernelMatDataFloat;
std::vector<double> kernelMatDataDouble; int kernel_size_y2_aligned = _prepareKernelFilter2D<float>(kernelMatDataFloat, kernelMat);
int kernel_size_y2_aligned = useDouble ?
_prepareKernelFilter2D<double>(kernelMatDataDouble, kernelMat)
: _prepareKernelFilter2D<float>(kernelMatDataFloat, kernelMat);
cv::Size sz = _src.size(), wholeSize;
size_t globalsize[2] = { sz.width, sz.height }, localsize[2] = { 0, 1 };
cv::Size sz = _src.size(); ocl::Kernel k;
size_t globalsize[2] = {sz.width, sz.height}; UMat src = _src.getUMat();
size_t localsize[2] = {0, 1}; if (!isolated)
ocl::Kernel kernel;
UMat src; Size wholeSize;
if (!isIsolatedBorder)
{ {
src = _src.getUMat();
Point ofs; Point ofs;
src.locateROI(wholeSize, ofs); src.locateROI(wholeSize, ofs);
} }
size_t maxWorkItemSizes[32]; device.maxWorkItemSizes(maxWorkItemSizes); size_t maxWorkItemSizes[32];
device.maxWorkItemSizes(maxWorkItemSizes);
size_t tryWorkItems = maxWorkItemSizes[0]; size_t tryWorkItems = maxWorkItemSizes[0];
for (;;) char cvt[2][40];
for ( ; ; )
{ {
size_t BLOCK_SIZE = tryWorkItems; size_t BLOCK_SIZE = tryWorkItems;
while (BLOCK_SIZE > 32 && BLOCK_SIZE >= (size_t)ksize.width * 2 && BLOCK_SIZE > (size_t)sz.width * 2) while (BLOCK_SIZE > 32 && BLOCK_SIZE >= (size_t)ksize.width * 2 && BLOCK_SIZE > (size_t)sz.width * 2)
...@@ -3241,32 +3217,36 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth, ...@@ -3241,32 +3217,36 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
int requiredLeft = (int)BLOCK_SIZE; // not this: anchor.x; int requiredLeft = (int)BLOCK_SIZE; // not this: anchor.x;
int requiredBottom = ksize.height - 1 - anchor.y; int requiredBottom = ksize.height - 1 - anchor.y;
int requiredRight = (int)BLOCK_SIZE; // not this: ksize.width - 1 - anchor.x; int requiredRight = (int)BLOCK_SIZE; // not this: ksize.width - 1 - anchor.x;
int h = isIsolatedBorder ? sz.height : wholeSize.height; int h = isolated ? sz.height : wholeSize.height;
int w = isIsolatedBorder ? sz.width : wholeSize.width; int w = isolated ? sz.width : wholeSize.width;
bool extra_extrapolation = h < requiredTop || h < requiredBottom || w < requiredLeft || w < requiredRight; bool extra_extrapolation = h < requiredTop || h < requiredBottom || w < requiredLeft || w < requiredRight;
if ((w < ksize.width) || (h < ksize.height)) if ((w < ksize.width) || (h < ksize.height))
return false; return false;
char build_options[1024]; String opts = format("-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D cn=%d "
sprintf(build_options, "-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D DATA_DEPTH=%d -D DATA_CHAN=%d -D USE_DOUBLE=%d " "-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d "
"-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D KERNEL_SIZE_Y2_ALIGNED=%d " "-D KERNEL_SIZE_Y2_ALIGNED=%d -D %s -D %s -D %s%s "
"-D %s -D %s -D %s", "-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s "
(int)BLOCK_SIZE, (int)BLOCK_SIZE_Y, "-D convertToWT=%s -D convertToDstT=%s",
sdepth, cn, useDouble ? 1 : 0, (int)BLOCK_SIZE, (int)BLOCK_SIZE_Y, cn, anchor.x, anchor.y,
anchor.x, anchor.y, ksize.width, ksize.height, kernel_size_y2_aligned, ksize.width, ksize.height, kernel_size_y2_aligned, borderMap[borderType],
btype, extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION",
extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION", isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED",
isIsolatedBorder ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED"); doubleSupport ? " -D DOUBLE_SUPPORT" : "",
ocl::typeToStr(type), ocl::typeToStr(sdepth), ocl::typeToStr(dtype),
ocl::typeToStr(ddepth), ocl::typeToStr(wtype), ocl::typeToStr(wdepth),
ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]),
ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]));
localsize[0] = BLOCK_SIZE; localsize[0] = BLOCK_SIZE;
globalsize[0] = DIVUP(sz.width, BLOCK_SIZE - (ksize.width - 1)) * BLOCK_SIZE; globalsize[0] = DIVUP(sz.width, BLOCK_SIZE - (ksize.width - 1)) * BLOCK_SIZE;
globalsize[1] = DIVUP(sz.height, BLOCK_SIZE_Y); globalsize[1] = DIVUP(sz.height, BLOCK_SIZE_Y);
cv::String errmsg; if (!k.create("filter2D", cv::ocl::imgproc::filter2D_oclsrc, opts))
if (!kernel.create("filter2D", cv::ocl::imgproc::filter2D_oclsrc, build_options))
return false; return false;
size_t kernelWorkGroupSize = kernel.workGroupSize();
size_t kernelWorkGroupSize = k.workGroupSize();
if (localsize[0] <= kernelWorkGroupSize) if (localsize[0] <= kernelWorkGroupSize)
break; break;
if (BLOCK_SIZE < kernelWorkGroupSize) if (BLOCK_SIZE < kernelWorkGroupSize)
...@@ -3274,46 +3254,19 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth, ...@@ -3274,46 +3254,19 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
tryWorkItems = kernelWorkGroupSize; tryWorkItems = kernelWorkGroupSize;
} }
_dst.create(sz, CV_MAKETYPE(ddepth, cn)); _dst.create(sz, dtype);
UMat dst = _dst.getUMat(); UMat dst = _dst.getUMat(), kernalDataUMat(kernelMatDataFloat, true);
if (src.empty())
src = _src.getUMat();
int idxArg = 0;
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(src));
idxArg = kernel.set(idxArg, (int)src.step);
int srcOffsetX = (int)((src.offset % src.step) / src.elemSize()); int srcOffsetX = (int)((src.offset % src.step) / src.elemSize());
int srcOffsetY = (int)(src.offset / src.step); int srcOffsetY = (int)(src.offset / src.step);
int srcEndX = (isIsolatedBorder ? (srcOffsetX + sz.width) : wholeSize.width); int srcEndX = (isolated ? (srcOffsetX + sz.width) : wholeSize.width);
int srcEndY = (isIsolatedBorder ? (srcOffsetY + sz.height) : wholeSize.height); int srcEndY = (isolated ? (srcOffsetY + sz.height) : wholeSize.height);
idxArg = kernel.set(idxArg, srcOffsetX);
idxArg = kernel.set(idxArg, srcOffsetY); k.args(ocl::KernelArg::PtrReadOnly(src), (int)src.step, srcOffsetX, srcOffsetY,
idxArg = kernel.set(idxArg, srcEndX); srcEndX, srcEndY, ocl::KernelArg::WriteOnly(dst),
idxArg = kernel.set(idxArg, srcEndY); ocl::KernelArg::PtrReadOnly(kernalDataUMat), (float)delta);
idxArg = kernel.set(idxArg, ocl::KernelArg::WriteOnly(dst)); return k.run(2, globalsize, localsize, false);
float borderValue[4] = {0, 0, 0, 0};
double borderValueDouble[4] = {0, 0, 0, 0};
if ((borderType & ~BORDER_ISOLATED) == BORDER_CONSTANT)
{
int cnocl = 3 == cn ? 4 : cn;
if (useDouble)
idxArg = kernel.set(idxArg, (void *)&borderValueDouble[0], sizeof(double) * cnocl);
else
idxArg = kernel.set(idxArg, (void *)&borderValue[0], sizeof(float) * cnocl);
}
if (useDouble)
{
UMat kernalDataUMat(kernelMatDataDouble, true);
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(kernalDataUMat));
}
else
{
UMat kernalDataUMat(kernelMatDataFloat, true);
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(kernalDataUMat));
}
return kernel.run(2, globalsize, localsize, true);
} }
static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX, int anchor, static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX, int anchor,
......
This diff is collapsed.
...@@ -62,6 +62,7 @@ PARAM_TEST_CASE(Filter2D, MatDepth, Channels, BorderType, bool, bool) ...@@ -62,6 +62,7 @@ PARAM_TEST_CASE(Filter2D, MatDepth, Channels, BorderType, bool, bool)
int borderType; int borderType;
bool useRoi; bool useRoi;
Mat kernel; Mat kernel;
double delta;
TEST_DECLARE_INPUT_PARAMETER(src); TEST_DECLARE_INPUT_PARAMETER(src);
TEST_DECLARE_OUTPUT_PARAMETER(dst); TEST_DECLARE_OUTPUT_PARAMETER(dst);
...@@ -91,6 +92,8 @@ PARAM_TEST_CASE(Filter2D, MatDepth, Channels, BorderType, bool, bool) ...@@ -91,6 +92,8 @@ PARAM_TEST_CASE(Filter2D, MatDepth, Channels, BorderType, bool, bool)
anchor.x = randomInt(-1, ksize.width); anchor.x = randomInt(-1, ksize.width);
anchor.y = randomInt(-1, ksize.height); anchor.y = randomInt(-1, ksize.height);
delta = randomDouble(-100, 100);
UMAT_UPLOAD_INPUT_PARAMETER(src); UMAT_UPLOAD_INPUT_PARAMETER(src);
UMAT_UPLOAD_OUTPUT_PARAMETER(dst); UMAT_UPLOAD_OUTPUT_PARAMETER(dst);
} }
...@@ -108,8 +111,8 @@ OCL_TEST_P(Filter2D, Mat) ...@@ -108,8 +111,8 @@ OCL_TEST_P(Filter2D, Mat)
{ {
random_roi(); random_roi();
OCL_OFF(cv::filter2D(src_roi, dst_roi, -1, kernel, anchor, 0.0, borderType)); OCL_OFF(cv::filter2D(src_roi, dst_roi, -1, kernel, anchor, delta, borderType));
OCL_ON(cv::filter2D(usrc_roi, udst_roi, -1, kernel, anchor, 0.0, borderType)); OCL_ON(cv::filter2D(usrc_roi, udst_roi, -1, kernel, anchor, delta, borderType));
Near(1.0); Near(1.0);
} }
......
...@@ -152,8 +152,8 @@ OCL_TEST_P(LaplacianTest, Accuracy) ...@@ -152,8 +152,8 @@ OCL_TEST_P(LaplacianTest, Accuracy)
{ {
random_roi(); random_roi();
OCL_OFF(cv::Laplacian(src_roi, dst_roi, -1, ksize, scale, 0, borderType)); OCL_OFF(cv::Laplacian(src_roi, dst_roi, -1, ksize, scale, 10, borderType));
OCL_ON(cv::Laplacian(usrc_roi, udst_roi, -1, ksize, scale, 0, borderType)); OCL_ON(cv::Laplacian(usrc_roi, udst_roi, -1, ksize, scale, 10, borderType));
Near(); Near();
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment