Commit 10a52220 authored by Alexander Karsakov's avatar Alexander Karsakov

Added integer arithmetic to sepFilter2D

parent d17142b8
...@@ -3275,6 +3275,7 @@ static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX ...@@ -3275,6 +3275,7 @@ static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX
int type = src.type(), cn = CV_MAT_CN(type), sdepth = CV_MAT_DEPTH(type); int type = src.type(), cn = CV_MAT_CN(type), sdepth = CV_MAT_DEPTH(type);
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
Size bufSize = buf.size(); Size bufSize = buf.size();
int buf_type = buf.type(), bdepth = CV_MAT_DEPTH(buf_type);
if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
return false; return false;
...@@ -3306,11 +3307,11 @@ static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX ...@@ -3306,11 +3307,11 @@ static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX
radiusX, (int)localsize[0], (int)localsize[1], cn, btype, radiusX, (int)localsize[0], (int)localsize[1], cn, btype,
extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION", extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION",
isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED", isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED",
ocl::typeToStr(type), ocl::typeToStr(CV_32FC(cn)), ocl::typeToStr(type), ocl::typeToStr(buf_type),
ocl::convertTypeStr(sdepth, CV_32F, cn, cvt), ocl::convertTypeStr(sdepth, bdepth, cn, cvt),
ocl::typeToStr(sdepth), ocl::typeToStr(CV_32F), ocl::typeToStr(sdepth), ocl::typeToStr(bdepth),
doubleSupport ? " -D DOUBLE_SUPPORT" : ""); doubleSupport ? " -D DOUBLE_SUPPORT" : "");
build_options += ocl::kernelToStr(kernelX, CV_32F); build_options += ocl::kernelToStr(kernelX, bdepth);
Size srcWholeSize; Point srcOffset; Size srcWholeSize; Point srcOffset;
src.locateROI(srcWholeSize, srcOffset); src.locateROI(srcWholeSize, srcOffset);
...@@ -3337,7 +3338,7 @@ static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX ...@@ -3337,7 +3338,7 @@ static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX
return k.run(2, globalsize, localsize, false); return k.run(2, globalsize, localsize, false);
} }
static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY, double delta, int anchor) static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY, double delta, int anchor, int bits)
{ {
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if (dst.depth() == CV_64F && !doubleSupport) if (dst.depth() == CV_64F && !doubleSupport)
...@@ -3352,6 +3353,7 @@ static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY ...@@ -3352,6 +3353,7 @@ static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY
int dtype = dst.type(), cn = CV_MAT_CN(dtype), ddepth = CV_MAT_DEPTH(dtype); int dtype = dst.type(), cn = CV_MAT_CN(dtype), ddepth = CV_MAT_DEPTH(dtype);
Size sz = dst.size(); Size sz = dst.size();
int buf_type = buf.type(), bdepth = CV_MAT_DEPTH(buf_type);
globalsize[1] = DIVUP(sz.height, localsize[1]) * localsize[1]; globalsize[1] = DIVUP(sz.height, localsize[1]) * localsize[1];
globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0]; globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0];
...@@ -3359,13 +3361,13 @@ static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY ...@@ -3359,13 +3361,13 @@ static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY
char cvt[40]; char cvt[40];
cv::String build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d" cv::String build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d"
" -D srcT=%s -D dstT=%s -D convertToDstT=%s" " -D srcT=%s -D dstT=%s -D convertToDstT=%s"
" -D srcT1=%s -D dstT1=%s%s", " -D srcT1=%s -D dstT1=%s -D BITS=%d%s",
anchor, (int)localsize[0], (int)localsize[1], cn, anchor, (int)localsize[0], (int)localsize[1], cn,
ocl::typeToStr(buf.type()), ocl::typeToStr(dtype), ocl::typeToStr(buf_type), ocl::typeToStr(dtype),
ocl::convertTypeStr(CV_32F, ddepth, cn, cvt), ocl::convertTypeStr(bdepth, ddepth, cn, cvt),
ocl::typeToStr(CV_32F), ocl::typeToStr(ddepth), ocl::typeToStr(bdepth), ocl::typeToStr(ddepth),
doubleSupport ? " -D DOUBLE_SUPPORT" : ""); bits, doubleSupport ? " -D DOUBLE_SUPPORT" : "");
build_options += ocl::kernelToStr(kernelY, CV_32F); build_options += ocl::kernelToStr(kernelY, bdepth);
ocl::Kernel k("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc, ocl::Kernel k("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc,
build_options); build_options);
...@@ -3457,13 +3459,13 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth, ...@@ -3457,13 +3459,13 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
if (ddepth < 0) if (ddepth < 0)
ddepth = sdepth; ddepth = sdepth;
CV_OCL_RUN_(kernelY.cols <= 21 && kernelX.cols <= 21 && //CV_OCL_RUN_(kernelY.cols <= 21 && kernelX.cols <= 21 &&
imgSize.width > optimizedSepFilterLocalSize + (kernelX.cols >> 1) && // imgSize.width > optimizedSepFilterLocalSize + (kernelX.cols >> 1) &&
imgSize.height > optimizedSepFilterLocalSize + (kernelY.cols >> 1) && // imgSize.height > optimizedSepFilterLocalSize + (kernelY.cols >> 1) &&
(!(borderType & BORDER_ISOLATED) || _src.offset() == 0) && anchor == Point(-1, -1) && // (!(borderType & BORDER_ISOLATED) || _src.offset() == 0) && anchor == Point(-1, -1) &&
(d.isIntel() || (d.isAMD() && !d.hostUnifiedMemory())), // (d.isIntel() || (d.isAMD() && !d.hostUnifiedMemory())),
ocl_sepFilter2D_SinglePass(_src, _dst, kernelX, kernelY, delta, // ocl_sepFilter2D_SinglePass(_src, _dst, kernelX, kernelY, delta,
borderType & ~BORDER_ISOLATED, ddepth), true) // borderType & ~BORDER_ISOLATED, ddepth), true)
if (anchor.x < 0) if (anchor.x < 0)
anchor.x = kernelX.cols >> 1; anchor.x = kernelX.cols >> 1;
...@@ -3474,19 +3476,45 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth, ...@@ -3474,19 +3476,45 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
Size srcWholeSize; Point srcOffset; Size srcWholeSize; Point srcOffset;
src.locateROI(srcWholeSize, srcOffset); src.locateROI(srcWholeSize, srcOffset);
bool fast8uc1 = type == CV_8UC1 && srcOffset.x % 4 == 0 && //bool fast8uc1 = type == CV_8UC1 && srcOffset.x % 4 == 0 &&
src.cols % 4 == 0 && src.step % 4 == 0; // src.cols % 4 == 0 && src.step % 4 == 0;
bool fast8uc1 = false;
int rtype = getKernelType(kernelX,
kernelX.rows == 1 ? Point(anchor.x, 0) : Point(0, anchor.x));
int ctype = getKernelType(kernelY,
kernelY.rows == 1 ? Point(anchor.y, 0) : Point(0, anchor.y));
int bdepth = CV_32F;
int bits = 0;
if( sdepth == CV_8U &&
((rtype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL &&
ctype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL &&
ddepth == CV_8U)))
{
bdepth = CV_32S;
bits = 8;
_kernelX.getMat().convertTo( kernelX, CV_32S, 1 << bits );
_kernelY.getMat().convertTo( kernelY, CV_32S, 1 << bits );
kernelX = kernelX.reshape(1,1);
kernelY = kernelY.reshape(1,1);
bits *= 2;
delta *= (1 << bits);
}
Size srcSize = src.size(); Size srcSize = src.size();
Size bufSize(srcSize.width, srcSize.height + kernelY.cols - 1); Size bufSize(srcSize.width, srcSize.height + kernelY.cols - 1);
UMat buf(bufSize, CV_32FC(cn)); UMat buf(bufSize, CV_MAKETYPE(bdepth, cn));
if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType, ddepth, fast8uc1)) if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType, ddepth, fast8uc1))
return false; return false;
Mat buffer = buf.getMat(ACCESS_READ);
_dst.create(srcSize, CV_MAKETYPE(ddepth, cn)); _dst.create(srcSize, CV_MAKETYPE(ddepth, cn));
UMat dst = _dst.getUMat(); UMat dst = _dst.getUMat();
return ocl_sepColFilter2D(buf, dst, kernelY, delta, anchor.y); return ocl_sepColFilter2D(buf, dst, kernelY, delta, anchor.y, bits);
} }
#endif #endif
......
...@@ -60,7 +60,7 @@ ...@@ -60,7 +60,7 @@
#endif #endif
#define DIG(a) a, #define DIG(a) a,
__constant float mat_kernel[] = { COEFF }; __constant srcT1 mat_kernel[] = { COEFF };
__kernel void col_filter(__global const uchar * src, int src_step, int src_offset, int src_whole_rows, int src_whole_cols, __kernel void col_filter(__global const uchar * src, int src_step, int src_offset, int src_whole_rows, int src_whole_cols,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, float delta) __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, float delta)
...@@ -97,8 +97,13 @@ __kernel void col_filter(__global const uchar * src, int src_step, int src_offse ...@@ -97,8 +97,13 @@ __kernel void col_filter(__global const uchar * src, int src_step, int src_offse
temp[0] = LDS_DAT[l_y + RADIUSY - i][l_x]; temp[0] = LDS_DAT[l_y + RADIUSY - i][l_x];
temp[1] = LDS_DAT[l_y + RADIUSY + i][l_x]; temp[1] = LDS_DAT[l_y + RADIUSY + i][l_x];
sum += mad(temp[0], mat_kernel[RADIUSY - i], temp[1] * mat_kernel[RADIUSY + i]); sum += mad(temp[0], mat_kernel[RADIUSY - i], temp[1] * mat_kernel[RADIUSY + i]);
//sum += temp[0]*mat_kernel[RADIUSY - i] + temp[1] * mat_kernel[RADIUSY + i];
} }
#if BITS > 0
sum = sum >> BITS;
#endif
// write the result to dst // write the result to dst
if (x < dst_cols && y < dst_rows) if (x < dst_cols && y < dst_rows)
{ {
......
...@@ -138,7 +138,7 @@ ...@@ -138,7 +138,7 @@
#endif #endif
#define DIG(a) a, #define DIG(a) a,
__constant float mat_kernel[] = { COEFF }; __constant dstT1 mat_kernel[] = { COEFF };
__kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel, int src_offset_x, int src_offset_y, __kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel, int src_offset_x, int src_offset_y,
int src_cols, int src_rows, int src_whole_cols, int src_whole_rows, int src_cols, int src_rows, int src_whole_cols, int src_whole_rows,
...@@ -356,6 +356,7 @@ __kernel void row_filter(__global const uchar * src, int src_step, int src_offse ...@@ -356,6 +356,7 @@ __kernel void row_filter(__global const uchar * src, int src_step, int src_offse
temp[0] = LDS_DAT[l_y][l_x + RADIUSX - i]; temp[0] = LDS_DAT[l_y][l_x + RADIUSX - i];
temp[1] = LDS_DAT[l_y][l_x + RADIUSX + i]; temp[1] = LDS_DAT[l_y][l_x + RADIUSX + i];
sum += mad(convertToDstT(temp[0]), mat_kernel[RADIUSX - i], convertToDstT(temp[1]) * mat_kernel[RADIUSX + i]); sum += mad(convertToDstT(temp[0]), mat_kernel[RADIUSX - i], convertToDstT(temp[1]) * mat_kernel[RADIUSX + i]);
//sum += convertToDstT(temp[0])*mat_kernel[RADIUSX - i] + convertToDstT(temp[1]) * mat_kernel[RADIUSX + i];
} }
// write the result to dst // write the result to dst
......
...@@ -1196,12 +1196,12 @@ void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize, ...@@ -1196,12 +1196,12 @@ void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
} }
#endif #endif
if (type == CV_8U) //if (type == CV_8U)
{ //{
CV_OCL_RUN_(_dst.isUMat() && _src.dims() <= 2 && // CV_OCL_RUN_(_dst.isUMat() && _src.dims() <= 2 &&
(!(borderType & BORDER_ISOLATED) || _src.offset() == 0), // (!(borderType & BORDER_ISOLATED) || _src.offset() == 0),
GaussianBlur_8u(_src, _dst, ksize, sigma1, sigma2, borderType)) // GaussianBlur_8u(_src, _dst, ksize, sigma1, sigma2, borderType))
} //}
Mat kx, ky; Mat kx, ky;
createGaussianKernels(kx, ky, type, ksize, sigma1, sigma2); createGaussianKernels(kx, ky, type, ksize, sigma1, sigma2);
......
...@@ -209,7 +209,7 @@ typedef FilterTestBase GaussianBlurTest; ...@@ -209,7 +209,7 @@ typedef FilterTestBase GaussianBlurTest;
OCL_TEST_P(GaussianBlurTest, Mat) OCL_TEST_P(GaussianBlurTest, Mat)
{ {
for (int j = 0; j < test_loop_times; j++) for (int j = 0; j < test_loop_times + 100; j++)
{ {
random_roi(); random_roi();
...@@ -222,7 +222,8 @@ OCL_TEST_P(GaussianBlurTest, Mat) ...@@ -222,7 +222,8 @@ OCL_TEST_P(GaussianBlurTest, Mat)
if (checkNorm2(dst_roi, udst_roi) > 2 && CV_MAT_DEPTH(type) == CV_8U) if (checkNorm2(dst_roi, udst_roi) > 2 && CV_MAT_DEPTH(type) == CV_8U)
{ {
Mat udst = udst_roi.getMat(ACCESS_READ); std::cout << "i = " << j << std::endl;
Mat uudst = udst_roi.getMat(ACCESS_READ);
Mat diff; Mat diff;
absdiff(dst_roi, udst, diff); absdiff(dst_roi, udst, diff);
int nonZero = countNonZero(diff); int nonZero = countNonZero(diff);
...@@ -231,11 +232,15 @@ OCL_TEST_P(GaussianBlurTest, Mat) ...@@ -231,11 +232,15 @@ OCL_TEST_P(GaussianBlurTest, Mat)
minMaxLoc(diff, (double*)0, &max, (Point*) 0, &maxn); minMaxLoc(diff, (double*)0, &max, (Point*) 0, &maxn);
uchar a = dst_roi.at<uchar>(maxn); uchar a = dst_roi.at<uchar>(maxn);
uchar b = udst.at<uchar>(maxn); uchar b = uudst.at<uchar>(maxn);
std::cout << "dst_roi" << dst_roi << std::endl;
std::cout << "udst_roi" << uudst << std::endl;
} }
Near(CV_MAT_DEPTH(type) == CV_8U ? 2 : 5e-5, false);
Near(CV_MAT_DEPTH(type) == CV_8U ? 1 : 5e-5, false);
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment