Commit 4a37ac30 authored by mletavin's avatar mletavin

Added new Intel-optimized 3x3 and 5x5 kernels to medianFilter.cl file and…

Added new Intel-optimized 3x3 and 5x5 kernels to medianFilter.cl file and corresponding code to host in smooth.cpp

Only Intel platform and 1 channel images are supported.
parent ada2879f
This diff is collapsed.
...@@ -2014,14 +2014,21 @@ medianBlur_SortNet( const Mat& _src, Mat& _dst, int m ) ...@@ -2014,14 +2014,21 @@ medianBlur_SortNet( const Mat& _src, Mat& _dst, int m )
static bool ocl_medianFilter(InputArray _src, OutputArray _dst, int m) static bool ocl_medianFilter(InputArray _src, OutputArray _dst, int m)
{ {
size_t localsize[2] = { 16, 16 };
size_t globalsize[2];
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
if ( !((depth == CV_8U || depth == CV_16U || depth == CV_16S || depth == CV_32F) && cn <= 4 && (m == 3 || m == 5)) ) if ( !((depth == CV_8U || depth == CV_16U || depth == CV_16S || depth == CV_32F) && cn <= 4 && (m == 3 || m == 5)) )
return false; return false;
ocl::Kernel k(format("medianFilter%d", m).c_str(), ocl::imgproc::medianFilter_oclsrc, bool useOptimized = (1 == cn) && (ocl::Device::getDefault().isIntel());
format("-D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type),
ocl::typeToStr(depth), cn)); cv::String kname = format( useOptimized ? "medianFilter%d_u" : "medianFilter%d", m) ;
ocl::Kernel k(kname.c_str(), ocl::imgproc::medianFilter_oclsrc,
format("-D T=%s -D T1=%s -D T4=%s%d -D cn=%d", ocl::typeToStr(type),
ocl::typeToStr(depth), ocl::typeToStr(depth), cn*4, cn));
if (k.empty()) if (k.empty())
return false; return false;
...@@ -2031,7 +2038,17 @@ static bool ocl_medianFilter(InputArray _src, OutputArray _dst, int m) ...@@ -2031,7 +2038,17 @@ static bool ocl_medianFilter(InputArray _src, OutputArray _dst, int m)
k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst)); k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst));
size_t globalsize[2] = { (src.cols + 18) / 16 * 16, (src.rows + 15) / 16 * 16}, localsize[2] = { 16, 16 }; if( useOptimized )
{
globalsize[0] = DIVUP(src.cols / 4, localsize[0]) * localsize[0];
globalsize[1] = DIVUP(src.rows / 4, localsize[1]) * localsize[1];
}
else
{
globalsize[0] = (src.cols + localsize[0] + 2) / localsize[0] * localsize[0];
globalsize[1] = (src.rows + localsize[1] - 1) / localsize[1] * localsize[1];
}
return k.run(2, globalsize, localsize, false); return k.run(2, globalsize, localsize, false);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment