Commit 381986d0 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #2882 from akarsakov:ocl_pyrDown_opt

parents 3490a5ad d5c99a07
This diff is collapsed.
......@@ -405,10 +405,10 @@ typedef void (*PyrFunc)(const Mat&, Mat&, int);
static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType)
{
int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if (channels > 4 || (depth == CV_64F && !doubleSupport))
if (cn > 4 || (depth == CV_64F && !doubleSupport))
return false;
Size ssize = _src.size();
......@@ -423,17 +423,20 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in
int float_depth = depth == CV_64F ? CV_64F : CV_32F;
const int local_size = 256;
int kercn = 1;
if (depth == CV_8U && float_depth == CV_32F && cn == 1 && ocl::Device::getDefault().isIntel())
kercn = 4;
const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
"BORDER_REFLECT_101" };
char cvt[2][50];
String buildOptions = format(
"-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
"-D T1=%s -D cn=%d -D %s -D LOCAL_SIZE=%d",
ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
ocl::typeToStr(depth), channels, borderMap[borderType], local_size
"-D T1=%s -D cn=%d -D kercn=%d -D fdepth=%d -D %s -D LOCAL_SIZE=%d",
ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, cn)),
ocl::convertTypeStr(float_depth, depth, cn, cvt[0]),
ocl::convertTypeStr(depth, float_depth, cn, cvt[1]),
doubleSupport ? " -D DOUBLE_SUPPORT" : "", ocl::typeToStr(depth),
cn, kercn, float_depth, borderMap[borderType], local_size
);
ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc, buildOptions);
if (k.empty())
......@@ -441,8 +444,8 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));
size_t localThreads[2] = { local_size, 1 };
size_t globalThreads[2] = { src.cols, dst.rows };
size_t localThreads[2] = { local_size/kercn, 1 };
size_t globalThreads[2] = { (src.cols + (kercn-1))/kercn, dst.rows };
return k.run(2, globalThreads, localThreads, false);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment