Commit de431609 authored by krodyush's avatar krodyush

optimize Dx and Dy calcualtion to make it as single opencl kernel

parent 5d5527d0
...@@ -1033,67 +1033,117 @@ namespace cv ...@@ -1033,67 +1033,117 @@ namespace cv
else else
scale = 1. / scale; scale = 1. / scale;
if (ksize > 0) const int sobel_lsz = 16;
if((src.type() == CV_8UC1 || src.type() == CV_32FC1) &&
(ksize==3 || ksize==5 || ksize==7 || ksize==-1) &&
src.wholerows > sobel_lsz + (ksize>>1) &&
src.wholecols > sobel_lsz + (ksize>>1))
{ {
Context* clCxt = Context::getContext(); Dx.create(src.size(), CV_32FC1);
if(clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE) && src.type() == CV_8UC1 && Dy.create(src.size(), CV_32FC1);
src.cols % 8 == 0 && src.rows % 8 == 0 &&
ksize==3 && CV_Assert(Dx.rows == Dy.rows && Dx.cols == Dy.cols);
(borderType ==cv::BORDER_REFLECT ||
borderType == cv::BORDER_REPLICATE || size_t lt2[3] = {sobel_lsz, sobel_lsz, 1};
borderType ==cv::BORDER_REFLECT101 || size_t gt2[3] = {lt2[0]*(1 + (src.cols-1) / lt2[0]), lt2[1]*(1 + (src.rows-1) / lt2[1]), 1};
borderType ==cv::BORDER_WRAP))
unsigned int src_pitch = src.step;
unsigned int Dx_pitch = Dx.step;
unsigned int Dy_pitch = Dy.step;
int src_offset_x = (src.offset % src.step) / src.elemSize();
int src_offset_y = src.offset / src.step;
float _scale = scale;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_x ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_y ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dx.data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.offset ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&Dx_pitch ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dy.data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dy.offset ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&Dy_pitch ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholecols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholerows ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.rows ));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&_scale ));
string option = cv::format("-D BLK_X=%d -D BLK_Y=%d",(int)lt2[0],(int)lt2[1]);
switch(src.type())
{ {
Dx.create(src.size(), CV_32FC1); case CV_8UC1:
Dy.create(src.size(), CV_32FC1); option += " -D SRCTYPE=uchar";
break;
const unsigned int block_x = 8; case CV_32FC1:
const unsigned int block_y = 8; option += " -D SRCTYPE=float";
break;
unsigned int src_pitch = src.step;
unsigned int dst_pitch = Dx.cols;
float _scale = scale;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dx.data ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dy.data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&dst_pitch ));
args.push_back( std::make_pair( sizeof(cl_float) , (void *)&_scale ));
size_t gt2[3] = {src.cols, src.rows, 1}, lt2[3] = {block_x, block_y, 1};
string option = "-D BLK_X=8 -D BLK_Y=8";
switch(borderType)
{
case cv::BORDER_REPLICATE:
option += " -D BORDER_REPLICATE";
break;
case cv::BORDER_REFLECT:
option += " -D BORDER_REFLECT";
break;
case cv::BORDER_REFLECT101:
option += " -D BORDER_REFLECT101";
break;
case cv::BORDER_WRAP:
option += " -D BORDER_WRAP";
break;
}
openCLExecuteKernel(src.clCxt, &imgproc_sobel3, "sobel3", gt2, lt2, args, -1, -1, option.c_str() );
} }
else switch(borderType)
{ {
Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, 0, borderType); case cv::BORDER_CONSTANT:
Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, 0, borderType); option += " -D BORDER_CONSTANT";
break;
case cv::BORDER_REPLICATE:
option += " -D BORDER_REPLICATE";
break;
case cv::BORDER_REFLECT:
option += " -D BORDER_REFLECT";
break;
case cv::BORDER_REFLECT101:
option += " -D BORDER_REFLECT_101";
break;
case cv::BORDER_WRAP:
option += " -D BORDER_WRAP";
break;
default:
CV_Error(CV_StsBadFlag, "BORDER type is not supported!");
break;
} }
string kernel_name;
switch(ksize)
{
case -1:
option += " -D SCHARR";
kernel_name = "sobel3";
break;
case 3:
kernel_name = "sobel3";
break;
case 5:
kernel_name = "sobel5";
break;
case 7:
kernel_name = "sobel7";
break;
default:
CV_Error(CV_StsBadFlag, "Kernel size is not supported!");
break;
}
openCLExecuteKernel(src.clCxt, &imgproc_sobel3, kernel_name, gt2, lt2, args, -1, -1, option.c_str() );
} }
else else
{ {
Scharr(src, Dx, CV_32F, 1, 0, scale, 0, borderType); if (ksize > 0)
Scharr(src, Dy, CV_32F, 0, 1, scale, 0, borderType); {
Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, 0, borderType);
Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, 0, borderType);
}
else
{
Scharr(src, Dx, CV_32F, 1, 0, scale, 0, borderType);
Scharr(src, Dy, CV_32F, 0, 1, scale, 0, borderType);
}
} }
CV_Assert(Dx.offset == 0 && Dy.offset == 0); CV_Assert(Dx.offset == 0 && Dy.offset == 0);
} }
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment