Commit a8aa6381 authored by vbystricky's avatar vbystricky

Optimize OpenCL version of conversScaleAbs function

parent 54b59c3c
...@@ -618,7 +618,7 @@ CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noAr ...@@ -618,7 +618,7 @@ CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noAr
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(), InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
OclVectorStrategy strat = OCL_VECTOR_DEFAULT); OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
CV_EXPORTS int checkOptimalVectorWidth(int *vectorWidths, CV_EXPORTS int checkOptimalVectorWidth(const int *vectorWidths,
InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(), InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(), InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
......
...@@ -3275,13 +3275,26 @@ static BinaryFunc getConvertScaleFunc(int sdepth, int ddepth) ...@@ -3275,13 +3275,26 @@ static BinaryFunc getConvertScaleFunc(int sdepth, int ddepth)
static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, double beta ) static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, double beta )
{ {
const ocl::Device & d = ocl::Device::getDefault(); const ocl::Device & d = ocl::Device::getDefault();
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
kercn = ocl::predictOptimalVectorWidth(_src, _dst), rowsPerWI = d.isIntel() ? 4 : 1;
bool doubleSupport = d.doubleFPConfig() > 0;
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
bool doubleSupport = d.doubleFPConfig() > 0;
if (!doubleSupport && depth == CV_64F) if (!doubleSupport && depth == CV_64F)
return false; return false;
_dst.create(_src.size(), CV_8UC(cn));
int kercn = 1;
if (d.isIntel())
{
static const int vectorWidths[] = {4, 4, 4, 4, 4, 4, 4, -1};
kercn = ocl::checkOptimalVectorWidth( vectorWidths, _src, _dst,
noArray(), noArray(), noArray(),
noArray(), noArray(), noArray(),
noArray(), ocl::OCL_VECTOR_MAX);
}
else
kercn = ocl::predictOptimalVectorWidthMax(_src, _dst);
int rowsPerWI = d.isIntel() ? 4 : 1;
char cvt[2][50]; char cvt[2][50];
int wdepth = std::max(depth, CV_32F); int wdepth = std::max(depth, CV_32F);
String build_opt = format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D srcT1=%s" String build_opt = format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D srcT1=%s"
...@@ -3299,7 +3312,6 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha ...@@ -3299,7 +3312,6 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha
return false; return false;
UMat src = _src.getUMat(); UMat src = _src.getUMat();
_dst.create(src.size(), CV_8UC(cn));
UMat dst = _dst.getUMat(); UMat dst = _dst.getUMat();
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
......
...@@ -4531,12 +4531,14 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3, ...@@ -4531,12 +4531,14 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
return checkOptimalVectorWidth(vectorWidths, src1, src2, src3, src4, src5, src6, src7, src8, src9, strat); return checkOptimalVectorWidth(vectorWidths, src1, src2, src3, src4, src5, src6, src7, src8, src9, strat);
} }
int checkOptimalVectorWidth(int *vectorWidths, int checkOptimalVectorWidth(const int *vectorWidths,
InputArray src1, InputArray src2, InputArray src3, InputArray src1, InputArray src2, InputArray src3,
InputArray src4, InputArray src5, InputArray src6, InputArray src4, InputArray src5, InputArray src6,
InputArray src7, InputArray src8, InputArray src9, InputArray src7, InputArray src8, InputArray src9,
OclVectorStrategy strat) OclVectorStrategy strat)
{ {
CV_Assert(vectorWidths);
int ref_type = src1.type(); int ref_type = src1.type();
std::vector<size_t> offsets, steps, cols; std::vector<size_t> offsets, steps, cols;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment