Commit fd59551f authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #3354 from vbystricky:oclopt_convertScaleAbs

parents efebd83b a8aa6381
...@@ -618,7 +618,7 @@ CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noAr ...@@ -618,7 +618,7 @@ CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noAr
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(), InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
OclVectorStrategy strat = OCL_VECTOR_DEFAULT); OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
CV_EXPORTS int checkOptimalVectorWidth(int *vectorWidths, CV_EXPORTS int checkOptimalVectorWidth(const int *vectorWidths,
InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(), InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(), InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
......
...@@ -3275,13 +3275,26 @@ static BinaryFunc getConvertScaleFunc(int sdepth, int ddepth) ...@@ -3275,13 +3275,26 @@ static BinaryFunc getConvertScaleFunc(int sdepth, int ddepth)
static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, double beta ) static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, double beta )
{ {
const ocl::Device & d = ocl::Device::getDefault(); const ocl::Device & d = ocl::Device::getDefault();
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
kercn = ocl::predictOptimalVectorWidth(_src, _dst), rowsPerWI = d.isIntel() ? 4 : 1;
bool doubleSupport = d.doubleFPConfig() > 0;
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
bool doubleSupport = d.doubleFPConfig() > 0;
if (!doubleSupport && depth == CV_64F) if (!doubleSupport && depth == CV_64F)
return false; return false;
_dst.create(_src.size(), CV_8UC(cn));
int kercn = 1;
if (d.isIntel())
{
static const int vectorWidths[] = {4, 4, 4, 4, 4, 4, 4, -1};
kercn = ocl::checkOptimalVectorWidth( vectorWidths, _src, _dst,
noArray(), noArray(), noArray(),
noArray(), noArray(), noArray(),
noArray(), ocl::OCL_VECTOR_MAX);
}
else
kercn = ocl::predictOptimalVectorWidthMax(_src, _dst);
int rowsPerWI = d.isIntel() ? 4 : 1;
char cvt[2][50]; char cvt[2][50];
int wdepth = std::max(depth, CV_32F); int wdepth = std::max(depth, CV_32F);
String build_opt = format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D srcT1=%s" String build_opt = format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D srcT1=%s"
...@@ -3299,7 +3312,6 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha ...@@ -3299,7 +3312,6 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha
return false; return false;
UMat src = _src.getUMat(); UMat src = _src.getUMat();
_dst.create(src.size(), CV_8UC(cn));
UMat dst = _dst.getUMat(); UMat dst = _dst.getUMat();
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
......
...@@ -4536,12 +4536,14 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3, ...@@ -4536,12 +4536,14 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
return checkOptimalVectorWidth(vectorWidths, src1, src2, src3, src4, src5, src6, src7, src8, src9, strat); return checkOptimalVectorWidth(vectorWidths, src1, src2, src3, src4, src5, src6, src7, src8, src9, strat);
} }
int checkOptimalVectorWidth(int *vectorWidths, int checkOptimalVectorWidth(const int *vectorWidths,
InputArray src1, InputArray src2, InputArray src3, InputArray src1, InputArray src2, InputArray src3,
InputArray src4, InputArray src5, InputArray src6, InputArray src4, InputArray src5, InputArray src6,
InputArray src7, InputArray src8, InputArray src9, InputArray src7, InputArray src8, InputArray src9,
OclVectorStrategy strat) OclVectorStrategy strat)
{ {
CV_Assert(vectorWidths);
int ref_type = src1.type(); int ref_type = src1.type();
std::vector<size_t> offsets, steps, cols; std::vector<size_t> offsets, steps, cols;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment