Commit bd1a1cc0 authored by Andrey Pavlenko's avatar Andrey Pavlenko Committed by OpenCV Buildbot

Merge pull request #1633 from ilya-lavrenov:ocl_imgproc

parents 599d51ec 4f68f35a
...@@ -52,25 +52,24 @@ using namespace cv::ocl; ...@@ -52,25 +52,24 @@ using namespace cv::ocl;
void cv::ocl::columnSum(const oclMat &src, oclMat &dst) void cv::ocl::columnSum(const oclMat &src, oclMat &dst)
{ {
CV_Assert(src.type() == CV_32FC1); CV_Assert(src.type() == CV_32FC1);
dst.create(src.size(), src.type()); dst.create(src.size(), src.type());
Context *clCxt = src.clCxt; int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
const std::string kernelName = "columnSum";
std::vector< pair<size_t, const void *> > args; std::vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&src_step));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step));
args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset));
size_t globalThreads[3] = {dst.cols, 1, 1}; size_t globalThreads[3] = {dst.cols, 1, 1};
size_t localThreads[3] = {256, 1, 1}; size_t localThreads[3] = {256, 1, 1};
openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); openCLExecuteKernel(src.clCxt, &imgproc_columnsum, "columnSum", globalThreads, localThreads, args, src.oclchannels(), src.depth());
} }
...@@ -183,111 +183,89 @@ namespace cv ...@@ -183,111 +183,89 @@ namespace cv
void remap( const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int borderType, const Scalar &borderValue ) void remap( const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int borderType, const Scalar &borderValue )
{ {
Context *clCxt = src.clCxt; Context *clCxt = src.clCxt;
bool supportsDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE);
if (!supportsDouble && src.depth() == CV_64F)
{
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device does not support double");
return;
}
CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST
|| interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4); || interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4);
CV_Assert((map1.type() == CV_16SC2 && !map2.data) || (map1.type() == CV_32FC2 && !map2.data) || (map1.type() == CV_32FC1 && map2.type() == CV_32FC1)); CV_Assert((map1.type() == CV_16SC2 && !map2.data) || (map1.type() == CV_32FC2 && !map2.data) ||
(map1.type() == CV_32FC1 && map2.type() == CV_32FC1));
CV_Assert(!map2.data || map2.size() == map1.size()); CV_Assert(!map2.data || map2.size() == map1.size());
CV_Assert(dst.size() == map1.size()); CV_Assert(borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE || borderType == BORDER_WRAP
|| borderType == BORDER_REFLECT_101 || borderType == BORDER_REFLECT);
dst.create(map1.size(), src.type()); dst.create(map1.size(), src.type());
string kernelName; const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
const char * const channelMap[] = { "", "", "2", "4", "4" };
const char * const interMap[] = { "INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC", "INTER_LINEAR", "INTER_LANCZOS" };
const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
"BORDER_REFLECT_101", "BORDER_TRANSPARENT" };
string kernelName = "remap";
if ( map1.type() == CV_32FC2 && !map2.data ) if ( map1.type() == CV_32FC2 && !map2.data )
{ kernelName += "_32FC2";
if (interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT)
kernelName = "remapLNFConstant";
else if (interpolation == INTER_NEAREST && borderType == BORDER_CONSTANT)
kernelName = "remapNNFConstant";
}
else if (map1.type() == CV_16SC2 && !map2.data) else if (map1.type() == CV_16SC2 && !map2.data)
{ kernelName += "_16SC2";
if (interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT)
kernelName = "remapLNSConstant";
else if (interpolation == INTER_NEAREST && borderType == BORDER_CONSTANT)
kernelName = "remapNNSConstant";
}
else if (map1.type() == CV_32FC1 && map2.type() == CV_32FC1) else if (map1.type() == CV_32FC1 && map2.type() == CV_32FC1)
{ kernelName += "_2_32FC1";
if (interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT) else
kernelName = "remapLNF1Constant"; CV_Error(CV_StsBadArg, "Unsupported map types");
else if (interpolation == INTER_NEAREST && borderType == BORDER_CONSTANT)
kernelName = "remapNNF1Constant";
}
size_t blkSizeX = 16, blkSizeY = 16; int ocn = dst.oclchannels();
size_t glbSizeX; size_t localThreads[3] = { 16, 16, 1};
int cols = dst.cols; size_t globalThreads[3] = { dst.cols, dst.rows, 1};
if (src.type() == CV_8UC1)
{
cols = (dst.cols + dst.offset % 4 + 3) / 4;
glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX;
} Mat scalar(1, 1, CV_MAKE_TYPE(dst.depth(), ocn), borderValue);
else if (src.type() == CV_32FC1 && interpolation == INTER_LINEAR) std::string buildOptions = format("-D %s -D %s -D T=%s%s", interMap[interpolation],
borderMap[borderType], typeMap[src.depth()], channelMap[ocn]);
if (interpolation != INTER_NEAREST)
{ {
cols = (dst.cols + (dst.offset >> 2) % 4 + 3) / 4; int wdepth = std::max(CV_32F, dst.depth());
glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX; if (!supportsDouble)
wdepth = std::min(CV_32F, wdepth);
buildOptions += format(" -D WT=%s%s -D convertToT=convert_%s%s%s -D convertToWT=convert_%s%s"
" -D convertToWT2=convert_%s2 -D WT2=%s2",
typeMap[wdepth], channelMap[ocn],
typeMap[src.depth()], channelMap[ocn], src.depth() < CV_32F ? "_sat_rte" : "",
typeMap[wdepth], channelMap[ocn],
typeMap[wdepth], typeMap[wdepth]);
} }
else
glbSizeX = dst.cols % blkSizeX == 0 ? dst.cols : (dst.cols / blkSizeX + 1) * blkSizeX;
size_t glbSizeY = dst.rows % blkSizeY == 0 ? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY; int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
size_t globalThreads[3] = {glbSizeX, glbSizeY, 1}; int map1_step = map1.step / map1.elemSize(), map1_offset = map1.offset / map1.elemSize();
size_t localThreads[3] = {blkSizeX, blkSizeY, 1}; int map2_step = map2.step / map2.elemSize(), map2_offset = map2.offset / map2.elemSize();
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
vector< pair<size_t, const void *> > args; vector< pair<size_t, const void *> > args;
if (map1.channels() == 2) args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
{ args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data)); args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data)); if (!map2.empty())
args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.step));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.step));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
if (src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
else
args.push_back( make_pair(sizeof(cl_float4), (void *)&borderFloat));
}
if (map1.channels() == 1)
{
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&map2.data)); args.push_back( make_pair(sizeof(cl_mem), (void *)&map2.data));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset)); args.push_back( make_pair(sizeof(cl_int), (void *)&src_offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst_offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.offset)); args.push_back( make_pair(sizeof(cl_int), (void *)&map1_offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step)); if (!map2.empty())
args.push_back( make_pair(sizeof(cl_int), (void *)&src.step)); args.push_back( make_pair(sizeof(cl_int), (void *)&map2_offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.step)); args.push_back( make_pair(sizeof(cl_int), (void *)&src_step));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&map1_step));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols)); if (!map2.empty())
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&map2_step));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols)); args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&cols)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
if (src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue)); args.push_back( make_pair(scalar.elemSize(), (void *)scalar.data));
else
args.push_back( make_pair(sizeof(cl_float4), (void *)&borderFloat)); openCLExecuteKernel(clCxt, &imgproc_remap, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str());
}
openCLExecuteKernel(clCxt, &imgproc_remap, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
} }
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
...@@ -448,31 +426,47 @@ namespace cv ...@@ -448,31 +426,47 @@ namespace cv
void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int bordertype, const Scalar &scalar) void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int bordertype, const Scalar &scalar)
{ {
CV_Assert(top >= 0 && bottom >= 0 && left >= 0 && right >= 0); if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
if ((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi
{ {
if (((bordertype & cv::BORDER_ISOLATED) == 0) && CV_Error(CV_OpenCLDoubleNotSupported, "Selected device does not support double");
(bordertype != cv::BORDER_CONSTANT) && return;
(bordertype != cv::BORDER_REPLICATE))
{
CV_Error(CV_StsBadArg, "Unsupported border type");
}
} }
oclMat _src = src;
CV_Assert(top >= 0 && bottom >= 0 && left >= 0 && right >= 0);
if( _src.offset != 0 && (bordertype & BORDER_ISOLATED) == 0 )
{
Size wholeSize;
Point ofs;
_src.locateROI(wholeSize, ofs);
int dtop = std::min(ofs.y, top);
int dbottom = std::min(wholeSize.height - _src.rows - ofs.y, bottom);
int dleft = std::min(ofs.x, left);
int dright = std::min(wholeSize.width - _src.cols - ofs.x, right);
_src.adjustROI(dtop, dbottom, dleft, dright);
top -= dtop;
left -= dleft;
bottom -= dbottom;
right -= dright;
}
bordertype &= ~cv::BORDER_ISOLATED; bordertype &= ~cv::BORDER_ISOLATED;
// TODO need to remove this conditions and fix the code
if (bordertype == cv::BORDER_REFLECT || bordertype == cv::BORDER_WRAP) if (bordertype == cv::BORDER_REFLECT || bordertype == cv::BORDER_WRAP)
{ {
CV_Assert((src.cols >= left) && (src.cols >= right) && (src.rows >= top) && (src.rows >= bottom)); CV_Assert((_src.cols >= left) && (_src.cols >= right) && (_src.rows >= top) && (_src.rows >= bottom));
} }
else if (bordertype == cv::BORDER_REFLECT_101) else if (bordertype == cv::BORDER_REFLECT_101)
{ {
CV_Assert((src.cols > left) && (src.cols > right) && (src.rows > top) && (src.rows > bottom)); CV_Assert((_src.cols > left) && (_src.cols > right) && (_src.rows > top) && (_src.rows > bottom));
} }
dst.create(src.rows + top + bottom, src.cols + left + right, src.type()); dst.create(_src.rows + top + bottom, _src.cols + left + right, _src.type());
int srcStep = src.step1() / src.oclchannels(), dstStep = dst.step1() / dst.oclchannels(); int srcStep = _src.step1() / _src.oclchannels(), dstStep = dst.step1() / dst.oclchannels();
int srcOffset = src.offset / src.elemSize(), dstOffset = dst.offset / dst.elemSize(); int srcOffset = _src.offset / _src.elemSize(), dstOffset = dst.offset / dst.elemSize();
int depth = src.depth(), ochannels = src.oclchannels(); int depth = _src.depth(), ochannels = _src.oclchannels();
int __bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, BORDER_REFLECT, BORDER_WRAP, BORDER_REFLECT_101}; int __bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, BORDER_REFLECT, BORDER_WRAP, BORDER_REFLECT_101};
const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"}; const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
...@@ -483,19 +477,19 @@ namespace cv ...@@ -483,19 +477,19 @@ namespace cv
break; break;
if (bordertype_index == sizeof(__bordertype) / sizeof(int)) if (bordertype_index == sizeof(__bordertype) / sizeof(int))
CV_Error(CV_StsBadArg, "unsupported border type"); CV_Error(CV_StsBadArg, "Unsupported border type");
string kernelName = "copymakeborder"; string kernelName = "copymakeborder";
size_t localThreads[3] = {16, 16, 1}; size_t localThreads[3] = {16, 16, 1};
size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
vector< pair<size_t, const void *> > args; vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&_src.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols)); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows)); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); args.push_back( make_pair( sizeof(cl_int), (void *)&_src.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); args.push_back( make_pair( sizeof(cl_int), (void *)&_src.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep)); args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep));
args.push_back( make_pair( sizeof(cl_int), (void *)&srcOffset)); args.push_back( make_pair( sizeof(cl_int), (void *)&srcOffset));
args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep)); args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep));
...@@ -1314,6 +1308,8 @@ namespace cv ...@@ -1314,6 +1308,8 @@ namespace cv
args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&clipLimit )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&clipLimit ));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&lutScale )); args.push_back( std::make_pair( sizeof(cl_float), (void *)&lutScale ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.offset ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset ));
String kernelName = "calcLut"; String kernelName = "calcLut";
size_t localThreads[3] = { 32, 8, 1 }; size_t localThreads[3] = { 32, 8, 1 };
...@@ -1333,7 +1329,7 @@ namespace cv ...@@ -1333,7 +1329,7 @@ namespace cv
} }
static void transform(const oclMat &src, oclMat &dst, const oclMat &lut, static void transform(const oclMat &src, oclMat &dst, const oclMat &lut,
const int tilesX, const int tilesY, const cv::Size tileSize) const int tilesX, const int tilesY, const Size & tileSize)
{ {
cl_int2 tile_size; cl_int2 tile_size;
tile_size.s[0] = tileSize.width; tile_size.s[0] = tileSize.width;
...@@ -1351,6 +1347,9 @@ namespace cv ...@@ -1351,6 +1347,9 @@ namespace cv
args.push_back( std::make_pair( sizeof(cl_int2), (void *)&tile_size )); args.push_back( std::make_pair( sizeof(cl_int2), (void *)&tile_size ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesY )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesY ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.offset ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&lut.offset ));
size_t localThreads[3] = { 32, 8, 1 }; size_t localThreads[3] = { 32, 8, 1 };
size_t globalThreads[3] = { src.cols, src.rows, 1 }; size_t globalThreads[3] = { src.cols, src.rows, 1 };
...@@ -1419,9 +1418,10 @@ namespace cv ...@@ -1419,9 +1418,10 @@ namespace cv
} }
else else
{ {
cv::ocl::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101, cv::Scalar()); ocl::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0,
tilesX_ - (src.cols % tilesX_), BORDER_REFLECT_101, Scalar::all(0));
tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_); tileSize = Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
srcForLut = srcExt_; srcForLut = srcExt_;
} }
...@@ -1579,30 +1579,31 @@ static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, st ...@@ -1579,30 +1579,31 @@ static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, st
{ {
dst.create(src.size(), src.type()); dst.create(src.size(), src.type());
int channels = dst.oclchannels(), depth = dst.depth();
size_t vector_length = 1;
int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1);
int cols = divUp(dst.cols * channels + offset_cols, vector_length);
int rows = dst.rows;
size_t localThreads[3] = { 16, 16, 1 }; size_t localThreads[3] = { 16, 16, 1 };
size_t globalThreads[3] = { cols, rows, 1 }; size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
int temp1_step = temp1.step / temp1.elemSize(), temp1_offset = temp1.offset / temp1.elemSize();
vector<pair<size_t , const void *> > args; vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&temp1.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&temp1.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step )); args.push_back( make_pair( sizeof(cl_int), (void *)&src_step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.step )); args.push_back( make_pair( sizeof(cl_int), (void *)&temp1_step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset ));
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1_offset ));
openCLExecuteKernel(src.clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth); openCLExecuteKernel(src.clCxt, source, kernelName, globalThreads, localThreads, args, -1, dst.depth());
} }
void cv::ocl::convolve(const oclMat &x, const oclMat &t, oclMat &y) void cv::ocl::convolve(const oclMat &x, const oclMat &t, oclMat &y)
{ {
CV_Assert(x.depth() == CV_32F && t.depth() == CV_32F); CV_Assert(x.depth() == CV_32F && t.depth() == CV_32F);
......
...@@ -53,12 +53,8 @@ int calc_lut(__local int* smem, int val, int tid) ...@@ -53,12 +53,8 @@ int calc_lut(__local int* smem, int val, int tid)
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid == 0) if (tid == 0)
{
for (int i = 1; i < 256; ++i) for (int i = 1; i < 256; ++i)
{
smem[i] += smem[i - 1]; smem[i] += smem[i - 1];
}
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
return smem[tid]; return smem[tid];
...@@ -71,69 +67,51 @@ void reduce(volatile __local int* smem, int val, int tid) ...@@ -71,69 +67,51 @@ void reduce(volatile __local int* smem, int val, int tid)
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 128) if (tid < 128)
{
smem[tid] = val += smem[tid + 128]; smem[tid] = val += smem[tid + 128];
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 64) if (tid < 64)
{
smem[tid] = val += smem[tid + 64]; smem[tid] = val += smem[tid + 64];
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 32) if (tid < 32)
{
smem[tid] += smem[tid + 32]; smem[tid] += smem[tid + 32];
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16) if (tid < 16)
{
smem[tid] += smem[tid + 16]; smem[tid] += smem[tid + 16];
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8) if (tid < 8)
{
smem[tid] += smem[tid + 8]; smem[tid] += smem[tid + 8];
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 4) if (tid < 4)
{
smem[tid] += smem[tid + 4]; smem[tid] += smem[tid + 4];
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 2) if (tid < 2)
{
smem[tid] += smem[tid + 2]; smem[tid] += smem[tid + 2];
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 1) if (tid < 1)
{
smem[256] = smem[tid] + smem[tid + 1]; smem[256] = smem[tid] + smem[tid + 1];
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} }
#else #else
void reduce(__local volatile int* smem, int val, int tid) void reduce(__local volatile int* smem, int val, int tid)
{ {
smem[tid] = val; smem[tid] = val;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 128) if (tid < 128)
{
smem[tid] = val += smem[tid + 128]; smem[tid] = val += smem[tid + 128];
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 64) if (tid < 64)
{
smem[tid] = val += smem[tid + 64]; smem[tid] = val += smem[tid + 64];
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 32) if (tid < 32)
...@@ -141,12 +119,17 @@ void reduce(__local volatile int* smem, int val, int tid) ...@@ -141,12 +119,17 @@ void reduce(__local volatile int* smem, int val, int tid)
smem[tid] += smem[tid + 32]; smem[tid] += smem[tid + 32];
#if WAVE_SIZE < 32 #if WAVE_SIZE < 32
} barrier(CLK_LOCAL_MEM_FENCE); } barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16) {
if (tid < 16)
{
#endif #endif
smem[tid] += smem[tid + 16]; smem[tid] += smem[tid + 16];
#if WAVE_SIZE < 16 #if WAVE_SIZE < 16
} barrier(CLK_LOCAL_MEM_FENCE); }
if (tid < 8) { barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
{
#endif #endif
smem[tid] += smem[tid + 8]; smem[tid] += smem[tid + 8];
smem[tid] += smem[tid + 4]; smem[tid] += smem[tid + 4];
...@@ -159,7 +142,8 @@ void reduce(__local volatile int* smem, int val, int tid) ...@@ -159,7 +142,8 @@ void reduce(__local volatile int* smem, int val, int tid)
__kernel void calcLut(__global __const uchar * src, __global uchar * lut, __kernel void calcLut(__global __const uchar * src, __global uchar * lut,
const int srcStep, const int dstStep, const int srcStep, const int dstStep,
const int2 tileSize, const int tilesX, const int2 tileSize, const int tilesX,
const int clipLimit, const float lutScale) const int clipLimit, const float lutScale,
const int src_offset, const int dst_offset)
{ {
__local int smem[512]; __local int smem[512];
...@@ -173,25 +157,21 @@ __kernel void calcLut(__global __const uchar * src, __global uchar * lut, ...@@ -173,25 +157,21 @@ __kernel void calcLut(__global __const uchar * src, __global uchar * lut,
for (int i = get_local_id(1); i < tileSize.y; i += get_local_size(1)) for (int i = get_local_id(1); i < tileSize.y; i += get_local_size(1))
{ {
__global const uchar* srcPtr = src + mad24( ty * tileSize.y + i, __global const uchar* srcPtr = src + mad24(ty * tileSize.y + i, srcStep, tx * tileSize.x + src_offset);
srcStep, tx * tileSize.x );
for (int j = get_local_id(0); j < tileSize.x; j += get_local_size(0)) for (int j = get_local_id(0); j < tileSize.x; j += get_local_size(0))
{ {
const int data = srcPtr[j]; const int data = srcPtr[j];
atomic_inc(&smem[data]); atomic_inc(&smem[data]);
} }
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
int tHistVal = smem[tid]; int tHistVal = smem[tid];
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (clipLimit > 0) if (clipLimit > 0)
{ {
// clip histogram bar // clip histogram bar
int clipped = 0; int clipped = 0;
if (tHistVal > clipLimit) if (tHistVal > clipLimit)
{ {
...@@ -200,7 +180,6 @@ __kernel void calcLut(__global __const uchar * src, __global uchar * lut, ...@@ -200,7 +180,6 @@ __kernel void calcLut(__global __const uchar * src, __global uchar * lut,
} }
// find number of overall clipped samples // find number of overall clipped samples
reduce(smem, clipped, tid); reduce(smem, clipped, tid);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
#ifdef CPU #ifdef CPU
...@@ -229,7 +208,7 @@ __kernel void calcLut(__global __const uchar * src, __global uchar * lut, ...@@ -229,7 +208,7 @@ __kernel void calcLut(__global __const uchar * src, __global uchar * lut,
const int lutVal = calc_lut(smem, tHistVal, tid); const int lutVal = calc_lut(smem, tHistVal, tid);
uint ires = (uint)convert_int_rte(lutScale * lutVal); uint ires = (uint)convert_int_rte(lutScale * lutVal);
lut[(ty * tilesX + tx) * dstStep + tid] = lut[(ty * tilesX + tx) * dstStep + tid + dst_offset] =
convert_uchar(clamp(ires, (uint)0, (uint)255)); convert_uchar(clamp(ires, (uint)0, (uint)255));
} }
...@@ -239,7 +218,8 @@ __kernel void transform(__global __const uchar * src, ...@@ -239,7 +218,8 @@ __kernel void transform(__global __const uchar * src,
const int srcStep, const int dstStep, const int lutStep, const int srcStep, const int dstStep, const int lutStep,
const int cols, const int rows, const int cols, const int rows,
const int2 tileSize, const int2 tileSize,
const int tilesX, const int tilesY) const int tilesX, const int tilesY,
const int src_offset, const int dst_offset, int lut_offset)
{ {
const int x = get_global_id(0); const int x = get_global_id(0);
const int y = get_global_id(1); const int y = get_global_id(1);
...@@ -261,15 +241,15 @@ __kernel void transform(__global __const uchar * src, ...@@ -261,15 +241,15 @@ __kernel void transform(__global __const uchar * src,
tx1 = max(tx1, 0); tx1 = max(tx1, 0);
tx2 = min(tx2, tilesX - 1); tx2 = min(tx2, tilesX - 1);
const int srcVal = src[mad24(y, srcStep, x)]; const int srcVal = src[mad24(y, srcStep, x + src_offset)];
float res = 0; float res = 0;
res += lut[mad24(ty1 * tilesX + tx1, lutStep, srcVal)] * ((1.0f - xa) * (1.0f - ya)); res += lut[mad24(ty1 * tilesX + tx1, lutStep, srcVal + lut_offset)] * ((1.0f - xa) * (1.0f - ya));
res += lut[mad24(ty1 * tilesX + tx2, lutStep, srcVal)] * ((xa) * (1.0f - ya)); res += lut[mad24(ty1 * tilesX + tx2, lutStep, srcVal + lut_offset)] * ((xa) * (1.0f - ya));
res += lut[mad24(ty2 * tilesX + tx1, lutStep, srcVal)] * ((1.0f - xa) * (ya)); res += lut[mad24(ty2 * tilesX + tx1, lutStep, srcVal + lut_offset)] * ((1.0f - xa) * (ya));
res += lut[mad24(ty2 * tilesX + tx2, lutStep, srcVal)] * ((xa) * (ya)); res += lut[mad24(ty2 * tilesX + tx2, lutStep, srcVal + lut_offset)] * ((xa) * (ya));
uint ires = (uint)convert_int_rte(res); uint ires = (uint)convert_int_rte(res);
dst[mad24(y, dstStep, x)] = convert_uchar(clamp(ires, (uint)0, (uint)255)); dst[mad24(y, dstStep, x + dst_offset)] = convert_uchar(clamp(ires, (uint)0, (uint)255));
} }
...@@ -43,38 +43,28 @@ ...@@ -43,38 +43,28 @@
// //
//M*/ //M*/
#pragma OPENCL EXTENSION cl_amd_printf : enable
#if defined (__ATI__)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (__NVIDIA__)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
///////////////////////// columnSum //////////////////////////////// ///////////////////////// columnSum ////////////////////////////////
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
/// CV_32FC1
__kernel void columnSum_C1_D5(__global float* src,__global float* dst,int srcCols,int srcRows,int srcStep,int dstStep) __kernel void columnSum_C1_D5(__global float * src, __global float * dst,
int cols, int rows, int src_step, int dst_step, int src_offset, int dst_offset)
{ {
const int x = get_global_id(0); const int x = get_global_id(0);
srcStep >>= 2; if (x < cols)
dstStep >>= 2;
if (x < srcCols)
{ {
int srcIdx = x ; int srcIdx = x + src_offset;
int dstIdx = x ; int dstIdx = x + dst_offset;
float sum = 0; float sum = 0;
for (int y = 0; y < srcRows; ++y) for (int y = 0; y < rows; ++y)
{ {
sum += src[srcIdx]; sum += src[srcIdx];
dst[dstIdx] = sum; dst[dstIdx] = sum;
srcIdx += srcStep; srcIdx += src_step;
dstIdx += dstStep; dstIdx += dst_step;
} }
} }
} }
...@@ -48,9 +48,12 @@ ...@@ -48,9 +48,12 @@
#elif defined (__NVIDIA__) #elif defined (__NVIDIA__)
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
/************************************** convolve **************************************/ /************************************** convolve **************************************/
__kernel void convolve_D5 (__global float *src, __global float *temp1, __global float *dst,
int rows, int cols, int src_step, int dst_step,int k_step, int kWidth, int kHeight) __kernel void convolve_D5(__global float *src, __global float *temp1, __global float *dst,
int rows, int cols, int src_step, int dst_step,int k_step, int kWidth, int kHeight,
int src_offset, int dst_offset, int koffset)
{ {
__local float smem[16 + 2 * 8][16 + 2 * 8]; __local float smem[16 + 2 * 8][16 + 2 * 8];
...@@ -65,7 +68,7 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global ...@@ -65,7 +68,7 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global
// 0 | 0 0 | 0 // 0 | 0 0 | 0
// ----------- // -----------
// 0 | 0 0 | 0 // 0 | 0 0 | 0
smem[y][x] = src[min(max(gy - 8, 0), rows - 1)*(src_step >> 2) + min(max(gx - 8, 0), cols - 1)]; smem[y][x] = src[min(max(gy - 8, 0), rows - 1) * src_step + min(max(gx - 8, 0), cols - 1) + src_offset];
// 0 | 0 x | x // 0 | 0 x | x
// ----------- // -----------
...@@ -73,7 +76,7 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global ...@@ -73,7 +76,7 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global
// 0 | 0 0 | 0 // 0 | 0 0 | 0
// ----------- // -----------
// 0 | 0 0 | 0 // 0 | 0 0 | 0
smem[y][x + 16] = src[min(max(gy - 8, 0), rows - 1)*(src_step >> 2) + min(gx + 8, cols - 1)]; smem[y][x + 16] = src[min(max(gy - 8, 0), rows - 1) * src_step + min(gx + 8, cols - 1) + src_offset];
// 0 | 0 0 | 0 // 0 | 0 0 | 0
// ----------- // -----------
...@@ -81,7 +84,7 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global ...@@ -81,7 +84,7 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global
// x | x 0 | 0 // x | x 0 | 0
// ----------- // -----------
// x | x 0 | 0 // x | x 0 | 0
smem[y + 16][x] = src[min(gy + 8, rows - 1)*(src_step >> 2) + min(max(gx - 8, 0), cols - 1)]; smem[y + 16][x] = src[min(gy + 8, rows - 1) * src_step + min(max(gx - 8, 0), cols - 1) + src_offset];
// 0 | 0 0 | 0 // 0 | 0 0 | 0
// ----------- // -----------
...@@ -89,21 +92,18 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global ...@@ -89,21 +92,18 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global
// 0 | 0 x | x // 0 | 0 x | x
// ----------- // -----------
// 0 | 0 x | x // 0 | 0 x | x
smem[y + 16][x + 16] = src[min(gy + 8, rows - 1)*(src_step >> 2) + min(gx + 8, cols - 1)]; smem[y + 16][x + 16] = src[min(gy + 8, rows - 1) * src_step + min(gx + 8, cols - 1) + src_offset];
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (gx < cols && gy < rows) if (gx < cols && gy < rows)
{ {
float res = 0; float res = 0;
for (int i = 0; i < kHeight; ++i) for (int i = 0; i < kHeight; ++i)
{
for (int j = 0; j < kWidth; ++j) for (int j = 0; j < kWidth; ++j)
{ res += smem[y + 8 - kHeight / 2 + i][x + 8 - kWidth / 2 + j] * temp1[i * k_step + j + koffset];
res += smem[y + 8 - kHeight / 2 + i][x + 8 - kWidth / 2 + j] * temp1[i * (k_step>>2) + j];
} dst[gy * dst_step + gx + dst_offset] = res;
} }
dst[gy*(dst_step >> 2)+gx] = res;
}
} }
...@@ -34,6 +34,13 @@ ...@@ -34,6 +34,13 @@
// //
// //
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
#ifdef BORDER_CONSTANT #ifdef BORDER_CONSTANT
//BORDER_CONSTANT: iiiiii|abcdefgh|iiiiiii //BORDER_CONSTANT: iiiiii|abcdefgh|iiiiiii
......
/*M/////////////////////////////////////////////////////////////////////////////////////// /*M///////////////////////////////////////////////////////////////////////////////////////
// //
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
...@@ -43,940 +42,282 @@ ...@@ -43,940 +42,282 @@
// the use of this software, even if advised of the possibility of such damage. // the use of this software, even if advised of the possibility of such damage.
// //
//M*/ //M*/
//#pragma OPENCL EXTENSION cl_amd_printf : enable
#if defined DOUBLE_SUPPORT #if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
typedef double4 F4 ; #elif defined (cl_amd_fp64)
#else #pragma OPENCL EXTENSION cl_amd_fp64:enable
typedef float4 F4; #endif
#endif #endif
#ifdef INTER_NEAREST
#define convertToWT
#endif
///////////////////////////////////////////////////////// #ifdef BORDER_CONSTANT
///////////////////////using buffer////////////////////// #define EXTRAPOLATE(v2, v) v = scalar;
///////////////////////////////////////////////////////// #elif defined BORDER_REPLICATE
__kernel void remapNNSConstant_C1_D0(__global unsigned char* dst, __global unsigned char const * restrict src, #define EXTRAPOLATE(v2, v) \
__global short * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, { \
int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows, int threadCols, F4 nVal) v2 = max(min(v2, (int2)(src_cols - 1, src_rows - 1)), zero); \
{ v = convertToWT(src[mad24(v2.y, src_step, v2.x + src_offset)]); \
int x = get_global_id(0);
int y = get_global_id(1);
if(x < threadCols && y < dst_rows)
{
x = x << 2;
int gx = x - (dst_offset&3);
int4 Gx = (int4)(gx, gx+1, gx+2, gx+3);
uchar4 nval =convert_uchar4(nVal);
uchar4 val = (uchar4)(nval.s0);
int dstStart = (y * dst_step + x + dst_offset) - (dst_offset&3);
int map1Start = y * map1_step + (x << 2) + map1_offset - ((dst_offset & 3) << 2);
short8 map1_data;
map1_data = *((__global short8 *)((__global char*)map1 + map1Start));
int4 srcIdx = convert_int4(map1_data.odd) * src_step + convert_int4(map1_data.even) + src_offset;
uchar4 con = convert_uchar4(convert_int4(map1_data.even) >= (int4)(src_cols) || convert_int4(map1_data.odd) >= (int4)(src_rows) || convert_int4(map1_data.even) < (int4)(0) || convert_int4(map1_data.odd) < (int4)(0));
uchar4 src_data = val;
if (con.s0 == 0)
src_data.s0 = *(src + srcIdx.s0);
if (con.s1 == 0)
src_data.s1 = *(src + srcIdx.s1);
if (con.s2 == 0)
src_data.s2 = *(src + srcIdx.s2);
if (con.s3 == 0)
src_data.s3 = *(src + srcIdx.s3);
uchar4 dst_data;
__global uchar4* d = (__global uchar4 *)(dst + dstStart);
uchar4 dVal = *d;
int4 dcon = (Gx >= 0 && Gx < dst_cols && y >= 0 && y < dst_rows);
dst_data = (convert_uchar4(dcon) != convert_uchar4((int4)(0))) ? src_data : dVal;
*d = dst_data;
}
}
__kernel void remapNNFConstant_C1_D0(__global unsigned char* dst, __global unsigned char const * restrict src,
__global float * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step,
int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows, int threadCols, F4 nVal)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < threadCols && y < dst_rows)
{
x = x << 2;
int gx = x - (dst_offset&3);
int4 Gx = (int4)(gx, gx+1, gx+2, gx+3);
uchar4 nval =convert_uchar4(nVal);
uchar val = nval.s0;
int dstStart = (y * dst_step + x + dst_offset) - (dst_offset&3);
int map1Start = y * map1_step + (x << 3) + map1_offset - ((dst_offset & 3) << 3);
float8 map1_data;
map1_data = *((__global float8 *)((__global char*)map1 + map1Start));
int8 map1_dataZ = convert_int8_sat_rte(map1_data);
int4 srcIdx = map1_dataZ.odd * src_step + map1_dataZ.even + src_offset;
uchar4 src_data = val;
uchar4 con = convert_uchar4(map1_dataZ.even >= (int4)(src_cols) || map1_dataZ.odd >= (int4)(src_rows) || map1_dataZ.even < (int4)(0) || map1_dataZ.odd < (int4)(0));
if (con.s0 == 0)
src_data.s0 = *(src + srcIdx.s0);
if (con.s1 == 0)
src_data.s1 = *(src + srcIdx.s1);
if (con.s2 == 0)
src_data.s2 = *(src + srcIdx.s2);
if (con.s3 == 0)
src_data.s3 = *(src + srcIdx.s3);
uchar4 dst_data;
// dst_data = convert_uchar4(map1_dataZ.even >= (int4)(src_cols) || map1_dataZ.odd >= (int4)(src_rows)) ? (uchar4)(val) : src_data;
__global uchar4* d = (__global uchar4 *)(dst + dstStart);
uchar4 dVal = *d;
int4 dcon = (Gx >= 0 && Gx < dst_cols && y >= 0 && y < dst_rows);
dst_data = (convert_uchar4(dcon) != convert_uchar4((int4)(0))) ? src_data : dVal;
*d = dst_data;
}
}
__kernel void remapNNF1Constant_C1_D0(__global unsigned char* dst, __global unsigned char const * restrict src,
__global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step,
int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows, int threadCols, F4 nVal)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < threadCols && y < dst_rows)
{
x = x << 2;
int gx = x - (dst_offset&3);
int4 Gx = (int4)(gx, gx+1, gx+2, gx+3);
uchar4 nval =convert_uchar4(nVal);
uchar4 val = (uchar4)(nval.s0);
int dstStart = (y * dst_step + x + dst_offset) - (dst_offset&3);
int map1Start = y * map1_step + (x << 2) + map1_offset - ((dst_offset & 3) << 2);
float4 map1_data;
float4 map2_data;
map1_data = *((__global float4 *)((__global char*)map1 + map1Start));
map2_data = *((__global float4 *)((__global char*)map2 + map1Start));
float8 map_data = (float8)(map1_data.s0, map2_data.s0, map1_data.s1, map2_data.s1, map1_data.s2, map2_data.s2, map1_data.s3, map2_data.s3);
int8 map_dataZ = convert_int8_sat_rte(map_data);
int4 srcIdx = map_dataZ.odd * src_step + map_dataZ.even + src_offset;
uchar4 src_data = val;
uchar4 con = convert_uchar4(map_dataZ.even >= (int4)(src_cols) || map_dataZ.odd >= (int4)(src_rows)|| map_dataZ.even < (int4)(0) || map_dataZ.odd < (int4)(0));
if (con.s0 == 0)
src_data.s0 = *(src + srcIdx.s0);
if (con.s1 == 0)
src_data.s1 = *(src + srcIdx.s1);
if (con.s2 == 0)
src_data.s2 = *(src + srcIdx.s2);
if (con.s3 == 0)
src_data.s3 = *(src + srcIdx.s3);
uchar4 dst_data;
// dst_data = convert_uchar4(map_dataZ.even >= (int4)(src_cols) || map_dataZ.odd >= (int4)(src_rows)) ? (uchar4)(val) : src_data;
__global uchar4* d = (__global uchar4 *)(dst + dstStart);
uchar4 dVal = *d;
int4 dcon = (Gx >= 0 && Gx < dst_cols && y >= 0 && y < dst_rows);
dst_data = (convert_uchar4(dcon) != convert_uchar4((int4)(0))) ? src_data : dVal;
*d = dst_data;
} }
} #elif defined BORDER_WRAP
#define EXTRAPOLATE(v2, v) \
{ \
__kernel void remapNNSConstant_C4_D0(__global unsigned char* dst, __global unsigned char const * restrict src, if (v2.x < 0) \
__global short * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, v2.x -= ((v2.x - src_cols + 1) / src_cols) * src_cols; \
int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows, int threadCols, F4 nVal) if (v2.x >= src_cols) \
{ v2.x %= src_cols; \
int x = get_global_id(0); \
int y = get_global_id(1); if (v2.y < 0) \
v2.y -= ((v2.y - src_rows + 1) / src_rows) * src_rows; \
if(x < threadCols && y < dst_rows) if( v2.y >= src_rows ) \
{ v2.y %= src_rows; \
int dstIdx = y * dst_step + (x << 2) + dst_offset; v = convertToWT(src[mad24(v2.y, src_step, v2.x + src_offset)]); \
int mapIdx = y * map1_step + (x << 2) + map1_offset;
short2 map1_data = *((__global short2 *)((__global char*)map1 + mapIdx));
int srcIdx = map1_data.y * src_step + (map1_data.x << 2) + src_offset;
uchar4 nval = convert_uchar4(nVal);
uchar4 src_data;
if(map1_data.x >= src_cols || map1_data.y >= src_rows || map1_data.x <0 || map1_data.y < 0 )
src_data = nval;
else
src_data = *((__global uchar4 *)((__global uchar *)src + srcIdx));
*((__global uchar4 *)((__global uchar*)dst + dstIdx)) = src_data;
} }
#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101)
#ifdef BORDER_REFLECT
} #define DELTA int delta = 0
#else
__kernel void remapNNFConstant_C4_D0(__global unsigned char* dst, __global unsigned char const * restrict src, #define DELTA int delta = 1
__global float * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, #endif
int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows, int threadCols, F4 nVal) #define EXTRAPOLATE(v2, v) \
{ { \
int x = get_global_id(0); DELTA; \
int y = get_global_id(1); if (src_cols == 1) \
v2.x = 0; \
if(x < threadCols && y < dst_rows) else \
{ do \
int dstIdx = y * dst_step + (x << 2) + dst_offset; { \
int mapIdx = y * map1_step + (x << 3) + map1_offset; if( v2.x < 0 ) \
float2 map1_data = *((__global float2 *)((__global char*)map1 + mapIdx)); v2.x = -v2.x - 1 + delta; \
int2 map1_dataZ = convert_int2_sat_rte(map1_data); else \
int srcIdx = map1_dataZ.y * src_step + (map1_dataZ.x << 2) + src_offset; v2.x = src_cols - 1 - (v2.x - src_cols) - delta; \
uchar4 nval = convert_uchar4(nVal); } \
uchar4 src_data; while (v2.x >= src_cols || v2.x < 0); \
if(map1_dataZ.x >= src_cols || map1_dataZ.y >= src_rows || map1_dataZ.x < 0 || map1_dataZ.y < 0) \
src_data = nval; if (src_rows == 1) \
else v2.y = 0; \
src_data = *((__global uchar4 *)((__global uchar *)src + srcIdx)); else \
*((__global uchar4 *)((__global uchar*)dst + dstIdx)) = src_data; do \
{ \
if( v2.y < 0 ) \
v2.y = -v2.y - 1 + delta; \
else \
v2.y = src_rows - 1 - (v2.y - src_rows) - delta; \
} \
while (v2.y >= src_rows || v2.y < 0); \
v = convertToWT(src[mad24(v2.y, src_step, v2.x + src_offset)]); \
} }
#else
#error No extrapolation method
#endif
} #define NEED_EXTRAPOLATION(gx, gy) (gx >= src_cols || gy >= src_rows || gx < 0 || gy < 0)
__kernel void remapNNF1Constant_C4_D0(__global unsigned char* dst, __global unsigned char const * restrict src,
__global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step,
int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows, int threadCols, F4 nVal)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < threadCols && y < dst_rows) #ifdef INTER_NEAREST
{
int dstIdx = y * dst_step + (x << 2) + dst_offset;
int mapIdx = y * map1_step + (x << 2) + map1_offset;
float map1_data = *((__global float *)((__global char*)map1 + mapIdx));
float map2_data = *((__global float *)((__global char*)map2 + mapIdx));
int srcIdx = convert_int_sat_rte(map2_data) * src_step + (convert_int_sat_rte(map1_data) << 2) + src_offset;
uchar4 nval = convert_uchar4(nVal);
uchar4 src_data;
if(convert_int_sat_rte(map1_data) >= src_cols || convert_int_sat_rte(map2_data) >= src_rows || convert_int_sat_rte(map1_data) < 0 || convert_int_sat_rte(map2_data) < 0)
src_data = nval;
else
src_data = *((__global uchar4 *)((__global uchar *)src + srcIdx));
*((__global uchar4 *)((__global uchar*)dst + dstIdx)) = src_data;
}
}
__kernel void remapNNSConstant_C1_D5(__global float* dst, __global float const * restrict src, __kernel void remap_2_32FC1(__global const T * restrict src, __global T * dst,
__global short * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, __global float * map1, __global float * map2,
int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows ,int threadCols, F4 nVal) int src_offset, int dst_offset, int map1_offset, int map2_offset,
int src_step, int dst_step, int map1_step, int map2_step,
int src_cols, int src_rows, int dst_cols, int dst_rows, T scalar)
{ {
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if(x < threadCols && y < dst_rows) if (x < dst_cols && y < dst_rows)
{ {
int dstIdx = y * dst_step + (x << 2) + dst_offset; int dstIdx = mad24(y, dst_step, x + dst_offset);
int mapIdx = y * map1_step + (x << 2) + map1_offset; int map1Idx = mad24(y, map1_step, x + map1_offset);
short2 map1_data = *((__global short2 *)((__global char*)map1 + mapIdx)); int map2Idx = mad24(y, map2_step, x + map2_offset);
int srcIdx = map1_data.y * src_step + (map1_data.x << 2) + src_offset;
float nval = convert_float(nVal.x); int gx = convert_int_sat_rte(map1[map1Idx]);
float src_data; int gy = convert_int_sat_rte(map2[map2Idx]);
if(map1_data.x >= src_cols || map1_data.y >= src_rows|| map1_data.x < 0 || map1_data.y < 0)
src_data = nval; if (NEED_EXTRAPOLATION(gx, gy))
{
int2 gxy = (int2)(gx, gy), zero = (int2)(0);
EXTRAPOLATE(gxy, dst[dstIdx]);
}
else else
src_data = *((__global float *)((__global uchar *)src + srcIdx)); {
*((__global float *)((__global uchar*)dst + dstIdx)) = src_data; int srcIdx = mad24(gy, src_step, gx + src_offset);
dst[dstIdx] = src[srcIdx];
}
} }
} }
__kernel void remapNNFConstant_C1_D5(__global float* dst, __global float const * restrict src, __kernel void remap_32FC2(__global const T * restrict src, __global T * dst, __global float2 * map1,
__global float * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, int src_offset, int dst_offset, int map1_offset,
int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows ,int threadCols, F4 nVal) int src_step, int dst_step, int map1_step,
int src_cols, int src_rows, int dst_cols, int dst_rows, T scalar)
{ {
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if(x < threadCols && y < dst_rows) if (x < dst_cols && y < dst_rows)
{ {
int dstIdx = y * dst_step + (x << 2) + dst_offset; int dstIdx = mad24(y, dst_step, x + dst_offset);
int mapIdx = y * map1_step + (x << 3) + map1_offset; int map1Idx = mad24(y, map1_step, x + map1_offset);
float2 map1_data = *((__global float2 *)((__global char*)map1 + mapIdx));
int2 map1_dataZ = convert_int2_sat_rte(map1_data);
int srcIdx = map1_dataZ.y * src_step + (map1_dataZ.x << 2) + src_offset;
float nval = convert_float(nVal.x);
float src_data;
if(map1_dataZ.x >= src_cols || map1_dataZ.y >= src_rows || map1_dataZ.x < 0 || map1_dataZ.y < 0)
src_data = nval;
else
src_data = *((__global float *)((__global uchar *)src + srcIdx));
*((__global float *)((__global uchar*)dst + dstIdx)) = src_data;
int2 gxy = convert_int2_sat_rte(map1[map1Idx]);
int gx = gxy.x, gy = gxy.y;
} if (NEED_EXTRAPOLATION(gx, gy))
{
} int2 zero = (int2)(0);
EXTRAPOLATE(gxy, dst[dstIdx]);
__kernel void remapNNF1Constant_C1_D5(__global float* dst, __global float const * restrict src, }
__global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step,
int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows ,int threadCols, F4 nVal)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < threadCols && y < dst_rows)
{
int dstIdx = y * dst_step + (x << 2) + dst_offset;
int mapIdx = y * map1_step + (x << 2) + map1_offset;
float map1_data = *((__global float *)((__global char*)map1 + mapIdx));
float map2_data = *((__global float *)((__global char*)map2 + mapIdx));
float2 map_data = (float2)(map1_data, map2_data);
int2 map1_dataZ = convert_int2_sat_rte(map_data);
int srcIdx = map1_dataZ.y * src_step + (map1_dataZ.x << 2) + src_offset;
float nval = convert_float(nVal.x);
float src_data;
if(map1_dataZ.x >= src_cols || map1_dataZ.y >= src_rows || map1_dataZ.x < 0 || map1_dataZ.y < 0)
src_data = nval;
else else
src_data = *((__global float *)((__global uchar *)src + srcIdx)); {
*((__global float *)((__global uchar*)dst + dstIdx)) = src_data; int srcIdx = mad24(gy, src_step, gx + src_offset);
dst[dstIdx] = src[srcIdx];
}
}
}
__kernel void remapNNSConstant_C4_D5(__global float * dst, __global float const * restrict src,
__global short * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step,
int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < threadCols && y < dst_rows)
{
int dstIdx = y * dst_step + (x << 4) + dst_offset ;
int mapIdx = y * map1_step + (x << 2) + map1_offset ;
short2 map1_data = *((__global short2 *)((__global char*)map1 + mapIdx));
int srcIdx = map1_data.y * src_step + (map1_data.x << 4) + src_offset;
float4 nval = convert_float4(nVal);
float4 src_data;
if (map1_data.x <0 || map1_data.x >= src_cols || map1_data.y <0 || map1_data.y >= src_rows)
src_data = nval;
else
src_data = *((__global float4 *)((__global uchar *)src + srcIdx));
*((__global float4 *)((__global uchar*)dst + dstIdx)) = src_data;
} }
} }
__kernel void remapNNFConstant_C4_D5(__global float * dst, __global float const * restrict src, __kernel void remap_16SC2(__global const T * restrict src, __global T * dst, __global short2 * map1,
__global float * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, int src_offset, int dst_offset, int map1_offset,
int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) int src_step, int dst_step, int map1_step,
int src_cols, int src_rows, int dst_cols, int dst_rows, T scalar)
{ {
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if(x < threadCols && y < dst_rows) if (x < dst_cols && y < dst_rows)
{ {
int dstIdx = y * dst_step + (x << 4) + dst_offset ; int dstIdx = mad24(y, dst_step, x + dst_offset);
int mapIdx = y * map1_step + (x << 3) + map1_offset ; int map1Idx = mad24(y, map1_step, x + map1_offset);
float2 map1_data = *((__global float2 *)((__global char*)map1 + mapIdx));
int2 map1_dataZ = convert_int2_sat_rte(map1_data);
int srcIdx = map1_dataZ.y * src_step + (map1_dataZ.x << 4) + src_offset;
float4 nval = convert_float4(nVal);
float4 src_data = nval;
if(map1_dataZ.x >= 0 && map1_dataZ.x < src_cols && map1_dataZ.y >=0 && map1_dataZ.y < src_rows)
src_data = *((__global float4 *)((__global uchar *)src + srcIdx));
*((__global float4 *)((__global uchar*)dst + dstIdx)) = src_data;
}
}
__kernel void remapNNF1Constant_C4_D5(__global float * dst, __global float const * restrict src, int2 gxy = convert_int2(map1[map1Idx]);
__global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, int gx = gxy.x, gy = gxy.y;
int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < threadCols && y < dst_rows) if (NEED_EXTRAPOLATION(gx, gy))
{ {
int dstIdx = y * dst_step + (x << 4) + dst_offset ; int2 zero = (int2)(0);
int mapIdx = y * map1_step + (x << 2) + map1_offset ; EXTRAPOLATE(gxy, dst[dstIdx]);
float map1_data = *((__global float *)((__global char*)map1 + mapIdx)); }
float map2_data = *((__global float *)((__global char*)map2 + mapIdx)); else
float2 map_data = (float2)(map1_data, map2_data); {
int2 map1_dataZ = convert_int2_sat_rte(map_data); int srcIdx = mad24(gy, src_step, gx + src_offset);
int srcIdx = map1_dataZ.y * src_step + (map1_dataZ.x << 4) + src_offset; dst[dstIdx] = src[srcIdx];
float4 nval = convert_float4(nVal); }
float4 src_data = nval;
if(map1_dataZ.x >= 0 && map1_dataZ.x < src_cols && map1_dataZ.y >= 0 && map1_dataZ.y < src_rows)
src_data = *((__global float4 *)((__global uchar *)src + srcIdx));
*((__global float4 *)((__global uchar*)dst + dstIdx)) = src_data;
} }
} }
#elif INTER_LINEAR
__kernel void remap_2_32FC1(__global T const * restrict src, __global T * dst,
__kernel void remapLNFConstant_C1_D0(__global unsigned char* dst, __global unsigned char const * restrict src, __global float * map1, __global float * map2,
__global float * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, int src_offset, int dst_offset, int map1_offset, int map2_offset,
int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) int src_step, int dst_step, int map1_step, int map2_step,
int src_cols, int src_rows, int dst_cols, int dst_rows, T nVal)
{ {
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if(x < threadCols && y < dst_rows)
{
x = x << 2;
int gx = x - (dst_offset&3);
int4 Gx = (int4)(gx, gx+1, gx+2, gx+3);
uchar4 nval =convert_uchar4(nVal);
uchar4 val = (uchar4)(nval.s0);
int dstStart = (y * dst_step + x + dst_offset) - (dst_offset&3);
int map1Start = y * map1_step + (x << 3) + map1_offset - ((dst_offset & 3) << 3);
float8 map1_data;
map1_data = *((__global float8 *)((__global char*)map1 + map1Start));
int8 map1_dataD = convert_int8(map1_data);
float8 temp = map1_data - convert_float8(map1_dataD);
float4 u = temp.even;
float4 v = temp.odd;
float4 ud = (float4)(1.0) - u;
float4 vd = (float4)(1.0) - v;
//float8 map1_dataU = map1_dataD + 1;
int4 map1_dataDx = map1_dataD.even;
int4 map1_dataDy = map1_dataD.odd;
int4 map1_dataDx1 = map1_dataDx + (int4)(1);
int4 map1_dataDy1 = map1_dataDy + (int4)(1);
uchar4 a = val, b = val, c = val, d =val;
if (map1_dataDx.s0 < src_cols && map1_dataDx.s0 >= 0 && map1_dataDy.s0 < src_rows && map1_dataDy.s0 >= 0)
a.s0 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s0 * src_step + map1_dataDx.s0 + src_offset));
if (map1_dataDx.s1 < src_cols && map1_dataDx.s1 >= 0 && map1_dataDy.s1 < src_rows && map1_dataDy.s1 >= 0)
a.s1 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s1 * src_step + map1_dataDx.s1 + src_offset));
if (map1_dataDx.s2 < src_cols && map1_dataDx.s2 >= 0 && map1_dataDy.s2 < src_rows && map1_dataDy.s2 >= 0)
a.s2 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s2 * src_step + map1_dataDx.s2 + src_offset));
if (map1_dataDx.s3 < src_cols && map1_dataDx.s3 >= 0 && map1_dataDy.s3 < src_rows && map1_dataDy.s3 >= 0)
a.s3 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s3 * src_step + map1_dataDx.s3 + src_offset));
if (map1_dataDx1.s0 < src_cols && map1_dataDx1.s0 >= 0 && map1_dataDy.s0 < src_rows && map1_dataDy.s0 >= 0)
b.s0 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s0 * src_step + map1_dataDx1.s0 + src_offset));
if (map1_dataDx1.s1 < src_cols && map1_dataDx1.s1 >= 0 && map1_dataDy.s1 < src_rows && map1_dataDy.s1 >= 0)
b.s1 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s1 * src_step + map1_dataDx1.s1 + src_offset));
if (map1_dataDx1.s2 < src_cols && map1_dataDx1.s2 >= 0 && map1_dataDy.s2 < src_rows && map1_dataDy.s2 >= 0)
b.s2 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s2 * src_step + map1_dataDx1.s2 + src_offset));
if (map1_dataDx1.s3 < src_cols && map1_dataDx1.s3 >= 0 && map1_dataDy.s3 < src_rows && map1_dataDy.s3 >= 0)
b.s3 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s3 * src_step + map1_dataDx1.s3 + src_offset));
if (map1_dataDx.s0 < src_cols && map1_dataDx.s0 >= 0 && map1_dataDy1.s0 < src_rows && map1_dataDy1.s0 >= 0)
c.s0 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s0 * src_step + map1_dataDx.s0 + src_offset));
if (map1_dataDx.s1 < src_cols && map1_dataDx.s1 >= 0 && map1_dataDy1.s1 < src_rows && map1_dataDy1.s1 >= 0)
c.s1 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s1 * src_step + map1_dataDx.s1 + src_offset));
if (map1_dataDx.s2 < src_cols && map1_dataDx.s2 >= 0 && map1_dataDy1.s2 < src_rows && map1_dataDy1.s2 >= 0)
c.s2 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s2 * src_step + map1_dataDx.s2 + src_offset));
if (map1_dataDx.s3 < src_cols && map1_dataDx.s3 >= 0 && map1_dataDy1.s3 < src_rows && map1_dataDy1.s3 >= 0)
c.s3 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s3 * src_step + map1_dataDx.s3 + src_offset));
if (map1_dataDx1.s0 < src_cols && map1_dataDx1.s0 >= 0 && map1_dataDy1.s0 < src_rows && map1_dataDy1.s0 >= 0)
d.s0 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s0 * src_step + map1_dataDx1.s0 + src_offset));
if (map1_dataDx1.s1 < src_cols && map1_dataDx1.s1 >= 0 && map1_dataDy1.s1 < src_rows && map1_dataDy1.s1 >= 0)
d.s1 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s1 * src_step + map1_dataDx1.s1 + src_offset));
if (map1_dataDx1.s2 < src_cols && map1_dataDx1.s2 >= 0 && map1_dataDy1.s2 < src_rows && map1_dataDy1.s2 >= 0)
d.s2 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s2 * src_step + map1_dataDx1.s2 + src_offset));
if (map1_dataDx1.s3 < src_cols && map1_dataDx1.s3 >= 0 && map1_dataDy1.s3 < src_rows && map1_dataDy1.s3 >= 0)
d.s3 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s3 * src_step + map1_dataDx1.s3 + src_offset));
uchar4 dst_data = convert_uchar4_sat_rte((convert_float4(a))* ud * vd +(convert_float4(b))* u * vd + (convert_float4(c))* ud * v + (convert_float4(d)) * u * v );
__global uchar4* D = (__global uchar4 *)(dst + dstStart);
uchar4 dVal = *D;
int4 con = (Gx >= 0 && Gx < dst_cols && y >= 0 && y < dst_rows);
dst_data = (convert_uchar4(con) != (uchar4)(0)) ? dst_data : dVal;
*D = dst_data;
}
}
__kernel void remapLNF1Constant_C1_D0(__global unsigned char* dst, __global unsigned char const * restrict src,
__global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step,
int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal)
{
int x = get_global_id(0); if (x < dst_cols && y < dst_rows)
int y = get_global_id(1);
if(x < threadCols && y < dst_rows)
{ {
x = x << 2; int dstIdx = mad24(y, dst_step, x + dst_offset);
int gx = x - (dst_offset&3); int map1Idx = mad24(y, map1_step, x + map1_offset);
int4 Gx = (int4)(gx, gx+1, gx+2, gx+3); int map2Idx = mad24(y, map2_step, x + map2_offset);
uchar4 nval =convert_uchar4(nVal);
uchar4 val = (uchar4)(nval.s0);
int dstStart = (y * dst_step + x + dst_offset) - (dst_offset&3);
int map1Start = y * map1_step + (x << 2) + map1_offset - ((dst_offset & 3) << 2);
float4 map1_data;
float4 map2_data;
map1_data = *((__global float4 *)((__global char*)map1 + map1Start));
map2_data = *((__global float4 *)((__global char*)map2 + map1Start));
float8 map_data = (float8)(map1_data.s0, map2_data.s0, map1_data.s1, map2_data.s1, map1_data.s2, map2_data.s2, map1_data.s3, map2_data.s3);
int8 map1_dataD = convert_int8(map_data);
float8 temp = map_data - convert_float8(map1_dataD);
float4 u = temp.even;
float4 v = temp.odd;
float4 ud = (float4)(1.0) - u;
float4 vd = (float4)(1.0) - v;
//float8 map1_dataU = map1_dataD + 1;
int4 map1_dataDx = map1_dataD.even;
int4 map1_dataDy = map1_dataD.odd;
int4 map1_dataDx1 = map1_dataDx + (int4)(1);
int4 map1_dataDy1 = map1_dataDy + (int4)(1);
uchar4 a = val, b = val, c = val, d =val;
if (map1_dataDx.s0 < src_cols && map1_dataDx.s0 >= 0 && map1_dataDy.s0 < src_rows && map1_dataDy.s0 >= 0)
a.s0 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s0 * src_step + map1_dataDx.s0 + src_offset));
if (map1_dataDx.s1 < src_cols && map1_dataDx.s1 >= 0 && map1_dataDy.s1 < src_rows && map1_dataDy.s1 >= 0)
a.s1 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s1 * src_step + map1_dataDx.s1 + src_offset));
if (map1_dataDx.s2 < src_cols && map1_dataDx.s2 >= 0 && map1_dataDy.s2 < src_rows && map1_dataDy.s2 >= 0)
a.s2 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s2 * src_step + map1_dataDx.s2 + src_offset));
if (map1_dataDx.s3 < src_cols && map1_dataDx.s3 >= 0 && map1_dataDy.s3 < src_rows && map1_dataDy.s3 >= 0)
a.s3 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s3 * src_step + map1_dataDx.s3 + src_offset));
if (map1_dataDx1.s0 < src_cols && map1_dataDx1.s0 >= 0 && map1_dataDy.s0 < src_rows && map1_dataDy.s0 >= 0)
b.s0 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s0 * src_step + map1_dataDx1.s0 + src_offset));
if (map1_dataDx1.s1 < src_cols && map1_dataDx1.s1 >= 0 && map1_dataDy.s1 < src_rows && map1_dataDy.s1 >= 0)
b.s1 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s1 * src_step + map1_dataDx1.s1 + src_offset));
if (map1_dataDx1.s2 < src_cols && map1_dataDx1.s2 >= 0 && map1_dataDy.s2 < src_rows && map1_dataDy.s2 >= 0)
b.s2 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s2 * src_step + map1_dataDx1.s2 + src_offset));
if (map1_dataDx1.s3 < src_cols && map1_dataDx1.s3 >= 0 && map1_dataDy.s3 < src_rows && map1_dataDy.s3 >= 0)
b.s3 = *((__global uchar*)((__global uchar *)src + map1_dataDy.s3 * src_step + map1_dataDx1.s3 + src_offset));
if (map1_dataDx.s0 < src_cols && map1_dataDx.s0 >= 0 && map1_dataDy1.s0 < src_rows && map1_dataDy1.s0 >= 0)
c.s0 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s0 * src_step + map1_dataDx.s0 + src_offset));
if (map1_dataDx.s1 < src_cols && map1_dataDx.s1 >= 0 && map1_dataDy1.s1 < src_rows && map1_dataDy1.s1 >= 0)
c.s1 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s1 * src_step + map1_dataDx.s1 + src_offset));
if (map1_dataDx.s2 < src_cols && map1_dataDx.s2 >= 0 && map1_dataDy1.s2 < src_rows && map1_dataDy1.s2 >= 0)
c.s2 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s2 * src_step + map1_dataDx.s2 + src_offset));
if (map1_dataDx.s3 < src_cols && map1_dataDx.s3 >= 0 && map1_dataDy1.s3 < src_rows && map1_dataDy1.s3 >= 0)
c.s3 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s3 * src_step + map1_dataDx.s3 + src_offset));
if (map1_dataDx1.s0 < src_cols && map1_dataDx1.s0 >= 0 && map1_dataDy1.s0 < src_rows && map1_dataDy1.s0 >= 0)
d.s0 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s0 * src_step + map1_dataDx1.s0 + src_offset));
if (map1_dataDx1.s1 < src_cols && map1_dataDx1.s1 >= 0 && map1_dataDy1.s1 < src_rows && map1_dataDy1.s1 >= 0)
d.s1 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s1 * src_step + map1_dataDx1.s1 + src_offset));
if (map1_dataDx1.s2 < src_cols && map1_dataDx1.s2 >= 0 && map1_dataDy1.s2 < src_rows && map1_dataDy1.s2 >= 0)
d.s2 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s2 * src_step + map1_dataDx1.s2 + src_offset));
if (map1_dataDx1.s3 < src_cols && map1_dataDx1.s3 >= 0 && map1_dataDy1.s3 < src_rows && map1_dataDy1.s3 >= 0)
d.s3 = *((__global uchar*)((__global uchar *)src + map1_dataDy1.s3 * src_step + map1_dataDx1.s3 + src_offset));
uchar4 dst_data = convert_uchar4_sat_rte((convert_float4(a))* ud * vd +(convert_float4(b))* u * vd + (convert_float4(c))* ud * v + (convert_float4(d)) * u * v );
__global uchar4* D = (__global uchar4 *)(dst + dstStart);
uchar4 dVal = *D;
int4 con = (Gx >= 0 && Gx < dst_cols && y >= 0 && y < dst_rows);
dst_data = (convert_uchar4(con) != (uchar4)(0)) ? dst_data : dVal;
*D = dst_data;
}
}
__kernel void remapLNFConstant_C4_D0(__global unsigned char* dst, __global unsigned char const * restrict src, float2 map_data = (float2)(map1[map1Idx], map2[map2Idx]);
__global float * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step,
int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal)
{
int x = get_global_id(0); int2 map_dataA = convert_int2_sat_rtn(map_data);
int y = get_global_id(1);
if(x < threadCols && y < dst_rows)
{
int dstIdx = y * dst_step + (x << 2) + dst_offset;
int mapIdx = y * map1_step + (x << 3) + map1_offset;
float2 map_data = *((__global float2 *)((__global char*)map1 + mapIdx));
int2 map_dataA = convert_int2(map_data);
float2 u = map_data - convert_float2(map_dataA);
int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y); int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1); int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y +1); int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y +1);
uchar4 nval = convert_uchar4(nVal); int2 zero = (int2)(0);
uchar4 a, b, c , d;
if(map_dataA.x < 0 || map_dataA.x >= src_cols || map_dataA.y >= src_rows || map_dataA.y < 0)
a = nval;
else
a = *((__global uchar4 *)((__global uchar *)src + map_dataA.y * src_step + (map_dataA.x<<2) + src_offset ));
if(map_dataB.x < 0 || map_dataB.x >= src_cols || map_dataB.y >= src_rows || map_dataB.y < 0)
b = nval;
else
b = *((__global uchar4 *)((__global uchar *)src + map_dataB.y * src_step + (map_dataB.x<<2) + src_offset ));
if(map_dataC.x < 0 || map_dataC.x >= src_cols || map_dataC.y >= src_rows || map_dataC.y < 0) float2 _u = map_data - convert_float2(map_dataA);
c = nval; WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)32)) / (WT2)32;
else WT scalar = convertToWT(nVal);
c = *((__global uchar4 *)((__global uchar *)src + map_dataC.y * src_step + (map_dataC.x<<2) + src_offset )); WT a = scalar, b = scalar, c = scalar, d = scalar;
if(map_dataD.x < 0 || map_dataD.x >= src_cols || map_dataD.y >= src_rows || map_dataD.y < 0) if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y))
d = nval; a = convertToWT(src[mad24(map_dataA.y, src_step, map_dataA.x + src_offset)]);
else else
d = *((__global uchar4 *)((__global uchar *)src + map_dataD.y * src_step + (map_dataD.x<<2) + src_offset )); EXTRAPOLATE(map_dataA, a);
float4 dst_data = convert_float4(a)*((float4)(1.0-u.x)*((float4)(1.0-u.y))) + convert_float4(b)*((float4)(u.x))*((float4)(1.0-u.y)) + convert_float4(c)*((float4)(1.0-u.x))*((float4)(u.y)) + convert_float4(d)*((float4)(u.x))*((float4)(u.y));
*((__global uchar4 *)((__global uchar*)dst + dstIdx)) = convert_uchar4_sat_rte(dst_data);
}
}
__kernel void remapLNF1Constant_C4_D0(__global unsigned char* dst, __global unsigned char const * restrict src,
__global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step,
int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal)
{
int x = get_global_id(0); if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y))
int y = get_global_id(1); b = convertToWT(src[mad24(map_dataB.y, src_step, map_dataB.x + src_offset)]);
if(x < threadCols && y < dst_rows)
{
int dstIdx = y * dst_step + (x << 2) + dst_offset;
int mapIdx = y * map1_step + (x << 2) + map1_offset;
float map1_data = *((__global float *)((__global char*)map1 + mapIdx));
float map2_data = *((__global float *)((__global char*)map2 + mapIdx));
float2 map_data = (float2)(map1_data, map2_data);
int2 map_dataA = convert_int2(map_data);
float2 u = map_data - convert_float2(map_dataA);
int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y +1);
uchar4 nval = convert_uchar4(nVal);
uchar4 a, b, c , d;
if(map_dataA.x < 0 || map_dataA.x >= src_cols || map_dataA.y >= src_rows || map_dataA.y < 0)
a = nval;
else
a = *((__global uchar4 *)((__global uchar *)src + map_dataA.y * src_step + (map_dataA.x<<2) + src_offset ));
if(map_dataB.x < 0 || map_dataB.x >= src_cols || map_dataB.y >= src_rows || map_dataB.y < 0)
b = nval;
else else
b = *((__global uchar4 *)((__global uchar *)src + map_dataB.y * src_step + (map_dataB.x<<2) + src_offset )); EXTRAPOLATE(map_dataB, b);
if(map_dataC.x < 0 || map_dataC.x >= src_cols || map_dataC.y >= src_rows || map_dataC.y < 0) if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y))
c = nval; c = convertToWT(src[mad24(map_dataC.y, src_step, map_dataC.x + src_offset)]);
else else
c = *((__global uchar4 *)((__global uchar *)src + map_dataC.y * src_step + (map_dataC.x<<2) + src_offset )); EXTRAPOLATE(map_dataC, c);
if(map_dataD.x < 0 || map_dataD.x >= src_cols || map_dataD.y >= src_rows || map_dataD.y < 0) if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y))
d = nval; d = convertToWT(src[mad24(map_dataD.y, src_step, map_dataD.x + src_offset)]);
else else
d = *((__global uchar4 *)((__global uchar *)src + map_dataD.y * src_step + (map_dataD.x<<2) + src_offset )); EXTRAPOLATE(map_dataD, d);
float4 dst_data = convert_float4(a)*((float4)(1.0-u.x)*((float4)(1.0-u.y))) + convert_float4(b)*((float4)(u.x))*((float4)(1.0-u.y)) + convert_float4(c)*((float4)(1.0-u.x))*((float4)(u.y)) + convert_float4(d)*((float4)(u.x))*((float4)(u.y));
*((__global uchar4 *)((__global uchar*)dst + dstIdx)) = convert_uchar4_sat_rte(dst_data);
WT dst_data = a * (WT)(1 - u.x) * (WT)(1 - u.y) +
b * (WT)(u.x) * (WT)(1 - u.y) +
c * (WT)(1 - u.x) * (WT)(u.y) +
d * (WT)(u.x) * (WT)(u.y);
dst[dstIdx] = convertToT(dst_data);
} }
} }
__kernel void remap_32FC2(__global T const * restrict src, __global T * dst,
__global float2 * map1,
__kernel void remapLNFConstant_C1_D5(__global float* dst, __global float const * restrict src, int src_offset, int dst_offset, int map1_offset,
__global float * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, int src_step, int dst_step, int map1_step,
int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) int src_cols, int src_rows, int dst_cols, int dst_rows, T nVal)
{ {
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if(x < threadCols && y < dst_rows)
{
x = x << 4;
int gx = x - (dst_offset&15);
int4 Gx = (int4)(gx, gx+4, gx+8, gx+12);
float4 nval =convert_float4(nVal);
float4 val = (float4)(nval.s0);
int dstStart = (y * dst_step + x + dst_offset) - (dst_offset&15);
int map1Start = y * map1_step + (x << 1) + map1_offset - ((dst_offset & 15) << 1);
float8 map1_data;
map1_data = *((__global float8 *)((__global char*)map1 + map1Start));
int8 map1_dataD = convert_int8(map1_data);
float8 temp = map1_data - convert_float8(map1_dataD);
float4 u = temp.even;
float4 v = temp.odd;
float4 ud = (float4)(1.0) - u;
float4 vd = (float4)(1.0) - v;
//float8 map1_dataU = map1_dataD + 1;
int4 map1_dataDx = map1_dataD.even;
int4 map1_dataDy = map1_dataD.odd;
int4 map1_dataDx1 = map1_dataDx + (int4)(1);
int4 map1_dataDy1 = map1_dataDy + (int4)(1);
float4 a = val, b = val, c = val, d = val;
if (map1_dataDx.s0 < src_cols && map1_dataDx.s0 >= 0 && map1_dataDy.s0 < src_rows && map1_dataDy.s0 >= 0)
a.s0 = *((__global float*)((__global uchar *)src + map1_dataDy.s0 * src_step + (map1_dataDx.s0 << 2) + src_offset));
if (map1_dataDx.s1 < src_cols && map1_dataDx.s1 >= 0 && map1_dataDy.s1 < src_rows && map1_dataDy.s1 >= 0)
a.s1 = *((__global float*)((__global uchar *)src + map1_dataDy.s1 * src_step + (map1_dataDx.s1 << 2) + src_offset));
if (map1_dataDx.s2 < src_cols && map1_dataDx.s2 >= 0 && map1_dataDy.s2 < src_rows && map1_dataDy.s2 >= 0)
a.s2 = *((__global float*)((__global uchar *)src + map1_dataDy.s2 * src_step + (map1_dataDx.s2 << 2) + src_offset));
if (map1_dataDx.s3 < src_cols && map1_dataDx.s3 >= 0 && map1_dataDy.s3 < src_rows && map1_dataDy.s3 >= 0)
a.s3 = *((__global float*)((__global uchar *)src + map1_dataDy.s3 * src_step + (map1_dataDx.s3 << 2) + src_offset));
if (map1_dataDx1.s0 < src_cols && map1_dataDx1.s0 >= 0 && map1_dataDy.s0 < src_rows && map1_dataDy.s0 >= 0)
b.s0 = *((__global float*)((__global uchar *)src + map1_dataDy.s0 * src_step + (map1_dataDx1.s0 << 2) + src_offset));
if (map1_dataDx1.s1 < src_cols && map1_dataDx1.s1 >= 0 && map1_dataDy.s1 < src_rows && map1_dataDy.s1 >= 0)
b.s1 = *((__global float*)((__global uchar *)src + map1_dataDy.s1 * src_step + (map1_dataDx1.s1 << 2) + src_offset));
if (map1_dataDx1.s2 < src_cols && map1_dataDx1.s2 >= 0 && map1_dataDy.s2 < src_rows && map1_dataDy.s2 >= 0)
b.s2 = *((__global float*)((__global uchar *)src + map1_dataDy.s2 * src_step + (map1_dataDx1.s2 << 2) + src_offset));
if (map1_dataDx1.s3 < src_cols && map1_dataDx1.s3 >= 0 && map1_dataDy.s3 < src_rows && map1_dataDy.s3 >= 0)
b.s3 = *((__global float*)((__global uchar *)src + map1_dataDy.s3 * src_step + (map1_dataDx1.s3 << 2) + src_offset));
if (map1_dataDx.s0 < src_cols && map1_dataDx.s0 >= 0 && map1_dataDy1.s0 < src_rows && map1_dataDy1.s0 >= 0)
c.s0 = *((__global float*)((__global uchar *)src + map1_dataDy1.s0 * src_step + (map1_dataDx.s0 << 2) + src_offset));
if (map1_dataDx.s1 < src_cols && map1_dataDx.s1 >= 0 && map1_dataDy1.s1 < src_rows && map1_dataDy1.s1 >= 0)
c.s1 = *((__global float*)((__global uchar *)src + map1_dataDy1.s1 * src_step + (map1_dataDx.s1 << 2) + src_offset));
if (map1_dataDx.s2 < src_cols && map1_dataDx.s2 >= 0 && map1_dataDy1.s2 < src_rows && map1_dataDy1.s2 >= 0)
c.s2 = *((__global float*)((__global uchar *)src + map1_dataDy1.s2 * src_step + (map1_dataDx.s2 << 2) + src_offset));
if (map1_dataDx.s3 < src_cols && map1_dataDx.s3 >= 0 && map1_dataDy1.s3 < src_rows && map1_dataDy1.s3 >= 0)
c.s3 = *((__global float*)((__global uchar *)src + map1_dataDy1.s3 * src_step + (map1_dataDx.s3 << 2) + src_offset));
if (map1_dataDx1.s0 < src_cols && map1_dataDx1.s0 >= 0 && map1_dataDy1.s0 < src_rows && map1_dataDy1.s0 >= 0)
d.s0 = *((__global float*)((__global uchar *)src + map1_dataDy1.s0 * src_step + (map1_dataDx1.s0 << 2) + src_offset));
if (map1_dataDx1.s1 < src_cols && map1_dataDx1.s1 >= 0 && map1_dataDy1.s1 < src_rows && map1_dataDy1.s1 >= 0)
d.s1 = *((__global float*)((__global uchar *)src + map1_dataDy1.s1 * src_step + (map1_dataDx1.s1 << 2) + src_offset));
if (map1_dataDx1.s2 < src_cols && map1_dataDx1.s2 >= 0 && map1_dataDy1.s2 < src_rows && map1_dataDy1.s2 >= 0)
d.s2 = *((__global float*)((__global uchar *)src + map1_dataDy1.s2 * src_step + (map1_dataDx1.s2 << 2) + src_offset));
if (map1_dataDx1.s3 < src_cols && map1_dataDx1.s3 >= 0 && map1_dataDy1.s3 < src_rows && map1_dataDy1.s3 >= 0)
d.s3 = *((__global float*)((__global uchar *)src + map1_dataDy1.s3 * src_step + (map1_dataDx1.s3 << 2) + src_offset));
float4 dst_data = a * ud * vd + b * u * vd + c * ud * v + d * u * v ;
__global float4* D = (__global float4 *)((__global char*)dst + dstStart);
float4 dVal = *D;
int4 con = (Gx >= 0 && Gx < (dst_cols << 2) && y >= 0 && y < dst_rows);
dst_data = (convert_float4(con) != (float4)(0)) ? dst_data : dVal;
*D = dst_data;
}
}
__kernel void remapLNF1Constant_C1_D5(__global float* dst, __global float const * restrict src,
__global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step,
int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal)
{
int x = get_global_id(0); if (x < dst_cols && y < dst_rows)
int y = get_global_id(1);
if(x < threadCols && y < dst_rows)
{ {
x = x << 4; int dstIdx = mad24(y, dst_step, x + dst_offset);
int gx = x - (dst_offset&15); int map1Idx = mad24(y, map1_step, x + map1_offset);
int4 Gx = (int4)(gx, gx+4, gx+8, gx+12);
float4 nval =convert_float4(nVal);
float4 val = (float4)(nval.s0);
int dstStart = y * dst_step + x + dst_offset - (dst_offset & 15);
int map1Start = y * map1_step + x + map1_offset - (dst_offset & 15);
float4 map1_data;
float4 map2_data;
map1_data = *((__global float4 *)((__global char*)map1 + map1Start));
map2_data = *((__global float4 *)((__global char*)map2 + map1Start));
float8 map_data = (float8)(map1_data.s0, map2_data.s0, map1_data.s1, map2_data.s1, map1_data.s2, map2_data.s2, map1_data.s3, map2_data.s3);
int8 map1_dataD = convert_int8(map_data);
float8 temp = map_data - convert_float8(map1_dataD);
float4 u = temp.even;
float4 v = temp.odd;
float4 ud = (float4)(1.0) - u;
float4 vd = (float4)(1.0) - v;
//float8 map1_dataU = map1_dataD + 1;
int4 map1_dataDx = map1_dataD.even;
int4 map1_dataDy = map1_dataD.odd;
int4 map1_dataDx1 = map1_dataDx + (int4)(1);
int4 map1_dataDy1 = map1_dataDy + (int4)(1);
float4 a = val, b = val, c = val, d = val;
if (map1_dataDx.s0 < src_cols && map1_dataDx.s0 >= 0 && map1_dataDy.s0 < src_rows && map1_dataDy.s0 >= 0)
a.s0 = *((__global float*)((__global uchar *)src + map1_dataDy.s0 * src_step + (map1_dataDx.s0 << 2) + src_offset));
if (map1_dataDx.s1 < src_cols && map1_dataDx.s1 >= 0 && map1_dataDy.s1 < src_rows && map1_dataDy.s1 >= 0)
a.s1 = *((__global float*)((__global uchar *)src + map1_dataDy.s1 * src_step + (map1_dataDx.s1 << 2) + src_offset));
if (map1_dataDx.s2 < src_cols && map1_dataDx.s2 >= 0 && map1_dataDy.s2 < src_rows && map1_dataDy.s2 >= 0)
a.s2 = *((__global float*)((__global uchar *)src + map1_dataDy.s2 * src_step + (map1_dataDx.s2 << 2) + src_offset));
if (map1_dataDx.s3 < src_cols && map1_dataDx.s3 >= 0 && map1_dataDy.s3 < src_rows && map1_dataDy.s3 >= 0)
a.s3 = *((__global float*)((__global uchar *)src + map1_dataDy.s3 * src_step + (map1_dataDx.s3 << 2) + src_offset));
if (map1_dataDx1.s0 < src_cols && map1_dataDx1.s0 >= 0 && map1_dataDy.s0 < src_rows && map1_dataDy.s0 >= 0)
b.s0 = *((__global float*)((__global uchar *)src + map1_dataDy.s0 * src_step + (map1_dataDx1.s0 << 2) + src_offset));
if (map1_dataDx1.s1 < src_cols && map1_dataDx1.s1 >= 0 && map1_dataDy.s1 < src_rows && map1_dataDy.s1 >= 0)
b.s1 = *((__global float*)((__global uchar *)src + map1_dataDy.s1 * src_step + (map1_dataDx1.s1 << 2) + src_offset));
if (map1_dataDx1.s2 < src_cols && map1_dataDx1.s2 >= 0 && map1_dataDy.s2 < src_rows && map1_dataDy.s2 >= 0)
b.s2 = *((__global float*)((__global uchar *)src + map1_dataDy.s2 * src_step + (map1_dataDx1.s2 << 2) + src_offset));
if (map1_dataDx1.s3 < src_cols && map1_dataDx1.s3 >= 0 && map1_dataDy.s3 < src_rows && map1_dataDy.s3 >= 0)
b.s3 = *((__global float*)((__global uchar *)src + map1_dataDy.s3 * src_step + (map1_dataDx1.s3 << 2) + src_offset));
if (map1_dataDx.s0 < src_cols && map1_dataDx.s0 >= 0 && map1_dataDy1.s0 < src_rows && map1_dataDy1.s0 >= 0)
c.s0 = *((__global float*)((__global uchar *)src + map1_dataDy1.s0 * src_step + (map1_dataDx.s0 << 2) + src_offset));
if (map1_dataDx.s1 < src_cols && map1_dataDx.s1 >= 0 && map1_dataDy1.s1 < src_rows && map1_dataDy1.s1 >= 0)
c.s1 = *((__global float*)((__global uchar *)src + map1_dataDy1.s1 * src_step + (map1_dataDx.s1 << 2) + src_offset));
if (map1_dataDx.s2 < src_cols && map1_dataDx.s2 >= 0 && map1_dataDy1.s2 < src_rows && map1_dataDy1.s2 >= 0)
c.s2 = *((__global float*)((__global uchar *)src + map1_dataDy1.s2 * src_step + (map1_dataDx.s2 << 2) + src_offset));
if (map1_dataDx.s3 < src_cols && map1_dataDx.s3 >= 0 && map1_dataDy1.s3 < src_rows && map1_dataDy1.s3 >= 0)
c.s3 = *((__global float*)((__global uchar *)src + map1_dataDy1.s3 * src_step + (map1_dataDx.s3 << 2) + src_offset));
if (map1_dataDx1.s0 < src_cols && map1_dataDx1.s0 >= 0 && map1_dataDy1.s0 < src_rows && map1_dataDy1.s0 >= 0)
d.s0 = *((__global float*)((__global uchar *)src + map1_dataDy1.s0 * src_step + (map1_dataDx1.s0 << 2) + src_offset));
if (map1_dataDx1.s1 < src_cols && map1_dataDx1.s1 >= 0 && map1_dataDy1.s1 < src_rows && map1_dataDy1.s1 >= 0)
d.s1 = *((__global float*)((__global uchar *)src + map1_dataDy1.s1 * src_step + (map1_dataDx1.s1 << 2) + src_offset));
if (map1_dataDx1.s2 < src_cols && map1_dataDx1.s2 >= 0 && map1_dataDy1.s2 < src_rows && map1_dataDy1.s2 >= 0)
d.s2 = *((__global float*)((__global uchar *)src + map1_dataDy1.s2 * src_step + (map1_dataDx1.s2 << 2) + src_offset));
if (map1_dataDx1.s3 < src_cols && map1_dataDx1.s3 >= 0 && map1_dataDy1.s3 < src_rows && map1_dataDy1.s3 >= 0)
d.s3 = *((__global float*)((__global uchar *)src + map1_dataDy1.s3 * src_step + (map1_dataDx1.s3 << 2) + src_offset));
float4 dst_data = a * ud * vd + b * u * vd + c * ud * v + d * u * v ;
__global float4* D = (__global float4 *)((__global char*)dst + dstStart);
float4 dVal = *D;
int4 con = (Gx >= 0 && Gx < (dst_cols << 2) && y >= 0 && y < dst_rows);
dst_data = (convert_float4(con) != (float4)(0)) ? dst_data : dVal;
*D = dst_data;
}
}
__kernel void remapLNFConstant_C4_D5(__global float * dst, __global float const * restrict src, float2 map_data = map1[map1Idx];
__global float * map1, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, int2 map_dataA = convert_int2_sat_rtn(map_data);
int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
{ int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
int x = get_global_id(0); int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
int y = get_global_id(1); int2 zero = (int2)(0);
if(x < threadCols && y < dst_rows) float2 _u = map_data - convert_float2(map_dataA);
{ WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)32)) / (WT2)32;
int dstIdx = y * dst_step + (x << 4) + dst_offset ; WT scalar = convertToWT(nVal);
int mapIdx = y * map1_step + (x << 3) + map1_offset ; WT a = scalar, b = scalar, c = scalar, d = scalar;
float2 map_data = *((__global float2 *)((__global char*)map1 + mapIdx));
int2 map_dataA = convert_int2(map_data);
float2 u = map_data - convert_float2(map_dataA);
int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y +1);
float4 nval = convert_float4(nVal);
float4 a, b, c , d;
if(map_dataA.x < 0 || map_dataA.x >= src_cols || map_dataA.y >= src_rows || map_dataA.y < 0)
a = nval;
else
a = *((__global float4 *)((__global uchar *)src + map_dataA.y * src_step + (map_dataA.x<<4) + src_offset ));
if(map_dataB.x < 0 || map_dataB.x >= src_cols || map_dataB.y >= src_rows || map_dataB.y < 0)
b = nval;
else
b = *((__global float4 *)((__global uchar *)src + map_dataB.y * src_step + (map_dataB.x<<4) + src_offset ));
if(map_dataC.x < 0 || map_dataC.x >= src_cols || map_dataC.y >= src_rows || map_dataC.y < 0)
c = nval;
else
c = *((__global float4 *)((__global uchar *)src + map_dataC.y * src_step + (map_dataC.x<<4) + src_offset ));
if(map_dataD.x < 0 || map_dataD.x >= src_cols || map_dataD.y >= src_rows || map_dataD.y < 0)
d = nval;
else
d = *((__global float4 *)((__global uchar *)src + map_dataD.y * src_step + (map_dataD.x<<4) + src_offset ));
float4 dst_data = a * ((float4)(1.0-u.x)) * ((float4)(1.0-u.y)) + b *((float4)(u.x)) * ((float4)(1.0-u.y)) + c * ((float4)(1.0-u.x)) *((float4)(u.y)) + d *((float4)(u.x)) *((float4)(u.y));
*((__global float4 *)((__global uchar*)dst + dstIdx)) = dst_data ;
} if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y))
} a = convertToWT(src[mad24(map_dataA.y, src_step, map_dataA.x + src_offset)]);
else
EXTRAPOLATE(map_dataA, a);
__kernel void remapLNF1Constant_C4_D5(__global float * dst, __global float const * restrict src, if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y))
__global float * map1, __global float * map2, int dst_offset, int src_offset, int map1_offset, int dst_step, int src_step, b = convertToWT(src[mad24(map_dataB.y, src_step, map_dataB.x + src_offset)]);
int map1_step, int src_cols, int src_rows, int dst_cols, int dst_rows, int map1_cols, int map1_rows , int threadCols, F4 nVal) else
{ EXTRAPOLATE(map_dataB, b);
int x = get_global_id(0);
int y = get_global_id(1);
if(x < threadCols && y < dst_rows) if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y))
{ c = convertToWT(src[mad24(map_dataC.y, src_step, map_dataC.x + src_offset)]);
int dstIdx = y * dst_step + (x << 4) + dst_offset ; else
int mapIdx = y * map1_step + (x << 2) + map1_offset ; EXTRAPOLATE(map_dataC, c);
float map1_data = *((__global float *)((__global char*)map1 + mapIdx));
float map2_data = *((__global float *)((__global char*)map2 + mapIdx));
float2 map_data = (float2)(map1_data, map2_data);
int2 map_dataA = convert_int2(map_data);
float2 u = map_data - convert_float2(map_dataA);
int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y +1);
float4 nval = convert_float4(nVal);
float4 a, b, c , d;
if(map_dataA.x < 0 || map_dataA.x >= src_cols || map_dataA.y >= src_rows || map_dataA.y < 0)
a = nval;
else
a = *((__global float4 *)((__global uchar *)src + map_dataA.y * src_step + (map_dataA.x<<4) + src_offset ));
if(map_dataB.x < 0 || map_dataB.x >= src_cols || map_dataB.y >= src_rows || map_dataB.y < 0)
b = nval;
else
b = *((__global float4 *)((__global uchar *)src + map_dataB.y * src_step + (map_dataB.x<<4) + src_offset ));
if(map_dataC.x < 0 || map_dataC.x >= src_cols || map_dataC.y >= src_rows || map_dataC.y < 0)
c = nval;
else
c = *((__global float4 *)((__global uchar *)src + map_dataC.y * src_step + (map_dataC.x<<4) + src_offset ));
if(map_dataD.x < 0 || map_dataD.x >= src_cols || map_dataD.y >= src_rows || map_dataD.y < 0)
d = nval;
else
d = *((__global float4 *)((__global uchar *)src + map_dataD.y * src_step + (map_dataD.x<<4) + src_offset ));
float4 dst_data = a * ((float4)(1.0-u.x)) * ((float4)(1.0-u.y)) + b *((float4)(u.x)) * ((float4)(1.0-u.y)) + c * ((float4)(1.0-u.x)) *((float4)(u.y)) + d *((float4)(u.x)) *((float4)(u.y));
*((__global float4 *)((__global uchar*)dst + dstIdx)) = dst_data ;
if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y))
d = convertToWT(src[mad24(map_dataD.y, src_step, map_dataD.x + src_offset)]);
else
EXTRAPOLATE(map_dataD, d);
WT dst_data = a * (WT)(1 - u.x) * (WT)(1 - u.y) +
b * (WT)(u.x) * (WT)(1 - u.y) +
c * (WT)(1 - u.x) * (WT)(u.y) +
d * (WT)(u.x) * (WT)(u.y);
dst[dstIdx] = convertToT(dst_data);
} }
} }
#endif
...@@ -62,8 +62,7 @@ PARAM_TEST_CASE(FilterTestBase, MatType, ...@@ -62,8 +62,7 @@ PARAM_TEST_CASE(FilterTestBase, MatType,
int, // border type, or iteration int, // border type, or iteration
bool) // roi or not bool) // roi or not
{ {
int type, borderType; int type, borderType, ksize;
int ksize;
bool useRoi; bool useRoi;
Mat src, dst_whole, src_roi, dst_roi; Mat src, dst_whole, src_roi, dst_roi;
...@@ -92,8 +91,12 @@ PARAM_TEST_CASE(FilterTestBase, MatType, ...@@ -92,8 +91,12 @@ PARAM_TEST_CASE(FilterTestBase, MatType,
void Near(double threshold = 0.0) void Near(double threshold = 0.0)
{ {
EXPECT_MAT_NEAR(dst_whole, Mat(gdst_whole), threshold); Mat roi, whole;
EXPECT_MAT_NEAR(dst_roi, Mat(gdst_roi), threshold); gdst_whole.download(whole);
gdst_roi.download(roi);
EXPECT_MAT_NEAR(dst_whole, whole, threshold);
EXPECT_MAT_NEAR(dst_roi, roi, threshold);
} }
}; };
......
...@@ -55,1603 +55,492 @@ ...@@ -55,1603 +55,492 @@
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
using namespace cv;
using namespace std;
using namespace testing; using namespace testing;
using namespace std;
using namespace cv;
MatType nulltype = -1; ///////////////////////////////////////////////////////////////////////////////
#define ONE_TYPE(type) testing::ValuesIn(typeVector(type))
#define NULL_TYPE testing::ValuesIn(typeVector(nulltype))
vector<MatType> typeVector(MatType type)
{
vector<MatType> v;
v.push_back(type);
return v;
}
typedef struct
{
short x;
short y;
} COOR;
COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, Size size, int sp, int sr, int maxIter, float eps, int *tab) PARAM_TEST_CASE(ImgprocTestBase, MatType,
int, // blockSize
int, // border type
bool) // roi or not
{ {
int type, borderType, blockSize;
bool useRoi;
int isr2 = sr * sr; Mat src, dst_whole, src_roi, dst_roi;
int c0, c1, c2, c3; ocl::oclMat gsrc_whole, gsrc_roi, gdst_whole, gdst_roi;
int iter;
uchar *ptr = NULL;
uchar *pstart = NULL;
int revx = 0, revy = 0;
c0 = sptr[0];
c1 = sptr[1];
c2 = sptr[2];
c3 = sptr[3];
// iterate meanshift procedure
for(iter = 0; iter < maxIter; iter++ )
{
int count = 0;
int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
//mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
int minx = x0 - sp;
int miny = y0 - sp;
int maxx = x0 + sp;
int maxy = y0 + sp;
//deal with the image boundary
if(minx < 0) minx = 0;
if(miny < 0) miny = 0;
if(maxx >= size.width) maxx = size.width - 1;
if(maxy >= size.height) maxy = size.height - 1;
if(iter == 0)
{
pstart = sptr;
}
else
{
pstart = pstart + revy * sstep + (revx << 2); //point to the new position
}
ptr = pstart;
ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row
for( int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2)) virtual void SetUp()
{
int rowCount = 0;
int x = minx;
#if CV_ENABLE_UNROLLED
for( ; x + 4 <= maxx; x += 4, ptr += 16)
{
int t0, t1, t2;
t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x;
rowCount++;
}
t0 = ptr[4], t1 = ptr[5], t2 = ptr[6];
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x + 1;
rowCount++;
}
t0 = ptr[8], t1 = ptr[9], t2 = ptr[10];
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x + 2;
rowCount++;
}
t0 = ptr[12], t1 = ptr[13], t2 = ptr[14];
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x + 3;
rowCount++;
}
}
#endif
for(; x <= maxx; x++, ptr += 4)
{
int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x;
rowCount++;
}
}
if(rowCount == 0)
continue;
count += rowCount;
sy += y * rowCount;
}
if( count == 0 )
break;
int x1 = sx / count;
int y1 = sy / count;
s0 = s0 / count;
s1 = s1 / count;
s2 = s2 / count;
bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) +
tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps);
//revise the pointer corresponding to the new (y0,x0)
revx = x1 - x0;
revy = y1 - y0;
x0 = x1;
y0 = y1;
c0 = s0;
c1 = s1;
c2 = s2;
if( stopFlag )
break;
} //for iter
dptr[0] = (uchar)c0;
dptr[1] = (uchar)c1;
dptr[2] = (uchar)c2;
dptr[3] = (uchar)c3;
COOR coor;
coor.x = (short)x0;
coor.y = (short)y0;
return coor;
}
void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, TermCriteria crit)
{
if( src_roi.empty() )
CV_Error( CV_StsBadArg, "The input image is empty" );
if( src_roi.depth() != CV_8U || src_roi.channels() != 4 )
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) );
CV_Assert( !(dst_roi.step & 0x3) );
if( !(crit.type & TermCriteria::MAX_ITER) )
crit.maxCount = 5;
int maxIter = std::min(std::max(crit.maxCount, 1), 100);
float eps;
if( !(crit.type & TermCriteria::EPS) )
eps = 1.f;
eps = (float)std::max(crit.epsilon, 0.0);
int tab[512];
for(int i = 0; i < 512; i++)
tab[i] = (i - 255) * (i - 255);
uchar *sptr = src_roi.data;
uchar *dptr = dst_roi.data;
int sstep = (int)src_roi.step;
int dstep = (int)dst_roi.step;
Size size = src_roi.size();
for(int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2),
dptr += dstep - (size.width << 2))
{ {
for(int j = 0; j < size.width; j++, sptr += 4, dptr += 4) type = GET_PARAM(0);
{ blockSize = GET_PARAM(1);
do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab); borderType = GET_PARAM(2);
} useRoi = GET_PARAM(3);
} }
}
void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, TermCriteria crit)
{
if( src_roi.empty() ) void random_roi()
CV_Error( CV_StsBadArg, "The input image is empty" );
if( src_roi.depth() != CV_8U || src_roi.channels() != 4 )
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) &&
(src_roi.cols == dstCoor_roi.cols) && (src_roi.rows == dstCoor_roi.rows));
CV_Assert( !(dstCoor_roi.step & 0x3) );
if( !(crit.type & TermCriteria::MAX_ITER) )
crit.maxCount = 5;
int maxIter = std::min(std::max(crit.maxCount, 1), 100);
float eps;
if( !(crit.type & TermCriteria::EPS) )
eps = 1.f;
eps = (float)std::max(crit.epsilon, 0.0);
int tab[512];
for(int i = 0; i < 512; i++)
tab[i] = (i - 255) * (i - 255);
uchar *sptr = src_roi.data;
uchar *dptr = dst_roi.data;
short *dCoorptr = (short *)dstCoor_roi.data;
int sstep = (int)src_roi.step;
int dstep = (int)dst_roi.step;
int dCoorstep = (int)dstCoor_roi.step >> 1;
Size size = src_roi.size();
for(int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2),
dptr += dstep - (size.width << 2), dCoorptr += dCoorstep - (size.width << 1))
{ {
for(int j = 0; j < size.width; j++, sptr += 4, dptr += 4, dCoorptr += 2) Size roiSize = randomSize(1, MAX_VALUE);
{ Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
*((COOR *)dCoorptr) = do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab); randomSubMat(src, src_roi, roiSize, srcBorder, type, 5, 256);
}
}
} Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst_whole, dst_roi, roiSize, dstBorder, type, 5, 16);
PARAM_TEST_CASE(ImgprocTestBase, MatType, MatType, MatType, MatType, MatType, bool) generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
{ generateOclMat(gdst_whole, gdst_roi, dst_whole, roiSize, dstBorder);
int type1, type2, type3, type4, type5;
Scalar val;
// set up roi
int roicols;
int roirows;
int src1x;
int src1y;
int src2x;
int src2y;
int dstx;
int dsty;
int dst1x;
int dst1y;
int maskx;
int masky;
//mat
Mat mat1;
Mat mat2;
Mat mask;
Mat dst;
Mat dst1; //bak, for two outputs
//mat with roi
Mat mat1_roi;
Mat mat2_roi;
Mat mask_roi;
Mat dst_roi;
Mat dst1_roi; //bak
//ocl mat
ocl::oclMat clmat1;
ocl::oclMat clmat2;
ocl::oclMat clmask;
ocl::oclMat cldst;
ocl::oclMat cldst1; //bak
//ocl mat with roi
ocl::oclMat clmat1_roi;
ocl::oclMat clmat2_roi;
ocl::oclMat clmask_roi;
ocl::oclMat cldst_roi;
ocl::oclMat cldst1_roi;
virtual void SetUp()
{
type1 = GET_PARAM(0);
type2 = GET_PARAM(1);
type3 = GET_PARAM(2);
type4 = GET_PARAM(3);
type5 = GET_PARAM(4);
Size size(MWIDTH, MHEIGHT);
double min = 1, max = 20;
if(type1 != nulltype)
{
mat1 = randomMat(size, type1, min, max, false);
clmat1 = mat1;
}
if(type2 != nulltype)
{
mat2 = randomMat(size, type2, min, max, false);
clmat2 = mat2;
}
if(type3 != nulltype)
{
dst = randomMat(size, type3, min, max, false);
cldst = dst;
}
if(type4 != nulltype)
{
dst1 = randomMat(size, type4, min, max, false);
cldst1 = dst1;
}
if(type5 != nulltype)
{
mask = randomMat(size, CV_8UC1, 0, 2, false);
threshold(mask, mask, 0.5, 255., type5);
clmask = mask;
}
val = Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
} }
void random_roi() void Near(double threshold = 0.0)
{ {
#ifdef RANDOMROI Mat whole, roi;
//randomize ROI gdst_whole.download(whole);
roicols = rng.uniform(1, mat1.cols); gdst_roi.download(roi);
roirows = rng.uniform(1, mat1.rows);
src1x = rng.uniform(0, mat1.cols - roicols);
src1y = rng.uniform(0, mat1.rows - roirows);
src2x = rng.uniform(0, mat2.cols - roicols);
src2y = rng.uniform(0, mat2.rows - roirows);
dstx = rng.uniform(0, dst.cols - roicols);
dsty = rng.uniform(0, dst.rows - roirows);
dst1x = rng.uniform(0, dst1.cols - roicols);
dst1y = rng.uniform(0, dst1.rows - roirows);
maskx = rng.uniform(0, mask.cols - roicols);
masky = rng.uniform(0, mask.rows - roirows);
#else
roicols = mat1.cols;
roirows = mat1.rows;
src1x = 0;
src1y = 0;
src2x = 0;
src2y = 0;
dstx = 0;
dsty = 0;
dst1x = 0;
dst1y = 0;
maskx = 0;
masky = 0;
#endif
if(type1 != nulltype)
{
mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows));
}
if(type2 != nulltype)
{
mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows));
clmat2_roi = clmat2(Rect(src2x, src2y, roicols, roirows));
}
if(type3 != nulltype)
{
dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
cldst_roi = cldst(Rect(dstx, dsty, roicols, roirows));
}
if(type4 != nulltype)
{
dst1_roi = dst1(Rect(dst1x, dst1y, roicols, roirows));
cldst1_roi = cldst1(Rect(dst1x, dst1y, roicols, roirows));
}
if(type5 != nulltype)
{
mask_roi = mask(Rect(maskx, masky, roicols, roirows));
clmask_roi = clmask(Rect(maskx, masky, roicols, roirows));
}
}
void Near(double threshold) EXPECT_MAT_NEAR(dst_whole, whole, threshold);
{ EXPECT_MAT_NEAR(dst_roi, roi, threshold);
Mat cpu_cldst;
cldst.download(cpu_cldst);
EXPECT_MAT_NEAR(dst, cpu_cldst, threshold);
} }
}; };
////////////////////////////////equalizeHist//////////////////////////////////////////
typedef ImgprocTestBase EqualizeHist;
OCL_TEST_P(EqualizeHist, Mat)
{
if (mat1.type() != CV_8UC1 || mat1.type() != dst.type())
{
cout << "Unsupported type" << endl;
EXPECT_DOUBLE_EQ(0.0, 0.0);
}
else
{
for(int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
equalizeHist(mat1_roi, dst_roi);
ocl::equalizeHist(clmat1_roi, cldst_roi);
Near(1.1);
}
}
}
////////////////////////////////copyMakeBorder//////////////////////////////////////////// ////////////////////////////////copyMakeBorder////////////////////////////////////////////
typedef ImgprocTestBase CopyMakeBorder; PARAM_TEST_CASE(CopyMakeBorder, MatDepth, // depth
Channels, // channels
OCL_TEST_P(CopyMakeBorder, Mat) bool, // isolated or not
Border, // border type
bool) // roi or not
{ {
int bordertype[] = {BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT, BORDER_WRAP, BORDER_REFLECT_101}; int type, borderType;
int top = rng.uniform(0, 10); bool useRoi;
int bottom = rng.uniform(0, 10);
int left = rng.uniform(0, 10);
int right = rng.uniform(0, 10);
if (mat1.type() != dst.type())
{
cout << "Unsupported type" << endl;
EXPECT_DOUBLE_EQ(0.0, 0.0);
}
else
{
for(size_t i = 0; i < sizeof(bordertype) / sizeof(int); i++)
for(int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
#ifdef RANDOMROI
if(((bordertype[i] != BORDER_CONSTANT) && (bordertype[i] != BORDER_REPLICATE)) && (mat1_roi.cols <= left) || (mat1_roi.cols <= right) || (mat1_roi.rows <= top) || (mat1_roi.rows <= bottom))
{
continue;
}
if((dstx >= left) && (dsty >= top) && (dstx + cldst_roi.cols + right <= cldst_roi.wholecols) && (dsty + cldst_roi.rows + bottom <= cldst_roi.wholerows))
{
dst_roi.adjustROI(top, bottom, left, right);
cldst_roi.adjustROI(top, bottom, left, right);
}
else
{
continue;
}
#endif
cv::copyMakeBorder(mat1_roi, dst_roi, top, bottom, left, right, bordertype[i] | BORDER_ISOLATED, Scalar(1.0));
ocl::copyMakeBorder(clmat1_roi, cldst_roi, top, bottom, left, right, bordertype[i] | BORDER_ISOLATED, Scalar(1.0));
Mat cpu_cldst;
#ifndef RANDOMROI
cldst_roi.download(cpu_cldst);
EXPECT_MAT_NEAR(dst_roi, cpu_cldst, 0.0);
#else
cldst.download(cpu_cldst);
EXPECT_MAT_NEAR(dst, cpu_cldst, 0.0);
#endif
}
}
}
Border border;
Scalar val;
////////////////////////////////cornerMinEigenVal////////////////////////////////////////// Mat src, dst_whole, src_roi, dst_roi;
ocl::oclMat gsrc_whole, gsrc_roi, gdst_whole, gdst_roi;
struct CornerMinEigenVal : ImgprocTestBase {};
OCL_TEST_P(CornerMinEigenVal, Mat) virtual void SetUp()
{
for(int j = 0; j < LOOP_TIMES; j++)
{ {
type = CV_MAKE_TYPE(GET_PARAM(0), GET_PARAM(1));
borderType = GET_PARAM(3);
random_roi(); if (GET_PARAM(2))
int blockSize = 3, apertureSize = 3;//1 + 2 * (rand() % 4); borderType |= BORDER_ISOLATED;
//int borderType = BORDER_CONSTANT;
//int borderType = BORDER_REPLICATE;
int borderType = BORDER_REFLECT;
cornerMinEigenVal(mat1_roi, dst_roi, blockSize, apertureSize, borderType);
ocl::cornerMinEigenVal(clmat1_roi, cldst_roi, blockSize, apertureSize, borderType);
Near(1.);
}
}
useRoi = GET_PARAM(4);
////////////////////////////////cornerHarris//////////////////////////////////////////
typedef ImgprocTestBase CornerHarris;
OCL_TEST_P(CornerHarris, Mat)
{
for(int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
int blockSize = 3, apertureSize = 3; //1 + 2 * (rand() % 4);
double k = 2;
//int borderType = BORDER_CONSTANT;
//int borderType = BORDER_REPLICATE;
int borderType = BORDER_REFLECT;
cornerHarris(mat1_roi, dst_roi, blockSize, apertureSize, k, borderType);
ocl::cornerHarris(clmat1_roi, cldst_roi, blockSize, apertureSize, k, borderType);
Near(1.);
} }
}
void random_roi()
////////////////////////////////integral/////////////////////////////////////////////////
typedef ImgprocTestBase Integral;
OCL_TEST_P(Integral, Mat1)
{
for(int j = 0; j < LOOP_TIMES; j++)
{ {
random_roi(); Size roiSize = randomSize(1, MAX_VALUE);
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, roiSize, srcBorder, type, 5, 256);
ocl::integral(clmat1_roi, cldst_roi); Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
integral(mat1_roi, dst_roi); randomSubMat(dst_whole, dst_roi, roiSize, dstBorder, type, 5, 16);
Near(0);
}
}
OCL_TEST_P(Integral, Mat2)
{
for(int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
ocl::integral(clmat1_roi, cldst_roi, cldst1_roi); generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
integral(mat1_roi, dst_roi, dst1_roi); generateOclMat(gdst_whole, gdst_roi, dst_whole, roiSize, dstBorder);
Near(0);
Mat cpu_cldst1; border = randomBorder(0, 10);
cldst1.download(cpu_cldst1); val = randomScalar(-MAX_VALUE, MAX_VALUE);
EXPECT_MAT_NEAR(dst1, cpu_cldst1, 0.0);
} }
}
/////////////////////////////////////////////////////////////////////////////////////////////////
// warpAffine & warpPerspective
PARAM_TEST_CASE(WarpTestBase, MatType, int)
{
int type;
Size size;
int interpolation;
//src mat
Mat mat1;
Mat dst;
// set up roi void Near(double threshold = 0.0)
int src_roicols;
int src_roirows;
int dst_roicols;
int dst_roirows;
int src1x;
int src1y;
int dstx;
int dsty;
//src mat with roi
Mat mat1_roi;
Mat dst_roi;
//ocl dst mat for testing
ocl::oclMat gdst_whole;
//ocl mat with roi
ocl::oclMat gmat1;
ocl::oclMat gdst;
virtual void SetUp()
{ {
type = GET_PARAM(0); Mat whole, roi;
interpolation = GET_PARAM(1); gdst_whole.download(whole);
size = Size(MWIDTH, MHEIGHT); gdst_roi.download(roi);
mat1 = randomMat(size, type, 5, 16, false);
dst = randomMat(size, type, 5, 16, false);
}
void random_roi() EXPECT_MAT_NEAR(dst_whole, whole, threshold);
{ EXPECT_MAT_NEAR(dst_roi, roi, threshold);
#ifdef RANDOMROI
//randomize ROI
src_roicols = rng.uniform(1, mat1.cols);
src_roirows = rng.uniform(1, mat1.rows);
dst_roicols = rng.uniform(1, dst.cols);
dst_roirows = rng.uniform(1, dst.rows);
src1x = rng.uniform(0, mat1.cols - src_roicols);
src1y = rng.uniform(0, mat1.rows - src_roirows);
dstx = rng.uniform(0, dst.cols - dst_roicols);
dsty = rng.uniform(0, dst.rows - dst_roirows);
#else
src_roicols = mat1.cols;
src_roirows = mat1.rows;
dst_roicols = dst.cols;
dst_roirows = dst.rows;
src1x = 0;
src1y = 0;
dstx = 0;
dsty = 0;
#endif
mat1_roi = mat1(Rect(src1x, src1y, src_roicols, src_roirows));
dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows));
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows));
gmat1 = mat1_roi;
} }
}; };
/////warpAffine OCL_TEST_P(CopyMakeBorder, Mat)
typedef WarpTestBase WarpAffine;
OCL_TEST_P(WarpAffine, Mat)
{ {
static const double coeffs[2][3] = for (int i = 0; i < LOOP_TIMES; ++i)
{
{cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
{sin(CV_PI / 6), cos(CV_PI / 6), -100.0}
};
Mat M(2, 3, CV_64F, (void *)coeffs);
for(int j = 0; j < LOOP_TIMES; j++)
{ {
random_roi(); random_roi();
warpAffine(mat1_roi, dst_roi, M, size, interpolation); cv::copyMakeBorder(src_roi, dst_roi, border.top, border.bot, border.lef, border.rig, borderType, val);
ocl::warpAffine(gmat1, gdst, M, size, interpolation); ocl::copyMakeBorder(gsrc_roi, gdst_roi, border.top, border.bot, border.lef, border.rig, borderType, val);
Mat cpu_dst; Near();
gdst_whole.download(cpu_dst);
EXPECT_MAT_NEAR(dst, cpu_dst, 1.0);
} }
} }
////////////////////////////////equalizeHist//////////////////////////////////////////////
// warpPerspective typedef ImgprocTestBase EqualizeHist;
typedef WarpTestBase WarpPerspective;
OCL_TEST_P(WarpPerspective, Mat) OCL_TEST_P(EqualizeHist, Mat)
{ {
static const double coeffs[3][3] = for (int j = 0; j < LOOP_TIMES; j++)
{
{cos(3.14 / 6), -sin(3.14 / 6), 100.0},
{sin(3.14 / 6), cos(3.14 / 6), -100.0},
{0.0, 0.0, 1.0}
};
Mat M(3, 3, CV_64F, (void *)coeffs);
for(int j = 0; j < LOOP_TIMES; j++)
{ {
random_roi(); random_roi();
warpPerspective(mat1_roi, dst_roi, M, size, interpolation); equalizeHist(src_roi, dst_roi);
ocl::warpPerspective(gmat1, gdst, M, size, interpolation); ocl::equalizeHist(gsrc_roi, gdst_roi);
Mat cpu_dst; Near(1.1);
gdst_whole.download(cpu_dst);
EXPECT_MAT_NEAR(dst, cpu_dst, 1.0);
} }
} }
///////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////cornerMinEigenVal//////////////////////////////////////////
// remap
//////////////////////////////////////////////////////////////////////////////////////////////////
PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int)
{
int srcType;
int map1Type;
int map2Type;
Scalar val;
int interpolation;
int bordertype;
Mat src;
Mat dst;
Mat map1;
Mat map2;
//std::vector<ocl::Info> oclinfo;
int src_roicols;
int src_roirows;
int dst_roicols;
int dst_roirows;
int map1_roicols;
int map1_roirows;
int map2_roicols;
int map2_roirows;
int srcx;
int srcy;
int dstx;
int dsty;
int map1x;
int map1y;
int map2x;
int map2y;
Mat src_roi;
Mat dst_roi;
Mat map1_roi;
Mat map2_roi;
//ocl mat for testing
ocl::oclMat gdst;
//ocl mat with roi
ocl::oclMat gsrc_roi;
ocl::oclMat gdst_roi;
ocl::oclMat gmap1_roi;
ocl::oclMat gmap2_roi;
virtual void SetUp()
{
srcType = GET_PARAM(0);
map1Type = GET_PARAM(1);
map2Type = GET_PARAM(2);
interpolation = GET_PARAM(3);
bordertype = GET_PARAM(4);
Size srcSize = Size(MWIDTH, MHEIGHT);
Size map1Size = Size(MWIDTH, MHEIGHT);
double min = 5, max = 16;
if(srcType != nulltype)
{
src = randomMat(srcSize, srcType, min, max, false);
}
if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2 && map2Type == nulltype))
{
map1 = randomMat(map1Size, map1Type, min, max, false);
}
else if (map1Type == CV_32FC1 && map2Type == CV_32FC1)
{
map1 = randomMat(map1Size, map1Type, min, max, false);
map2 = randomMat(map1Size, map1Type, min, max, false);
}
else
{
cout << "The wrong input type" << endl;
return;
}
dst = randomMat(map1Size, srcType, min, max, false); typedef ImgprocTestBase CornerMinEigenVal;
switch (src.channels())
{
case 1:
val = Scalar(rng.uniform(0.0, 10.0), 0, 0, 0);
break;
case 2:
val = Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0, 0);
break;
case 3:
val = Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0);
break;
case 4:
val = Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0));
break;
}
} OCL_TEST_P(CornerMinEigenVal, Mat)
void random_roi() {
for (int j = 0; j < LOOP_TIMES; j++)
{ {
dst_roicols = rng.uniform(1, dst.cols); random_roi();
dst_roirows = rng.uniform(1, dst.rows);
src_roicols = rng.uniform(1, src.cols);
src_roirows = rng.uniform(1, src.rows);
srcx = rng.uniform(0, src.cols - src_roicols);
srcy = rng.uniform(0, src.rows - src_roirows);
dstx = rng.uniform(0, dst.cols - dst_roicols);
dsty = rng.uniform(0, dst.rows - dst_roirows);
map1_roicols = dst_roicols;
map1_roirows = dst_roirows;
map2_roicols = dst_roicols;
map2_roirows = dst_roirows;
map1x = dstx;
map1y = dsty;
map2x = dstx;
map2y = dsty;
if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2 && map2Type == nulltype))
{
map1_roi = map1(Rect(map1x, map1y, map1_roicols, map1_roirows));
gmap1_roi = map1_roi;
}
else if (map1Type == CV_32FC1 && map2Type == CV_32FC1) int apertureSize = 3;
{
map1_roi = map1(Rect(map1x, map1y, map1_roicols, map1_roirows));
gmap1_roi = map1_roi;
map2_roi = map2(Rect(map2x, map2y, map2_roicols, map2_roirows));
gmap2_roi = map2_roi;
}
src_roi = src(Rect(srcx, srcy, src_roicols, src_roirows));
dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows));
gsrc_roi = src_roi;
gdst = dst;
gdst_roi = gdst(Rect(dstx, dsty, dst_roicols, dst_roirows));
}
};
OCL_TEST_P(Remap, Mat) cornerMinEigenVal(src_roi, dst_roi, blockSize, apertureSize, borderType);
{ ocl::cornerMinEigenVal(gsrc_roi, gdst_roi, blockSize, apertureSize, borderType);
if((interpolation == 1 && map1Type == CV_16SC2) || (map1Type == CV_32FC1 && map2Type == nulltype) || (map1Type == CV_16SC2 && map2Type == CV_32FC1) || (map1Type == CV_32FC2 && map2Type == CV_32FC1))
{
cout << "Don't support the dataType" << endl;
return;
}
int bordertype[] = {BORDER_CONSTANT, BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
for(int j = 0; j < LOOP_TIMES; j++) Near(1.0);
{
random_roi();
remap(src_roi, dst_roi, map1_roi, map2_roi, interpolation, bordertype[0], val);
ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, interpolation, bordertype[0], val);
Mat cpu_dst;
gdst.download(cpu_dst);
if(interpolation == 0)
EXPECT_MAT_NEAR(dst, cpu_dst, 1.0);
EXPECT_MAT_NEAR(dst, cpu_dst, 2.0);
} }
} }
////////////////////////////////cornerHarris//////////////////////////////////////////
typedef ImgprocTestBase CornerHarris;
///////////////////////////////////////////////////////////////////////////////////////////////// OCL_TEST_P(CornerHarris, Mat)
// resize
PARAM_TEST_CASE(Resize, MatType, Size, double, double, int)
{ {
int type; for (int j = 0; j < LOOP_TIMES; j++)
Size dsize;
double fx, fy;
int interpolation;
//src mat
Mat mat1;
Mat dst;
// set up roi
int src_roicols;
int src_roirows;
int dst_roicols;
int dst_roirows;
int src1x;
int src1y;
int dstx;
int dsty;
//src mat with roi
Mat mat1_roi;
Mat dst_roi;
//ocl dst mat for testing
ocl::oclMat gdst_whole;
//ocl mat with roi
ocl::oclMat gmat1;
ocl::oclMat gdst;
virtual void SetUp()
{ {
type = GET_PARAM(0); random_roi();
dsize = GET_PARAM(1);
fx = GET_PARAM(2);
fy = GET_PARAM(3);
interpolation = GET_PARAM(4);
Size size(MWIDTH, MHEIGHT);
if(dsize == Size() && !(fx > 0 && fy > 0))
{
cout << "invalid dsize and fx fy" << endl;
return;
}
if(dsize == Size()) int apertureSize = 3;
{ double k = 2.0;
dsize.width = (int)(size.width * fx);
dsize.height = (int)(size.height * fy);
}
mat1 = randomMat(size, type, 5, 16, false); cornerHarris(src_roi, dst_roi, blockSize, apertureSize, k, borderType);
dst = randomMat(dsize, type, 5, 16, false); ocl::cornerHarris(gsrc_roi, gdst_roi, blockSize, apertureSize, k, borderType);
Near(1.0);
} }
}
void random_roi() //////////////////////////////////integral/////////////////////////////////////////////////
{
#ifdef RANDOMROI
//randomize ROI
src_roicols = rng.uniform(1, mat1.cols);
src_roirows = rng.uniform(1, mat1.rows);
dst_roicols = (int)(src_roicols * fx);
dst_roirows = (int)(src_roirows * fy);
src1x = rng.uniform(0, mat1.cols - src_roicols);
src1y = rng.uniform(0, mat1.rows - src_roirows);
dstx = rng.uniform(0, dst.cols - dst_roicols);
dsty = rng.uniform(0, dst.rows - dst_roirows);
#else
src_roicols = mat1.cols;
src_roirows = mat1.rows;
dst_roicols = dst.cols;
dst_roirows = dst.rows;
src1x = 0;
src1y = 0;
dstx = 0;
dsty = 0;
#endif
dsize.width = dst_roicols;
dsize.height = dst_roirows;
mat1_roi = mat1(Rect(src1x, src1y, src_roicols, src_roirows));
dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows));
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows));
dsize.width = (int)(mat1_roi.size().width * fx);
dsize.height = (int)(mat1_roi.size().height * fy);
gmat1 = mat1_roi;
}
}; typedef ImgprocTestBase Integral;
OCL_TEST_P(Resize, Mat) OCL_TEST_P(Integral, Mat1)
{ {
for(int j = 0; j < LOOP_TIMES; j++) for (int j = 0; j < LOOP_TIMES; j++)
{ {
random_roi(); random_roi();
// resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation); ocl::integral(gsrc_roi, gdst_roi);
// ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation); integral(src_roi, dst_roi);
if(dst_roicols < 1 || dst_roirows < 1) continue;
resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation);
ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation);
Mat cpu_dst; Near();
gdst_whole.download(cpu_dst);
EXPECT_MAT_NEAR(dst, cpu_dst, 1.0);
} }
} }
// TODO wrong output type
///////////////////////////////////////////////////////////////////////////////////////////////// OCL_TEST_P(Integral, DISABLED_Mat2)
//threshold
PARAM_TEST_CASE(Threshold, MatType, ThreshOp)
{ {
int type; Mat dst1;
int threshOp; ocl::oclMat gdst1;
//src mat
Mat mat1;
Mat dst;
// set up roi
int roicols;
int roirows;
int src1x;
int src1y;
int dstx;
int dsty;
//src mat with roi for (int j = 0; j < LOOP_TIMES; j++)
Mat mat1_roi;
Mat dst_roi;
//ocl dst mat for testing
ocl::oclMat gdst_whole;
//ocl mat with roi
ocl::oclMat gmat1;
ocl::oclMat gdst;
virtual void SetUp()
{
type = GET_PARAM(0);
threshOp = GET_PARAM(1);
Size size(MWIDTH, MHEIGHT);
mat1 = randomMat(size, type, 5, 16, false);
dst = randomMat(size, type, 5, 16, false);
}
void random_roi()
{
#ifdef RANDOMROI
//randomize ROI
roicols = rng.uniform(1, mat1.cols);
roirows = rng.uniform(1, mat1.rows);
src1x = rng.uniform(0, mat1.cols - roicols);
src1y = rng.uniform(0, mat1.rows - roirows);
dstx = rng.uniform(0, dst.cols - roicols);
dsty = rng.uniform(0, dst.rows - roirows);
#else
roicols = mat1.cols;
roirows = mat1.rows;
src1x = 0;
src1y = 0;
dstx = 0;
dsty = 0;
#endif
mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gmat1 = mat1_roi;
}
};
OCL_TEST_P(Threshold, Mat)
{
for(int j = 0; j < LOOP_TIMES; j++)
{ {
random_roi(); random_roi();
double maxVal = randomDouble(20.0, 127.0);
double thresh = randomDouble(0.0, maxVal);
threshold(mat1_roi, dst_roi, thresh, maxVal, threshOp); integral(src_roi, dst1, dst_roi);
ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp); ocl::integral(gsrc_roi, gdst1, gdst_roi);
Mat cpu_dst; Near();
gdst_whole.download(cpu_dst);
EXPECT_MAT_NEAR(dst, cpu_dst, 1);
} }
} }
PARAM_TEST_CASE(MeanShiftTestBase, MatType, MatType, int, int, TermCriteria) ///////////////////////////////////////////////////////////////////////////////////////////////////
//// threshold
struct Threshold :
public ImgprocTestBase
{ {
int type, typeCoor; int thresholdType;
int sp, sr;
TermCriteria crit;
//src mat
Mat src;
Mat dst;
Mat dstCoor;
//set up roi
int roicols;
int roirows;
int srcx;
int srcy;
int dstx;
int dsty;
//src mat with roi
Mat src_roi;
Mat dst_roi;
Mat dstCoor_roi;
//ocl dst mat
ocl::oclMat gdst;
ocl::oclMat gdstCoor;
//ocl mat with roi
ocl::oclMat gsrc_roi;
ocl::oclMat gdst_roi;
ocl::oclMat gdstCoor_roi;
virtual void SetUp() virtual void SetUp()
{ {
type = GET_PARAM(0); type = GET_PARAM(0);
typeCoor = GET_PARAM(1); blockSize = GET_PARAM(1);
sp = GET_PARAM(2); thresholdType = GET_PARAM(2);
sr = GET_PARAM(3); useRoi = GET_PARAM(3);
crit = GET_PARAM(4);
// MWIDTH=256, MHEIGHT=256. defined in utility.hpp
Size size = Size(MWIDTH, MHEIGHT);
src = randomMat(size, type, 5, 16, false);
dst = randomMat(size, type, 5, 16, false);
dstCoor = randomMat(size, typeCoor, 5, 16, false);
}
void random_roi()
{
#ifdef RANDOMROI
//randomize ROI
roicols = rng.uniform(1, src.cols);
roirows = rng.uniform(1, src.rows);
srcx = rng.uniform(0, src.cols - roicols);
srcy = rng.uniform(0, src.rows - roirows);
dstx = rng.uniform(0, dst.cols - roicols);
dsty = rng.uniform(0, dst.rows - roirows);
#else
roicols = src.cols;
roirows = src.rows;
srcx = 0;
srcy = 0;
dstx = 0;
dsty = 0;
#endif
src_roi = src(Rect(srcx, srcy, roicols, roirows));
dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
dstCoor_roi = dstCoor(Rect(dstx, dsty, roicols, roirows));
gdst = dst;
gdstCoor = dstCoor;
gsrc_roi = src_roi;
gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi
gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows));
} }
}; };
/////////////////////////meanShiftFiltering///////////////////////////// OCL_TEST_P(Threshold, Mat)
typedef MeanShiftTestBase MeanShiftFiltering;
OCL_TEST_P(MeanShiftFiltering, Mat)
{
for(int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
Mat cpu_gdst;
gdst.download(cpu_gdst);
::meanShiftFiltering_(src_roi, dst_roi, sp, sr, crit);
ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit);
gdst.download(cpu_gdst);
EXPECT_MAT_NEAR(dst, cpu_gdst, 0.0);
}
}
///////////////////////////meanShiftProc//////////////////////////////////
typedef MeanShiftTestBase MeanShiftProc;
OCL_TEST_P(MeanShiftProc, Mat)
{ {
for (int j = 0; j < LOOP_TIMES; j++)
for(int j = 0; j < LOOP_TIMES; j++)
{ {
random_roi(); random_roi();
Mat cpu_gdst; double maxVal = randomDouble(20.0, 127.0);
Mat cpu_gdstCoor; double thresh = randomDouble(0.0, maxVal);
meanShiftProc_(src_roi, dst_roi, dstCoor_roi, sp, sr, crit); threshold(src_roi, dst_roi, thresh, maxVal, thresholdType);
ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit); ocl::threshold(gsrc_roi, gdst_roi, thresh, maxVal, thresholdType);
gdst.download(cpu_gdst); Near(1);
gdstCoor.download(cpu_gdstCoor);
EXPECT_MAT_NEAR(dst, cpu_gdst, 0.0);
EXPECT_MAT_NEAR(dstCoor, cpu_gdstCoor, 0.0);
} }
} }
/////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////
//hist // calcHist
void calcHistGold(const Mat &src, Mat &hist) static void calcHistGold(const Mat &src, Mat &hist)
{ {
hist.create(1, 256, CV_32SC1); hist = Mat(1, 256, CV_32SC1, Scalar::all(0));
hist.setTo(Scalar::all(0));
int *hist_row = hist.ptr<int>(); int * const hist_row = hist.ptr<int>();
for (int y = 0; y < src.rows; ++y) for (int y = 0; y < src.rows; ++y)
{ {
const uchar *src_row = src.ptr(y); const uchar * const src_row = src.ptr(y);
for (int x = 0; x < src.cols; ++x) for (int x = 0; x < src.cols; ++x)
++hist_row[src_row[x]]; ++hist_row[src_row[x]];
} }
} }
PARAM_TEST_CASE(HistTestBase, MatType, MatType) typedef ImgprocTestBase CalcHist;
{
int type_src;
//src mat
Mat src;
Mat dst_hist;
//set up roi
int roicols;
int roirows;
int srcx;
int srcy;
//src mat with roi
Mat src_roi;
//ocl dst mat, dst_hist and gdst_hist don't have roi
ocl::oclMat gdst_hist;
//ocl mat with roi
ocl::oclMat gsrc_roi;
virtual void SetUp()
{
type_src = GET_PARAM(0);
Size size = Size(MWIDTH, MHEIGHT);
src = randomMat(size, type_src, 0, 256, false);
}
void random_roi()
{
#ifdef RANDOMROI
//randomize ROI
roicols = rng.uniform(1, src.cols);
roirows = rng.uniform(1, src.rows);
srcx = rng.uniform(0, src.cols - roicols);
srcy = rng.uniform(0, src.rows - roirows);
#else
roicols = src.cols;
roirows = src.rows;
srcx = 0;
srcy = 0;
#endif
src_roi = src(Rect(srcx, srcy, roicols, roirows));
gsrc_roi = src_roi;
}
};
///////////////////////////calcHist///////////////////////////////////////
typedef HistTestBase CalcHist;
OCL_TEST_P(CalcHist, Mat) OCL_TEST_P(CalcHist, Mat)
{ {
for(int j = 0; j < LOOP_TIMES; j++) for (int j = 0; j < LOOP_TIMES; j++)
{ {
random_roi(); random_roi();
Mat cpu_hist; calcHistGold(src_roi, dst_roi);
ocl::calcHist(gsrc_roi, gdst_roi);
calcHistGold(src_roi, dst_hist);
ocl::calcHist(gsrc_roi, gdst_hist);
gdst_hist.download(cpu_hist); Near();
EXPECT_MAT_NEAR(dst_hist, cpu_hist, 0.0);
} }
} }
/////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////
// CLAHE //// CLAHE
PARAM_TEST_CASE(CLAHE_Test, Size, double) PARAM_TEST_CASE(CLAHETest, Size, double, bool)
{ {
Size gridSize; Size gridSize;
double clipLimit; double clipLimit;
bool useRoi;
Mat src; Mat src, dst_whole, src_roi, dst_roi;
Mat dst_gold; ocl::oclMat gsrc_whole, gsrc_roi, gdst_whole, gdst_roi;
ocl::oclMat g_src;
ocl::oclMat g_dst;
virtual void SetUp() virtual void SetUp()
{ {
gridSize = GET_PARAM(0); gridSize = GET_PARAM(0);
clipLimit = GET_PARAM(1); clipLimit = GET_PARAM(1);
useRoi = GET_PARAM(2);
src = randomMat(Size(MWIDTH, MHEIGHT), CV_8UC1, 0, 256, false);
g_src.upload(src);
} }
};
OCL_TEST_P(CLAHE_Test, Accuracy)
{
Ptr<CLAHE> clahe = ocl::createCLAHE(clipLimit, gridSize);
clahe->apply(g_src, g_dst);
Mat dst(g_dst);
Ptr<CLAHE> clahe_gold = createCLAHE(clipLimit, gridSize);
clahe_gold->apply(src, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 1.0); void random_roi()
}
///////////////////////////Convolve//////////////////////////////////
PARAM_TEST_CASE(ConvolveTestBase, MatType, bool)
{
int type;
//src mat
Mat mat1;
Mat mat2;
Mat dst;
Mat dst1; //bak, for two outputs
// set up roi
int roicols;
int roirows;
int src1x;
int src1y;
int src2x;
int src2y;
int dstx;
int dsty;
//src mat with roi
Mat mat1_roi;
Mat mat2_roi;
Mat dst_roi;
Mat dst1_roi; //bak
//ocl dst mat for testing
ocl::oclMat gdst_whole;
ocl::oclMat gdst1_whole; //bak
//ocl mat with roi
ocl::oclMat gmat1;
ocl::oclMat gmat2;
ocl::oclMat gdst;
ocl::oclMat gdst1; //bak
virtual void SetUp()
{ {
type = GET_PARAM(0); Size roiSize = randomSize(std::max(gridSize.height, gridSize.width), MAX_VALUE);
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, roiSize, srcBorder, CV_8UC1, 5, 256);
Size size(MWIDTH, MHEIGHT); Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst_whole, dst_roi, roiSize, dstBorder, CV_8UC1, 5, 16);
mat1 = randomMat(size, type, 5, 16, false); generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
mat2 = randomMat(size, type, 5, 16, false); generateOclMat(gdst_whole, gdst_roi, dst_whole, roiSize, dstBorder);
dst = randomMat(size, type, 5, 16, false);
dst1 = randomMat(size, type, 5, 16, false);
} }
void random_roi()
void Near(double threshold = 0.0)
{ {
#ifdef RANDOMROI Mat whole, roi;
//randomize ROI gdst_whole.download(whole);
roicols = rng.uniform(1, mat1.cols); gdst_roi.download(roi);
roirows = rng.uniform(1, mat1.rows);
src1x = rng.uniform(0, mat1.cols - roicols);
src1y = rng.uniform(0, mat1.rows - roirows);
dstx = rng.uniform(0, dst.cols - roicols);
dsty = rng.uniform(0, dst.rows - roirows);
#else
roicols = mat1.cols;
roirows = mat1.rows;
src1x = 0;
src1y = 0;
dstx = 0;
dsty = 0;
#endif
src2x = rng.uniform(0, mat2.cols - roicols);
src2y = rng.uniform(0, mat2.rows - roirows);
mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows));
dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows));
gdst_whole = dst;
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gdst1_whole = dst1;
gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows));
gmat1 = mat1_roi;
gmat2 = mat2_roi;
//end
}
EXPECT_MAT_NEAR(dst_whole, whole, threshold);
EXPECT_MAT_NEAR(dst_roi, roi, threshold);
}
}; };
typedef ConvolveTestBase Convolve; OCL_TEST_P(CLAHETest, Accuracy)
void conv2( Mat x, Mat y, Mat z)
{ {
int N1 = x.rows; for (int i = 0; i < LOOP_TIMES; ++i)
int M1 = x.cols; {
int N2 = y.rows; random_roi();
int M2 = y.cols;
Ptr<CLAHE> clahe = ocl::createCLAHE(clipLimit, gridSize);
clahe->apply(gsrc_roi, gdst_roi);
int i, j; Ptr<CLAHE> clahe_gold = createCLAHE(clipLimit, gridSize);
int m, n; clahe_gold->apply(src_roi, dst_roi);
Near(1.0);
}
}
float *kerneldata = (float *)(x.data); /////////////////////////////Convolve//////////////////////////////////
float *srcdata = (float *)(y.data);
float *dstdata = (float *)(z.data);
for(i = 0; i < N2; i++) static void convolve_gold(const Mat & src, const Mat & kernel, Mat & dst)
for(j = 0; j < M2; j++) {
for (int i = 0; i < src.rows; i++)
{
float * const dstptr = dst.ptr<float>(i);
for (int j = 0; j < src.cols; j++)
{ {
float temp = 0; float temp = 0;
for(m = 0; m < N1; m++)
for(n = 0; n < M1; n++) for (int m = 0; m < kernel.rows; m++)
{
const float * const kptr = kernel.ptr<float>(m);
for (int n = 0; n < kernel.cols; n++)
{ {
int r, c; int r = clipInt(i - kernel.rows / 2 + m, 0, src.rows - 1);
r = min(max((i - N1 / 2 + m), 0), N2 - 1); int c = clipInt(j - kernel.cols / 2 + n, 0, src.cols - 1);
c = min(max((j - M1 / 2 + n), 0), M2 - 1);
temp += kerneldata[m * (x.step >> 2) + n] * srcdata[r * (y.step >> 2) + c]; temp += src.ptr<float>(r)[c] * kptr[n];
} }
dstdata[i * (z.step >> 2) + j] = temp; }
dstptr[j] = temp;
} }
}
} }
typedef ImgprocTestBase Convolve;
OCL_TEST_P(Convolve, Mat) OCL_TEST_P(Convolve, Mat)
{ {
if(mat1.type() != CV_32FC1) Mat kernel, kernel_roi;
{ ocl::oclMat gkernel, gkernel_roi;
cout << "\tUnsupported type\t\n"; const Size roiSize(7, 7);
}
for(int j = 0; j < LOOP_TIMES; j++) for (int j = 0; j < LOOP_TIMES; j++)
{ {
random_roi(); random_roi();
ocl::oclMat temp1;
Mat kernel_cpu = mat2(Rect(0, 0, 7, 7));
temp1 = kernel_cpu;
conv2(kernel_cpu, mat1_roi, dst_roi); Border kernelBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
ocl::convolve(gmat1, temp1, gdst); randomSubMat(kernel, kernel_roi, roiSize, kernelBorder, type, 5, 16);
generateOclMat(gkernel, gkernel_roi, kernel, roiSize, kernelBorder);
Mat cpu_dst; convolve_gold(src_roi, kernel_roi, dst_roi);
gdst_whole.download(cpu_dst); ocl::convolve(gsrc_roi, gkernel_roi, gdst_roi);
EXPECT_MAT_NEAR(dst, cpu_dst, .1);
Near(1);
} }
} }
//////////////////////////////// ColumnSum ////////////////////////////////////// ////////////////////////////////// ColumnSum //////////////////////////////////////
PARAM_TEST_CASE(ColumnSum, Size) static void columnSum_gold(const Mat & src, Mat & dst)
{ {
Size size; float * prevdptr = dst.ptr<float>(0);
Mat src; const float * sptr = src.ptr<float>(0);
virtual void SetUp() for (int x = 0; x < src.cols; ++x)
prevdptr[x] = sptr[x];
for (int y = 1; y < src.rows; ++y)
{ {
size = GET_PARAM(0); sptr = src.ptr<float>(y);
} float * const dptr = dst.ptr<float>(y);
};
OCL_TEST_P(ColumnSum, Accuracy) for (int x = 0; x < src.cols; ++x)
{ dptr[x] = prevdptr[x] + sptr[x];
Mat src = randomMat(size, CV_32FC1, 0, 255);
ocl::oclMat d_dst;
ocl::oclMat d_src(src);
ocl::columnSum(d_src, d_dst); prevdptr = dptr;
}
}
Mat dst(d_dst); typedef ImgprocTestBase ColumnSum;
for (int j = 0; j < src.cols; ++j) OCL_TEST_P(ColumnSum, Accuracy)
{
for (int i = 0; i < LOOP_TIMES; ++i)
{ {
float gold = src.at<float>(0, j); random_roi();
float res = dst.at<float>(0, j);
ASSERT_NEAR(res, gold, 1e-5);
}
for (int i = 1; i < src.rows; ++i) columnSum_gold(src_roi, dst_roi);
{ ocl::columnSum(gsrc_roi, gdst_roi);
for (int j = 0; j < src.cols; ++j)
{ Near(1e-5);
float gold = src.at<float>(i, j) += src.at<float>(i - 1, j);
float res = dst.at<float>(i, j);
ASSERT_NEAR(res, gold, 1e-5);
}
} }
} }
///////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, EqualizeHist, Combine( INSTANTIATE_TEST_CASE_P(Imgproc, EqualizeHist, Combine(
ONE_TYPE(CV_8UC1), Values((MatType)CV_8UC1),
NULL_TYPE, Values(0), // not used
ONE_TYPE(CV_8UC1), Values(0), // not used
NULL_TYPE, Bool()));
NULL_TYPE,
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
NULL_TYPE,
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
NULL_TYPE,
NULL_TYPE,
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CornerMinEigenVal, Combine(
Values(CV_8UC1, CV_32FC1),
NULL_TYPE,
ONE_TYPE(CV_32FC1),
NULL_TYPE,
NULL_TYPE,
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CornerHarris, Combine( INSTANTIATE_TEST_CASE_P(Imgproc, CornerMinEigenVal, Combine(
Values(CV_8UC1, CV_32FC1), Values(CV_8UC1, CV_32FC1),
NULL_TYPE, Values(3), // TODO some fails when blockSize != 3 (for example 5)
ONE_TYPE(CV_32FC1), Values((int)BORDER_REFLECT, (int)BORDER_CONSTANT, (int)BORDER_REPLICATE), // TODO does not work with (int)BORDER_REFLECT101
NULL_TYPE, Bool()));
NULL_TYPE,
Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Imgproc, CornerHarris, Combine(
Values((MatType)CV_8UC1), // TODO does not work properly with CV_32FC1
Values(3, 5),
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, Integral, Combine( Values((int)BORDER_REFLECT101, (int)BORDER_REFLECT, (int)BORDER_CONSTANT, (int)BORDER_REPLICATE),
ONE_TYPE(CV_8UC1), Bool()));
NULL_TYPE,
ONE_TYPE(CV_32SC1), INSTANTIATE_TEST_CASE_P(Imgproc, Integral, Combine(
ONE_TYPE(CV_32FC1), Values((MatType)CV_8UC1), // TODO does work with CV_32F, CV_64F
NULL_TYPE, Values(0), // not used
Values(false))); // Values(false) is the reserved parameter Values(0), // not used
Bool()));
INSTANTIATE_TEST_CASE_P(Imgproc, WarpAffine, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values((MatType)INTER_NEAREST, (MatType)INTER_LINEAR,
(MatType)INTER_CUBIC, (MatType)(INTER_NEAREST | WARP_INVERSE_MAP),
(MatType)(INTER_LINEAR | WARP_INVERSE_MAP), (MatType)(INTER_CUBIC | WARP_INVERSE_MAP))));
INSTANTIATE_TEST_CASE_P(Imgproc, WarpPerspective, Combine
(Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values((MatType)INTER_NEAREST, (MatType)INTER_LINEAR,
(MatType)INTER_CUBIC, (MatType)(INTER_NEAREST | WARP_INVERSE_MAP),
(MatType)(INTER_LINEAR | WARP_INVERSE_MAP), (MatType)(INTER_CUBIC | WARP_INVERSE_MAP))));
INSTANTIATE_TEST_CASE_P(Imgproc, Resize, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(Size()),
Values(0.5, 1.5, 2), Values(0.5, 1.5, 2), Values((MatType)INTER_NEAREST, (MatType)INTER_LINEAR)));
INSTANTIATE_TEST_CASE_P(Imgproc, Threshold, Combine( INSTANTIATE_TEST_CASE_P(Imgproc, Threshold, Combine(
Values(CV_8UC1, CV_32FC1), Values(ThreshOp(THRESH_BINARY), Values(CV_8UC1, CV_32FC1),
ThreshOp(THRESH_BINARY_INV), ThreshOp(THRESH_TRUNC), Values(0),
ThreshOp(THRESH_TOZERO), ThreshOp(THRESH_TOZERO_INV)))); Values(ThreshOp(THRESH_BINARY),
ThreshOp(THRESH_BINARY_INV), ThreshOp(THRESH_TRUNC),
ThreshOp(THRESH_TOZERO), ThreshOp(THRESH_TOZERO_INV)),
INSTANTIATE_TEST_CASE_P(Imgproc, MeanShiftFiltering, Combine( Bool()));
ONE_TYPE(CV_8UC4),
ONE_TYPE(CV_16SC2), INSTANTIATE_TEST_CASE_P(Imgproc, CalcHist, Combine(
Values(5), Values((MatType)CV_8UC1),
Values(6), Values(0), // not used
Values(TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 5, 1)) Values(0), // not used
)); Bool()));
INSTANTIATE_TEST_CASE_P(Imgproc, CLAHETest, Combine(
INSTANTIATE_TEST_CASE_P(Imgproc, MeanShiftProc, Combine( Values(Size(4, 4), Size(32, 8), Size(8, 64)),
ONE_TYPE(CV_8UC4), Values(0.0, 10.0, 62.0, 300.0),
ONE_TYPE(CV_16SC2), Bool()));
Values(5),
Values(6), INSTANTIATE_TEST_CASE_P(Imgproc, Convolve, Combine(
Values(TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 5, 1)) Values((MatType)CV_32FC1),
)); Values(0), // not used
Values(0), // not used
INSTANTIATE_TEST_CASE_P(Imgproc, Remap, Combine( Bool()));
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(CV_32FC1, CV_16SC2, CV_32FC2), Values(-1, CV_32FC1), INSTANTIATE_TEST_CASE_P(Imgproc, ColumnSum, Combine(
Values((int)INTER_NEAREST, (int)INTER_LINEAR), Values(MatType(CV_32FC1)),
Values((int)BORDER_CONSTANT))); Values(0), // not used
Values(0), // not used
Bool()));
INSTANTIATE_TEST_CASE_P(histTestBase, CalcHist, Combine(
ONE_TYPE(CV_8UC1),
ONE_TYPE(CV_32SC1) //no use
));
INSTANTIATE_TEST_CASE_P(Imgproc, CLAHE_Test, Combine(
Values(Size(4, 4), Size(32, 8), Size(8, 64)),
Values(0.0, 10.0, 62.0, 300.0)));
INSTANTIATE_TEST_CASE_P(Imgproc, ColumnSum, DIFFERENT_SIZES); INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine(
testing::Range((MatDepth)CV_8U, (MatDepth)CV_USRTYPE1),
testing::Values((Channels)1, (Channels)4),
Bool(), // border isolated or not
Values((Border)BORDER_CONSTANT,
(Border)BORDER_REPLICATE,
(Border)BORDER_REFLECT,
(Border)BORDER_WRAP,
(Border)BORDER_REFLECT_101),
Bool()));
#endif // HAVE_OPENCL #endif // HAVE_OPENCL
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Niko Li, newlife20080214@gmail.com
// Jia Haipeng, jiahaipeng95@gmail.com
// Shengen Yan, yanshengen@gmail.com
// Jiang Liyuan, lyuan001.good@163.com
// Rock Li, Rock.Li@amd.com
// Wu Zailong, bullet@yeah.net
// Xu Pang, pangxu010@163.com
// Sen Liu, swjtuls1987@126.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#ifdef HAVE_OPENCL
using namespace testing;
using namespace std;
using namespace cv;
typedef struct
{
short x;
short y;
} COOR;
COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, Size size, int sp, int sr, int maxIter, float eps, int *tab)
{
int isr2 = sr * sr;
int c0, c1, c2, c3;
int iter;
uchar *ptr = NULL;
uchar *pstart = NULL;
int revx = 0, revy = 0;
c0 = sptr[0];
c1 = sptr[1];
c2 = sptr[2];
c3 = sptr[3];
// iterate meanshift procedure
for(iter = 0; iter < maxIter; iter++ )
{
int count = 0;
int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
//mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
int minx = x0 - sp;
int miny = y0 - sp;
int maxx = x0 + sp;
int maxy = y0 + sp;
//deal with the image boundary
if(minx < 0) minx = 0;
if(miny < 0) miny = 0;
if(maxx >= size.width) maxx = size.width - 1;
if(maxy >= size.height) maxy = size.height - 1;
if(iter == 0)
{
pstart = sptr;
}
else
{
pstart = pstart + revy * sstep + (revx << 2); //point to the new position
}
ptr = pstart;
ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row
for( int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2))
{
int rowCount = 0;
int x = minx;
#if CV_ENABLE_UNROLLED
for( ; x + 4 <= maxx; x += 4, ptr += 16)
{
int t0, t1, t2;
t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x;
rowCount++;
}
t0 = ptr[4], t1 = ptr[5], t2 = ptr[6];
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x + 1;
rowCount++;
}
t0 = ptr[8], t1 = ptr[9], t2 = ptr[10];
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x + 2;
rowCount++;
}
t0 = ptr[12], t1 = ptr[13], t2 = ptr[14];
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x + 3;
rowCount++;
}
}
#endif
for(; x <= maxx; x++, ptr += 4)
{
int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x;
rowCount++;
}
}
if(rowCount == 0)
continue;
count += rowCount;
sy += y * rowCount;
}
if( count == 0 )
break;
int x1 = sx / count;
int y1 = sy / count;
s0 = s0 / count;
s1 = s1 / count;
s2 = s2 / count;
bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) +
tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps);
//revise the pointer corresponding to the new (y0,x0)
revx = x1 - x0;
revy = y1 - y0;
x0 = x1;
y0 = y1;
c0 = s0;
c1 = s1;
c2 = s2;
if( stopFlag )
break;
} //for iter
dptr[0] = (uchar)c0;
dptr[1] = (uchar)c1;
dptr[2] = (uchar)c2;
dptr[3] = (uchar)c3;
COOR coor;
coor.x = (short)x0;
coor.y = (short)y0;
return coor;
}
void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, TermCriteria crit)
{
if( src_roi.empty() )
CV_Error( CV_StsBadArg, "The input image is empty" );
if( src_roi.depth() != CV_8U || src_roi.channels() != 4 )
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) );
CV_Assert( !(dst_roi.step & 0x3) );
if( !(crit.type & TermCriteria::MAX_ITER) )
crit.maxCount = 5;
int maxIter = std::min(std::max(crit.maxCount, 1), 100);
float eps;
if( !(crit.type & TermCriteria::EPS) )
eps = 1.f;
eps = (float)std::max(crit.epsilon, 0.0);
int tab[512];
for(int i = 0; i < 512; i++)
tab[i] = (i - 255) * (i - 255);
uchar *sptr = src_roi.data;
uchar *dptr = dst_roi.data;
int sstep = (int)src_roi.step;
int dstep = (int)dst_roi.step;
Size size = src_roi.size();
for(int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2),
dptr += dstep - (size.width << 2))
{
for(int j = 0; j < size.width; j++, sptr += 4, dptr += 4)
{
do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab);
}
}
}
void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, TermCriteria crit)
{
if( src_roi.empty() )
CV_Error( CV_StsBadArg, "The input image is empty" );
if( src_roi.depth() != CV_8U || src_roi.channels() != 4 )
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) &&
(src_roi.cols == dstCoor_roi.cols) && (src_roi.rows == dstCoor_roi.rows));
CV_Assert( !(dstCoor_roi.step & 0x3) );
if( !(crit.type & TermCriteria::MAX_ITER) )
crit.maxCount = 5;
int maxIter = std::min(std::max(crit.maxCount, 1), 100);
float eps;
if( !(crit.type & TermCriteria::EPS) )
eps = 1.f;
eps = (float)std::max(crit.epsilon, 0.0);
int tab[512];
for(int i = 0; i < 512; i++)
tab[i] = (i - 255) * (i - 255);
uchar *sptr = src_roi.data;
uchar *dptr = dst_roi.data;
short *dCoorptr = (short *)dstCoor_roi.data;
int sstep = (int)src_roi.step;
int dstep = (int)dst_roi.step;
int dCoorstep = (int)dstCoor_roi.step >> 1;
Size size = src_roi.size();
for(int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2),
dptr += dstep - (size.width << 2), dCoorptr += dCoorstep - (size.width << 1))
{
for(int j = 0; j < size.width; j++, sptr += 4, dptr += 4, dCoorptr += 2)
{
*((COOR *)dCoorptr) = do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab);
}
}
}
//////////////////////////////// meanShift //////////////////////////////////////////
PARAM_TEST_CASE(meanShiftTestBase, MatType, MatType, int, int, TermCriteria, bool)
{
int type, typeCoor;
int sp, sr;
TermCriteria crit;
bool useRoi;
// src mat
Mat src, src_roi;
Mat dst, dst_roi;
Mat dstCoor, dstCoor_roi;
// ocl dst mat
ocl::oclMat gsrc, gsrc_roi;
ocl::oclMat gdst, gdst_roi;
ocl::oclMat gdstCoor, gdstCoor_roi;
virtual void SetUp()
{
type = GET_PARAM(0);
typeCoor = GET_PARAM(1);
sp = GET_PARAM(2);
sr = GET_PARAM(3);
crit = GET_PARAM(4);
useRoi = GET_PARAM(5);
}
void random_roi()
{
Size roiSize = randomSize(1, MAX_VALUE);
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, roiSize, srcBorder, type, 5, 256);
generateOclMat(gsrc, gsrc_roi, src, roiSize, srcBorder);
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 5, 256);
generateOclMat(gdst, gdst_roi, dst, roiSize, dstBorder);
randomSubMat(dstCoor, dstCoor_roi, roiSize, dstBorder, typeCoor, 5, 256);
generateOclMat(gdstCoor, gdstCoor_roi, dstCoor, roiSize, dstBorder);
}
void Near(double threshold = 0.0)
{
Mat whole, roi;
gdst.download(whole);
gdst_roi.download(roi);
EXPECT_MAT_NEAR(dst, whole, threshold);
EXPECT_MAT_NEAR(dst_roi, roi, threshold);
}
void Near1(double threshold = 0.0)
{
Mat whole, roi;
gdstCoor.download(whole);
gdstCoor_roi.download(roi);
EXPECT_MAT_NEAR(dstCoor, whole, threshold);
EXPECT_MAT_NEAR(dstCoor_roi, roi, threshold);
}
};
/////////////////////////meanShiftFiltering/////////////////////////////
typedef meanShiftTestBase meanShiftFiltering;
OCL_TEST_P(meanShiftFiltering, Mat)
{
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
meanShiftFiltering_(src_roi, dst_roi, sp, sr, crit);
ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit);
Near();
}
}
///////////////////////////meanShiftProc//////////////////////////////////
typedef meanShiftTestBase meanShiftProc;
OCL_TEST_P(meanShiftProc, Mat)
{
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
meanShiftProc_(src_roi, dst_roi, dstCoor_roi, sp, sr, crit);
ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit);
Near();
Near1();
}
}
/////////////////////////////////////////////////////////////////////////////////////
INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftFiltering, Combine(
Values((MatType)CV_8UC4),
Values((MatType)CV_16SC2),
Values(5),
Values(6),
Values(TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 5, 1)),
Bool()
));
INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftProc, Combine(
Values((MatType)CV_8UC4),
Values((MatType)CV_16SC2),
Values(5),
Values(6),
Values(TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 5, 1)),
Bool()
));
#endif // HAVE_OPENCL
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Niko Li, newlife20080214@gmail.com
// Jia Haipeng, jiahaipeng95@gmail.com
// Shengen Yan, yanshengen@gmail.com
// Jiang Liyuan, lyuan001.good@163.com
// Rock Li, Rock.Li@amd.com
// Wu Zailong, bullet@yeah.net
// Xu Pang, pangxu010@163.com
// Sen Liu, swjtuls1987@126.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#ifdef HAVE_OPENCL
using namespace cv;
using namespace testing;
using namespace std;
static MatType noType = -1;
/////////////////////////////////////////////////////////////////////////////////////////////////
// warpAffine & warpPerspective
PARAM_TEST_CASE(WarpTestBase, MatType, Interpolation, bool, bool)
{
int type, interpolation;
Size dsize;
bool useRoi, mapInverse;
Mat src, dst_whole, src_roi, dst_roi;
ocl::oclMat gsrc_whole, gsrc_roi, gdst_whole, gdst_roi;
virtual void SetUp()
{
type = GET_PARAM(0);
interpolation = GET_PARAM(1);
mapInverse = GET_PARAM(2);
useRoi = GET_PARAM(3);
if (mapInverse)
interpolation |= WARP_INVERSE_MAP;
}
void random_roi()
{
Size roiSize = randomSize(1, MAX_VALUE);
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst_whole, dst_roi, roiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
generateOclMat(gdst_whole, gdst_roi, dst_whole, roiSize, dstBorder);
dsize = randomSize(1, MAX_VALUE);
}
void Near(double threshold = 0.0)
{
Mat whole, roi;
gdst_whole.download(whole);
gdst_roi.download(roi);
EXPECT_MAT_NEAR(dst_whole, whole, threshold);
EXPECT_MAT_NEAR(dst_roi, roi, threshold);
}
};
/////warpAffine
typedef WarpTestBase WarpAffine;
OCL_TEST_P(WarpAffine, Mat)
{
static const double coeffs[2][3] =
{
{ cos(CV_PI / 6), -sin(CV_PI / 6), 100.0 },
{ sin(CV_PI / 6), cos(CV_PI / 6), -100.0 }
};
static Mat M(2, 3, CV_64FC1, (void *)coeffs);
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
warpAffine(src_roi, dst_roi, M, dsize, interpolation);
ocl::warpAffine(gsrc_roi, gdst_roi, M, dsize, interpolation);
Near(1.0);
}
}
// warpPerspective
typedef WarpTestBase WarpPerspective;
OCL_TEST_P(WarpPerspective, Mat)
{
static const double coeffs[3][3] =
{
{ cos(CV_PI / 6), -sin(CV_PI / 6), 100.0 },
{ sin(CV_PI / 6), cos(CV_PI / 6), -100.0 },
{ 0.0, 0.0, 1.0 }
};
static Mat M(3, 3, CV_64FC1, (void *)coeffs);
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
warpPerspective(src_roi, dst_roi, M, dsize, interpolation);
ocl::warpPerspective(gsrc_roi, gdst_roi, M, dsize, interpolation);
Near(1.0);
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////
// remap
PARAM_TEST_CASE(Remap, MatDepth, Channels, pair<MatType, MatType>, Border, bool)
{
int srcType, map1Type, map2Type;
int borderType;
bool useRoi;
Scalar val;
Mat src, src_roi;
Mat dst, dst_roi;
Mat map1, map1_roi;
Mat map2, map2_roi;
// ocl mat with roi
ocl::oclMat gsrc, gsrc_roi;
ocl::oclMat gdst, gdst_roi;
ocl::oclMat gmap1, gmap1_roi;
ocl::oclMat gmap2, gmap2_roi;
virtual void SetUp()
{
srcType = CV_MAKE_TYPE(GET_PARAM(0), GET_PARAM(1));
map1Type = GET_PARAM(2).first;
map2Type = GET_PARAM(2).second;
borderType = GET_PARAM(3);
useRoi = GET_PARAM(4);
}
void random_roi()
{
val = randomScalar(-MAX_VALUE, MAX_VALUE);
Size srcROISize = randomSize(1, MAX_VALUE);
Size dstROISize = randomSize(1, MAX_VALUE);
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, srcROISize, srcBorder, srcType, 5, 256);
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, dstROISize, dstBorder, srcType, -MAX_VALUE, MAX_VALUE);
int mapMaxValue = MAX_VALUE << 2;
Border map1Border = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(map1, map1_roi, dstROISize, map1Border, map1Type, -mapMaxValue, mapMaxValue);
Border map2Border = randomBorder(0, useRoi ? MAX_VALUE : 0);
if (map2Type != noType)
randomSubMat(map2, map2_roi, dstROISize, map2Border, map2Type, -mapMaxValue, mapMaxValue);
generateOclMat(gsrc, gsrc_roi, src, srcROISize, srcBorder);
generateOclMat(gdst, gdst_roi, dst, dstROISize, dstBorder);
generateOclMat(gmap1, gmap1_roi, map1, dstROISize, map1Border);
if (noType != map2Type)
generateOclMat(gmap2, gmap2_roi, map2, dstROISize, map2Border);
}
void Near(double threshold = 0.0)
{
Mat whole, roi;
gdst.download(whole);
gdst_roi.download(roi);
EXPECT_MAT_NEAR(dst, whole, threshold);
EXPECT_MAT_NEAR(dst_roi, roi, threshold);
}
};
typedef Remap Remap_INTER_NEAREST;
OCL_TEST_P(Remap_INTER_NEAREST, Mat)
{
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
remap(src_roi, dst_roi, map1_roi, map2_roi, INTER_NEAREST, borderType, val);
ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, INTER_NEAREST, borderType, val);
Near(1.0);
}
}
typedef Remap Remap_INTER_LINEAR;
OCL_TEST_P(Remap_INTER_LINEAR, Mat)
{
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
cv::remap(src_roi, dst_roi, map1_roi, map2_roi, INTER_LINEAR, borderType, val);
ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, INTER_LINEAR, borderType, val);
Near(2.0);
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////
// resize
PARAM_TEST_CASE(Resize, MatType, double, double, Interpolation, bool)
{
int type, interpolation;
double fx, fy;
bool useRoi;
Mat src, dst_whole, src_roi, dst_roi;
ocl::oclMat gsrc_whole, gsrc_roi, gdst_whole, gdst_roi;
virtual void SetUp()
{
type = GET_PARAM(0);
fx = GET_PARAM(1);
fy = GET_PARAM(2);
interpolation = GET_PARAM(3);
useRoi = GET_PARAM(4);
}
void random_roi()
{
Size srcRoiSize = randomSize(1, MAX_VALUE);
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, srcRoiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
Size dstRoiSize;
dstRoiSize.width = cvRound(srcRoiSize.width * fx);
dstRoiSize.height = cvRound(srcRoiSize.height * fy);
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst_whole, dst_roi, dstRoiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
generateOclMat(gsrc_whole, gsrc_roi, src, srcRoiSize, srcBorder);
generateOclMat(gdst_whole, gdst_roi, dst_whole, dstRoiSize, dstBorder);
}
void Near(double threshold = 0.0)
{
Mat whole, roi;
gdst_whole.download(whole);
gdst_roi.download(roi);
EXPECT_MAT_NEAR(dst_whole, whole, threshold);
EXPECT_MAT_NEAR(dst_roi, roi, threshold);
}
};
OCL_TEST_P(Resize, Mat)
{
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
resize(src_roi, dst_roi, Size(), fx, fy, interpolation);
ocl::resize(gsrc_roi, gdst_roi, Size(), fx, fy, interpolation);
Near(1.0);
}
}
/////////////////////////////////////////////////////////////////////////////////////
INSTANTIATE_TEST_CASE_P(ImgprocWarp, WarpAffine, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR, (Interpolation)INTER_CUBIC),
Bool(),
Bool()));
INSTANTIATE_TEST_CASE_P(ImgprocWarp, WarpPerspective, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR, (Interpolation)INTER_CUBIC),
Bool(),
Bool()));
INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_LINEAR, Combine(
Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
Values(1, 2, 3, 4),
Values(pair<MatType, MatType>((MatType)CV_32FC1, (MatType)CV_32FC1),
pair<MatType, MatType>((MatType)CV_32FC2, noType)),
Values((Border)BORDER_CONSTANT,
(Border)BORDER_REPLICATE,
(Border)BORDER_WRAP,
(Border)BORDER_REFLECT,
(Border)BORDER_REFLECT_101),
Bool()));
INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_NEAREST, Combine(
Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
Values(1, 2, 3, 4),
Values(pair<MatType, MatType>((MatType)CV_32FC1, (MatType)CV_32FC1),
pair<MatType, MatType>((MatType)CV_32FC2, noType),
pair<MatType, MatType>((MatType)CV_16SC2, noType)),
Values((Border)BORDER_CONSTANT,
(Border)BORDER_REPLICATE,
(Border)BORDER_WRAP,
(Border)BORDER_REFLECT,
(Border)BORDER_REFLECT_101),
Bool()));
INSTANTIATE_TEST_CASE_P(ImgprocWarp, Resize, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(0.5, 1.5, 2.0),
Values(0.5, 1.5, 2.0),
Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR),
Bool()));
#endif // HAVE_OPENCL
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment