Commit e69d2c1b authored by Roman Donchenko's avatar Roman Donchenko Committed by OpenCV Buildbot

Merge pull request #1819 from ilya-lavrenov:ocl_resize_AREA

parents 5c77784a 198cd1a4
......@@ -185,6 +185,46 @@ PERF_TEST_P(resizeFixture, resize,
OCL_PERF_ELSE
}
typedef tuple<Size, MatType, double> resizeAreaParams;
typedef TestBaseWithParam<resizeAreaParams> resizeAreaFixture;
PERF_TEST_P(resizeAreaFixture, resize,
::testing::Combine(OCL_TYPICAL_MAT_SIZES,
OCL_PERF_ENUM(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
::testing::Values(0.3, 0.5, 0.6)))
{
const resizeAreaParams params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
double scale = get<2>(params);
const Size dstSize(cvRound(srcSize.width * scale), cvRound(srcSize.height * scale));
checkDeviceMaxMemoryAllocSize(srcSize, type);
Mat src(srcSize, type), dst;
dst.create(dstSize, type);
declare.in(src, WARMUP_RNG).out(dst);
if (RUN_OCL_IMPL)
{
ocl::oclMat oclSrc(src), oclDst(dstSize, type);
OCL_TEST_CYCLE() cv::ocl::resize(oclSrc, oclDst, Size(), scale, scale, cv::INTER_AREA);
oclDst.download(dst);
SANITY_CHECK(dst, 1 + DBL_EPSILON);
}
else if (RUN_PLAIN_IMPL)
{
TEST_CYCLE() cv::resize(src, dst, Size(), scale, scale, cv::INTER_AREA);
SANITY_CHECK(dst, 1 + DBL_EPSILON);
}
else
OCL_PERF_ELSE
}
///////////// remap////////////////////////
CV_ENUM(RemapInterType, INTER_NEAREST, INTER_LINEAR)
......
......@@ -280,22 +280,86 @@ namespace cv
////////////////////////////////////////////////////////////////////////////////////////////
// resize
static void resize_gpu( const oclMat &src, oclMat &dst, double fx, double fy, int interpolation)
static void computeResizeAreaTabs(int ssize, int dsize, double scale, int * const map_tab,
float * const alpha_tab, int * const ofs_tab)
{
float ifx = 1.f / fx, ify = 1.f / fy;
int k = 0, dx = 0;
for ( ; dx < dsize; dx++)
{
ofs_tab[dx] = k;
double fsx1 = dx * scale;
double fsx2 = fsx1 + scale;
double cellWidth = std::min(scale, ssize - fsx1);
int sx1 = cvCeil(fsx1), sx2 = cvFloor(fsx2);
sx2 = std::min(sx2, ssize - 1);
sx1 = std::min(sx1, sx2);
if (sx1 - fsx1 > 1e-3)
{
map_tab[k] = sx1 - 1;
alpha_tab[k++] = (float)((sx1 - fsx1) / cellWidth);
}
for (int sx = sx1; sx < sx2; sx++)
{
map_tab[k] = sx;
alpha_tab[k++] = float(1.0 / cellWidth);
}
if (fsx2 - sx2 > 1e-3)
{
map_tab[k] = sx2;
alpha_tab[k++] = (float)(std::min(std::min(fsx2 - sx2, 1.), cellWidth) / cellWidth);
}
}
ofs_tab[dx] = k;
}
static void computeResizeAreaFastTabs(int * dmap_tab, int * smap_tab, int scale, int dcols, int scol)
{
for (int i = 0; i < dcols; ++i)
dmap_tab[i] = scale * i;
for (int i = 0, size = dcols * scale; i < size; ++i)
smap_tab[i] = std::min(scol - 1, i);
}
static void resize_gpu( const oclMat &src, oclMat &dst, double ifx, double ify, int interpolation)
{
float ifxf = (float)ifx, ifyf = (float)ify;
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
int ocn = interpolation == INTER_LINEAR ? dst.oclchannels() : -1;
int depth = interpolation == INTER_LINEAR ? dst.depth() : -1;
int ocn = dst.oclchannels(), depth = dst.depth();
const char * const interMap[] = { "NN", "LN", "CUBIC", "AREA", "LAN4" };
std::string kernelName = std::string("resize") + interMap[interpolation];
const char * const typeMap[] = { "uchar", "uchar", "ushort", "ushort", "int", "int", "double" };
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
const char * const channelMap[] = { "" , "", "2", "4", "4" };
std::string buildOption = format("-D %s -D T=%s%s", interMap[interpolation], typeMap[dst.depth()], channelMap[dst.oclchannels()]);
std::string buildOption = format("-D %s -D T=%s%s", interMap[interpolation], typeMap[depth], channelMap[ocn]);
int wdepth = std::max(src.depth(), CV_32F);
// check if fx, fy is integer and then we have inter area fast mode
int iscale_x = saturate_cast<int>(ifx);
int iscale_y = saturate_cast<int>(ify);
bool is_area_fast = std::abs(ifx - iscale_x) < DBL_EPSILON &&
std::abs(ify - iscale_y) < DBL_EPSILON;
if (is_area_fast)
wdepth = std::max(src.depth(), CV_32S);
if (interpolation != INTER_NEAREST)
{
buildOption += format(" -D WT=%s -D WTV=%s%s -D convertToWTV=convert_%s%s -D convertToT=convert_%s%s%s",
typeMap[wdepth], typeMap[wdepth], channelMap[ocn],
typeMap[wdepth], channelMap[ocn],
typeMap[src.depth()], channelMap[ocn], src.depth() <= CV_32S ? "_sat_rte" : "");
}
//TODO: improve this kernel
size_t blkSizeX = 16, blkSizeY = 16;
size_t glbSizeX;
if (src.type() == CV_8UC1 && interpolation == INTER_LINEAR)
......@@ -306,6 +370,50 @@ namespace cv
else
glbSizeX = dst.cols;
oclMat alphaOcl, mapOcl, tabofsOcl;
if (interpolation == INTER_AREA)
{
if (is_area_fast)
{
kernelName += "_FAST";
int wdepth2 = std::max(CV_32F, src.depth());
buildOption += format(" -D WT2V=%s%s -D convertToWT2V=convert_%s%s -D AREA_FAST -D XSCALE=%d -D YSCALE=%d -D SCALE=%f",
typeMap[wdepth2], channelMap[ocn], typeMap[wdepth2], channelMap[ocn],
iscale_x, iscale_y, 1.0f / (iscale_x * iscale_y));
int smap_tab_size = dst.cols * iscale_x + dst.rows * iscale_y;
AutoBuffer<int> dmap_tab(dst.cols + dst.rows), smap_tab(smap_tab_size);
int * dxmap_tab = dmap_tab, * dymap_tab = dxmap_tab + dst.cols;
int * sxmap_tab = smap_tab, * symap_tab = smap_tab + dst.cols * iscale_y;
computeResizeAreaFastTabs(dxmap_tab, sxmap_tab, iscale_x, dst.cols, src.cols);
computeResizeAreaFastTabs(dymap_tab, symap_tab, iscale_y, dst.rows, src.rows);
tabofsOcl = oclMat(1, dst.cols + dst.rows, CV_32SC1, (void *)dmap_tab);
mapOcl = oclMat(1, smap_tab_size, CV_32SC1, (void *)smap_tab);
}
else
{
Size ssize = src.size(), dsize = dst.size();
int xytab_size = (ssize.width + ssize.height) << 1;
int tabofs_size = dsize.height + dsize.width + 2;
AutoBuffer<int> _xymap_tab(xytab_size), _xyofs_tab(tabofs_size);
AutoBuffer<float> _xyalpha_tab(xytab_size);
int * xmap_tab = _xymap_tab, * ymap_tab = _xymap_tab + (ssize.width << 1);
float * xalpha_tab = _xyalpha_tab, * yalpha_tab = _xyalpha_tab + (ssize.width << 1);
int * xofs_tab = _xyofs_tab, * yofs_tab = _xyofs_tab + dsize.width + 1;
computeResizeAreaTabs(ssize.width, dsize.width, ifx, xmap_tab, xalpha_tab, xofs_tab);
computeResizeAreaTabs(ssize.height, dsize.height, ify, ymap_tab, yalpha_tab, yofs_tab);
// loading precomputed arrays to GPU
alphaOcl = oclMat(1, xytab_size, CV_32FC1, (void *)_xyalpha_tab);
mapOcl = oclMat(1, xytab_size, CV_32SC1, (void *)_xymap_tab);
tabofsOcl = oclMat(1, tabofs_size, CV_32SC1, (void *)_xyofs_tab);
}
}
size_t globalThreads[3] = { glbSizeX, dst.rows, 1 };
size_t localThreads[3] = { blkSizeX, blkSizeY, 1 };
......@@ -320,8 +428,30 @@ namespace cv
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
args.push_back( make_pair(sizeof(cl_float), (void *)&ifx));
args.push_back( make_pair(sizeof(cl_float), (void *)&ify));
if (wdepth == CV_64F)
{
args.push_back( make_pair(sizeof(cl_double), (void *)&ifx));
args.push_back( make_pair(sizeof(cl_double), (void *)&ify));
}
else
{
args.push_back( make_pair(sizeof(cl_float), (void *)&ifxf));
args.push_back( make_pair(sizeof(cl_float), (void *)&ifyf));
}
// precomputed tabs
if (!tabofsOcl.empty())
args.push_back( make_pair(sizeof(cl_mem), (void *)&tabofsOcl.data));
if (!mapOcl.empty())
args.push_back( make_pair(sizeof(cl_mem), (void *)&mapOcl.data));
if (!alphaOcl.empty())
args.push_back( make_pair(sizeof(cl_mem), (void *)&alphaOcl.data));
ocn = interpolation == INTER_LINEAR ? ocn : -1;
depth = interpolation == INTER_LINEAR ? depth : -1;
openCLExecuteKernel(src.clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args,
ocn, depth, buildOption.c_str());
......@@ -329,9 +459,14 @@ namespace cv
void resize(const oclMat &src, oclMat &dst, Size dsize, double fx, double fy, int interpolation)
{
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device does not support double");
return;
}
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3 || src.type() == CV_8UC4
|| src.type() == CV_32FC1 || src.type() == CV_32FC3 || src.type() == CV_32FC4);
CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST);
CV_Assert(dsize.area() > 0 || (fx > 0 && fy > 0));
if (dsize.area() == 0)
......@@ -345,9 +480,13 @@ namespace cv
fy = (double)dsize.height / src.rows;
}
double inv_fy = 1 / fy, inv_fx = 1 / fx;
CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST ||
(interpolation == INTER_AREA && inv_fx >= 1 && inv_fy >= 1));
dst.create(dsize, src.type());
resize_gpu( src, dst, fx, fy, interpolation);
resize_gpu( src, dst, inv_fx, inv_fy, interpolation);
}
////////////////////////////////////////////////////////////////////////
......
......@@ -296,7 +296,7 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
#elif defined NN
__kernel void resizeNN(__global T * dst, __global T * src,
int dst_offset, int src_offset,int dst_step, int src_step,
int dst_offset, int src_offset, int dst_step, int src_step,
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify)
{
int dx = get_global_id(0);
......@@ -315,4 +315,91 @@ __kernel void resizeNN(__global T * dst, __global T * src,
}
}
#elif defined AREA
#ifdef AREA_FAST
__kernel void resizeAREA_FAST(__global T * dst, __global T * src,
int dst_offset, int src_offset, int dst_step, int src_step,
int src_cols, int src_rows, int dst_cols, int dst_rows, WT ifx, WT ify,
__global const int * dmap_tab, __global const int * smap_tab)
{
int dx = get_global_id(0);
int dy = get_global_id(1);
if (dx < dst_cols && dy < dst_rows)
{
int dst_index = mad24(dy, dst_step, dst_offset + dx);
__global const int * xmap_tab = dmap_tab;
__global const int * ymap_tab = dmap_tab + dst_cols;
__global const int * sxmap_tab = smap_tab;
__global const int * symap_tab = smap_tab + XSCALE * dst_cols;
int sx = xmap_tab[dx], sy = ymap_tab[dy];
WTV sum = (WTV)(0);
#pragma unroll
for (int y = 0; y < YSCALE; ++y)
{
int src_index = mad24(symap_tab[y + sy], src_step, src_offset);
#pragma unroll
for (int x = 0; x < XSCALE; ++x)
sum += convertToWTV(src[src_index + sxmap_tab[sx + x]]);
}
dst[dst_index] = convertToT(convertToWT2V(sum) * (WT2V)(SCALE));
}
}
#else
__kernel void resizeAREA(__global T * dst, __global T * src,
int dst_offset, int src_offset, int dst_step, int src_step,
int src_cols, int src_rows, int dst_cols, int dst_rows, WT ifx, WT ify,
__global const int * ofs_tab, __global const int * map_tab,
__global const float * alpha_tab)
{
int dx = get_global_id(0);
int dy = get_global_id(1);
if (dx < dst_cols && dy < dst_rows)
{
int dst_index = mad24(dy, dst_step, dst_offset + dx);
__global const int * xmap_tab = map_tab;
__global const int * ymap_tab = (__global const int *)(map_tab + (src_cols << 1));
__global const float * xalpha_tab = alpha_tab;
__global const float * yalpha_tab = (__global const float *)(alpha_tab + (src_cols << 1));
__global const int * xofs_tab = ofs_tab;
__global const int * yofs_tab = (__global const int *)(ofs_tab + dst_cols + 1);
int xk0 = xofs_tab[dx], xk1 = xofs_tab[dx + 1];
int yk0 = yofs_tab[dy], yk1 = yofs_tab[dy + 1];
int sy0 = ymap_tab[yk0], sy1 = ymap_tab[yk1 - 1];
int sx0 = xmap_tab[xk0], sx1 = xmap_tab[xk1 - 1];
WTV sum = (WTV)(0), buf;
int src_index = mad24(sy0, src_step, src_offset);
for (int sy = sy0, yk = yk0; sy <= sy1; ++sy, src_index += src_step, ++yk)
{
WTV beta = (WTV)(yalpha_tab[yk]);
buf = (WTV)(0);
for (int sx = sx0, xk = xk0; sx <= sx1; ++sx, ++xk)
{
WTV alpha = (WTV)(xalpha_tab[xk]);
buf += convertToWTV(src[src_index + sx]) * alpha;
}
sum += buf * beta;
}
dst[dst_index] = convertToT(sum);
}
}
#endif
#endif
......@@ -398,10 +398,7 @@ PARAM_TEST_CASE(Resize, MatType, double, double, Interpolation, bool)
dstRoiSize.height = cvRound(srcRoiSize.height * fy);
if (dstRoiSize.area() == 0)
{
random_roi();
return;
}
return random_roi();
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, srcRoiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
......@@ -480,11 +477,18 @@ INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_NEAREST, Combine(
(Border)BORDER_REFLECT_101),
Bool()));
INSTANTIATE_TEST_CASE_P(ImgprocWarp, Resize, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(0.5, 1.5, 2.0),
Values(0.5, 1.5, 2.0),
INSTANTIATE_TEST_CASE_P(ImgprocWarpResize, Resize, Combine(
Values((MatType)CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(0.7, 0.4, 2.0),
Values(0.3, 0.6, 2.0),
Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR),
Bool()));
INSTANTIATE_TEST_CASE_P(ImgprocWarpResizeArea, Resize, Combine(
Values((MatType)CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(0.7, 0.4, 0.5),
Values(0.3, 0.6, 0.5),
Values((Interpolation)INTER_AREA),
Bool()));
#endif // HAVE_OPENCL
......@@ -231,7 +231,7 @@ double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& o
return final_test_result;
}
void showDiff(const Mat& gold, const Mat& actual, double eps, bool alwaysShow)
void showDiff(const Mat& src, const Mat& gold, const Mat& actual, double eps, bool alwaysShow)
{
Mat diff, diff_thresh;
absdiff(gold, actual, diff);
......@@ -240,10 +240,18 @@ void showDiff(const Mat& gold, const Mat& actual, double eps, bool alwaysShow)
if (alwaysShow || cv::countNonZero(diff_thresh.reshape(1)) > 0)
{
#if 0
std::cout << "Src: " << std::endl << src << std::endl;
std::cout << "Reference: " << std::endl << gold << std::endl;
std::cout << "OpenCL: " << std::endl << actual << std::endl;
#endif
namedWindow("src", WINDOW_NORMAL);
namedWindow("gold", WINDOW_NORMAL);
namedWindow("actual", WINDOW_NORMAL);
namedWindow("diff", WINDOW_NORMAL);
imshow("src", src);
imshow("gold", gold);
imshow("actual", actual);
imshow("diff", diff);
......
......@@ -52,7 +52,7 @@ extern int LOOP_TIMES;
namespace cvtest {
void showDiff(const Mat& gold, const Mat& actual, double eps, bool alwaysShow = false);
void showDiff(const Mat& src, const Mat& gold, const Mat& actual, double eps, bool alwaysShow = false);
cv::ocl::oclMat createMat_ocl(cv::RNG& rng, Size size, int type, bool useRoi);
cv::ocl::oclMat loadMat_ocl(cv::RNG& rng, const Mat& m, bool useRoi);
......@@ -262,7 +262,7 @@ CV_ENUM(NormCode, NORM_INF, NORM_L1, NORM_L2, NORM_TYPE_MASK, NORM_RELATIVE, NOR
CV_ENUM(ReduceOp, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN)
CV_ENUM(MorphOp, MORPH_OPEN, MORPH_CLOSE, MORPH_GRADIENT, MORPH_TOPHAT, MORPH_BLACKHAT)
CV_ENUM(ThreshOp, THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV)
CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC)
CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA)
CV_ENUM(Border, BORDER_REFLECT101, BORDER_REPLICATE, BORDER_CONSTANT, BORDER_REFLECT, BORDER_WRAP)
CV_ENUM(TemplateMethod, TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment