Commit 6ef94b52 authored by Andrey Pavlenko's avatar Andrey Pavlenko Committed by OpenCV Buildbot

Merge pull request #2536 from ilya-lavrenov:tapi_resize_linear

parents aa3c4ae5 31f864a2
This diff is collapsed.
......@@ -43,110 +43,140 @@
//
//M*/
#if defined DOUBLE_SUPPORT
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
#define INTER_RESIZE_COEF_BITS 11
#define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS)
#define CAST_BITS (INTER_RESIZE_COEF_BITS << 1)
#define INC(x,l) min(x+1,l-1)
#define noconvert(x) (x)
#define noconvert
#if cn != 3
#define loadpix(addr) *(__global const PIXTYPE*)(addr)
#define storepix(val, addr) *(__global PIXTYPE*)(addr) = val
#define PIXSIZE ((int)sizeof(PIXTYPE))
#define loadpix(addr) *(__global const T *)(addr)
#define storepix(val, addr) *(__global T *)(addr) = val
#define TSIZE (int)sizeof(T)
#else
#define loadpix(addr) vload3(0, (__global const PIXTYPE1*)(addr))
#define storepix(val, addr) vstore3(val, 0, (__global PIXTYPE1*)(addr))
#define PIXSIZE ((int)sizeof(PIXTYPE1)*3)
#define loadpix(addr) vload3(0, (__global const T1 *)(addr))
#define storepix(val, addr) vstore3(val, 0, (__global T1 *)(addr))
#define TSIZE (int)sizeof(T1)*cn
#endif
#if defined INTER_LINEAR
#ifdef INTER_LINEAR_INTEGER
__kernel void resizeLN(__global const uchar* srcptr, int srcstep, int srcoffset,
int srcrows, int srccols,
__global uchar* dstptr, int dststep, int dstoffset,
int dstrows, int dstcols,
float ifx, float ify)
__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
__global const uchar * buffer)
{
int dx = get_global_id(0);
int dy = get_global_id(1);
float sx = ((dx+0.5f) * ifx - 0.5f), sy = ((dy+0.5f) * ify - 0.5f);
int x = floor(sx), y = floor(sy);
float u = sx - x, v = sy - y;
if (dx < dst_cols && dy < dst_rows)
{
__global const int * xofs = (__global const int *)(buffer), * yofs = xofs + dst_cols;
__global const short * ialpha = (__global const short *)(yofs + dst_rows);
__global const short * ibeta = ialpha + ((dst_cols + dy) << 1);
ialpha += dx << 1;
int sx0 = xofs[dx], sy0 = clamp(yofs[dy], 0, src_rows - 1),
sy1 = clamp(yofs[dy] + 1, 0, src_rows - 1);
short a0 = ialpha[0], a1 = ialpha[1];
short b0 = ibeta[0], b1 = ibeta[1];
int src_index0 = mad24(sy0, src_step, mad24(sx0, TSIZE, src_offset)),
src_index1 = mad24(sy1, src_step, mad24(sx0, TSIZE, src_offset));
WT data0 = convertToWT(loadpix(srcptr + src_index0));
WT data1 = convertToWT(loadpix(srcptr + src_index0 + TSIZE));
WT data2 = convertToWT(loadpix(srcptr + src_index1));
WT data3 = convertToWT(loadpix(srcptr + src_index1 + TSIZE));
WT val = ( (((data0 * a0 + data1 * a1) >> 4) * b0) >> 16) +
( (((data2 * a0 + data3 * a1) >> 4) * b1) >> 16);
storepix(convertToDT((val + 2) >> 2),
dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
}
}
if ( x<0 ) x=0,u=0;
if ( x>=srccols ) x=srccols-1,u=0;
if ( y<0 ) y=0,v=0;
if ( y>=srcrows ) y=srcrows-1,v=0;
#elif defined INTER_LINEAR
int y_ = INC(y,srcrows);
int x_ = INC(x,srccols);
__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
float ifx, float ify)
{
int dx = get_global_id(0);
int dy = get_global_id(1);
#if depth <= 4
if (dx < dst_cols && dy < dst_rows)
{
float sx = ((dx+0.5f) * ifx - 0.5f), sy = ((dy+0.5f) * ify - 0.5f);
int x = floor(sx), y = floor(sy);
u = u * INTER_RESIZE_COEF_SCALE;
v = v * INTER_RESIZE_COEF_SCALE;
float u = sx - x, v = sy - y;
int U = rint(u);
int V = rint(v);
int U1 = rint(INTER_RESIZE_COEF_SCALE - u);
int V1 = rint(INTER_RESIZE_COEF_SCALE - v);
if ( x<0 ) x=0,u=0;
if ( x>=src_cols ) x=src_cols-1,u=0;
if ( y<0 ) y=0,v=0;
if ( y>=src_rows ) y=src_rows-1,v=0;
WORKTYPE data0 = convertToWT(loadpix(srcptr + mad24(y, srcstep, srcoffset + x*PIXSIZE)));
WORKTYPE data1 = convertToWT(loadpix(srcptr + mad24(y, srcstep, srcoffset + x_*PIXSIZE)));
WORKTYPE data2 = convertToWT(loadpix(srcptr + mad24(y_, srcstep, srcoffset + x*PIXSIZE)));
WORKTYPE data3 = convertToWT(loadpix(srcptr + mad24(y_, srcstep, srcoffset + x_*PIXSIZE)));
int y_ = INC(y, src_rows);
int x_ = INC(x, src_cols);
WORKTYPE val = mul24((WORKTYPE)mul24(U1, V1), data0) + mul24((WORKTYPE)mul24(U, V1), data1) +
mul24((WORKTYPE)mul24(U1, V), data2) + mul24((WORKTYPE)mul24(U, V), data3);
#if depth <= 4
u = u * INTER_RESIZE_COEF_SCALE;
v = v * INTER_RESIZE_COEF_SCALE;
PIXTYPE uval = convertToDT((val + (1<<(CAST_BITS-1)))>>CAST_BITS);
int U = rint(u);
int V = rint(v);
int U1 = rint(INTER_RESIZE_COEF_SCALE - u);
int V1 = rint(INTER_RESIZE_COEF_SCALE - v);
#else
float u1 = 1.f - u;
float v1 = 1.f - v;
WORKTYPE data0 = convertToWT(loadpix(srcptr + mad24(y, srcstep, srcoffset + x*PIXSIZE)));
WORKTYPE data1 = convertToWT(loadpix(srcptr + mad24(y, srcstep, srcoffset + x_*PIXSIZE)));
WORKTYPE data2 = convertToWT(loadpix(srcptr + mad24(y_, srcstep, srcoffset + x*PIXSIZE)));
WORKTYPE data3 = convertToWT(loadpix(srcptr + mad24(y_, srcstep, srcoffset + x_*PIXSIZE)));
WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))));
WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset))));
WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset))));
WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset))));
PIXTYPE uval = u1 * v1 * data0 + u * v1 * data1 + u1 * v *data2 + u * v *data3;
WT val = mul24((WT)mul24(U1, V1), data0) + mul24((WT)mul24(U, V1), data1) +
mul24((WT)mul24(U1, V), data2) + mul24((WT)mul24(U, V), data3);
T uval = convertToDT((val + (1<<(CAST_BITS-1)))>>CAST_BITS);
#else
float u1 = 1.f - u;
float v1 = 1.f - v;
WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))));
WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset))));
WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset))));
WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset))));
T uval = u1 * v1 * data0 + u * v1 * data1 + u1 * v *data2 + u * v *data3;
#endif
if(dx < dstcols && dy < dstrows)
{
storepix(uval, dstptr + mad24(dy, dststep, dstoffset + dx*PIXSIZE));
storepix(uval, dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
}
}
#elif defined INTER_NEAREST
__kernel void resizeNN(__global const uchar* srcptr, int srcstep, int srcoffset,
int srcrows, int srccols,
__global uchar* dstptr, int dststep, int dstoffset,
int dstrows, int dstcols,
__kernel void resizeNN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
float ifx, float ify)
{
int dx = get_global_id(0);
int dy = get_global_id(1);
if( dx < dstcols && dy < dstrows )
if (dx < dst_cols && dy < dst_rows)
{
float s1 = dx*ifx;
float s2 = dy*ify;
int sx = min(convert_int_rtz(s1), srccols-1);
int sy = min(convert_int_rtz(s2), srcrows-1);
float s1 = dx * ifx;
float s2 = dy * ify;
int sx = min(convert_int_rtz(s1), src_cols - 1);
int sy = min(convert_int_rtz(s2), src_rows - 1);
storepix(loadpix(srcptr + mad24(sy, srcstep, srcoffset + sx*PIXSIZE)),
dstptr + mad24(dy, dststep, dstoffset + dx*PIXSIZE));
storepix(loadpix(srcptr + mad24(sy, src_step, mad24(sx, TSIZE, src_offset))),
dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
}
}
......@@ -179,10 +209,10 @@ __kernel void resizeAREA_FAST(__global const uchar * src, int src_step, int src_
int src_index = mad24(symap_tab[y + sy], src_step, src_offset);
#pragma unroll
for (int x = 0; x < XSCALE; ++x)
sum += convertToWTV(loadpix(src + src_index + sxmap_tab[sx + x]*PIXSIZE));
sum += convertToWTV(loadpix(src + mad24(sxmap_tab[sx + x], TSIZE, src_index)));
}
storepix(convertToPIXTYPE(convertToWT2V(sum) * (WT2V)(SCALE)), dst + dst_index + dx*PIXSIZE);
storepix(convertToT(convertToWT2V(sum) * (WT2V)(SCALE)), dst + mad24(dx, TSIZE, dst_index));
}
}
......@@ -224,12 +254,12 @@ __kernel void resizeAREA(__global const uchar * src, int src_step, int src_offse
for (int sx = sx0, xk = xk0; sx <= sx1; ++sx, ++xk)
{
WTV alpha = (WTV)(xalpha_tab[xk]);
buf += convertToWTV(loadpix(src + src_index + sx*PIXSIZE)) * alpha;
buf += convertToWTV(loadpix(src + mad24(sx, TSIZE, src_index))) * alpha;
}
sum += buf * beta;
}
storepix(convertToPIXTYPE(sum), dst + dst_index + dx*PIXSIZE);
storepix(convertToT(sum), dst + mad24(dx, TSIZE, dst_index));
}
}
......
......@@ -210,12 +210,15 @@ OCL_TEST_P(Resize, Mat)
{
for (int j = 0; j < test_loop_times; j++)
{
int depth = CV_MAT_DEPTH(type);
double eps = depth <= CV_32S ? 1 : 1e-2;
random_roi();
OCL_OFF(cv::resize(src_roi, dst_roi, Size(), fx, fy, interpolation));
OCL_ON(cv::resize(usrc_roi, udst_roi, Size(), fx, fy, interpolation));
Near(1.0);
Near(eps);
}
}
......@@ -328,8 +331,8 @@ OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarp, WarpPerspective, Combine(
OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarp, Resize, Combine(
Values(CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, CV_32FC4),
Values(0.5, 1.5, 2.0),
Values(0.5, 1.5, 2.0),
Values(0.5, 1.5, 2.0, 0.2),
Values(0.5, 1.5, 2.0, 0.2),
Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR),
Bool()));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment