Commit 5864895e authored by Ilya Lavrenov's avatar Ilya Lavrenov

fixed ocl::copyMakeBorder

parent a54d36bd
......@@ -436,7 +436,7 @@ namespace cv
CV_Assert(top >= 0 && bottom >= 0 && left >= 0 && right >= 0);
if( _src.offset != 0 && (bordertype & BORDER_ISOLATED) == 0 )
if( (_src.wholecols != _src.cols || _src.wholerows != _src.rows) && (bordertype & BORDER_ISOLATED) == 0 )
Size wholeSize;
Point ofs;
......@@ -453,34 +453,25 @@ namespace cv
bordertype &= ~cv::BORDER_ISOLATED;
// TODO need to remove this conditions and fix the code
if (bordertype == cv::BORDER_REFLECT || bordertype == cv::BORDER_WRAP)
CV_Assert((_src.cols >= left) && (_src.cols >= right) && (_src.rows >= top) && (_src.rows >= bottom));
else if (bordertype == cv::BORDER_REFLECT_101)
CV_Assert((_src.cols > left) && (_src.cols > right) && (_src.rows > top) && (_src.rows > bottom));
dst.create(_src.rows + top + bottom, _src.cols + left + right, _src.type());
int srcStep = _src.step1() / _src.oclchannels(), dstStep = dst.step1() / dst.oclchannels();
int srcStep = _src.step / _src.elemSize(), dstStep = dst.step / dst.elemSize();
int srcOffset = _src.offset / _src.elemSize(), dstOffset = dst.offset / dst.elemSize();
int depth = _src.depth(), ochannels = _src.oclchannels();
size_t bordertype_index;
for(bordertype_index = 0; bordertype_index < sizeof(__bordertype) / sizeof(int); bordertype_index++)
if (__bordertype[bordertype_index] == bordertype)
int bordertype_index = -1;
for (int i = 0, end = sizeof(__bordertype) / sizeof(int); i < end; i++)
if (__bordertype[i] == bordertype)
bordertype_index = i;
if (bordertype_index == sizeof(__bordertype) / sizeof(int))
if (bordertype_index < 0)
CV_Error(CV_StsBadArg, "Unsupported border type");
string kernelName = "copymakeborder";
size_t localThreads[3] = {16, 16, 1};
size_t localThreads[3] = { 16, 16, 1 };
size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
vector< pair<size_t, const void *> > args;
......@@ -503,12 +494,6 @@ namespace cv
typeMap[depth], channelMap[ochannels],
if (src.type() == CV_8UC1 && (dst.offset & 3) == 0 && (dst.cols & 3) == 0)
kernelName = "copymakeborder_C1_D0";
globalThreads[0] = dst.cols >> 2;
int cn = src.channels(), ocn = src.oclchannels();
int bufSize = src.elemSize1() * ocn;
AutoBuffer<uchar> _buf(bufSize);
......@@ -518,7 +503,7 @@ namespace cv
args.push_back( make_pair( bufSize , (void *)buf ));
openCLExecuteKernel(src.clCxt, &imgproc_copymakeboder, kernelName, globalThreads,
openCLExecuteKernel(src.clCxt, &imgproc_copymakeboder, "copymakeborder", globalThreads,
localThreads, args, -1, -1, buildOptions.c_str());
......@@ -35,173 +35,100 @@
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
//BORDER_CONSTANT: iiiiii|abcdefgh|iiiiiii
#define ELEM(i,l_edge,r_edge,elem1,elem2) (i)<(l_edge) | (i) >= (r_edge) ? (elem1) : (elem2)
//BORDER_REPLICATE: aaaaaa|abcdefgh|hhhhhhh
#define ADDR_L(i,l_edge,r_edge,addr) (i) < (l_edge) ? (l_edge) : (addr)
#define ADDR_R(i,r_edge,addr) (i) >= (r_edge) ? (r_edge)-1 : (addr)
#define EXTRAPOLATE(x, y, v) v = scalar;
#elif defined BORDER_REPLICATE
#define EXTRAPOLATE(x, y, v) \
{ \
x = max(min(x, src_cols - 1), 0); \
y = max(min(y, src_rows - 1), 0); \
v = src[mad24(y, src_step, x + src_offset)]; \
#elif defined BORDER_WRAP
#define EXTRAPOLATE(x, y, v) \
{ \
if (x < 0) \
x -= ((x - src_cols + 1) / src_cols) * src_cols; \
if (x >= src_cols) \
x %= src_cols; \
if (y < 0) \
y -= ((y - src_rows + 1) / src_rows) * src_rows; \
if( y >= src_rows ) \
y %= src_rows; \
v = src[mad24(y, src_step, x + src_offset)]; \
#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101)
//BORDER_REFLECT: fedcba|abcdefgh|hgfedcb
#define ADDR_L(i,l_edge,r_edge,addr) (i) < (l_edge) ? -(i)-1 : (addr)
#define ADDR_R(i,r_edge,addr) (i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr)
#define DELTA int delta = 0
#define DELTA int delta = 1
//BORDER_REFLECT_101: gfedcb|abcdefgh|gfedcba
#define ADDR_L(i,l_edge,r_edge,addr) (i) < (l_edge) ? -(i) : (addr)
#define ADDR_R(i,r_edge,addr) (i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr)
#define EXTRAPOLATE(x, y, v) \
{ \
if (src_cols == 1) \
x = 0; \
else \
do \
{ \
if( x < 0 ) \
x = -x - 1 + delta; \
else \
x = src_cols - 1 - (x - src_cols) - delta; \
} \
while (x >= src_cols || x < 0); \
if (src_rows == 1) \
y = 0; \
else \
do \
{ \
if( y < 0 ) \
y = -y - 1 + delta; \
else \
y = src_rows - 1 - (y - src_rows) - delta; \
} \
while (y >= src_rows || y < 0); \
v = src[mad24(y, src_step, x + src_offset)]; \
#error No extrapolation method
//BORDER_WRAP: cdefgh|abcdefgh|abcdefg
#define ADDR_L(i,l_edge,r_edge,addr) (i) < (l_edge) ? (i)+(r_edge) : (addr)
#define ADDR_R(i,r_edge,addr) (i) >= (r_edge) ? (i)-(r_edge) : (addr)
#define NEED_EXTRAPOLATION(gx, gy) (gx >= src_cols || gy >= src_rows || gx < 0 || gy < 0)
__kernel void copymakeborder
(__global const GENTYPE *src,
__global GENTYPE *dst,
const int dst_cols,
const int dst_rows,
const int src_cols,
const int src_rows,
const int src_step_in_pixel,
const int src_offset_in_pixel,
const int dst_step_in_pixel,
const int dst_offset_in_pixel,
const int top,
const int left,
const GENTYPE val
int dst_cols, int dst_rows,
int src_cols, int src_rows,
int src_step, int src_offset,
int dst_step, int dst_offset,
int top, int left, GENTYPE scalar)
int x = get_global_id(0);
int y = get_global_id(1);
int src_x = x-left;
int src_y = y-top;
int src_addr = mad24(src_y,src_step_in_pixel,src_x+src_offset_in_pixel);
int dst_addr = mad24(y,dst_step_in_pixel,x+dst_offset_in_pixel);
int con = (src_x >= 0) && (src_x < src_cols) && (src_y >= 0) && (src_y < src_rows);
dst[dst_addr] = src[src_addr];
//write the result to dst
if((x<dst_cols) && (y<dst_rows))
dst[dst_addr] = val;
int s_x,s_y;
//judge if read out of boundary
s_x= ADDR_L(src_x,0,src_cols,src_x);
s_x= ADDR_R(src_x,src_cols,s_x);
s_y= ADDR_L(src_y,0,src_rows,src_y);
s_y= ADDR_R(src_y,src_rows,s_y);
//write the result to dst
if((x<dst_cols) && (y<dst_rows))
dst[dst_addr] = src[src_addr];
__kernel void copymakeborder_C1_D0
(__global const uchar *src,
__global uchar *dst,
const int dst_cols,
const int dst_rows,
const int src_cols,
const int src_rows,
const int src_step_in_pixel,
const int src_offset_in_pixel,
const int dst_step_in_pixel,
const int dst_offset_in_pixel,
const int top,
const int left,
const uchar val
int x = get_global_id(0)<<2;
int y = get_global_id(1);
int src_x = x-left;
int src_y = y-top;
int src_addr = mad24(src_y,src_step_in_pixel,src_x+src_offset_in_pixel);
int dst_addr = mad24(y,dst_step_in_pixel,x+dst_offset_in_pixel);
int con = (src_x >= 0) && (src_x+3 < src_cols) && (src_y >= 0) && (src_y < src_rows);
if (x < dst_cols && y < dst_rows)
uchar4 tmp = vload4(0,src+src_addr);
*(__global uchar4*)(dst+dst_addr) = tmp;
//write the result to dst
if((((src_x<0) && (src_x+3>=0))||(src_x < src_cols) && (src_x+3 >= src_cols)) && (src_y >= 0) && (src_y < src_rows))
int4 addr;
uchar4 tmp;
addr.x = ((src_x < 0) || (src_x>= src_cols)) ? 0 : src_addr;
addr.y = ((src_x+1 < 0) || (src_x+1>= src_cols)) ? 0 : (src_addr+1);
addr.z = ((src_x+2 < 0) || (src_x+2>= src_cols)) ? 0 : (src_addr+2);
addr.w = ((src_x+3 < 0) || (src_x+3>= src_cols)) ? 0 : (src_addr+3);
tmp.x = src[addr.x];
tmp.y = src[addr.y];
tmp.z = src[addr.z];
tmp.w = src[addr.w];
tmp.x = (src_x >=0)&&(src_x < src_cols) ? tmp.x : val;
tmp.y = (src_x+1 >=0)&&(src_x +1 < src_cols) ? tmp.y : val;
tmp.z = (src_x+2 >=0)&&(src_x +2 < src_cols) ? tmp.z : val;
tmp.w = (src_x+3 >=0)&&(src_x +3 < src_cols) ? tmp.w : val;
*(__global uchar4*)(dst+dst_addr) = tmp;
else if((x<dst_cols) && (y<dst_rows))
*(__global uchar4*)(dst+dst_addr) = (uchar4)val;
int4 s_x;
int s_y;
//judge if read out of boundary
s_x.x= ADDR_L(src_x,0,src_cols,src_x);
s_x.y= ADDR_L(src_x+1,0,src_cols,src_x+1);
s_x.z= ADDR_L(src_x+2,0,src_cols,src_x+2);
s_x.w= ADDR_L(src_x+3,0,src_cols,src_x+3);
s_x.x= ADDR_R(src_x,src_cols,s_x.x);
s_x.y= ADDR_R(src_x+1,src_cols,s_x.y);
s_x.z= ADDR_R(src_x+2,src_cols,s_x.z);
s_x.w= ADDR_R(src_x+3,src_cols,s_x.w);
s_y= ADDR_L(src_y,0,src_rows,src_y);
s_y= ADDR_R(src_y,src_rows,s_y);
int4 src_addr4=mad24((int4)s_y,(int4)src_step_in_pixel,s_x+(int4)src_offset_in_pixel);
//write the result to dst
if((x<dst_cols) && (y<dst_rows))
int src_x = x - left;
int src_y = y - top;
int dst_index = mad24(y, dst_step, x + dst_offset);
if (NEED_EXTRAPOLATION(src_x, src_y))
EXTRAPOLATE(src_x, src_y, dst[dst_index])
uchar4 tmp;
tmp.x = src[src_addr4.x];
tmp.y = src[src_addr4.y];
tmp.z = src[src_addr4.z];
tmp.w = src[src_addr4.w];
*(__global uchar4*)(dst+dst_addr) = tmp;
int src_index = mad24(src_y, src_step, src_x + src_offset);
dst[dst_index] = src[src_index];
......@@ -144,7 +144,7 @@ PARAM_TEST_CASE(CopyMakeBorder, MatDepth, // depth
generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
generateOclMat(gdst_whole, gdst_roi, dst_whole, roiSize, dstBorder);
border = randomBorder(0, 10);
border = randomBorder(0, MAX_VALUE << 2);
val = randomScalar(-MAX_VALUE, MAX_VALUE);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment