Commit 7193762d authored by Andrey Pavlenko's avatar Andrey Pavlenko Committed by OpenCV Buildbot

Merge pull request #2495 from ilya-lavrenov:tapi_flip

parents d18ebfa8 04884ebf
...@@ -482,9 +482,9 @@ enum { FLIP_COLS = 1 << 0, FLIP_ROWS = 1 << 1, FLIP_BOTH = FLIP_ROWS | FLIP_COLS ...@@ -482,9 +482,9 @@ enum { FLIP_COLS = 1 << 0, FLIP_ROWS = 1 << 1, FLIP_BOTH = FLIP_ROWS | FLIP_COLS
static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode ) static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
{ {
CV_Assert(flipCode >= - 1 && flipCode <= 1); CV_Assert(flipCode >= - 1 && flipCode <= 1);
int type = _src.type(), cn = CV_MAT_CN(type), flipType; int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), flipType;
if (cn > 4 || cn == 3) if (cn > 4)
return false; return false;
const char * kernelName; const char * kernelName;
...@@ -506,7 +506,8 @@ static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode ) ...@@ -506,7 +506,8 @@ static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
} }
ocl::Kernel k(kernelName, ocl::core::flip_oclsrc, ocl::Kernel k(kernelName, ocl::core::flip_oclsrc,
format( "-D type=%s", ocl::memopTypeToStr(type))); format( "-D T=%s -D T1=%s -D cn=%d", ocl::memopTypeToStr(type),
ocl::memopTypeToStr(depth), cn));
if (k.empty()) if (k.empty())
return false; return false;
......
...@@ -2679,17 +2679,17 @@ namespace cv { ...@@ -2679,17 +2679,17 @@ namespace cv {
static bool ocl_setIdentity( InputOutputArray _m, const Scalar& s ) static bool ocl_setIdentity( InputOutputArray _m, const Scalar& s )
{ {
int type = _m.type(), cn = CV_MAT_CN(type); int type = _m.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
if (cn == 3) sctype = CV_MAKE_TYPE(depth, cn == 3 ? 4 : cn);
return false;
ocl::Kernel k("setIdentity", ocl::core::set_identity_oclsrc, ocl::Kernel k("setIdentity", ocl::core::set_identity_oclsrc,
format("-D T=%s", ocl::memopTypeToStr(type))); format("-D T=%s -D T1=%s -D cn=%d -D ST=%s", ocl::memopTypeToStr(type),
ocl::memopTypeToStr(depth), cn, ocl::memopTypeToStr(sctype)));
if (k.empty()) if (k.empty())
return false; return false;
UMat m = _m.getUMat(); UMat m = _m.getUMat();
k.args(ocl::KernelArg::WriteOnly(m), ocl::KernelArg::Constant(Mat(1, 1, type, s))); k.args(ocl::KernelArg::WriteOnly(m), ocl::KernelArg::Constant(Mat(1, 1, sctype, s)));
size_t globalsize[2] = { m.cols, m.rows }; size_t globalsize[2] = { m.cols, m.rows };
return k.run(2, globalsize, NULL, false); return k.run(2, globalsize, NULL, false);
......
...@@ -39,10 +39,18 @@ ...@@ -39,10 +39,18 @@
// //
//M*/ //M*/
#define sizeoftype ((int)sizeof(type)) #if cn != 3
#define loadpix(addr) *(__global const T *)(addr)
#define storepix(val, addr) *(__global T *)(addr) = val
#define TSIZE (int)sizeof(T)
#else
#define loadpix(addr) vload3(0, (__global const T1 *)(addr))
#define storepix(val, addr) vstore3(val, 0, (__global T1 *)(addr))
#define TSIZE ((int)sizeof(T1)*3)
#endif
__kernel void arithm_flip_rows(__global const uchar* srcptr, int srcstep, int srcoffset, __kernel void arithm_flip_rows(__global const uchar * srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dststep, int dstoffset, __global uchar * dstptr, int dst_step, int dst_offset,
int rows, int cols, int thread_rows, int thread_cols) int rows, int cols, int thread_rows, int thread_cols)
{ {
int x = get_global_id(0); int x = get_global_id(0);
...@@ -50,19 +58,16 @@ __kernel void arithm_flip_rows(__global const uchar* srcptr, int srcstep, int sr ...@@ -50,19 +58,16 @@ __kernel void arithm_flip_rows(__global const uchar* srcptr, int srcstep, int sr
if (x < cols && y < thread_rows) if (x < cols && y < thread_rows)
{ {
__global const type* src0 = (__global const type*)(srcptr + mad24(y, srcstep, mad24(x, sizeoftype, srcoffset))); T src0 = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset)));
__global const type* src1 = (__global const type*)(srcptr + mad24(rows - y - 1, srcstep, mad24(x, sizeoftype, srcoffset))); T src1 = loadpix(srcptr + mad24(rows - y - 1, src_step, mad24(x, TSIZE, src_offset)));
__global type* dst0 = (__global type*)(dstptr + mad24(y, dststep, mad24(x, sizeoftype, dstoffset))); storepix(src1, dstptr + mad24(y, dst_step, mad24(x, TSIZE, dst_offset)));
__global type* dst1 = (__global type*)(dstptr + mad24(rows - y - 1, dststep, mad24(x, sizeoftype, dstoffset))); storepix(src0, dstptr + mad24(rows - y - 1, dst_step, mad24(x, TSIZE, dst_offset)));
dst0[0] = src1[0];
dst1[0] = src0[0];
} }
} }
__kernel void arithm_flip_rows_cols(__global const uchar* srcptr, int srcstep, int srcoffset, __kernel void arithm_flip_rows_cols(__global const uchar * srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dststep, int dstoffset, __global uchar * dstptr, int dst_step, int dst_offset,
int rows, int cols, int thread_rows, int thread_cols) int rows, int cols, int thread_rows, int thread_cols)
{ {
int x = get_global_id(0); int x = get_global_id(0);
...@@ -71,19 +76,16 @@ __kernel void arithm_flip_rows_cols(__global const uchar* srcptr, int srcstep, i ...@@ -71,19 +76,16 @@ __kernel void arithm_flip_rows_cols(__global const uchar* srcptr, int srcstep, i
if (x < cols && y < thread_rows) if (x < cols && y < thread_rows)
{ {
int x1 = cols - x - 1; int x1 = cols - x - 1;
__global const type* src0 = (__global const type*)(srcptr + mad24(y, srcstep, mad24(x, sizeoftype, srcoffset))); T src0 = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset)));
__global const type* src1 = (__global const type*)(srcptr + mad24(rows - y - 1, srcstep, mad24(x1, sizeoftype, srcoffset))); T src1 = loadpix(srcptr + mad24(rows - y - 1, src_step, mad24(x1, TSIZE, src_offset)));
__global type* dst0 = (__global type*)(dstptr + mad24(rows - y - 1, dststep, mad24(x1, sizeoftype, dstoffset)));
__global type* dst1 = (__global type*)(dstptr + mad24(y, dststep, mad24(x, sizeoftype, dstoffset)));
dst0[0] = src0[0]; storepix(src0, dstptr + mad24(rows - y - 1, dst_step, mad24(x1, TSIZE, dst_offset)));
dst1[0] = src1[0]; storepix(src1, dstptr + mad24(y, dst_step, mad24(x, TSIZE, dst_offset)));
} }
} }
__kernel void arithm_flip_cols(__global const uchar* srcptr, int srcstep, int srcoffset, __kernel void arithm_flip_cols(__global const uchar * srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dststep, int dstoffset, __global uchar * dstptr, int dst_step, int dst_offset,
int rows, int cols, int thread_rows, int thread_cols) int rows, int cols, int thread_rows, int thread_cols)
{ {
int x = get_global_id(0); int x = get_global_id(0);
...@@ -92,13 +94,10 @@ __kernel void arithm_flip_cols(__global const uchar* srcptr, int srcstep, int sr ...@@ -92,13 +94,10 @@ __kernel void arithm_flip_cols(__global const uchar* srcptr, int srcstep, int sr
if (x < thread_cols && y < rows) if (x < thread_cols && y < rows)
{ {
int x1 = cols - x - 1; int x1 = cols - x - 1;
__global const type* src0 = (__global const type*)(srcptr + mad24(y, srcstep, mad24(x, sizeoftype, srcoffset))); T src0 = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset)));
__global const type* src1 = (__global const type*)(srcptr + mad24(y, srcstep, mad24(x1, sizeoftype, srcoffset))); T src1 = loadpix(srcptr + mad24(y, src_step, mad24(x1, TSIZE, src_offset)));
__global type* dst0 = (__global type*)(dstptr + mad24(y, dststep, mad24(x1, sizeoftype, dstoffset)));
__global type* dst1 = (__global type*)(dstptr + mad24(y, dststep, mad24(x, sizeoftype, dstoffset)));
dst1[0] = src1[0]; storepix(src0, dstptr + mad24(y, dst_step, mad24(x1, TSIZE, dst_offset)));
dst0[0] = src0[0]; storepix(src1, dstptr + mad24(y, dst_step, mad24(x, TSIZE, dst_offset)));
} }
} }
...@@ -43,17 +43,28 @@ ...@@ -43,17 +43,28 @@
// //
//M*/ //M*/
#if cn != 3
#define loadpix(addr) *(__global const T *)(addr)
#define storepix(val, addr) *(__global T *)(addr) = val
#define TSIZE (int)sizeof(T)
#define scalar scalar_
#else
#define loadpix(addr) vload3(0, (__global const T1 *)(addr))
#define storepix(val, addr) vstore3(val, 0, (__global T1 *)(addr))
#define TSIZE ((int)sizeof(T1)*3)
#define scalar (T)(scalar_.x, scalar_.y, scalar_.z)
#endif
__kernel void setIdentity(__global uchar * srcptr, int src_step, int src_offset, int rows, int cols, __kernel void setIdentity(__global uchar * srcptr, int src_step, int src_offset, int rows, int cols,
T scalar) ST scalar_)
{ {
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if (x < cols && y < rows) if (x < cols && y < rows)
{ {
int src_index = mad24(y, src_step, mad24(x, (int)sizeof(T), src_offset)); int src_index = mad24(y, src_step, mad24(x, TSIZE, src_offset));
__global T * src = (__global T *)(srcptr + src_index);
src[0] = x == y ? scalar : (T)(0); storepix(x == y ? scalar : (T)(0), srcptr + src_index);
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment