Commit 7f818e9b authored by Ilya Lavrenov's avatar Ilya Lavrenov

optimized UMat::copyTo with mask

parent 61267885
...@@ -44,14 +44,14 @@ ...@@ -44,14 +44,14 @@
#ifdef COPY_TO_MASK #ifdef COPY_TO_MASK
#define DEFINE_DATA \ #define DEFINE_DATA \
int src_index = mad24(y, src_step, mad24(x, (int)sizeof(T) * scn, src_offset)); \ int src_index = mad24(y, src_step, mad24(x, (int)sizeof(T1) * scn, src_offset)); \
int dst_index = mad24(y, dst_step, mad24(x, (int)sizeof(T) * scn, dst_offset)); \ int dst_index = mad24(y, dst_step, mad24(x, (int)sizeof(T1) * scn, dst_offset)); \
\ \
__global const T * src = (__global const T *)(srcptr + src_index); \ __global const T1 * src = (__global const T1 *)(srcptr + src_index); \
__global T * dst = (__global T *)(dstptr + dst_index) __global T1 * dst = (__global T1 *)(dstptr + dst_index)
__kernel void copyToMask(__global const uchar * srcptr, int src_step, int src_offset, __kernel void copyToMask(__global const uchar * srcptr, int src_step, int src_offset,
__global const uchar * maskptr, int mask_step, int mask_offset, __global const uchar * mask, int mask_step, int mask_offset,
__global uchar * dstptr, int dst_step, int dst_offset, __global uchar * dstptr, int dst_step, int dst_offset,
int dst_rows, int dst_cols) int dst_rows, int dst_cols)
{ {
...@@ -60,8 +60,7 @@ __kernel void copyToMask(__global const uchar * srcptr, int src_step, int src_of ...@@ -60,8 +60,7 @@ __kernel void copyToMask(__global const uchar * srcptr, int src_step, int src_of
if (x < dst_cols && y < dst_rows) if (x < dst_cols && y < dst_rows)
{ {
int mask_index = mad24(y, mask_step, mad24(x, mcn, mask_offset)); mask += mad24(y, mask_step, mad24(x, mcn, mask_offset));
__global const uchar * mask = (__global const uchar *)(maskptr + mask_index);
#if mcn == 1 #if mcn == 1
if (mask[0]) if (mask[0])
...@@ -72,6 +71,16 @@ __kernel void copyToMask(__global const uchar * srcptr, int src_step, int src_of ...@@ -72,6 +71,16 @@ __kernel void copyToMask(__global const uchar * srcptr, int src_step, int src_of
for (int c = 0; c < scn; ++c) for (int c = 0; c < scn; ++c)
dst[c] = src[c]; dst[c] = src[c];
} }
#ifdef HAVE_DST_UNINIT
else
{
DEFINE_DATA;
#pragma unroll
for (int c = 0; c < scn; ++c)
dst[c] = (T1)(0);
}
#endif
#elif scn == mcn #elif scn == mcn
DEFINE_DATA; DEFINE_DATA;
...@@ -79,6 +88,10 @@ __kernel void copyToMask(__global const uchar * srcptr, int src_step, int src_of ...@@ -79,6 +88,10 @@ __kernel void copyToMask(__global const uchar * srcptr, int src_step, int src_of
for (int c = 0; c < scn; ++c) for (int c = 0; c < scn; ++c)
if (mask[c]) if (mask[c])
dst[c] = src[c]; dst[c] = src[c];
#ifdef HAVE_DST_UNINIT
else
dst[c] = (T1)(0);
#endif
#else #else
#error "(mcn == 1 || mcn == scn) should be true" #error "(mcn == 1 || mcn == scn) should be true"
#endif #endif
......
...@@ -678,16 +678,21 @@ void UMat::copyTo(OutputArray _dst, InputArray _mask) const ...@@ -678,16 +678,21 @@ void UMat::copyTo(OutputArray _dst, InputArray _mask) const
UMat dst = _dst.getUMat(); UMat dst = _dst.getUMat();
bool haveDstUninit = false;
if( prevu != dst.u ) // do not leave dst uninitialized if( prevu != dst.u ) // do not leave dst uninitialized
dst = Scalar(0); haveDstUninit = true;
ocl::Kernel k("copyToMask", ocl::core::copyset_oclsrc, String opts = format("-D COPY_TO_MASK -D T1=%s -D scn=%d -D mcn=%d%s",
format("-D COPY_TO_MASK -D T=%s -D scn=%d -D mcn=%d", ocl::memopTypeToStr(depth()), cn, mcn,
ocl::memopTypeToStr(depth()), cn, mcn)); haveDstUninit ? " -D HAVE_DST_UNINIT" : "");
ocl::Kernel k("copyToMask", ocl::core::copyset_oclsrc, opts);
if (!k.empty()) if (!k.empty())
{ {
k.args(ocl::KernelArg::ReadOnlyNoSize(*this), ocl::KernelArg::ReadOnlyNoSize(_mask.getUMat()), k.args(ocl::KernelArg::ReadOnlyNoSize(*this),
ocl::KernelArg::WriteOnly(dst)); ocl::KernelArg::ReadOnlyNoSize(_mask.getUMat()),
haveDstUninit ? ocl::KernelArg::WriteOnly(dst) :
ocl::KernelArg::ReadWrite(dst));
size_t globalsize[2] = { cols, rows }; size_t globalsize[2] = { cols, rows };
if (k.run(2, globalsize, NULL, false)) if (k.run(2, globalsize, NULL, false))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment