Commit c3100eeb authored by Alexander Karsakov's avatar Alexander Karsakov

Fixed buffer initialization in reduce kernel. Enabled OCL version of reduce for…

Fixed buffer initialization in reduce kernel. Enabled OCL version of reduce for SUM, MAX, MIN modes.
parent 51195645
......@@ -3462,9 +3462,6 @@ static bool ocl_reduce(InputArray _src, OutputArray _dst,
if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
return false;
if ((op == CV_REDUCE_SUM && sdepth == CV_32F) || op == CV_REDUCE_MIN || op == CV_REDUCE_MAX)
return false;
if (op == CV_REDUCE_AVG)
{
if (sdepth < CV_32S && ddepth < CV_32S)
......
......@@ -108,7 +108,10 @@ __kernel void reduce_horz_opt(__global const uchar * srcptr, int src_step, int s
int src_index = mad24(y, src_step, mad24(x, (int)sizeof(srcT) * cn, src_offset));
__global const srcT * src = (__global const srcT *)(srcptr + src_index);
bufT tmp[cn] = { INIT_VALUE };
bufT tmp[cn];
#pragma unroll
for (int c = 0; c < cn; ++c)
tmp[c] = INIT_VALUE;
int src_step_mul = BUF_COLS * cn;
for (int idx = x; idx < cols; idx += BUF_COLS, src += src_step_mul)
......@@ -140,7 +143,10 @@ __kernel void reduce_horz_opt(__global const uchar * srcptr, int src_step, int s
int dst_index = mad24(y, dst_step, dst_offset);
__global dstT * dst = (__global dstT *)(dstptr + dst_index);
bufT tmp[cn] = { INIT_VALUE };
bufT tmp[cn];
#pragma unroll
for (int c = 0; c < cn; ++c)
tmp[c] = INIT_VALUE;
#pragma unroll
for (int xin = 0; xin < BUF_COLS / 2; xin ++)
......@@ -179,7 +185,10 @@ __kernel void reduce(__global const uchar * srcptr, int src_step, int src_offset
int dst_index = mad24(x, (int)sizeof(dstT0) * cn, dst_offset);
__global dstT0 * dst = (__global dstT0 *)(dstptr + dst_index);
dstT tmp[cn] = { INIT_VALUE };
dstT tmp[cn];
#pragma unroll
for (int c = 0; c < cn; ++c)
tmp[c] = INIT_VALUE;
for (int y = 0; y < rows; ++y, src_index += src_step)
{
......@@ -209,7 +218,10 @@ __kernel void reduce(__global const uchar * srcptr, int src_step, int src_offset
__global const srcT * src = (__global const srcT *)(srcptr + src_index);
__global dstT * dst = (__global dstT *)(dstptr + dst_index);
dstT tmp[cn] = { INIT_VALUE };
dstT tmp[cn];
#pragma unroll
for (int c = 0; c < cn; ++c)
tmp[c] = INIT_VALUE;
for (int x = 0; x < cols; ++x, src += cn)
{
......
......@@ -1704,7 +1704,7 @@ OCL_TEST_P(ReduceSum, Mat)
OCL_OFF(cv::reduce(src_roi, dst_roi, dim, CV_REDUCE_SUM, dtype));
OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, CV_REDUCE_SUM, dtype));
double eps = ddepth <= CV_32S ? 1 : 1e-4;
double eps = ddepth <= CV_32S ? 1 : 7e-4;
OCL_EXPECT_MATS_NEAR(dst, eps);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment