Commit 35198b84 authored by Li Peng's avatar Li Peng

morph ocl kernel for erode and dilate filter

This kernel is for CV_8UC1 format and 3x3 kernel size,
It is about 33% ~ 55% faster than current ocl kernel with below perf test

python ./modules/ts/misc/run.py -t imgproc --gtest_filter=OCL_ErodeFixture*
python ./modules/ts/misc/run.py -t imgproc --gtest_filter=OCL_DilateFixture*

Also add accuracy test cases for this kernel, the test command is

./bin/opencv_test_imgproc --gtest_filter=OCL_Filter/MorphFilter3x3*
Signed-off-by: 's avatarLi Peng <peng.li@intel.com>
parent 18327aed
...@@ -1477,6 +1477,78 @@ Ptr<Morph> Morph ::create(int op, int src_type, int dst_type, int max_width, int ...@@ -1477,6 +1477,78 @@ Ptr<Morph> Morph ::create(int op, int src_type, int dst_type, int max_width, int
#define ROUNDUP(sz, n) ((sz) + (n) - 1 - (((sz) + (n) - 1) % (n))) #define ROUNDUP(sz, n) ((sz) + (n) - 1 - (((sz) + (n) - 1) % (n)))
static bool ocl_morph3x3_8UC1( InputArray _src, OutputArray _dst, InputArray _kernel, Point anchor,
int op, int actual_op = -1, InputArray _extraMat = noArray())
{
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
Size ksize = _kernel.size();
Mat kernel8u;
String processing;
bool haveExtraMat = !_extraMat.empty();
CV_Assert(actual_op <= 3 || haveExtraMat);
_kernel.getMat().convertTo(kernel8u, CV_8U);
for (int y = 0; y < kernel8u.rows; ++y)
for (int x = 0; x < kernel8u.cols; ++x)
if (kernel8u.at<uchar>(y, x) != 0)
processing += format("PROCESS(%d,%d)", y, x);
if (anchor.x < 0)
anchor.x = ksize.width / 2;
if (anchor.y < 0)
anchor.y = ksize.height / 2;
if (actual_op < 0)
actual_op = op;
if (type != CV_8UC1 ||
!((_src.offset() == 0) && (_src.step() % 4 == 0)) ||
!((_src.cols() % 16 == 0) && (_src.rows() % 2 == 0)) ||
!(anchor.x == 1 && anchor.y == 1) ||
!(ksize.width == 3 && ksize.height == 3))
return false;
Size size = _src.size();
size_t globalsize[2] = { 0, 0 };
size_t localsize[2] = { 0, 0 };
globalsize[0] = size.width / 16;
globalsize[1] = size.height / 2;
static const char * const op2str[] = { "OP_ERODE", "OP_DILATE", NULL, NULL, "OP_GRADIENT", "OP_TOPHAT", "OP_BLACKHAT" };
String opts = format("-D PROCESS_ELEM_=%s -D %s%s", processing.c_str(), op2str[op],
actual_op == op ? "" : cv::format(" -D %s", op2str[actual_op]).c_str());
ocl::Kernel k;
k.create("morph3x3_8UC1_cols16_rows2", cv::ocl::imgproc::morph3x3_oclsrc, opts);
if (k.empty())
return false;
UMat src = _src.getUMat();
_dst.create(size, CV_MAKETYPE(depth, cn));
if (!(_dst.offset() == 0 && _dst.step() % 4 == 0))
return false;
UMat dst = _dst.getUMat();
UMat extraMat = _extraMat.getUMat();
int idxArg = k.set(0, ocl::KernelArg::PtrReadOnly(src));
idxArg = k.set(idxArg, (int)src.step);
idxArg = k.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst));
idxArg = k.set(idxArg, (int)dst.step);
idxArg = k.set(idxArg, (int)dst.rows);
idxArg = k.set(idxArg, (int)dst.cols);
if (haveExtraMat)
{
idxArg = k.set(idxArg, ocl::KernelArg::ReadOnlyNoSize(extraMat));
}
return k.run(2, globalsize, (localsize[0] == 0) ? NULL : localsize, false);
}
static bool ocl_morphSmall( InputArray _src, OutputArray _dst, InputArray _kernel, Point anchor, int borderType, static bool ocl_morphSmall( InputArray _src, OutputArray _dst, InputArray _kernel, Point anchor, int borderType,
int op, int actual_op = -1, InputArray _extraMat = noArray()) int op, int actual_op = -1, InputArray _extraMat = noArray())
{ {
...@@ -1676,6 +1748,9 @@ static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel, ...@@ -1676,6 +1748,9 @@ static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel,
#endif #endif
) )
{ {
if (ocl_morph3x3_8UC1(_src, _dst, kernel, anchor, op, actual_op, _extraMat))
return true;
if (ocl_morphSmall(_src, _dst, kernel, anchor, borderType, op, actual_op, _extraMat)) if (ocl_morphSmall(_src, _dst, kernel, anchor, borderType, op, actual_op, _extraMat))
return true; return true;
} }
......
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifdef OP_ERODE
#define OP(m1, m2) min(m1, m2)
#define VAL UCHAR_MAX
#endif
#ifdef OP_DILATE
#define OP(m1, m2) max(m1, m2)
#define VAL 0
#endif
#if defined OP_GRADIENT || defined OP_TOPHAT || defined OP_BLACKHAT
#define EXTRA_PARAMS , __global const uchar * matptr, int mat_step, int mat_offset
#else
#define EXTRA_PARAMS
#endif
#define PROCESS(_y, _x) \
line_out[0] = OP(line_out[0], arr[_x + 3 * _y]); \
line_out[1] = OP(line_out[1], arr[_x + 3 * (_y + 1)]);
#define PROCESS_ELEM \
line_out[0] = (uchar16)VAL; \
line_out[1] = (uchar16)VAL; \
PROCESS_ELEM_
__kernel void morph3x3_8UC1_cols16_rows2(__global const uint* src, int src_step,
__global uint* dst, int dst_step,
int rows, int cols
EXTRA_PARAMS)
{
int block_x = get_global_id(0);
int y = get_global_id(1) * 2;
int ssx = 1, dsx = 1;
if ((block_x * 16) >= cols || y >= rows) return;
uchar a; uchar16 b; uchar c;
uchar d; uchar16 e; uchar f;
uchar g; uchar16 h; uchar i;
uchar j; uchar16 k; uchar l;
uchar16 line[4];
uchar16 line_out[2];
int src_index = block_x * 4 * ssx + (y - 1) * (src_step / 4);
line[0] = (y == 0) ? (uchar16)VAL: as_uchar16(vload4(0, src + src_index));
line[1] = as_uchar16(vload4(0, src + src_index + (src_step / 4)));
line[2] = as_uchar16(vload4(0, src + src_index + 2 * (src_step / 4)));
line[3] = (y == (rows - 2)) ? (uchar16)VAL: as_uchar16(vload4(0, src + src_index + 3 * (src_step / 4)));
__global uchar *src_p = (__global uchar *)src;
bool line_end = ((block_x + 1) * 16 == cols);
src_index = block_x * 16 * ssx + (y - 1) * src_step;
a = (block_x == 0 || y == 0) ? VAL : src_p[src_index - 1];
b = line[0];
c = (line_end || y == 0) ? VAL : src_p[src_index + 16];
d = (block_x == 0) ? VAL : src_p[src_index + src_step - 1];
e = line[1];
f = line_end ? VAL : src_p[src_index + src_step + 16];
g = (block_x == 0) ? VAL : src_p[src_index + 2 * src_step - 1];
h = line[2];
i = line_end ? VAL : src_p[src_index + 2 * src_step + 16];
j = (block_x == 0 || y == (rows - 2)) ? VAL : src_p[src_index + 3 * src_step - 1];
k = line[3];
l = (line_end || y == (rows - 2)) ? VAL : src_p[src_index + 3 * src_step + 16];
uchar16 arr[12];
arr[0] = (uchar16)(a, b.s01234567, b.s89ab, b.scde);
arr[1] = b;
arr[2] = (uchar16)(b.s12345678, b.s9abc, b.sdef, c);
arr[3] = (uchar16)(d, e.s01234567, e.s89ab, e.scde);
arr[4] = e;
arr[5] = (uchar16)(e.s12345678, e.s9abc, e.sdef, f);
arr[6] = (uchar16)(g, h.s01234567, h.s89ab, h.scde);
arr[7] = h;
arr[8] = (uchar16)(h.s12345678, h.s9abc, h.sdef, i);
arr[9] = (uchar16)(j, k.s01234567, k.s89ab, k.scde);
arr[10] = k;
arr[11] = (uchar16)(k.s12345678, k.s9abc, k.sdef, l);
PROCESS_ELEM;
int dst_index = block_x * 4 * dsx + y * (dst_step / 4);
#if defined OP_GRADIENT || defined OP_TOPHAT || defined OP_BLACKHAT
int mat_index = y * mat_step + block_x * 16 * ssx + mat_offset;
uchar16 val0 = vload16(0, matptr + mat_index);
uchar16 val1 = vload16(0, matptr + mat_index + mat_step);
#ifdef OP_GRADIENT
line_out[0] = convert_uchar16_sat(convert_int16(line_out[0]) - convert_int16(val0));
line_out[1] = convert_uchar16_sat(convert_int16(line_out[1]) - convert_int16(val1));
vstore4(as_uint4(line_out[0]), 0, dst + dst_index);
vstore4(as_uint4(line_out[1]), 0, dst + dst_index + (dst_step / 4));
#elif defined OP_TOPHAT
line_out[0] = convert_uchar16_sat(convert_int16(val0) - convert_int16(line_out[0]));
line_out[1] = convert_uchar16_sat(convert_int16(val1) - convert_int16(line_out[1]));
vstore4(as_uint4(line_out[0]), 0, dst + dst_index);
vstore4(as_uint4(line_out[1]), 0, dst + dst_index + (dst_step / 4));
#elif defined OP_BLACKHAT
line_out[0] = convert_uchar16_sat(convert_int16(line_out[0]) - convert_int16(val0));
line_out[1] = convert_uchar16_sat(convert_int16(line_out[1]) - convert_int16(val1));
vstore4(as_uint4(line_out[0]), 0, dst + dst_index);
vstore4(as_uint4(line_out[1]), 0, dst + dst_index + (dst_step / 4));
#endif
#else
vstore4(as_uint4(line_out[0]), 0, dst + dst_index);
vstore4(as_uint4(line_out[1]), 0, dst + dst_index + (dst_step / 4));
#endif
}
...@@ -273,6 +273,85 @@ OCL_TEST_P(Dilate, Mat) ...@@ -273,6 +273,85 @@ OCL_TEST_P(Dilate, Mat)
} }
} }
PARAM_TEST_CASE(MorphFilter3x3_cols16_rows2_Base, MatType,
int, // kernel size
Size, // dx, dy
BorderType, // border type
double, // optional parameter
bool, // roi or not
int) // width multiplier
{
int type, borderType, ksize;
Size size;
double param;
bool useRoi;
int widthMultiple;
TEST_DECLARE_INPUT_PARAMETER(src);
TEST_DECLARE_OUTPUT_PARAMETER(dst);
virtual void SetUp()
{
type = GET_PARAM(0);
ksize = GET_PARAM(1);
size = GET_PARAM(2);
borderType = GET_PARAM(3);
param = GET_PARAM(4);
useRoi = GET_PARAM(5);
widthMultiple = GET_PARAM(6);
}
void random_roi()
{
size = Size(3, 3);
Size roiSize = randomSize(size.width, MAX_VALUE, size.height, MAX_VALUE);
roiSize.width = std::max(size.width + 13, roiSize.width & (~0xf));
roiSize.height = std::max(size.height + 1, roiSize.height & (~0x1));
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, roiSize, srcBorder, type, 5, 256);
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, -60, 70);
UMAT_UPLOAD_INPUT_PARAMETER(src);
UMAT_UPLOAD_OUTPUT_PARAMETER(dst);
}
void Near()
{
Near(1, false);
}
void Near(double threshold, bool relative)
{
if (relative)
OCL_EXPECT_MATS_NEAR_RELATIVE(dst, threshold);
else
OCL_EXPECT_MATS_NEAR(dst, threshold);
}
};
typedef MorphFilter3x3_cols16_rows2_Base MorphFilter3x3_cols16_rows2;
OCL_TEST_P(MorphFilter3x3_cols16_rows2, Mat)
{
Size kernelSize(ksize, ksize);
int iterations = (int)param;
for (int j = 0; j < test_loop_times; j++)
{
random_roi();
Mat kernel = ksize==0 ? Mat() : randomMat(kernelSize, CV_8UC1, 0, 3);
OCL_OFF(cv::dilate(src_roi, dst_roi, kernel, Point(-1, -1), iterations) );
OCL_ON(cv::dilate(usrc_roi, udst_roi, kernel, Point(-1, -1), iterations) );
Near();
}
}
///////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////
// MorphologyEx // MorphologyEx
IMPLEMENT_PARAM_CLASS(MorphOp, int) IMPLEMENT_PARAM_CLASS(MorphOp, int)
...@@ -429,6 +508,15 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine( ...@@ -429,6 +508,15 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(
Bool(), Bool(),
Values(1))); // not used Values(1))); // not used
OCL_INSTANTIATE_TEST_CASE_P(Filter, MorphFilter3x3_cols16_rows2, Combine(
Values((MatType)CV_8UC1),
Values(0, 3), // kernel size, 0 means kernel = Mat()
Values(Size(0, 0)), // not used
Values((BorderType)BORDER_CONSTANT),
Values(1.0, 2.0, 3.0),
Bool(),
Values(1))); // not used
OCL_INSTANTIATE_TEST_CASE_P(Filter, MorphologyEx, Combine( OCL_INSTANTIATE_TEST_CASE_P(Filter, MorphologyEx, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(3, 5, 7), // kernel size Values(3, 5, 7), // kernel size
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment