Commit ba2eee9c authored by Andrey Pavlenko's avatar Andrey Pavlenko Committed by OpenCV Buildbot

Merge pull request #2522 from ilya-lavrenov:tapi_canny

parents 9c574538 6ba60a1e
...@@ -100,19 +100,29 @@ static bool ocl_Canny(InputArray _src, OutputArray _dst, float low_thresh, float ...@@ -100,19 +100,29 @@ static bool ocl_Canny(InputArray _src, OutputArray _dst, float low_thresh, float
low_thresh = std::min(32767.0f, low_thresh); low_thresh = std::min(32767.0f, low_thresh);
high_thresh = std::min(32767.0f, high_thresh); high_thresh = std::min(32767.0f, high_thresh);
if (low_thresh > 0) low_thresh *= low_thresh; if (low_thresh > 0)
if (high_thresh > 0) high_thresh *= high_thresh; low_thresh *= low_thresh;
if (high_thresh > 0)
high_thresh *= high_thresh;
} }
int low = cvFloor(low_thresh), high = cvFloor(high_thresh); int low = cvFloor(low_thresh), high = cvFloor(high_thresh);
Size esize(size.width + 2, size.height + 2); Size esize(size.width + 2, size.height + 2);
UMat mag; UMat mag;
size_t globalsize[2] = { size.width * cn, size.height }, localsize[2] = { 16, 16 }; size_t globalsize[2] = { size.width, size.height }, localsize[2] = { 16, 16 };
if (aperture_size == 3 && !_src.isSubmatrix()) if (aperture_size == 3 && !_src.isSubmatrix())
{ {
// Sobel calculation // Sobel calculation
ocl::Kernel calcSobelRowPassKernel("calcSobelRowPass", ocl::imgproc::canny_oclsrc); char cvt[2][40];
ocl::Kernel calcSobelRowPassKernel("calcSobelRowPass", ocl::imgproc::canny_oclsrc,
format("-D OP_SOBEL -D cn=%d -D shortT=%s -D ucharT=%s"
" -D convertToIntT=%s -D intT=%s -D convertToShortT=%s", cn,
ocl::typeToStr(CV_16SC(cn)),
ocl::typeToStr(CV_8UC(cn)),
ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]),
ocl::typeToStr(CV_32SC(cn)),
ocl::convertTypeStr(CV_32S, CV_16S, cn, cvt[1])));
if (calcSobelRowPassKernel.empty()) if (calcSobelRowPassKernel.empty())
return false; return false;
...@@ -126,58 +136,62 @@ static bool ocl_Canny(InputArray _src, OutputArray _dst, float low_thresh, float ...@@ -126,58 +136,62 @@ static bool ocl_Canny(InputArray _src, OutputArray _dst, float low_thresh, float
// magnitude calculation // magnitude calculation
ocl::Kernel magnitudeKernel("calcMagnitude_buf", ocl::imgproc::canny_oclsrc, ocl::Kernel magnitudeKernel("calcMagnitude_buf", ocl::imgproc::canny_oclsrc,
L2gradient ? " -D L2GRAD" : ""); format("-D cn=%d%s -D OP_MAG_BUF -D shortT=%s -D convertToIntT=%s -D intT=%s",
cn, L2gradient ? " -D L2GRAD" : "",
ocl::typeToStr(CV_16SC(cn)),
ocl::convertTypeStr(CV_16S, CV_32S, cn, cvt[0]),
ocl::typeToStr(CV_32SC(cn))));
if (magnitudeKernel.empty()) if (magnitudeKernel.empty())
return false; return false;
mag = UMat(esize, CV_32SC(cn), Scalar::all(0)); mag = UMat(esize, CV_32SC1, Scalar::all(0));
dx.create(size, CV_16SC(cn)); dx.create(size, CV_16SC(cn));
dy.create(size, CV_16SC(cn)); dy.create(size, CV_16SC(cn));
magnitudeKernel.args(ocl::KernelArg::ReadOnlyNoSize(dxBuf), ocl::KernelArg::ReadOnlyNoSize(dyBuf), magnitudeKernel.args(ocl::KernelArg::ReadOnlyNoSize(dxBuf), ocl::KernelArg::ReadOnlyNoSize(dyBuf),
ocl::KernelArg::WriteOnlyNoSize(dx), ocl::KernelArg::WriteOnlyNoSize(dy), ocl::KernelArg::WriteOnlyNoSize(dx), ocl::KernelArg::WriteOnlyNoSize(dy),
ocl::KernelArg::WriteOnlyNoSize(mag, cn), size.height, size.width); ocl::KernelArg::WriteOnlyNoSize(mag), size.height, size.width);
if (!magnitudeKernel.run(2, globalsize, localsize, false)) if (!magnitudeKernel.run(2, globalsize, localsize, false))
return false; return false;
} }
else else
{ {
dx.create(size, CV_16SC(cn)); Sobel(_src, dx, CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
dy.create(size, CV_16SC(cn)); Sobel(_src, dy, CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
Sobel(_src, dx, CV_16SC1, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
Sobel(_src, dy, CV_16SC1, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
// magnitude calculation // magnitude calculation
ocl::Kernel magnitudeKernel("calcMagnitude", ocl::imgproc::canny_oclsrc, ocl::Kernel magnitudeKernel("calcMagnitude", ocl::imgproc::canny_oclsrc,
L2gradient ? " -D L2GRAD" : ""); format("-D OP_MAG -D cn=%d%s -D intT=int -D shortT=short -D convertToIntT=convert_int_sat",
cn, L2gradient ? " -D L2GRAD" : ""));
if (magnitudeKernel.empty()) if (magnitudeKernel.empty())
return false; return false;
mag = UMat(esize, CV_32SC(cn), Scalar::all(0)); mag = UMat(esize, CV_32SC1, Scalar::all(0));
magnitudeKernel.args(ocl::KernelArg::ReadOnlyNoSize(dx), ocl::KernelArg::ReadOnlyNoSize(dy), magnitudeKernel.args(ocl::KernelArg::ReadOnlyNoSize(dx), ocl::KernelArg::ReadOnlyNoSize(dy),
ocl::KernelArg::WriteOnlyNoSize(mag, cn), size.height, size.width); ocl::KernelArg::WriteOnlyNoSize(mag), size.height, size.width);
if (!magnitudeKernel.run(2, globalsize, NULL, false)) if (!magnitudeKernel.run(2, globalsize, NULL, false))
return false; return false;
} }
// map calculation // map calculation
ocl::Kernel calcMapKernel("calcMap", ocl::imgproc::canny_oclsrc); ocl::Kernel calcMapKernel("calcMap", ocl::imgproc::canny_oclsrc,
format("-D OP_MAP -D cn=%d", cn));
if (calcMapKernel.empty()) if (calcMapKernel.empty())
return false; return false;
UMat map(esize, CV_32SC(cn)); UMat map(esize, CV_32SC1);
calcMapKernel.args(ocl::KernelArg::ReadOnlyNoSize(dx), ocl::KernelArg::ReadOnlyNoSize(dy), calcMapKernel.args(ocl::KernelArg::ReadOnlyNoSize(dx), ocl::KernelArg::ReadOnlyNoSize(dy),
ocl::KernelArg::ReadOnlyNoSize(mag), ocl::KernelArg::WriteOnlyNoSize(map, cn), ocl::KernelArg::ReadOnlyNoSize(mag), ocl::KernelArg::WriteOnlyNoSize(map),
size.height, size.width, low, high); size.height, size.width, low, high);
if (!calcMapKernel.run(2, globalsize, localsize, false)) if (!calcMapKernel.run(2, globalsize, localsize, false))
return false; return false;
// local hysteresis thresholding // local hysteresis thresholding
ocl::Kernel edgesHysteresisLocalKernel("edgesHysteresisLocal", ocl::imgproc::canny_oclsrc); ocl::Kernel edgesHysteresisLocalKernel("edgesHysteresisLocal", ocl::imgproc::canny_oclsrc,
"-D OP_HYST_LOCAL");
if (edgesHysteresisLocalKernel.empty()) if (edgesHysteresisLocalKernel.empty())
return false; return false;
...@@ -193,7 +207,8 @@ static bool ocl_Canny(InputArray _src, OutputArray _dst, float low_thresh, float ...@@ -193,7 +207,8 @@ static bool ocl_Canny(InputArray _src, OutputArray _dst, float low_thresh, float
for ( ; ; ) for ( ; ; )
{ {
ocl::Kernel edgesHysteresisGlobalKernel("edgesHysteresisGlobal", ocl::imgproc::canny_oclsrc); ocl::Kernel edgesHysteresisGlobalKernel("edgesHysteresisGlobal", ocl::imgproc::canny_oclsrc,
"-D OP_HYST_GLOBAL");
if (edgesHysteresisGlobalKernel.empty()) if (edgesHysteresisGlobalKernel.empty())
return false; return false;
...@@ -221,14 +236,15 @@ static bool ocl_Canny(InputArray _src, OutputArray _dst, float low_thresh, float ...@@ -221,14 +236,15 @@ static bool ocl_Canny(InputArray _src, OutputArray _dst, float low_thresh, float
} }
// get edges // get edges
ocl::Kernel getEdgesKernel("getEdges", ocl::imgproc::canny_oclsrc); ocl::Kernel getEdgesKernel("getEdges", ocl::imgproc::canny_oclsrc, "-D OP_EDGES");
if (getEdgesKernel.empty()) if (getEdgesKernel.empty())
return false; return false;
_dst.create(size, CV_8UC(cn)); _dst.create(size, CV_8UC1);
UMat dst = _dst.getUMat(); UMat dst = _dst.getUMat();
getEdgesKernel.args(ocl::KernelArg::ReadOnlyNoSize(map), ocl::KernelArg::WriteOnly(dst)); getEdgesKernel.args(ocl::KernelArg::ReadOnlyNoSize(map), ocl::KernelArg::WriteOnly(dst));
return getEdgesKernel.run(2, globalsize, NULL, false); return getEdgesKernel.run(2, globalsize, NULL, false);
} }
...@@ -254,12 +270,12 @@ void cv::Canny( InputArray _src, OutputArray _dst, ...@@ -254,12 +270,12 @@ void cv::Canny( InputArray _src, OutputArray _dst,
} }
if ((aperture_size & 1) == 0 || (aperture_size != -1 && (aperture_size < 3 || aperture_size > 7))) if ((aperture_size & 1) == 0 || (aperture_size != -1 && (aperture_size < 3 || aperture_size > 7)))
CV_Error(CV_StsBadFlag, ""); CV_Error(CV_StsBadFlag, "Aperture size should be odd");
if (low_thresh > high_thresh) if (low_thresh > high_thresh)
std::swap(low_thresh, high_thresh); std::swap(low_thresh, high_thresh);
CV_OCL_RUN(_dst.isUMat() && cn == 1, CV_OCL_RUN(_dst.isUMat() && (cn == 1 || cn == 3),
ocl_Canny(_src, _dst, (float)low_thresh, (float)high_thresh, aperture_size, L2gradient, cn, size)) ocl_Canny(_src, _dst, (float)low_thresh, (float)high_thresh, aperture_size, L2gradient, cn, size))
Mat src = _src.getMat(), dst = _dst.getMat(); Mat src = _src.getMat(), dst = _dst.getMat();
......
...@@ -43,6 +43,18 @@ ...@@ -43,6 +43,18 @@
// //
//M*/ //M*/
#ifdef OP_SOBEL
#if cn != 3
#define loadpix(addr) convertToIntT(*(__global const ucharT *)(addr))
#define storepix(val, addr) *(__global shortT *)(addr) = convertToShortT(val)
#define shortSize (int)sizeof(shortT)
#else
#define loadpix(addr) convertToIntT(vload3(0, (__global const uchar *)(addr)))
#define storepix(val, addr) vstore3(convertToShortT(val), 0, (__global short *)(addr))
#define shortSize (int)sizeof(short) * cn
#endif
// Smoothing perpendicular to the derivative direction with a triangle filter // Smoothing perpendicular to the derivative direction with a triangle filter
// only support 3x3 Sobel kernel // only support 3x3 Sobel kernel
// h (-1) = 1, h (0) = 2, h (1) = 1 // h (-1) = 1, h (0) = 2, h (1) = 1
...@@ -54,9 +66,7 @@ ...@@ -54,9 +66,7 @@
// dx_buf output dx buffer // dx_buf output dx buffer
// dy_buf output dy buffer // dy_buf output dy buffer
__kernel void __attribute__((reqd_work_group_size(16, 16, 1))) __kernel void calcSobelRowPass(__global const uchar * src, int src_step, int src_offset, int rows, int cols,
calcSobelRowPass
(__global const uchar * src, int src_step, int src_offset, int rows, int cols,
__global uchar * dx_buf, int dx_buf_step, int dx_buf_offset, __global uchar * dx_buf, int dx_buf_step, int dx_buf_offset,
__global uchar * dy_buf, int dy_buf_step, int dy_buf_offset) __global uchar * dy_buf, int dy_buf_step, int dy_buf_offset)
{ {
...@@ -66,34 +76,39 @@ calcSobelRowPass ...@@ -66,34 +76,39 @@ calcSobelRowPass
int lidx = get_local_id(0); int lidx = get_local_id(0);
int lidy = get_local_id(1); int lidy = get_local_id(1);
__local int smem[16][18]; __local intT smem[16][18];
smem[lidy][lidx + 1] = src[mad24(src_step, min(gidy, rows - 1), gidx + src_offset)]; smem[lidy][lidx + 1] = loadpix(src + mad24(src_step, min(gidy, rows - 1), mad24(gidx, cn, src_offset)));
if (lidx == 0) if (lidx == 0)
{ {
smem[lidy][0] = src[mad24(src_step, min(gidy, rows - 1), max(gidx - 1, 0) + src_offset)]; smem[lidy][0] = loadpix(src + mad24(src_step, min(gidy, rows - 1), mad24(max(gidx - 1, 0), cn, src_offset)));
smem[lidy][17] = src[mad24(src_step, min(gidy, rows - 1), min(gidx + 16, cols - 1) + src_offset)]; smem[lidy][17] = loadpix(src + mad24(src_step, min(gidy, rows - 1), mad24(min(gidx + 16, cols - 1), cn, src_offset)));
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (gidy < rows && gidx < cols) if (gidy < rows && gidx < cols)
{ {
*(__global short *)(dx_buf + mad24(gidy, dx_buf_step, gidx * (int)sizeof(short) + dx_buf_offset)) = storepix(smem[lidy][lidx + 2] - smem[lidy][lidx],
smem[lidy][lidx + 2] - smem[lidy][lidx]; dx_buf + mad24(gidy, dx_buf_step, mad24(gidx, shortSize, dx_buf_offset)));
*(__global short *)(dy_buf + mad24(gidy, dy_buf_step, gidx * (int)sizeof(short) + dy_buf_offset)) = storepix(mad24(2, smem[lidy][lidx + 1], smem[lidy][lidx] + smem[lidy][lidx + 2]),
smem[lidy][lidx] + 2 * smem[lidy][lidx + 1] + smem[lidy][lidx + 2]; dy_buf + mad24(gidy, dy_buf_step, mad24(gidx, shortSize, dy_buf_offset)));
} }
} }
inline int calc(short x, short y) #elif defined OP_MAG_BUF || defined OP_MAG
inline intT calc(shortT x, shortT y)
{ {
#ifdef L2GRAD #ifdef L2GRAD
return x * x + y * y; intT intx = convertToIntT(x), inty = convertToIntT(y);
return intx * intx + inty * inty;
#else #else
return (x >= 0 ? x : -x) + (y >= 0 ? y : -y); return convertToIntT( (x >= (shortT)(0) ? x : -x) + (y >= (shortT)(0) ? y : -y) );
#endif #endif
} }
#ifdef OP_MAG
// calculate the magnitude of the filter pass combining both x and y directions // calculate the magnitude of the filter pass combining both x and y directions
// This is the non-buffered version(non-3x3 sobel) // This is the non-buffered version(non-3x3 sobel)
// //
...@@ -112,18 +127,43 @@ __kernel void calcMagnitude(__global const uchar * dxptr, int dx_step, int dx_of ...@@ -112,18 +127,43 @@ __kernel void calcMagnitude(__global const uchar * dxptr, int dx_step, int dx_of
if (y < rows && x < cols) if (y < rows && x < cols)
{ {
int dx_index = mad24(dx_step, y, x * (int)sizeof(short) + dx_offset); int dx_index = mad24(dx_step, y, mad24(x, (int)sizeof(short) * cn, dx_offset));
int dy_index = mad24(dy_step, y, x * (int)sizeof(short) + dy_offset); int dy_index = mad24(dy_step, y, mad24(x, (int)sizeof(short) * cn, dy_offset));
int mag_index = mad24(mag_step, y + 1, (x + 1) * (int)sizeof(int) + mag_offset); int mag_index = mad24(mag_step, y + 1, mad24(x + 1, (int)sizeof(int), mag_offset));
__global const short * dx = (__global const short *)(dxptr + dx_index); __global short * dx = (__global short *)(dxptr + dx_index);
__global const short * dy = (__global const short *)(dyptr + dy_index); __global short * dy = (__global short *)(dyptr + dy_index);
__global int * mag = (__global int *)(magptr + mag_index); __global int * mag = (__global int *)(magptr + mag_index);
mag[0] = calc(dx[0], dy[0]); int cmag = calc(dx[0], dy[0]);
#if cn > 1
short cx = dx[0], cy = dy[0];
int pmag;
#pragma unroll
for (int i = 1; i < cn; ++i)
{
pmag = calc(dx[i], dy[i]);
if (pmag > cmag)
cmag = pmag, cx = dx[i], cy = dy[i];
}
dx[0] = cx, dy[0] = cy;
#endif
mag[0] = cmag;
} }
} }
#elif defined OP_MAG_BUF
#if cn != 3
#define loadpix(addr) *(__global const shortT *)(addr)
#define shortSize (int)sizeof(shortT)
#else
#define loadpix(addr) vload3(0, (__global const short *)(addr))
#define shortSize (int)sizeof(short)*cn
#endif
// calculate the magnitude of the filter pass combining both x and y directions // calculate the magnitude of the filter pass combining both x and y directions
// This is the buffered version(3x3 sobel) // This is the buffered version(3x3 sobel)
// //
...@@ -132,59 +172,64 @@ __kernel void calcMagnitude(__global const uchar * dxptr, int dx_step, int dx_of ...@@ -132,59 +172,64 @@ __kernel void calcMagnitude(__global const uchar * dxptr, int dx_step, int dx_of
// dx direvitive in x direction output // dx direvitive in x direction output
// dy direvitive in y direction output // dy direvitive in y direction output
// mag magnitude direvitive of xy output // mag magnitude direvitive of xy output
__kernel void __attribute__((reqd_work_group_size(16, 16, 1))) __kernel void calcMagnitude_buf(__global const uchar * dx_buf, int dx_buf_step, int dx_buf_offset,
calcMagnitude_buf __global const uchar * dy_buf, int dy_buf_step, int dy_buf_offset,
(__global const short * dx_buf, int dx_buf_step, int dx_buf_offset, __global uchar * dx, int dx_step, int dx_offset,
__global const short * dy_buf, int dy_buf_step, int dy_buf_offset, __global uchar * dy, int dy_step, int dy_offset,
__global short * dx, int dx_step, int dx_offset, __global uchar * mag, int mag_step, int mag_offset, int rows, int cols)
__global short * dy, int dy_step, int dy_offset,
__global int * mag, int mag_step, int mag_offset,
int rows, int cols)
{ {
dx_buf_step /= sizeof(*dx_buf);
dx_buf_offset /= sizeof(*dx_buf);
dy_buf_step /= sizeof(*dy_buf);
dy_buf_offset /= sizeof(*dy_buf);
dx_step /= sizeof(*dx);
dx_offset /= sizeof(*dx);
dy_step /= sizeof(*dy);
dy_offset /= sizeof(*dy);
mag_step /= sizeof(*mag);
mag_offset /= sizeof(*mag);
int gidx = get_global_id(0); int gidx = get_global_id(0);
int gidy = get_global_id(1); int gidy = get_global_id(1);
int lidx = get_local_id(0); int lidx = get_local_id(0);
int lidy = get_local_id(1); int lidy = get_local_id(1);
__local short sdx[18][16]; __local shortT sdx[18][16];
__local short sdy[18][16]; __local shortT sdy[18][16];
sdx[lidy + 1][lidx] = dx_buf[gidx + min(gidy, rows - 1) * dx_buf_step + dx_buf_offset]; sdx[lidy + 1][lidx] = loadpix(dx_buf + mad24(min(gidy, rows - 1), dx_buf_step, mad24(gidx, shortSize, dx_buf_offset)));
sdy[lidy + 1][lidx] = dy_buf[gidx + min(gidy, rows - 1) * dy_buf_step + dy_buf_offset]; sdy[lidy + 1][lidx] = loadpix(dy_buf + mad24(min(gidy, rows - 1), dy_buf_step, mad24(gidx, shortSize, dy_buf_offset)));
if (lidy == 0) if (lidy == 0)
{ {
sdx[0][lidx] = dx_buf[gidx + min(max(gidy - 1, 0), rows - 1) * dx_buf_step + dx_buf_offset]; sdx[0][lidx] = loadpix(dx_buf + mad24(clamp(gidy - 1, 0, rows - 1), dx_buf_step, mad24(gidx, shortSize, dx_buf_offset)));
sdx[17][lidx] = dx_buf[gidx + min(gidy + 16, rows - 1) * dx_buf_step + dx_buf_offset]; sdx[17][lidx] = loadpix(dx_buf + mad24(min(gidy + 16, rows - 1), dx_buf_step, mad24(gidx, shortSize, dx_buf_offset)));
sdy[0][lidx] = dy_buf[gidx + min(max(gidy - 1, 0), rows - 1) * dy_buf_step + dy_buf_offset]; sdy[0][lidx] = loadpix(dy_buf + mad24(clamp(gidy - 1, 0, rows - 1), dy_buf_step, mad24(gidx, shortSize, dy_buf_offset)));
sdy[17][lidx] = dy_buf[gidx + min(gidy + 16, rows - 1) * dy_buf_step + dy_buf_offset]; sdy[17][lidx] = loadpix(dy_buf + mad24(min(gidy + 16, rows - 1), dy_buf_step, mad24(gidx, shortSize, dy_buf_offset)));
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (gidx < cols && gidy < rows) if (gidx < cols && gidy < rows)
{ {
short x = sdx[lidy][lidx] + 2 * sdx[lidy + 1][lidx] + sdx[lidy + 2][lidx]; shortT x = sdx[lidy + 1][lidx] * (shortT)(2) + sdx[lidy][lidx] + sdx[lidy + 2][lidx];
short y = -sdy[lidy][lidx] + sdy[lidy + 2][lidx]; shortT y = -sdy[lidy][lidx] + sdy[lidy + 2][lidx];
#if cn == 1
*(__global short *)(dx + mad24(gidy, dx_step, mad24(gidx, shortSize, dx_offset))) = x;
*(__global short *)(dy + mad24(gidy, dy_step, mad24(gidx, shortSize, dy_offset))) = y;
dx[gidx + gidy * dx_step + dx_offset] = x; *(__global int *)(mag + mad24(gidy + 1, mag_step, mad24(gidx + 1, (int)sizeof(int), mag_offset))) = calc(x, y);
dy[gidx + gidy * dy_step + dy_offset] = y; #elif cn == 3
intT magv = calc(x, y);
short cx = x.x, cy = y.x;
int cmag = magv.x;
mag[(gidx + 1) + (gidy + 1) * mag_step + mag_offset] = calc(x, y); if (cmag < magv.y)
cx = x.y, cy = y.y, cmag = magv.y;
if (cmag < magv.z)
cx = x.z, cy = y.z, cmag = magv.z;
*(__global short *)(dx + mad24(gidy, dx_step, mad24(gidx, shortSize, dx_offset))) = cx;
*(__global short *)(dy + mad24(gidy, dy_step, mad24(gidx, shortSize, dy_offset))) = cy;
*(__global int *)(mag + mad24(gidy + 1, mag_step, mad24(gidx + 1, (int)sizeof(int), mag_offset))) = cmag;
#endif
} }
} }
#endif
#elif defined OP_MAP
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// 0.4142135623730950488016887242097 is tan(22.5) // 0.4142135623730950488016887242097 is tan(22.5)
...@@ -208,9 +253,7 @@ calcMagnitude_buf ...@@ -208,9 +253,7 @@ calcMagnitude_buf
// mag magnitudes calculated from calcMagnitude function // mag magnitudes calculated from calcMagnitude function
// map output containing raw edge types // map output containing raw edge types
__kernel void __attribute__((reqd_work_group_size(16,16,1))) __kernel void calcMap(__global const uchar * dx, int dx_step, int dx_offset,
calcMap(
__global const uchar * dx, int dx_step, int dx_offset,
__global const uchar * dy, int dy_step, int dy_offset, __global const uchar * dy, int dy_step, int dy_offset,
__global const uchar * mag, int mag_step, int mag_offset, __global const uchar * mag, int mag_step, int mag_offset,
__global uchar * map, int map_step, int map_offset, __global uchar * map, int map_step, int map_offset,
...@@ -227,7 +270,7 @@ calcMap( ...@@ -227,7 +270,7 @@ calcMap(
int grp_idx = get_global_id(0) & 0xFFFFF0; int grp_idx = get_global_id(0) & 0xFFFFF0;
int grp_idy = get_global_id(1) & 0xFFFFF0; int grp_idy = get_global_id(1) & 0xFFFFF0;
int tid = lidx + lidy * 16; int tid = mad24(lidy, 16, lidx);
int lx = tid % 18; int lx = tid % 18;
int ly = tid / 18; int ly = tid / 18;
...@@ -250,8 +293,8 @@ calcMap( ...@@ -250,8 +293,8 @@ calcMap(
if (m > low_thresh) if (m > low_thresh)
{ {
short xs = *(__global const short *)(dx + mad24(gidy, dx_step, dx_offset + (int)sizeof(short) * gidx)); short xs = *(__global const short *)(dx + mad24(gidy, dx_step, mad24(gidx, (int)sizeof(short) * cn, dx_offset)));
short ys = *(__global const short *)(dy + mad24(gidy, dy_step, dy_offset + (int)sizeof(short) * gidx)); short ys = *(__global const short *)(dy + mad24(gidy, dy_step, mad24(gidx, (int)sizeof(short) * cn, dy_offset)));
int x = abs(xs), y = abs(ys); int x = abs(xs), y = abs(ys);
int tg22x = x * TG22; int tg22x = x * TG22;
...@@ -278,13 +321,15 @@ calcMap( ...@@ -278,13 +321,15 @@ calcMap(
} }
} }
} }
*(__global int *)(map + mad24(map_step, gidy + 1, (gidx + 1) * (int)sizeof(int) + map_offset)) = edge_type; *(__global int *)(map + mad24(map_step, gidy + 1, mad24(gidx + 1, (int)sizeof(int), + map_offset))) = edge_type;
} }
} }
#undef CANNY_SHIFT #undef CANNY_SHIFT
#undef TG22 #undef TG22
#elif defined OP_HYST_LOCAL
struct PtrStepSz struct PtrStepSz
{ {
__global uchar * ptr; __global uchar * ptr;
...@@ -312,9 +357,7 @@ inline void set(struct PtrStepSz data, int y, int x, int value) ...@@ -312,9 +357,7 @@ inline void set(struct PtrStepSz data, int y, int x, int value)
// stack the potiential edge points found in this kernel call // stack the potiential edge points found in this kernel call
// counter the number of potiential edge points // counter the number of potiential edge points
__kernel void __attribute__((reqd_work_group_size(16,16,1))) __kernel void edgesHysteresisLocal(__global uchar * map_ptr, int map_step, int map_offset,
edgesHysteresisLocal
(__global uchar * map_ptr, int map_step, int map_offset,
__global ushort2 * st, __global unsigned int * counter, __global ushort2 * st, __global unsigned int * counter,
int rows, int cols) int rows, int cols)
{ {
...@@ -402,6 +445,8 @@ edgesHysteresisLocal ...@@ -402,6 +445,8 @@ edgesHysteresisLocal
} }
} }
#elif defined OP_HYST_GLOBAL
__constant int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1}; __constant int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
__constant int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1}; __constant int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
...@@ -409,8 +454,7 @@ __constant int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1}; ...@@ -409,8 +454,7 @@ __constant int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
#define stack_size 512 #define stack_size 512
#define map_index mad24(map_step, pos.y, pos.x * (int)sizeof(int)) #define map_index mad24(map_step, pos.y, pos.x * (int)sizeof(int))
__kernel void __attribute__((reqd_work_group_size(128, 1, 1))) __kernel void edgesHysteresisGlobal(__global uchar * map, int map_step, int map_offset,
edgesHysteresisGlobal(__global uchar * map, int map_step, int map_offset,
__global ushort2 * st1, __global ushort2 * st2, __global int * counter, __global ushort2 * st1, __global ushort2 * st2, __global int * counter,
int rows, int cols, int count) int rows, int cols, int count)
{ {
...@@ -492,6 +536,8 @@ edgesHysteresisGlobal(__global uchar * map, int map_step, int map_offset, ...@@ -492,6 +536,8 @@ edgesHysteresisGlobal(__global uchar * map, int map_step, int map_offset,
#undef map_index #undef map_index
#undef stack_size #undef stack_size
#elif defined OP_EDGES
// Get the edge result. egde type of value 2 will be marked as an edge point and set to 255. Otherwise 0. // Get the edge result. egde type of value 2 will be marked as an edge point and set to 255. Otherwise 0.
// map edge type mappings // map edge type mappings
// dst edge output // dst edge output
...@@ -504,7 +550,7 @@ __kernel void getEdges(__global const uchar * mapptr, int map_step, int map_offs ...@@ -504,7 +550,7 @@ __kernel void getEdges(__global const uchar * mapptr, int map_step, int map_offs
if (y < rows && x < cols) if (y < rows && x < cols)
{ {
int map_index = mad24(map_step, y + 1, (x + 1) * (int)sizeof(int) + map_offset); int map_index = mad24(map_step, y + 1, mad24(x + 1, (int)sizeof(int), map_offset));
int dst_index = mad24(dst_step, y, x + dst_offset); int dst_index = mad24(dst_step, y, x + dst_offset);
__global const int * map = (__global const int *)(mapptr + map_index); __global const int * map = (__global const int *)(mapptr + map_index);
...@@ -512,3 +558,5 @@ __kernel void getEdges(__global const uchar * mapptr, int map_step, int map_offs ...@@ -512,3 +558,5 @@ __kernel void getEdges(__global const uchar * mapptr, int map_step, int map_offs
dst[dst_index] = (uchar)(-(map[0] >> 1)); dst[dst_index] = (uchar)(-(map[0] >> 1));
} }
} }
#endif
...@@ -58,9 +58,9 @@ IMPLEMENT_PARAM_CLASS(AppertureSize, int) ...@@ -58,9 +58,9 @@ IMPLEMENT_PARAM_CLASS(AppertureSize, int)
IMPLEMENT_PARAM_CLASS(L2gradient, bool) IMPLEMENT_PARAM_CLASS(L2gradient, bool)
IMPLEMENT_PARAM_CLASS(UseRoi, bool) IMPLEMENT_PARAM_CLASS(UseRoi, bool)
PARAM_TEST_CASE(Canny, AppertureSize, L2gradient, UseRoi) PARAM_TEST_CASE(Canny, Channels, AppertureSize, L2gradient, UseRoi)
{ {
int apperture_size; int cn, apperture_size;
bool useL2gradient, use_roi; bool useL2gradient, use_roi;
TEST_DECLARE_INPUT_PARAMETER(src); TEST_DECLARE_INPUT_PARAMETER(src);
...@@ -68,19 +68,19 @@ PARAM_TEST_CASE(Canny, AppertureSize, L2gradient, UseRoi) ...@@ -68,19 +68,19 @@ PARAM_TEST_CASE(Canny, AppertureSize, L2gradient, UseRoi)
virtual void SetUp() virtual void SetUp()
{ {
apperture_size = GET_PARAM(0); cn = GET_PARAM(0);
useL2gradient = GET_PARAM(1); apperture_size = GET_PARAM(1);
use_roi = GET_PARAM(2); useL2gradient = GET_PARAM(2);
use_roi = GET_PARAM(3);
} }
void generateTestData() void generateTestData()
{ {
Mat img = readImage("shared/fruits.png", IMREAD_GRAYSCALE); Mat img = readImageType("shared/fruits.png", CV_8UC(cn));
ASSERT_FALSE(img.empty()) << "cann't load shared/fruits.png"; ASSERT_FALSE(img.empty()) << "cann't load shared/fruits.png";
Size roiSize = img.size(); Size roiSize = img.size();
int type = img.type(); int type = img.type();
ASSERT_EQ(CV_8UC1, type);
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, roiSize, srcBorder, type, 2, 100); randomSubMat(src, src_roi, roiSize, srcBorder, type, 2, 100);
...@@ -108,6 +108,7 @@ OCL_TEST_P(Canny, Accuracy) ...@@ -108,6 +108,7 @@ OCL_TEST_P(Canny, Accuracy)
} }
OCL_INSTANTIATE_TEST_CASE_P(ImgProc, Canny, testing::Combine( OCL_INSTANTIATE_TEST_CASE_P(ImgProc, Canny, testing::Combine(
testing::Values(1, 3),
testing::Values(AppertureSize(3), AppertureSize(5)), testing::Values(AppertureSize(3), AppertureSize(5)),
testing::Values(L2gradient(false), L2gradient(true)), testing::Values(L2gradient(false), L2gradient(true)),
testing::Values(UseRoi(false), UseRoi(true)))); testing::Values(UseRoi(false), UseRoi(true))));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment