Commit d8c01828 authored by Andrey Pavlenko's avatar Andrey Pavlenko Committed by OpenCV Buildbot

Merge pull request #2491 from ilya-lavrenov:tapi_sep_filter

parents 157f35ef 2875ce60
......@@ -118,6 +118,8 @@ public:
virtual int kind() const;
virtual int dims(int i=-1) const;
virtual int cols(int i=-1) const;
virtual int rows(int i=-1) const;
virtual Size size(int i=-1) const;
virtual int sizend(int* sz, int i=-1) const;
virtual bool sameSize(const _InputArray& arr) const;
......
......@@ -592,7 +592,7 @@ protected:
CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf);
CV_EXPORTS const char* typeToStr(int t);
CV_EXPORTS const char* memopTypeToStr(int t);
CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1);
CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL);
CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo>& platform_info);
CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
......
......@@ -1416,6 +1416,16 @@ int _InputArray::kind() const
return flags & KIND_MASK;
}
int _InputArray::rows(int i) const
{
return size(i).height;
}
int _InputArray::cols(int i) const
{
return size(i).width;
}
Size _InputArray::size(int i) const
{
int k = kind();
......@@ -2078,45 +2088,45 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int
create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
}
void _OutputArray::create(int rows, int cols, int mtype, int i, bool allowTransposed, int fixedDepthMask) const
void _OutputArray::create(int _rows, int _cols, int mtype, int i, bool allowTransposed, int fixedDepthMask) const
{
int k = kind();
if( k == MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{
CV_Assert(!fixedSize() || ((Mat*)obj)->size.operator()() == Size(cols, rows));
CV_Assert(!fixedSize() || ((Mat*)obj)->size.operator()() == Size(_cols, _rows));
CV_Assert(!fixedType() || ((Mat*)obj)->type() == mtype);
((Mat*)obj)->create(rows, cols, mtype);
((Mat*)obj)->create(_rows, _cols, mtype);
return;
}
if( k == UMAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{
CV_Assert(!fixedSize() || ((UMat*)obj)->size.operator()() == Size(cols, rows));
CV_Assert(!fixedSize() || ((UMat*)obj)->size.operator()() == Size(_cols, _rows));
CV_Assert(!fixedType() || ((UMat*)obj)->type() == mtype);
((UMat*)obj)->create(rows, cols, mtype);
((UMat*)obj)->create(_rows, _cols, mtype);
return;
}
if( k == GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{
CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == Size(cols, rows));
CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == Size(_cols, _rows));
CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype);
((cuda::GpuMat*)obj)->create(rows, cols, mtype);
((cuda::GpuMat*)obj)->create(_rows, _cols, mtype);
return;
}
if( k == OPENGL_BUFFER && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{
CV_Assert(!fixedSize() || ((ogl::Buffer*)obj)->size() == Size(cols, rows));
CV_Assert(!fixedSize() || ((ogl::Buffer*)obj)->size() == Size(_cols, _rows));
CV_Assert(!fixedType() || ((ogl::Buffer*)obj)->type() == mtype);
((ogl::Buffer*)obj)->create(rows, cols, mtype);
((ogl::Buffer*)obj)->create(_rows, _cols, mtype);
return;
}
if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{
CV_Assert(!fixedSize() || ((cuda::CudaMem*)obj)->size() == Size(cols, rows));
CV_Assert(!fixedSize() || ((cuda::CudaMem*)obj)->size() == Size(_cols, _rows));
CV_Assert(!fixedType() || ((cuda::CudaMem*)obj)->type() == mtype);
((cuda::CudaMem*)obj)->create(rows, cols, mtype);
((cuda::CudaMem*)obj)->create(_rows, _cols, mtype);
return;
}
int sizes[] = {rows, cols};
int sizes[] = {_rows, _cols};
create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
}
......
......@@ -4307,7 +4307,7 @@ static std::string kerToStr(const Mat & k)
return stream.str();
}
String kernelToStr(InputArray _kernel, int ddepth)
String kernelToStr(InputArray _kernel, int ddepth, const char * name)
{
Mat kernel = _kernel.getMat().reshape(1, 1);
......@@ -4318,13 +4318,13 @@ String kernelToStr(InputArray _kernel, int ddepth)
if (ddepth != depth)
kernel.convertTo(kernel, ddepth);
typedef std::string (*func_t)(const Mat &);
static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>,kerToStr<short>,
typedef std::string (* func_t)(const Mat &);
static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
const func_t func = funcs[depth];
CV_Assert(func != 0);
return cv::format(" -D COEFF=%s", func(kernel).c_str());
return cv::format(" -D %s=%s", name ? name : "COEFF", func(kernel).c_str());
}
#define PROCESS_SRC(src) \
......
......@@ -211,7 +211,7 @@ OCL_PERF_TEST_P(SobelFixture, Sobel,
OCL_TEST_CYCLE() cv::Sobel(src, dst, -1, dx, dy);
SANITY_CHECK(dst);
SANITY_CHECK(dst, 1e-6);
}
///////////// Scharr ////////////////////////
......
This diff is collapsed.
......@@ -34,47 +34,36 @@
//
//
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
#define READ_TIMES_COL ((2*(RADIUSY+LSIZE1)-1)/LSIZE1)
#define RADIUS 1
#if CN ==1
#define ALIGN (((RADIUS)+3)>>2<<2)
#elif CN==2
#define ALIGN (((RADIUS)+1)>>1<<1)
#elif CN==3
#define ALIGN (((RADIUS)+3)>>2<<2)
#elif CN==4
#define ALIGN (RADIUS)
#define READ_TIMES_ROW ((2*(RADIUS+LSIZE0)-1)/LSIZE0)
#endif
#define noconvert
/**********************************************************************************
These kernels are written for separable filters such as Sobel, Scharr, GaussianBlur.
Now(6/29/2011) the kernels only support 8U data type and the anchor of the convovle
kernel must be in the center. ROI is not supported either.
Each kernels read 4 elements(not 4 pixels), save them to LDS and read the data needed
from LDS to calculate the result.
The length of the convovle kernel supported is only related to the MAX size of LDS,
which is HW related.
Niko
6/29/2011
The info above maybe obsolete.
***********************************************************************************/
#if CN != 3
#define loadpix(addr) *(__global const srcT *)(addr)
#define storepix(val, addr) *(__global dstT *)(addr) = val
#define SRCSIZE (int)sizeof(srcT)
#define DSTSIZE (int)sizeof(dstT)
#else
#define loadpix(addr) vload3(0, (__global const srcT1 *)(addr))
#define storepix(val, addr) vstore3(val, 0, (__global dstT1 *)(addr))
#define SRCSIZE (int)sizeof(srcT1)*3
#define DSTSIZE (int)sizeof(dstT1)*3
#endif
#define DIG(a) a,
__constant float mat_kernel[] = { COEFF };
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter
(__global const GENTYPE_SRC * restrict src,
const int src_step_in_pixel,
const int src_whole_cols,
const int src_whole_rows,
__global GENTYPE_DST * dst,
const int dst_offset_in_pixel,
const int dst_step_in_pixel,
const int dst_cols,
const int dst_rows)
__kernel void col_filter(__global const uchar * src, int src_step, int src_offset, int src_whole_rows, int src_whole_cols,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
......@@ -82,38 +71,38 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter
int l_x = get_local_id(0);
int l_y = get_local_id(1);
int start_addr = mad24(y, src_step_in_pixel, x);
int end_addr = mad24(src_whole_rows - 1, src_step_in_pixel, src_whole_cols);
int start_addr = mad24(y, src_step, x * SRCSIZE);
int end_addr = mad24(src_whole_rows - 1, src_step, src_whole_cols * SRCSIZE);
int i;
GENTYPE_SRC sum, temp[READ_TIMES_COL];
__local GENTYPE_SRC LDS_DAT[LSIZE1 * READ_TIMES_COL][LSIZE0 + 1];
srcT sum, temp[READ_TIMES_COL];
__local srcT LDS_DAT[LSIZE1 * READ_TIMES_COL][LSIZE0 + 1];
//read pixels from src
for(i = 0;i<READ_TIMES_COL;i++)
// read pixels from src
for (int i = 0; i < READ_TIMES_COL; ++i)
{
int current_addr = start_addr+i*LSIZE1*src_step_in_pixel;
int current_addr = mad24(i, LSIZE1 * src_step, start_addr);
current_addr = current_addr < end_addr ? current_addr : 0;
temp[i] = src[current_addr];
}
//save pixels to lds
for(i = 0;i<READ_TIMES_COL;i++)
{
LDS_DAT[l_y+i*LSIZE1][l_x] = temp[i];
temp[i] = loadpix(src + current_addr);
}
// save pixels to lds
for (int i = 0; i < READ_TIMES_COL; ++i)
LDS_DAT[mad24(i, LSIZE1, l_y)][l_x] = temp[i];
barrier(CLK_LOCAL_MEM_FENCE);
//read pixels from lds and calculate the result
sum = LDS_DAT[l_y+RADIUSY][l_x]*mat_kernel[RADIUSY];
for(i=1;i<=RADIUSY;i++)
// read pixels from lds and calculate the result
sum = LDS_DAT[l_y + RADIUSY][l_x] * mat_kernel[RADIUSY];
for (int i = 1; i <= RADIUSY; ++i)
{
temp[0]=LDS_DAT[l_y+RADIUSY-i][l_x];
temp[1]=LDS_DAT[l_y+RADIUSY+i][l_x];
sum += temp[0] * mat_kernel[RADIUSY-i]+temp[1] * mat_kernel[RADIUSY+i];
temp[0] = LDS_DAT[l_y + RADIUSY - i][l_x];
temp[1] = LDS_DAT[l_y + RADIUSY + i][l_x];
sum += mad(temp[0], mat_kernel[RADIUSY - i], temp[1] * mat_kernel[RADIUSY + i]);
}
//write the result to dst
if((x<dst_cols) & (y<dst_rows))
// write the result to dst
if (x < dst_cols && y < dst_rows)
{
start_addr = mad24(y, dst_step_in_pixel, x + dst_offset_in_pixel);
dst[start_addr] = convert_to_DST(sum);
start_addr = mad24(y, dst_step, mad24(DSTSIZE, x, dst_offset));
storepix(convertToDstT(sum), dst + start_addr);
}
}
This diff is collapsed.
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2014, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////Macro for border type////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////
#ifdef BORDER_CONSTANT
// CCCCCC|abcdefgh|CCCCCCC
#define EXTRAPOLATE(x, maxV)
#elif defined BORDER_REPLICATE
// aaaaaa|abcdefgh|hhhhhhh
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = max(min((x), (maxV) - 1), 0); \
}
#elif defined BORDER_WRAP
// cdefgh|abcdefgh|abcdefg
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = ( (x) + (maxV) ) % (maxV); \
}
#elif defined BORDER_REFLECT
// fedcba|abcdefgh|hgfedcb
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = min(((maxV)-1)*2-(x)+1, max((x),-(x)-1) ); \
}
#elif defined BORDER_REFLECT_101 || defined BORDER_REFLECT101
// gfedcb|abcdefgh|gfedcba
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = min(((maxV)-1)*2-(x), max((x),-(x)) ); \
}
#else
#error No extrapolation method
#endif
#if CN != 3
#define loadpix(addr) *(__global const srcT *)(addr)
#define storepix(val, addr) *(__global dstT *)(addr) = val
#define SRCSIZE (int)sizeof(srcT)
#define DSTSIZE (int)sizeof(dstT)
#else
#define loadpix(addr) vload3(0, (__global const srcT1 *)(addr))
#define storepix(val, addr) vstore3(val, 0, (__global dstT1 *)(addr))
#define SRCSIZE (int)sizeof(srcT1)*3
#define DSTSIZE (int)sizeof(dstT1)*3
#endif
#define SRC(_x,_y) convertToWT(loadpix(Src + mad24(_y, src_step, SRCSIZE * _x)))
#ifdef BORDER_CONSTANT
// CCCCCC|abcdefgh|CCCCCCC
#define ELEM(_x,_y,r_edge,t_edge,const_v) (_x)<0 | (_x) >= (r_edge) | (_y)<0 | (_y) >= (t_edge) ? (const_v) : SRC((_x),(_y))
#else
#define ELEM(_x,_y,r_edge,t_edge,const_v) SRC((_x),(_y))
#endif
#define noconvert
// horizontal and vertical filter kernels
// should be defined on host during compile time to avoid overhead
#define DIG(a) a,
__constant float mat_kernelX[] = { KERNEL_MATRIX_X };
__constant float mat_kernelY[] = { KERNEL_MATRIX_Y };
__kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int srcOffsetY, int height, int width,
__global uchar* Dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
{
// RADIUSX, RADIUSY are filter dimensions
// BLK_X, BLK_Y are local wrogroup sizes
// all these should be defined on host during compile time
// first lsmem array for source pixels used in first pass,
// second lsmemDy for storing first pass results
__local WT lsmem[BLK_Y + 2 * RADIUSY][BLK_X + 2 * RADIUSX];
__local WT lsmemDy[BLK_Y][BLK_X + 2 * RADIUSX];
// get local and global ids - used as image and local memory array indexes
int lix = get_local_id(0);
int liy = get_local_id(1);
int x = get_global_id(0);
int y = get_global_id(1);
// calculate pixel position in source image taking image offset into account
int srcX = x + srcOffsetX - RADIUSX;
int srcY = y + srcOffsetY - RADIUSY;
int xb = srcX;
int yb = srcY;
// extrapolate coordinates, if needed
// and read my own source pixel into local memory
// with account for extra border pixels, which will be read by starting workitems
int clocY = liy;
int cSrcY = srcY;
do
{
int yb = cSrcY;
EXTRAPOLATE(yb, (height));
int clocX = lix;
int cSrcX = srcX;
do
{
int xb = cSrcX;
EXTRAPOLATE(xb,(width));
lsmem[clocY][clocX] = ELEM(xb, yb, (width), (height), 0 );
clocX += BLK_X;
cSrcX += BLK_X;
}
while(clocX < BLK_X+(RADIUSX*2));
clocY += BLK_Y;
cSrcY += BLK_Y;
}
while (clocY < BLK_Y+(RADIUSY*2));
barrier(CLK_LOCAL_MEM_FENCE);
// do vertical filter pass
// and store intermediate results to second local memory array
int i, clocX = lix;
WT sum = 0.0f;
do
{
sum = 0.0f;
for (i=0; i<=2*RADIUSY; i++)
sum = mad(lsmem[liy+i][clocX], mat_kernelY[i], sum);
lsmemDy[liy][clocX] = sum;
clocX += BLK_X;
}
while(clocX < BLK_X+(RADIUSX*2));
barrier(CLK_LOCAL_MEM_FENCE);
// if this pixel happened to be out of image borders because of global size rounding,
// then just return
if( x >= dst_cols || y >=dst_rows )
return;
// do second horizontal filter pass
// and calculate final result
sum = 0.0f;
for (i=0; i<=2*RADIUSX; i++)
sum = mad(lsmemDy[liy][lix+i], mat_kernelX[i], sum);
//store result into destination image
storepix(convertToDstT(sum), Dst + mad24(y, dst_step, mad24(x, DSTSIZE, dst_offset)));
}
......@@ -306,7 +306,7 @@ OCL_TEST_P(MorphologyEx, Mat)
(int)BORDER_REFLECT|BORDER_ISOLATED, (int)BORDER_WRAP|BORDER_ISOLATED, \
(int)BORDER_REFLECT_101|BORDER_ISOLATED*/) // WRAP and ISOLATED are not supported by cv:: version
#define FILTER_TYPES Values(CV_8UC1, CV_8UC2, CV_8UC4, CV_32FC1, CV_32FC4, CV_64FC1, CV_64FC4)
#define FILTER_TYPES Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4)
OCL_INSTANTIATE_TEST_CASE_P(Filter, Bilateral, Combine(
Values((MatType)CV_8UC1),
......
......@@ -75,33 +75,24 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
void random_roi()
{
Size ksize = randomSize(kernelMinSize, kernelMaxSize);
if (1 != (ksize.width % 2))
if (1 != ksize.width % 2)
ksize.width++;
if (1 != (ksize.height % 2))
if (1 != ksize.height % 2)
ksize.height++;
Mat temp = randomMat(Size(ksize.width, 1), CV_MAKE_TYPE(CV_32F, 1), -MAX_VALUE, MAX_VALUE);
cv::normalize(temp, kernelX, 1.0, 0.0, NORM_L1);
temp = randomMat(Size(1, ksize.height), CV_MAKE_TYPE(CV_32F, 1), -MAX_VALUE, MAX_VALUE);
cv::normalize(temp, kernelY, 1.0, 0.0, NORM_L1);
Size roiSize = randomSize(ksize.width, MAX_VALUE, ksize.height, MAX_VALUE);
int rest = roiSize.width % 4;
if (0 != rest)
roiSize.width += (4 - rest);
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
rest = srcBorder.lef % 4;
if (0 != rest)
srcBorder.lef += (4 - rest);
rest = srcBorder.rig % 4;
if (0 != rest)
srcBorder.rig += (4 - rest);
randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
anchor.x = -1;
anchor.y = -1;
anchor.x = anchor.y = -1;
UMAT_UPLOAD_INPUT_PARAMETER(src);
UMAT_UPLOAD_OUTPUT_PARAMETER(dst);
......@@ -115,7 +106,7 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
OCL_TEST_P(SepFilter2D, Mat)
{
for (int j = 0; j < test_loop_times; j++)
for (int j = 0; j < test_loop_times + 3; j++)
{
random_roi();
......@@ -126,11 +117,10 @@ OCL_TEST_P(SepFilter2D, Mat)
}
}
OCL_INSTANTIATE_TEST_CASE_P(ImageProc, SepFilter2D,
Combine(
Values(CV_8U, CV_32F),
Values(1, 4),
OCL_ALL_CHANNELS,
Values(
(BorderType)BORDER_CONSTANT,
(BorderType)BORDER_REPLICATE,
......
......@@ -1014,10 +1014,8 @@ namespace
return;
#ifdef HAVE_OPENCL
if (isUmat_ && curFrame_.channels() == 1)
if (isUmat_)
curFrame_.copyTo(ucurFrame_);
else
isUmat_ = false;
#endif
++storePos_;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment