Commit d8c01828 authored by Andrey Pavlenko's avatar Andrey Pavlenko Committed by OpenCV Buildbot

Merge pull request #2491 from ilya-lavrenov:tapi_sep_filter

parents 157f35ef 2875ce60
...@@ -118,6 +118,8 @@ public: ...@@ -118,6 +118,8 @@ public:
virtual int kind() const; virtual int kind() const;
virtual int dims(int i=-1) const; virtual int dims(int i=-1) const;
virtual int cols(int i=-1) const;
virtual int rows(int i=-1) const;
virtual Size size(int i=-1) const; virtual Size size(int i=-1) const;
virtual int sizend(int* sz, int i=-1) const; virtual int sizend(int* sz, int i=-1) const;
virtual bool sameSize(const _InputArray& arr) const; virtual bool sameSize(const _InputArray& arr) const;
......
...@@ -592,7 +592,7 @@ protected: ...@@ -592,7 +592,7 @@ protected:
CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf); CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf);
CV_EXPORTS const char* typeToStr(int t); CV_EXPORTS const char* typeToStr(int t);
CV_EXPORTS const char* memopTypeToStr(int t); CV_EXPORTS const char* memopTypeToStr(int t);
CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1); CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL);
CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo>& platform_info); CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo>& platform_info);
CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(), CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
......
...@@ -1416,6 +1416,16 @@ int _InputArray::kind() const ...@@ -1416,6 +1416,16 @@ int _InputArray::kind() const
return flags & KIND_MASK; return flags & KIND_MASK;
} }
int _InputArray::rows(int i) const
{
return size(i).height;
}
int _InputArray::cols(int i) const
{
return size(i).width;
}
Size _InputArray::size(int i) const Size _InputArray::size(int i) const
{ {
int k = kind(); int k = kind();
...@@ -2078,45 +2088,45 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int ...@@ -2078,45 +2088,45 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int
create(2, sizes, mtype, i, allowTransposed, fixedDepthMask); create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
} }
void _OutputArray::create(int rows, int cols, int mtype, int i, bool allowTransposed, int fixedDepthMask) const void _OutputArray::create(int _rows, int _cols, int mtype, int i, bool allowTransposed, int fixedDepthMask) const
{ {
int k = kind(); int k = kind();
if( k == MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 ) if( k == MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{ {
CV_Assert(!fixedSize() || ((Mat*)obj)->size.operator()() == Size(cols, rows)); CV_Assert(!fixedSize() || ((Mat*)obj)->size.operator()() == Size(_cols, _rows));
CV_Assert(!fixedType() || ((Mat*)obj)->type() == mtype); CV_Assert(!fixedType() || ((Mat*)obj)->type() == mtype);
((Mat*)obj)->create(rows, cols, mtype); ((Mat*)obj)->create(_rows, _cols, mtype);
return; return;
} }
if( k == UMAT && i < 0 && !allowTransposed && fixedDepthMask == 0 ) if( k == UMAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{ {
CV_Assert(!fixedSize() || ((UMat*)obj)->size.operator()() == Size(cols, rows)); CV_Assert(!fixedSize() || ((UMat*)obj)->size.operator()() == Size(_cols, _rows));
CV_Assert(!fixedType() || ((UMat*)obj)->type() == mtype); CV_Assert(!fixedType() || ((UMat*)obj)->type() == mtype);
((UMat*)obj)->create(rows, cols, mtype); ((UMat*)obj)->create(_rows, _cols, mtype);
return; return;
} }
if( k == GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 ) if( k == GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{ {
CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == Size(cols, rows)); CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == Size(_cols, _rows));
CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype); CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype);
((cuda::GpuMat*)obj)->create(rows, cols, mtype); ((cuda::GpuMat*)obj)->create(_rows, _cols, mtype);
return; return;
} }
if( k == OPENGL_BUFFER && i < 0 && !allowTransposed && fixedDepthMask == 0 ) if( k == OPENGL_BUFFER && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{ {
CV_Assert(!fixedSize() || ((ogl::Buffer*)obj)->size() == Size(cols, rows)); CV_Assert(!fixedSize() || ((ogl::Buffer*)obj)->size() == Size(_cols, _rows));
CV_Assert(!fixedType() || ((ogl::Buffer*)obj)->type() == mtype); CV_Assert(!fixedType() || ((ogl::Buffer*)obj)->type() == mtype);
((ogl::Buffer*)obj)->create(rows, cols, mtype); ((ogl::Buffer*)obj)->create(_rows, _cols, mtype);
return; return;
} }
if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 ) if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{ {
CV_Assert(!fixedSize() || ((cuda::CudaMem*)obj)->size() == Size(cols, rows)); CV_Assert(!fixedSize() || ((cuda::CudaMem*)obj)->size() == Size(_cols, _rows));
CV_Assert(!fixedType() || ((cuda::CudaMem*)obj)->type() == mtype); CV_Assert(!fixedType() || ((cuda::CudaMem*)obj)->type() == mtype);
((cuda::CudaMem*)obj)->create(rows, cols, mtype); ((cuda::CudaMem*)obj)->create(_rows, _cols, mtype);
return; return;
} }
int sizes[] = {rows, cols}; int sizes[] = {_rows, _cols};
create(2, sizes, mtype, i, allowTransposed, fixedDepthMask); create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
} }
......
...@@ -4307,7 +4307,7 @@ static std::string kerToStr(const Mat & k) ...@@ -4307,7 +4307,7 @@ static std::string kerToStr(const Mat & k)
return stream.str(); return stream.str();
} }
String kernelToStr(InputArray _kernel, int ddepth) String kernelToStr(InputArray _kernel, int ddepth, const char * name)
{ {
Mat kernel = _kernel.getMat().reshape(1, 1); Mat kernel = _kernel.getMat().reshape(1, 1);
...@@ -4318,13 +4318,13 @@ String kernelToStr(InputArray _kernel, int ddepth) ...@@ -4318,13 +4318,13 @@ String kernelToStr(InputArray _kernel, int ddepth)
if (ddepth != depth) if (ddepth != depth)
kernel.convertTo(kernel, ddepth); kernel.convertTo(kernel, ddepth);
typedef std::string (*func_t)(const Mat &); typedef std::string (* func_t)(const Mat &);
static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>,kerToStr<short>, static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 }; kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
const func_t func = funcs[depth]; const func_t func = funcs[depth];
CV_Assert(func != 0); CV_Assert(func != 0);
return cv::format(" -D COEFF=%s", func(kernel).c_str()); return cv::format(" -D %s=%s", name ? name : "COEFF", func(kernel).c_str());
} }
#define PROCESS_SRC(src) \ #define PROCESS_SRC(src) \
......
...@@ -211,7 +211,7 @@ OCL_PERF_TEST_P(SobelFixture, Sobel, ...@@ -211,7 +211,7 @@ OCL_PERF_TEST_P(SobelFixture, Sobel,
OCL_TEST_CYCLE() cv::Sobel(src, dst, -1, dx, dy); OCL_TEST_CYCLE() cv::Sobel(src, dst, -1, dx, dy);
SANITY_CHECK(dst); SANITY_CHECK(dst, 1e-6);
} }
///////////// Scharr //////////////////////// ///////////// Scharr ////////////////////////
......
This diff is collapsed.
...@@ -34,47 +34,36 @@ ...@@ -34,47 +34,36 @@
// //
// //
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
#define READ_TIMES_COL ((2*(RADIUSY+LSIZE1)-1)/LSIZE1) #define READ_TIMES_COL ((2*(RADIUSY+LSIZE1)-1)/LSIZE1)
#define RADIUS 1 #define RADIUS 1
#if CN ==1
#define ALIGN (((RADIUS)+3)>>2<<2)
#elif CN==2
#define ALIGN (((RADIUS)+1)>>1<<1)
#elif CN==3
#define ALIGN (((RADIUS)+3)>>2<<2)
#elif CN==4
#define ALIGN (RADIUS)
#define READ_TIMES_ROW ((2*(RADIUS+LSIZE0)-1)/LSIZE0)
#endif
#define noconvert #define noconvert
/********************************************************************************** #if CN != 3
These kernels are written for separable filters such as Sobel, Scharr, GaussianBlur. #define loadpix(addr) *(__global const srcT *)(addr)
Now(6/29/2011) the kernels only support 8U data type and the anchor of the convovle #define storepix(val, addr) *(__global dstT *)(addr) = val
kernel must be in the center. ROI is not supported either. #define SRCSIZE (int)sizeof(srcT)
Each kernels read 4 elements(not 4 pixels), save them to LDS and read the data needed #define DSTSIZE (int)sizeof(dstT)
from LDS to calculate the result. #else
The length of the convovle kernel supported is only related to the MAX size of LDS, #define loadpix(addr) vload3(0, (__global const srcT1 *)(addr))
which is HW related. #define storepix(val, addr) vstore3(val, 0, (__global dstT1 *)(addr))
Niko #define SRCSIZE (int)sizeof(srcT1)*3
6/29/2011 #define DSTSIZE (int)sizeof(dstT1)*3
The info above maybe obsolete. #endif
***********************************************************************************/
#define DIG(a) a, #define DIG(a) a,
__constant float mat_kernel[] = { COEFF }; __constant float mat_kernel[] = { COEFF };
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter __kernel void col_filter(__global const uchar * src, int src_step, int src_offset, int src_whole_rows, int src_whole_cols,
(__global const GENTYPE_SRC * restrict src, __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
const int src_step_in_pixel,
const int src_whole_cols,
const int src_whole_rows,
__global GENTYPE_DST * dst,
const int dst_offset_in_pixel,
const int dst_step_in_pixel,
const int dst_cols,
const int dst_rows)
{ {
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
...@@ -82,38 +71,38 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter ...@@ -82,38 +71,38 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter
int l_x = get_local_id(0); int l_x = get_local_id(0);
int l_y = get_local_id(1); int l_y = get_local_id(1);
int start_addr = mad24(y, src_step_in_pixel, x); int start_addr = mad24(y, src_step, x * SRCSIZE);
int end_addr = mad24(src_whole_rows - 1, src_step_in_pixel, src_whole_cols); int end_addr = mad24(src_whole_rows - 1, src_step, src_whole_cols * SRCSIZE);
int i; srcT sum, temp[READ_TIMES_COL];
GENTYPE_SRC sum, temp[READ_TIMES_COL]; __local srcT LDS_DAT[LSIZE1 * READ_TIMES_COL][LSIZE0 + 1];
__local GENTYPE_SRC LDS_DAT[LSIZE1 * READ_TIMES_COL][LSIZE0 + 1];
//read pixels from src // read pixels from src
for(i = 0;i<READ_TIMES_COL;i++) for (int i = 0; i < READ_TIMES_COL; ++i)
{ {
int current_addr = start_addr+i*LSIZE1*src_step_in_pixel; int current_addr = mad24(i, LSIZE1 * src_step, start_addr);
current_addr = current_addr < end_addr ? current_addr : 0; current_addr = current_addr < end_addr ? current_addr : 0;
temp[i] = src[current_addr]; temp[i] = loadpix(src + current_addr);
}
//save pixels to lds
for(i = 0;i<READ_TIMES_COL;i++)
{
LDS_DAT[l_y+i*LSIZE1][l_x] = temp[i];
} }
// save pixels to lds
for (int i = 0; i < READ_TIMES_COL; ++i)
LDS_DAT[mad24(i, LSIZE1, l_y)][l_x] = temp[i];
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
//read pixels from lds and calculate the result
sum = LDS_DAT[l_y+RADIUSY][l_x]*mat_kernel[RADIUSY]; // read pixels from lds and calculate the result
for(i=1;i<=RADIUSY;i++) sum = LDS_DAT[l_y + RADIUSY][l_x] * mat_kernel[RADIUSY];
for (int i = 1; i <= RADIUSY; ++i)
{ {
temp[0]=LDS_DAT[l_y+RADIUSY-i][l_x]; temp[0] = LDS_DAT[l_y + RADIUSY - i][l_x];
temp[1]=LDS_DAT[l_y+RADIUSY+i][l_x]; temp[1] = LDS_DAT[l_y + RADIUSY + i][l_x];
sum += temp[0] * mat_kernel[RADIUSY-i]+temp[1] * mat_kernel[RADIUSY+i]; sum += mad(temp[0], mat_kernel[RADIUSY - i], temp[1] * mat_kernel[RADIUSY + i]);
} }
//write the result to dst
if((x<dst_cols) & (y<dst_rows)) // write the result to dst
if (x < dst_cols && y < dst_rows)
{ {
start_addr = mad24(y, dst_step_in_pixel, x + dst_offset_in_pixel); start_addr = mad24(y, dst_step, mad24(DSTSIZE, x, dst_offset));
dst[start_addr] = convert_to_DST(sum); storepix(convertToDstT(sum), dst + start_addr);
} }
} }
This diff is collapsed.
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2014, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////Macro for border type////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////
#ifdef BORDER_CONSTANT
// CCCCCC|abcdefgh|CCCCCCC
#define EXTRAPOLATE(x, maxV)
#elif defined BORDER_REPLICATE
// aaaaaa|abcdefgh|hhhhhhh
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = max(min((x), (maxV) - 1), 0); \
}
#elif defined BORDER_WRAP
// cdefgh|abcdefgh|abcdefg
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = ( (x) + (maxV) ) % (maxV); \
}
#elif defined BORDER_REFLECT
// fedcba|abcdefgh|hgfedcb
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = min(((maxV)-1)*2-(x)+1, max((x),-(x)-1) ); \
}
#elif defined BORDER_REFLECT_101 || defined BORDER_REFLECT101
// gfedcb|abcdefgh|gfedcba
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = min(((maxV)-1)*2-(x), max((x),-(x)) ); \
}
#else
#error No extrapolation method
#endif
#if CN != 3
#define loadpix(addr) *(__global const srcT *)(addr)
#define storepix(val, addr) *(__global dstT *)(addr) = val
#define SRCSIZE (int)sizeof(srcT)
#define DSTSIZE (int)sizeof(dstT)
#else
#define loadpix(addr) vload3(0, (__global const srcT1 *)(addr))
#define storepix(val, addr) vstore3(val, 0, (__global dstT1 *)(addr))
#define SRCSIZE (int)sizeof(srcT1)*3
#define DSTSIZE (int)sizeof(dstT1)*3
#endif
#define SRC(_x,_y) convertToWT(loadpix(Src + mad24(_y, src_step, SRCSIZE * _x)))
#ifdef BORDER_CONSTANT
// CCCCCC|abcdefgh|CCCCCCC
#define ELEM(_x,_y,r_edge,t_edge,const_v) (_x)<0 | (_x) >= (r_edge) | (_y)<0 | (_y) >= (t_edge) ? (const_v) : SRC((_x),(_y))
#else
#define ELEM(_x,_y,r_edge,t_edge,const_v) SRC((_x),(_y))
#endif
#define noconvert
// horizontal and vertical filter kernels
// should be defined on host during compile time to avoid overhead
#define DIG(a) a,
__constant float mat_kernelX[] = { KERNEL_MATRIX_X };
__constant float mat_kernelY[] = { KERNEL_MATRIX_Y };
__kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int srcOffsetY, int height, int width,
__global uchar* Dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
{
// RADIUSX, RADIUSY are filter dimensions
// BLK_X, BLK_Y are local wrogroup sizes
// all these should be defined on host during compile time
// first lsmem array for source pixels used in first pass,
// second lsmemDy for storing first pass results
__local WT lsmem[BLK_Y + 2 * RADIUSY][BLK_X + 2 * RADIUSX];
__local WT lsmemDy[BLK_Y][BLK_X + 2 * RADIUSX];
// get local and global ids - used as image and local memory array indexes
int lix = get_local_id(0);
int liy = get_local_id(1);
int x = get_global_id(0);
int y = get_global_id(1);
// calculate pixel position in source image taking image offset into account
int srcX = x + srcOffsetX - RADIUSX;
int srcY = y + srcOffsetY - RADIUSY;
int xb = srcX;
int yb = srcY;
// extrapolate coordinates, if needed
// and read my own source pixel into local memory
// with account for extra border pixels, which will be read by starting workitems
int clocY = liy;
int cSrcY = srcY;
do
{
int yb = cSrcY;
EXTRAPOLATE(yb, (height));
int clocX = lix;
int cSrcX = srcX;
do
{
int xb = cSrcX;
EXTRAPOLATE(xb,(width));
lsmem[clocY][clocX] = ELEM(xb, yb, (width), (height), 0 );
clocX += BLK_X;
cSrcX += BLK_X;
}
while(clocX < BLK_X+(RADIUSX*2));
clocY += BLK_Y;
cSrcY += BLK_Y;
}
while (clocY < BLK_Y+(RADIUSY*2));
barrier(CLK_LOCAL_MEM_FENCE);
// do vertical filter pass
// and store intermediate results to second local memory array
int i, clocX = lix;
WT sum = 0.0f;
do
{
sum = 0.0f;
for (i=0; i<=2*RADIUSY; i++)
sum = mad(lsmem[liy+i][clocX], mat_kernelY[i], sum);
lsmemDy[liy][clocX] = sum;
clocX += BLK_X;
}
while(clocX < BLK_X+(RADIUSX*2));
barrier(CLK_LOCAL_MEM_FENCE);
// if this pixel happened to be out of image borders because of global size rounding,
// then just return
if( x >= dst_cols || y >=dst_rows )
return;
// do second horizontal filter pass
// and calculate final result
sum = 0.0f;
for (i=0; i<=2*RADIUSX; i++)
sum = mad(lsmemDy[liy][lix+i], mat_kernelX[i], sum);
//store result into destination image
storepix(convertToDstT(sum), Dst + mad24(y, dst_step, mad24(x, DSTSIZE, dst_offset)));
}
...@@ -306,7 +306,7 @@ OCL_TEST_P(MorphologyEx, Mat) ...@@ -306,7 +306,7 @@ OCL_TEST_P(MorphologyEx, Mat)
(int)BORDER_REFLECT|BORDER_ISOLATED, (int)BORDER_WRAP|BORDER_ISOLATED, \ (int)BORDER_REFLECT|BORDER_ISOLATED, (int)BORDER_WRAP|BORDER_ISOLATED, \
(int)BORDER_REFLECT_101|BORDER_ISOLATED*/) // WRAP and ISOLATED are not supported by cv:: version (int)BORDER_REFLECT_101|BORDER_ISOLATED*/) // WRAP and ISOLATED are not supported by cv:: version
#define FILTER_TYPES Values(CV_8UC1, CV_8UC2, CV_8UC4, CV_32FC1, CV_32FC4, CV_64FC1, CV_64FC4) #define FILTER_TYPES Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4)
OCL_INSTANTIATE_TEST_CASE_P(Filter, Bilateral, Combine( OCL_INSTANTIATE_TEST_CASE_P(Filter, Bilateral, Combine(
Values((MatType)CV_8UC1), Values((MatType)CV_8UC1),
......
...@@ -75,33 +75,24 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool) ...@@ -75,33 +75,24 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
void random_roi() void random_roi()
{ {
Size ksize = randomSize(kernelMinSize, kernelMaxSize); Size ksize = randomSize(kernelMinSize, kernelMaxSize);
if (1 != (ksize.width % 2)) if (1 != ksize.width % 2)
ksize.width++; ksize.width++;
if (1 != (ksize.height % 2)) if (1 != ksize.height % 2)
ksize.height++; ksize.height++;
Mat temp = randomMat(Size(ksize.width, 1), CV_MAKE_TYPE(CV_32F, 1), -MAX_VALUE, MAX_VALUE); Mat temp = randomMat(Size(ksize.width, 1), CV_MAKE_TYPE(CV_32F, 1), -MAX_VALUE, MAX_VALUE);
cv::normalize(temp, kernelX, 1.0, 0.0, NORM_L1); cv::normalize(temp, kernelX, 1.0, 0.0, NORM_L1);
temp = randomMat(Size(1, ksize.height), CV_MAKE_TYPE(CV_32F, 1), -MAX_VALUE, MAX_VALUE); temp = randomMat(Size(1, ksize.height), CV_MAKE_TYPE(CV_32F, 1), -MAX_VALUE, MAX_VALUE);
cv::normalize(temp, kernelY, 1.0, 0.0, NORM_L1); cv::normalize(temp, kernelY, 1.0, 0.0, NORM_L1);
Size roiSize = randomSize(ksize.width, MAX_VALUE, ksize.height, MAX_VALUE); Size roiSize = randomSize(ksize.width, MAX_VALUE, ksize.height, MAX_VALUE);
int rest = roiSize.width % 4;
if (0 != rest)
roiSize.width += (4 - rest);
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
rest = srcBorder.lef % 4;
if (0 != rest)
srcBorder.lef += (4 - rest);
rest = srcBorder.rig % 4;
if (0 != rest)
srcBorder.rig += (4 - rest);
randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE); randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE); randomSubMat(dst, dst_roi, roiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
anchor.x = -1; anchor.x = anchor.y = -1;
anchor.y = -1;
UMAT_UPLOAD_INPUT_PARAMETER(src); UMAT_UPLOAD_INPUT_PARAMETER(src);
UMAT_UPLOAD_OUTPUT_PARAMETER(dst); UMAT_UPLOAD_OUTPUT_PARAMETER(dst);
...@@ -115,7 +106,7 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool) ...@@ -115,7 +106,7 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
OCL_TEST_P(SepFilter2D, Mat) OCL_TEST_P(SepFilter2D, Mat)
{ {
for (int j = 0; j < test_loop_times; j++) for (int j = 0; j < test_loop_times + 3; j++)
{ {
random_roi(); random_roi();
...@@ -126,11 +117,10 @@ OCL_TEST_P(SepFilter2D, Mat) ...@@ -126,11 +117,10 @@ OCL_TEST_P(SepFilter2D, Mat)
} }
} }
OCL_INSTANTIATE_TEST_CASE_P(ImageProc, SepFilter2D, OCL_INSTANTIATE_TEST_CASE_P(ImageProc, SepFilter2D,
Combine( Combine(
Values(CV_8U, CV_32F), Values(CV_8U, CV_32F),
Values(1, 4), OCL_ALL_CHANNELS,
Values( Values(
(BorderType)BORDER_CONSTANT, (BorderType)BORDER_CONSTANT,
(BorderType)BORDER_REPLICATE, (BorderType)BORDER_REPLICATE,
......
...@@ -1014,10 +1014,8 @@ namespace ...@@ -1014,10 +1014,8 @@ namespace
return; return;
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
if (isUmat_ && curFrame_.channels() == 1) if (isUmat_)
curFrame_.copyTo(ucurFrame_); curFrame_.copyTo(ucurFrame_);
else
isUmat_ = false;
#endif #endif
++storePos_; ++storePos_;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment