Commit 82d619a3 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

fixed gpu::filter2D

parent fd312341
...@@ -284,12 +284,11 @@ CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, co ...@@ -284,12 +284,11 @@ CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, co
//! returns 2D filter with the specified kernel //! returns 2D filter with the specified kernel
//! supports CV_8UC1 and CV_8UC4 types //! supports CV_8UC1 and CV_8UC4 types
CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, const Size& ksize, CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
Point anchor = Point(-1, -1));
//! returns the non-separable linear filter engine //! returns the non-separable linear filter engine
CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel,
const Point& anchor = Point(-1,-1)); Point anchor = Point(-1,-1), int borderType = BORDER_DEFAULT);
//! returns the primitive row filter with the specified kernel. //! returns the primitive row filter with the specified kernel.
//! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 source type. //! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 source type.
...@@ -367,7 +366,7 @@ CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& ...@@ -367,7 +366,7 @@ CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat&
Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null()); Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
//! applies non-separable 2D linear filter to the image //! applies non-separable 2D linear filter to the image
CV_EXPORTS void filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1), Stream& stream = Stream::Null()); CV_EXPORTS void filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1), int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
//! applies separable 2D linear filter to the image //! applies separable 2D linear filter to the image
CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY,
......
...@@ -900,49 +900,100 @@ namespace cv { namespace gpu { namespace device ...@@ -900,49 +900,100 @@ namespace cv { namespace gpu { namespace device
__constant__ float c_filter2DKernel[FILTER2D_MAX_KERNEL_SIZE * FILTER2D_MAX_KERNEL_SIZE]; __constant__ float c_filter2DKernel[FILTER2D_MAX_KERNEL_SIZE * FILTER2D_MAX_KERNEL_SIZE];
texture<float, cudaTextureType2D, cudaReadModeElementType> filter2DTex(0, cudaFilterModePoint, cudaAddressModeClamp); template <class SrcT, typename D>
__global__ void filter2D(const SrcT src, DevMem2D_<D> dst, const int kWidth, const int kHeight, const int anchorX, const int anchorY)
__global__ void filter2D(int ofsX, int ofsY, PtrStepf dst, const int kWidth, const int kHeight, const int anchorX, const int anchorY, const BrdReflect101<float> brd)
{ {
typedef typename TypeVec<float, VecTraits<D>::cn>::vec_type sum_t;
const int x = blockIdx.x * blockDim.x + threadIdx.x; const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y; const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x > brd.last_col || y > brd.last_row) if (x >= dst.cols || y >= dst.rows)
return; return;
float res = 0; sum_t res = VecTraits<sum_t>::all(0);
int kInd = 0; int kInd = 0;
for (int i = 0; i < kHeight; ++i) for (int i = 0; i < kHeight; ++i)
{ {
for (int j = 0; j < kWidth; ++j) for (int j = 0; j < kWidth; ++j)
{ res = res + src(y - anchorY + i, x - anchorX + j) * c_filter2DKernel[kInd++];
const int srcX = ofsX + brd.idx_col(x - anchorX + j);
const int srcY = ofsY + brd.idx_row(y - anchorY + i);
res += tex2D(filter2DTex, srcX, srcY) * c_filter2DKernel[kInd++];
}
} }
dst.ptr(y)[x] = res; dst(y, x) = saturate_cast<D>(res);
} }
template <typename T, typename D, template <typename> class Brd> struct Filter2DCaller;
#define IMPLEMENT_FILTER2D_TEX_READER(type) \
texture< type , cudaTextureType2D, cudaReadModeElementType> tex_filter2D_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
struct tex_filter2D_ ## type ## _reader \
{ \
typedef type elem_type; \
typedef int index_type; \
const int xoff; \
const int yoff; \
tex_filter2D_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
{ \
return tex2D(tex_filter2D_ ## type , x + xoff, y + yoff); \
} \
}; \
template <typename D, template <typename> class Brd> struct Filter2DCaller< type , D, Brd> \
{ \
static void call(const DevMem2D_< type > srcWhole, int xoff, int yoff, DevMem2D_<D> dst, \
int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream) \
{ \
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
dim3 block(16, 16); \
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
bindTexture(&tex_filter2D_ ## type , srcWhole); \
tex_filter2D_ ## type ##_reader texSrc(xoff, yoff); \
Brd<work_type> brd(dst.rows, dst.cols, VecTraits<work_type>::make(borderValue)); \
BorderReader< tex_filter2D_ ## type ##_reader, Brd<work_type> > brdSrc(texSrc, brd); \
filter2D<<<grid, block, 0, stream>>>(brdSrc, dst, kWidth, kHeight, anchorX, anchorY); \
cudaSafeCall( cudaGetLastError() ); \
if (stream == 0) \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
};
IMPLEMENT_FILTER2D_TEX_READER(uchar);
IMPLEMENT_FILTER2D_TEX_READER(uchar4);
IMPLEMENT_FILTER2D_TEX_READER(ushort);
IMPLEMENT_FILTER2D_TEX_READER(ushort4);
IMPLEMENT_FILTER2D_TEX_READER(float);
IMPLEMENT_FILTER2D_TEX_READER(float4);
#undef IMPLEMENT_FILTER2D_TEX_READER
void filter2D_gpu(DevMem2Df src, int ofsX, int ofsY, DevMem2Df dst, int kWidth, int kHeight, int anchorX, int anchorY, float* kernel, cudaStream_t stream) template <typename T, typename D>
void filter2D_gpu(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst,
int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
int borderMode, const float* borderValue, cudaStream_t stream)
{ {
cudaSafeCall(cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); typedef void (*func_t)(const DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2D_<D> dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream);
static const func_t funcs[] =
const dim3 block(16, 16); {
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); Filter2DCaller<T, D, BrdReflect101>::call,
Filter2DCaller<T, D, BrdReplicate>::call,
bindTexture(&filter2DTex, src); Filter2DCaller<T, D, BrdConstant>::call,
Filter2DCaller<T, D, BrdReflect>::call,
BrdReflect101<float> brd(dst.rows, dst.cols); Filter2DCaller<T, D, BrdWrap>::call
};
filter2D<<<grid, block, 0, stream>>>(ofsX, ofsY, dst, kWidth, kHeight, anchorX, anchorY, brd); cudaSafeCall(cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
cudaSafeCall(cudaGetLastError());
if (stream == 0) funcs[borderMode](static_cast< DevMem2D_<T> >(srcWhole), ofsX, ofsY, static_cast< DevMem2D_<D> >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream);
cudaSafeCall(cudaDeviceSynchronize());
} }
template void filter2D_gpu<uchar, uchar>(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<uchar4, uchar4>(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<ushort, ushort>(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<ushort4, ushort4>(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<float, float>(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<float4, float4>(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
} // namespace imgproc } // namespace imgproc
}}} // namespace cv { namespace gpu { namespace device { }}} // namespace cv { namespace gpu { namespace device {
...@@ -58,8 +58,8 @@ Ptr<FilterEngine_GPU> cv::gpu::createBoxFilter_GPU(int, int, const Size&, const ...@@ -58,8 +58,8 @@ Ptr<FilterEngine_GPU> cv::gpu::createBoxFilter_GPU(int, int, const Size&, const
Ptr<BaseFilter_GPU> cv::gpu::getMorphologyFilter_GPU(int, int, const Mat&, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); } Ptr<BaseFilter_GPU> cv::gpu::getMorphologyFilter_GPU(int, int, const Mat&, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, const Point&, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); } Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, const Point&, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, GpuMat&, const Point&, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); } Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, GpuMat&, const Point&, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int, int, const Mat&, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); } Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int, int, const Mat&, Point, int) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int, int, const Mat&, const Point&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); } Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int, int, const Mat&, Point, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int, int, const Mat&, int, int) { throw_nogpu(); return Ptr<BaseRowFilter_GPU>(0); } Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int, int, const Mat&, int, int) { throw_nogpu(); return Ptr<BaseRowFilter_GPU>(0); }
Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int, int, const Mat&, int, int) { throw_nogpu(); return Ptr<BaseColumnFilter_GPU>(0); } Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int, int, const Mat&, int, int) { throw_nogpu(); return Ptr<BaseColumnFilter_GPU>(0); }
Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, const Point&, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); } Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, const Point&, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
...@@ -78,7 +78,7 @@ void cv::gpu::dilate(const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nog ...@@ -78,7 +78,7 @@ void cv::gpu::dilate(const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nog
void cv::gpu::dilate(const GpuMat&, GpuMat&, const Mat&, GpuMat&, Point, int, Stream&) { throw_nogpu(); } void cv::gpu::dilate(const GpuMat&, GpuMat&, const Mat&, GpuMat&, Point, int, Stream&) { throw_nogpu(); }
void cv::gpu::morphologyEx(const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_nogpu(); } void cv::gpu::morphologyEx(const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_nogpu(); }
void cv::gpu::morphologyEx(const GpuMat&, GpuMat&, int, const Mat&, GpuMat&, GpuMat&, Point, int, Stream&) { throw_nogpu(); } void cv::gpu::morphologyEx(const GpuMat&, GpuMat&, int, const Mat&, GpuMat&, GpuMat&, Point, int, Stream&) { throw_nogpu(); }
void cv::gpu::filter2D(const GpuMat&, GpuMat&, int, const Mat&, Point, Stream&) { throw_nogpu(); } void cv::gpu::filter2D(const GpuMat&, GpuMat&, int, const Mat&, Point, int, Stream&) { throw_nogpu(); }
void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, Point, int, int) { throw_nogpu(); } void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, Point, int, int) { throw_nogpu(); }
void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, GpuMat&, Point, int, int, Stream&) { throw_nogpu(); } void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, GpuMat&, Point, int, int, Stream&) { throw_nogpu(); }
void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double, int, int) { throw_nogpu(); } void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double, int, int) { throw_nogpu(); }
...@@ -663,7 +663,10 @@ namespace cv { namespace gpu { namespace device ...@@ -663,7 +663,10 @@ namespace cv { namespace gpu { namespace device
{ {
namespace imgproc namespace imgproc
{ {
void filter2D_gpu(DevMem2Df src, int ofsX, int ofsY, DevMem2Df dst, int kWidth, int kHeight, int anchorX, int anchorY, float* kernel, cudaStream_t stream); template <typename T, typename D>
void filter2D_gpu(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst,
int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
int borderMode, const float* borderValue, cudaStream_t stream);
} }
}}} }}}
...@@ -705,10 +708,16 @@ namespace ...@@ -705,10 +708,16 @@ namespace
nppFilter2D_t func; nppFilter2D_t func;
}; };
struct GpuLinearFilter : public BaseFilter_GPU typedef void (*gpuFilter2D_t)(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst,
int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
int borderMode, const float* borderValue, cudaStream_t stream);
struct GpuFilter2D : public BaseFilter_GPU
{ {
GpuLinearFilter(Size ksize_, Point anchor_, const GpuMat& kernel_) : GpuFilter2D(Size ksize_, Point anchor_, gpuFilter2D_t func_, const GpuMat& kernel_, int brd_type_) :
BaseFilter_GPU(ksize_, anchor_), kernel(kernel_) {} BaseFilter_GPU(ksize_, anchor_), func(func_), kernel(kernel_), brd_type(brd_type_)
{
}
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
{ {
...@@ -719,30 +728,32 @@ namespace ...@@ -719,30 +728,32 @@ namespace
src.locateROI(wholeSize, ofs); src.locateROI(wholeSize, ofs);
GpuMat srcWhole(wholeSize, src.type(), src.datastart); GpuMat srcWhole(wholeSize, src.type(), src.datastart);
filter2D_gpu(srcWhole, ofs.x, ofs.y, dst, ksize.width, ksize.height, anchor.x, anchor.y, kernel.ptr<float>(), StreamAccessor::getStream(stream)); static const Scalar_<float> zero = Scalar_<float>::all(0.0f);
func(srcWhole, ofs.x, ofs.y, dst, ksize.width, ksize.height, anchor.x, anchor.y, kernel.ptr<float>(), brd_type, zero.val, StreamAccessor::getStream(stream));
} }
gpuFilter2D_t func;
GpuMat kernel; GpuMat kernel;
int brd_type;
}; };
} }
Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, const Size& ksize, Point anchor) Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor, int brd_type)
{ {
CV_Assert(srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_32FC1); using namespace cv::gpu::device::imgproc;
CV_Assert(dstType == srcType);
if (srcType == CV_32FC1) int sdepth = CV_MAT_DEPTH(srcType);
{ int scn = CV_MAT_CN(srcType);
CV_Assert(ksize.width * ksize.height <= 16 * 16);
GpuMat gpu_krnl; CV_Assert(sdepth == CV_8U || sdepth == CV_16U || sdepth == CV_32F);
normalizeKernel(kernel, gpu_krnl, CV_32F); CV_Assert(scn == 1 || scn == 4);
CV_Assert(dstType == srcType);
CV_Assert(brd_type == BORDER_REFLECT101 || brd_type == BORDER_REPLICATE || brd_type == BORDER_CONSTANT || brd_type == BORDER_REFLECT || brd_type == BORDER_WRAP);
normalizeAnchor(anchor, ksize); Size ksize = kernel.size();
return Ptr<BaseFilter_GPU>(new GpuLinearFilter(ksize, anchor, gpu_krnl)); #if 0
} if ((srcType == CV_8UC1 || srcType == CV_8UC4) && brd_type == BORDER_CONSTANT)
else
{ {
static const nppFilter2D_t cppFilter2D_callers[] = {0, nppiFilter_8u_C1R, 0, 0, nppiFilter_8u_C4R}; static const nppFilter2D_t cppFilter2D_callers[] = {0, nppiFilter_8u_C1R, 0, 0, nppiFilter_8u_C4R};
...@@ -754,27 +765,64 @@ Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const ...@@ -754,27 +765,64 @@ Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const
return Ptr<BaseFilter_GPU>(new NPPLinearFilter(ksize, anchor, gpu_krnl, nDivisor, cppFilter2D_callers[CV_MAT_CN(srcType)])); return Ptr<BaseFilter_GPU>(new NPPLinearFilter(ksize, anchor, gpu_krnl, nDivisor, cppFilter2D_callers[CV_MAT_CN(srcType)]));
} }
#endif
CV_Assert(ksize.width * ksize.height <= 16 * 16);
int gpuBorderType;
CV_Assert( tryConvertToGpuBorderType(brd_type, gpuBorderType) );
GpuMat gpu_krnl;
normalizeKernel(kernel, gpu_krnl, CV_32F);
normalizeAnchor(anchor, ksize);
gpuFilter2D_t func = 0;
switch (srcType)
{
case CV_8UC1:
func = filter2D_gpu<uchar, uchar>;
break;
case CV_8UC4:
func = filter2D_gpu<uchar4, uchar4>;
break;
case CV_16UC1:
func = filter2D_gpu<ushort, ushort>;
break;
case CV_16UC4:
func = filter2D_gpu<ushort4, ushort4>;
break;
case CV_32FC1:
func = filter2D_gpu<float, float>;
break;
case CV_32FC4:
func = filter2D_gpu<float4, float4>;
break;
}
return Ptr<BaseFilter_GPU>(new GpuFilter2D(ksize, anchor, func, gpu_krnl, gpuBorderType));
} }
Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, const Point& anchor) Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor, int borderType)
{ {
Size ksize = kernel.size(); Ptr<BaseFilter_GPU> linearFilter = getLinearFilter_GPU(srcType, dstType, kernel, anchor, borderType);
Ptr<BaseFilter_GPU> linearFilter = getLinearFilter_GPU(srcType, dstType, kernel, ksize, anchor);
return createFilter2D_GPU(linearFilter, srcType, dstType); return createFilter2D_GPU(linearFilter, srcType, dstType);
} }
void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor, Stream& stream) void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor, int borderType, Stream& stream)
{ {
if( ddepth < 0 ) if (ddepth < 0)
ddepth = src.depth(); ddepth = src.depth();
dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels())); int dst_type = CV_MAKE_TYPE(ddepth, src.channels());
Ptr<FilterEngine_GPU> f = createLinearFilter_GPU(src.type(), dst.type(), kernel, anchor); Ptr<FilterEngine_GPU> f = createLinearFilter_GPU(src.type(), dst_type, kernel, anchor, borderType);
f->apply(src, dst, src.type() == CV_32FC1 ? Rect(0, 0, src.cols, src.rows) : Rect(0, 0, -1, -1), stream); dst.create(src.size(), dst_type);
f->apply(src, dst, Rect(0, 0, src.cols, src.rows), stream);
} }
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
......
...@@ -505,13 +505,14 @@ INSTANTIATE_TEST_CASE_P(GPU_Filter, MorphEx, testing::Combine( ...@@ -505,13 +505,14 @@ INSTANTIATE_TEST_CASE_P(GPU_Filter, MorphEx, testing::Combine(
///////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////
// Filter2D // Filter2D
PARAM_TEST_CASE(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor, UseRoi) PARAM_TEST_CASE(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor, BorderType, UseRoi)
{ {
cv::gpu::DeviceInfo devInfo; cv::gpu::DeviceInfo devInfo;
cv::Size size; cv::Size size;
int type; int type;
cv::Size ksize; cv::Size ksize;
cv::Point anchor; cv::Point anchor;
int borderType;
bool useRoi; bool useRoi;
cv::Mat img; cv::Mat img;
...@@ -524,7 +525,8 @@ PARAM_TEST_CASE(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor, ...@@ -524,7 +525,8 @@ PARAM_TEST_CASE(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor,
type = GET_PARAM(2); type = GET_PARAM(2);
ksize = GET_PARAM(3); ksize = GET_PARAM(3);
anchor = GET_PARAM(4); anchor = GET_PARAM(4);
useRoi = GET_PARAM(5); borderType = GET_PARAM(5);
useRoi = GET_PARAM(6);
cv::gpu::setDevice(devInfo.deviceID()); cv::gpu::setDevice(devInfo.deviceID());
} }
...@@ -533,26 +535,24 @@ PARAM_TEST_CASE(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor, ...@@ -533,26 +535,24 @@ PARAM_TEST_CASE(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor,
TEST_P(Filter2D, Accuracy) TEST_P(Filter2D, Accuracy)
{ {
cv::Mat src = randomMat(size, type); cv::Mat src = randomMat(size, type);
cv::Mat kernel = cv::Mat::ones(ksize.height, ksize.width, CV_32FC1); cv::Mat kernel = randomMat(cv::Size(ksize.width, ksize.height), CV_32FC1, 0.0, 1.0);
cv::gpu::GpuMat dst = createMat(size, type, useRoi); cv::gpu::GpuMat dst = createMat(size, type, useRoi);
cv::gpu::filter2D(loadMat(src, useRoi), dst, -1, kernel, anchor); cv::gpu::filter2D(loadMat(src, useRoi), dst, -1, kernel, anchor, borderType);
cv::Mat dst_gold; cv::Mat dst_gold;
cv::filter2D(src, dst_gold, -1, kernel, anchor, 0, type == CV_32FC1 ? cv::BORDER_DEFAULT : cv::BORDER_CONSTANT); cv::filter2D(src, dst_gold, -1, kernel, anchor, 0, borderType);
if (type == CV_32FC1) EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1.0);
EXPECT_MAT_NEAR(dst_gold, dst, 1e-1);
else
EXPECT_MAT_NEAR(getInnerROI(dst_gold, ksize), getInnerROI(dst, ksize), 0.0);
} }
INSTANTIATE_TEST_CASE_P(GPU_Filter, Filter2D, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Filter, Filter2D, testing::Combine(
ALL_DEVICES, ALL_DEVICES,
DIFFERENT_SIZES, DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)), testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC4)),
testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7)), KSize(cv::Size(11, 11)), KSize(cv::Size(13, 13)), KSize(cv::Size(15, 15))), testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7)), KSize(cv::Size(11, 11)), KSize(cv::Size(13, 13)), KSize(cv::Size(15, 15))),
testing::Values(Anchor(cv::Point(-1, -1)), Anchor(cv::Point(0, 0)), Anchor(cv::Point(2, 2))), testing::Values(Anchor(cv::Point(-1, -1)), Anchor(cv::Point(0, 0)), Anchor(cv::Point(2, 2))),
testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)),
WHOLE_SUBMAT)); WHOLE_SUBMAT));
} // namespace } // namespace
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment