Commit 9da6d789 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

optimized gpu filters, added buffered version for different filters

parent 340e23a4
......@@ -340,6 +340,8 @@ namespace cv
//! returns the separable filter engine with the specified filters
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType);
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType, GpuMat& buf);
//! returns horizontal 1D box filter
//! supports only CV_8UC1 source type and CV_32FC1 sum type
......@@ -367,6 +369,8 @@ namespace cv
//! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat& kernel,
const Point& anchor = Point(-1,-1), int iterations = 1);
CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat& kernel, GpuMat& buf,
const Point& anchor = Point(-1,-1), int iterations = 1);
//! returns 2D filter with the specified kernel
//! supports CV_8UC1 and CV_8UC4 types
......@@ -386,7 +390,7 @@ namespace cv
//! OpenCV version supports only CV_32F as buffer depth and
//! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types.
CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel,
int anchor = -1, int borderType = BORDER_CONSTANT);
int anchor = -1, int borderType = BORDER_DEFAULT);
//! returns the primitive column filter with the specified kernel.
//! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 dst type.
......@@ -397,20 +401,27 @@ namespace cv
//! OpenCV version supports only CV_32F as buffer depth and
//! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types.
CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel,
int anchor = -1, int borderType = BORDER_CONSTANT);
int anchor = -1, int borderType = BORDER_DEFAULT);
//! returns the separable linear filter engine
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel,
const Mat& columnKernel, const Point& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT,
int columnBorderType = -1);
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel,
const Mat& columnKernel, GpuMat& buf, const Point& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT,
int columnBorderType = -1);
//! returns filter engine for the generalized Sobel operator
CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize,
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, GpuMat& buf,
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
//! returns the Gaussian filter engine
CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0,
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, GpuMat& buf, double sigma1, double sigma2 = 0,
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
//! returns maximum filter
CV_EXPORTS Ptr<BaseFilter_GPU> getMaxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1));
......@@ -426,31 +437,42 @@ namespace cv
static inline void blur(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null()) { boxFilter(src, dst, -1, ksize, anchor, stream); }
//! erodes the image (applies the local minimum operator)
CV_EXPORTS void erode( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
CV_EXPORTS void erode(const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
CV_EXPORTS void erode(const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
//! dilates the image (applies the local maximum operator)
CV_EXPORTS void dilate( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
CV_EXPORTS void dilate(const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
CV_EXPORTS void dilate(const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
//! applies an advanced morphological operation to the image
CV_EXPORTS void morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, GpuMat& buf1, GpuMat& buf2, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
//! applies non-separable 2D linear filter to the image
CV_EXPORTS void filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1), Stream& stream = Stream::Null());
//! applies separable 2D linear filter to the image
CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY,
Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, GpuMat& buf,
Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
//! applies generalized Sobel operator to the image
CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1,
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, GpuMat& buf, int ksize = 3, double scale = 1,
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
//! applies the vertical or horizontal Scharr operator to the image
CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale = 1,
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, GpuMat& buf, double scale = 1,
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
//! smooths the image using Gaussian filter.
CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2 = 0,
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, GpuMat& buf, double sigma1, double sigma2 = 0,
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
//! applies Laplacian operator to the image
......
......@@ -101,17 +101,15 @@ PERF_TEST_P(DevInfo_Size_MatType_KernelSize, linearFilter, testing::Combine(test
SANITY_CHECK(dst_host);
}
PERF_TEST_P(DevInfo_Size_MatType_KernelSize_BorderMode, separableLinearFilter, testing::Combine(testing::ValuesIn(devices()),
testing::Values(GPU_TYPICAL_MAT_SIZES),
testing::Values(CV_8UC1, CV_8UC4, CV_16SC3, CV_32FC1),
testing::Values(3, 5),
testing::Values((int)BORDER_REFLECT101, (int)BORDER_CONSTANT)))
PERF_TEST_P(DevInfo_Size_MatType_KernelSize, separableLinearFilter, testing::Combine(testing::ValuesIn(devices()),
testing::Values(GPU_TYPICAL_MAT_SIZES),
testing::Values(CV_8UC1, CV_8UC4, CV_32FC1),
testing::Values(3, 5)))
{
DeviceInfo devInfo = std::tr1::get<0>(GetParam());
Size size = std::tr1::get<1>(GetParam());
int type = std::tr1::get<2>(GetParam());
int ksize = std::tr1::get<3>(GetParam());
int borderMode = std::tr1::get<4>(GetParam());
setDevice(devInfo.deviceID());
......@@ -123,7 +121,7 @@ PERF_TEST_P(DevInfo_Size_MatType_KernelSize_BorderMode, separableLinearFilter, t
GpuMat dst(size, type);
Mat kernel = getGaussianKernel(ksize, 0.5, CV_32F);
Ptr<FilterEngine_GPU> filter = createSeparableLinearFilter_GPU(type, type, kernel, kernel, Point(-1,-1), borderMode);
Ptr<FilterEngine_GPU> filter = createSeparableLinearFilter_GPU(type, type, kernel, kernel, Point(-1,-1));
declare.time(1.0).iterations(100);
......
......@@ -12,6 +12,7 @@
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
......@@ -51,49 +52,64 @@ using namespace cv::gpu::device;
#define MAX_KERNEL_SIZE 16
#define BLOCK_DIM_X 16
#define BLOCK_DIM_Y 16
#define BLOCK_DIM_Y 8
#define RESULT_STEPS 8
#define HALO_STEPS 1
namespace filter_krnls_column
namespace filter_column
{
__constant__ float cLinearKernel[MAX_KERNEL_SIZE];
__constant__ float c_kernel[MAX_KERNEL_SIZE];
void loadLinearKernel(const float kernel[], int ksize)
void loadKernel(const float kernel[], int ksize)
{
cudaSafeCall( cudaMemcpyToSymbol(cLinearKernel, kernel, ksize * sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float)) );
}
template <int ksize, typename T, typename D, typename B>
template <int KERNEL_SIZE, typename T, typename D, typename B>
__global__ void linearColumnFilter(const DevMem2D_<T> src, PtrStep_<D> dst, int anchor, const B b)
{
__shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
const int x = BLOCK_DIM_X * blockIdx.x + threadIdx.x;
const int y = BLOCK_DIM_Y * blockIdx.y + threadIdx.y;
__shared__ T smem[BLOCK_DIM_X][(RESULT_STEPS + 2 * HALO_STEPS) * BLOCK_DIM_Y + 1];
T* sDataColumn = smem + threadIdx.x;
//Offset to the upper halo edge
const int x = blockIdx.x * BLOCK_DIM_X + threadIdx.x;
const int y = (blockIdx.y * RESULT_STEPS - HALO_STEPS) * BLOCK_DIM_Y + threadIdx.y;
if (x < src.cols)
{
const T* srcCol = src.ptr() + x;
const T* src_col = src.ptr() + x;
sDataColumn[ threadIdx.y * BLOCK_DIM_X] = b.at_low(y - BLOCK_DIM_Y, srcCol, src.step);
sDataColumn[(threadIdx.y + BLOCK_DIM_Y) * BLOCK_DIM_X] = b.at_high(y, srcCol, src.step);
sDataColumn[(threadIdx.y + BLOCK_DIM_Y * 2) * BLOCK_DIM_X] = b.at_high(y + BLOCK_DIM_Y, srcCol, src.step);
//Main data
#pragma unroll
for(int i = HALO_STEPS; i < HALO_STEPS + RESULT_STEPS; ++i)
smem[threadIdx.x][threadIdx.y + i * BLOCK_DIM_Y] = b.at_high(y + i * BLOCK_DIM_Y, src_col, src.step);
//Upper halo
#pragma unroll
for(int i = 0; i < HALO_STEPS; ++i)
smem[threadIdx.x][threadIdx.y + i * BLOCK_DIM_Y] = b.at_low(y + i * BLOCK_DIM_Y, src_col, src.step);
//Lower halo
#pragma unroll
for(int i = HALO_STEPS + RESULT_STEPS; i < HALO_STEPS + RESULT_STEPS + HALO_STEPS; ++i)
smem[threadIdx.x][threadIdx.y + i * BLOCK_DIM_Y]= b.at_high(y + i * BLOCK_DIM_Y, src_col, src.step);
__syncthreads();
if (y < src.rows)
#pragma unroll
for(int i = HALO_STEPS; i < HALO_STEPS + RESULT_STEPS; ++i)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
sum_t sum = VecTraits<sum_t>::all(0);
sDataColumn += (threadIdx.y + BLOCK_DIM_Y - anchor) * BLOCK_DIM_X;
#pragma unroll
for(int i = 0; i < ksize; ++i)
sum = sum + sDataColumn[i * BLOCK_DIM_X] * cLinearKernel[i];
for(int j = 0; j < KERNEL_SIZE; ++j)
sum = sum + smem[threadIdx.x][threadIdx.y + i * BLOCK_DIM_Y + j - anchor] * c_kernel[j];
dst.ptr(y)[x] = saturate_cast<D>(sum);
int dstY = y + i * BLOCK_DIM_Y;
if (dstY < src.rows)
dst.ptr(dstY)[x] = saturate_cast<D>(sum);
}
}
}
......@@ -103,13 +119,13 @@ namespace cv { namespace gpu { namespace filters
{
template <int ksize, typename T, typename D, template<typename> class B>
void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream)
{
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
{
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
const dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, RESULT_STEPS * BLOCK_DIM_Y));
B<T> b(src.rows);
filter_krnls_column::linearColumnFilter<ksize, T, D><<<grid, threads, 0, stream>>>(src, dst, anchor, b);
filter_column::linearColumnFilter<ksize, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, b);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
......@@ -219,7 +235,7 @@ namespace cv { namespace gpu { namespace filters
}
};
filter_krnls_column::loadLinearKernel(kernel, ksize);
filter_column::loadKernel(kernel, ksize);
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream);
}
......
......@@ -12,6 +12,7 @@
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
......@@ -51,64 +52,85 @@ using namespace cv::gpu::device;
#define MAX_KERNEL_SIZE 16
#define BLOCK_DIM_X 16
#define BLOCK_DIM_Y 16
#define BLOCK_DIM_Y 4
#define RESULT_STEPS 8
#define HALO_STEPS 1
namespace filter_krnls_row
namespace filter_row
{
__constant__ float cLinearKernel[MAX_KERNEL_SIZE];
__constant__ float c_kernel[MAX_KERNEL_SIZE];
void loadLinearKernel(const float kernel[], int ksize)
void loadKernel(const float kernel[], int ksize)
{
cudaSafeCall( cudaMemcpyToSymbol(cLinearKernel, kernel, ksize * sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float)) );
}
template <typename T, size_t size> struct SmemType_
namespace detail
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type smem_t;
};
template <typename T> struct SmemType_<T, 4>
{
typedef T smem_t;
};
template <typename T, size_t size> struct SmemType
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type smem_t;
};
template <typename T> struct SmemType<T, 4>
{
typedef T smem_t;
};
}
template <typename T> struct SmemType
{
typedef typename SmemType_<T, sizeof(T)>::smem_t smem_t;
typedef typename detail::SmemType<T, sizeof(T)>::smem_t smem_t;
};
template <int ksize, typename T, typename D, typename B>
template <int KERNEL_SIZE, typename T, typename D, typename B>
__global__ void linearRowFilter(const DevMem2D_<T> src, PtrStep_<D> dst, int anchor, const B b)
{
typedef typename SmemType<T>::smem_t smem_t;
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
__shared__ smem_t smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];
const int x = BLOCK_DIM_X * blockIdx.x + threadIdx.x;
const int y = BLOCK_DIM_Y * blockIdx.y + threadIdx.y;
__shared__ smem_t smem[BLOCK_DIM_Y][(RESULT_STEPS + 2 * HALO_STEPS) * BLOCK_DIM_X];
smem_t* sDataRow = smem + threadIdx.y * BLOCK_DIM_X * 3;
//Offset to the left halo edge
const int x = (blockIdx.x * RESULT_STEPS - HALO_STEPS) * BLOCK_DIM_X + threadIdx.x;
const int y = blockIdx.y * BLOCK_DIM_Y + threadIdx.y;
if (y < src.rows)
{
const T* rowSrc = src.ptr(y);
const T* src_row = src.ptr(y);
//Load main data
#pragma unroll
for(int i = HALO_STEPS; i < HALO_STEPS + RESULT_STEPS; ++i)
smem[threadIdx.y][threadIdx.x + i * BLOCK_DIM_X] = b.at_high(i * BLOCK_DIM_X + x, src_row);
//Load left halo
#pragma unroll
for(int i = 0; i < HALO_STEPS; ++i)
smem[threadIdx.y][threadIdx.x + i * BLOCK_DIM_X] = b.at_low(i * BLOCK_DIM_X + x, src_row);
sDataRow[threadIdx.x ] = b.at_low(x - BLOCK_DIM_X, rowSrc);
sDataRow[threadIdx.x + BLOCK_DIM_X ] = b.at_high(x, rowSrc);
sDataRow[threadIdx.x + BLOCK_DIM_X * 2] = b.at_high(x + BLOCK_DIM_X, rowSrc);
//Load right halo
#pragma unroll
for(int i = HALO_STEPS + RESULT_STEPS; i < HALO_STEPS + RESULT_STEPS + HALO_STEPS; ++i)
smem[threadIdx.y][threadIdx.x + i * BLOCK_DIM_X] = b.at_high(i * BLOCK_DIM_X + x, src_row);
__syncthreads();
if (x < src.cols)
D* dst_row = dst.ptr(y);
#pragma unroll
for(int i = HALO_STEPS; i < HALO_STEPS + RESULT_STEPS; ++i)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
sum_t sum = VecTraits<sum_t>::all(0);
sDataRow += threadIdx.x + BLOCK_DIM_X - anchor;
#pragma unroll
for(int i = 0; i < ksize; ++i)
sum = sum + sDataRow[i] * cLinearKernel[i];
for (int j = 0; j < KERNEL_SIZE; ++j)
sum = sum + smem[threadIdx.y][threadIdx.x + i * BLOCK_DIM_X + j - anchor] * c_kernel[j];
dst.ptr(y)[x] = saturate_cast<D>(sum);
int dstX = x + i * BLOCK_DIM_X;
if (dstX < src.cols)
dst_row[dstX] = saturate_cast<D>(sum);
}
}
}
......@@ -119,13 +141,14 @@ namespace cv { namespace gpu { namespace filters
template <int ksize, typename T, typename D, template<typename> class B>
void linearRowFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream)
{
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
typedef typename filter_row::SmemType<T>::smem_t smem_t;
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
const dim3 grid(divUp(src.cols, RESULT_STEPS * BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
typedef typename filter_krnls_row::SmemType<T>::smem_t smem_t;
B<smem_t> b(src.cols);
filter_krnls_row::linearRowFilter<ksize, T, D><<<grid, threads, 0, stream>>>(src, dst, anchor, b);
filter_row::linearRowFilter<ksize, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, b);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
......@@ -235,7 +258,7 @@ namespace cv { namespace gpu { namespace filters
}
};
filter_krnls_row::loadLinearKernel(kernel, ksize);
filter_row::loadKernel(kernel, ksize);
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream);
}
......
This diff is collapsed.
......@@ -737,32 +737,6 @@ TEST(resize)
}
TEST(Sobel)
{
Mat src, dst;
gpu::GpuMat d_src, d_dst;
for (int size = 2000; size <= 4000; size += 1000)
{
SUBTEST << "size " << size << ", 32F";
gen(src, size, size, CV_32F, 0, 1);
dst.create(size, size, CV_32F);
CPU_ON;
Sobel(src, dst, dst.depth(), 1, 1);
CPU_OFF;
d_src = src;
d_dst.create(size, size, CV_32F);
GPU_ON;
gpu::Sobel(d_src, d_dst, d_dst.depth(), 1, 1);
GPU_OFF;
}
}
TEST(cvtColor)
{
Mat src, dst;
......@@ -1068,26 +1042,28 @@ TEST(solvePnPRansac)
TEST(GaussianBlur)
{
for (int size = 1000; size < 10000; size += 3000)
for (int size = 1000; size <= 4000; size += 1000)
{
SUBTEST << "16SC3, size " << size;
SUBTEST << "8UC1, size " << size;
Mat src; gen(src, size, size, CV_16SC3, 0, 256);
Mat src; gen(src, size, size, CV_8UC1, 0, 256);
Mat dst(src.size(), src.type());
CPU_ON;
GaussianBlur(src, dst, Size(5,5), 0);
GaussianBlur(src, dst, Size(3, 3), 1);
CPU_OFF;
gpu::GpuMat d_src(src);
gpu::GpuMat d_dst(src.size(), src.type());
gpu::GpuMat d_buf;
gpu::GaussianBlur(d_src, d_dst, Size(3, 3), d_buf, 1);
GPU_ON;
gpu::GaussianBlur(d_src, d_dst, Size(5,5), 0);
gpu::GaussianBlur(d_src, d_dst, Size(3, 3), d_buf, 1);
GPU_OFF;
}
for (int size = 1000; size < 10000; size += 3000)
for (int size = 1000; size <= 4000; size += 1000)
{
SUBTEST << "8UC4, size " << size;
......@@ -1095,14 +1071,37 @@ TEST(GaussianBlur)
Mat dst(src.size(), src.type());
CPU_ON;
GaussianBlur(src, dst, Size(5,5), 0);
GaussianBlur(src, dst, Size(3, 3), 1);
CPU_OFF;
gpu::GpuMat d_src(src);
gpu::GpuMat d_dst(src.size(), src.type());
gpu::GpuMat d_buf;
gpu::GaussianBlur(d_src, d_dst, Size(3, 3), d_buf, 1);
GPU_ON;
gpu::GaussianBlur(d_src, d_dst, Size(3, 3), d_buf, 1);
GPU_OFF;
}
for (int size = 1000; size <= 4000; size += 1000)
{
SUBTEST << "32FC1, size " << size;
Mat src; gen(src, size, size, CV_32FC1, 0, 1);
Mat dst(src.size(), src.type());
CPU_ON;
GaussianBlur(src, dst, Size(3, 3), 1);
CPU_OFF;
gpu::GpuMat d_src(src);
gpu::GpuMat d_dst(src.size(), src.type());
gpu::GpuMat d_buf;
gpu::GaussianBlur(d_src, d_dst, Size(3, 3), d_buf, 1);
GPU_ON;
gpu::GaussianBlur(d_src, d_dst, Size(5,5), 0);
gpu::GaussianBlur(d_src, d_dst, Size(3, 3), d_buf, 1);
GPU_OFF;
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment