Commit e746b3e8 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

added buffered version of pyrDown and pyrUp

added stream support to downsample, upsample, pyrUp and pyrDown
parent cf42f308
This diff is collapsed.
......@@ -908,29 +908,31 @@ namespace cv { namespace gpu { namespace imgproc
template <typename T, int cn>
void downsampleCaller(const DevMem2D src, DevMem2D dst)
void downsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream)
{
dim3 threads(32, 8);
dim3 grid(divUp(dst.cols, threads.x), divUp(dst.rows, threads.y));
downsampleKernel<T,cn><<<grid,threads>>>(DevMem2D_<T>(src), DevMem2D_<T>(dst));
downsampleKernel<T,cn><<<grid, threads, 0, stream>>>(DevMem2D_<T>(src), DevMem2D_<T>(dst));
cudaSafeCall(cudaGetLastError());
cudaSafeCall(cudaDeviceSynchronize());
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
template void downsampleCaller<uchar,1>(const DevMem2D src, DevMem2D dst);
template void downsampleCaller<uchar,2>(const DevMem2D src, DevMem2D dst);
template void downsampleCaller<uchar,3>(const DevMem2D src, DevMem2D dst);
template void downsampleCaller<uchar,4>(const DevMem2D src, DevMem2D dst);
template void downsampleCaller<short,1>(const DevMem2D src, DevMem2D dst);
template void downsampleCaller<short,2>(const DevMem2D src, DevMem2D dst);
template void downsampleCaller<short,3>(const DevMem2D src, DevMem2D dst);
template void downsampleCaller<short,4>(const DevMem2D src, DevMem2D dst);
template void downsampleCaller<float,1>(const DevMem2D src, DevMem2D dst);
template void downsampleCaller<float,2>(const DevMem2D src, DevMem2D dst);
template void downsampleCaller<float,3>(const DevMem2D src, DevMem2D dst);
template void downsampleCaller<float,4>(const DevMem2D src, DevMem2D dst);
template void downsampleCaller<uchar,1>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void downsampleCaller<uchar,2>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void downsampleCaller<uchar,3>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void downsampleCaller<uchar,4>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void downsampleCaller<short,1>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void downsampleCaller<short,2>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void downsampleCaller<short,3>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void downsampleCaller<short,4>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void downsampleCaller<float,1>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void downsampleCaller<float,2>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void downsampleCaller<float,3>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void downsampleCaller<float,4>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
//////////////////////////////////////////////////////////////////////////
......@@ -952,29 +954,31 @@ namespace cv { namespace gpu { namespace imgproc
template <typename T, int cn>
void upsampleCaller(const DevMem2D src, DevMem2D dst)
void upsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream)
{
dim3 threads(32, 8);
dim3 grid(divUp(dst.cols, threads.x), divUp(dst.rows, threads.y));
upsampleKernel<T,cn><<<grid,threads>>>(DevMem2D_<T>(src), DevMem2D_<T>(dst));
upsampleKernel<T,cn><<<grid, threads, 0, stream>>>(DevMem2D_<T>(src), DevMem2D_<T>(dst));
cudaSafeCall(cudaGetLastError());
cudaSafeCall(cudaDeviceSynchronize());
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
template void upsampleCaller<uchar,1>(const DevMem2D src, DevMem2D dst);
template void upsampleCaller<uchar,2>(const DevMem2D src, DevMem2D dst);
template void upsampleCaller<uchar,3>(const DevMem2D src, DevMem2D dst);
template void upsampleCaller<uchar,4>(const DevMem2D src, DevMem2D dst);
template void upsampleCaller<short,1>(const DevMem2D src, DevMem2D dst);
template void upsampleCaller<short,2>(const DevMem2D src, DevMem2D dst);
template void upsampleCaller<short,3>(const DevMem2D src, DevMem2D dst);
template void upsampleCaller<short,4>(const DevMem2D src, DevMem2D dst);
template void upsampleCaller<float,1>(const DevMem2D src, DevMem2D dst);
template void upsampleCaller<float,2>(const DevMem2D src, DevMem2D dst);
template void upsampleCaller<float,3>(const DevMem2D src, DevMem2D dst);
template void upsampleCaller<float,4>(const DevMem2D src, DevMem2D dst);
template void upsampleCaller<uchar,1>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void upsampleCaller<uchar,2>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void upsampleCaller<uchar,3>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void upsampleCaller<uchar,4>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void upsampleCaller<short,1>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void upsampleCaller<short,2>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void upsampleCaller<short,3>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void upsampleCaller<short,4>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void upsampleCaller<float,1>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void upsampleCaller<float,2>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void upsampleCaller<float,3>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
template void upsampleCaller<float,4>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
//////////////////////////////////////////////////////////////////////////
......
......@@ -79,10 +79,14 @@ void cv::gpu::dft(const GpuMat&, GpuMat&, Size, int) { throw_nogpu(); }
void cv::gpu::ConvolveBuf::create(Size, Size) { throw_nogpu(); }
void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_nogpu(); }
void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&) { throw_nogpu(); }
void cv::gpu::downsample(const GpuMat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::upsample(const GpuMat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::pyrDown(const GpuMat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::pyrUp(const GpuMat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::downsample(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::upsample(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::pyrDown(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::PyrDownBuf::create(Size, int) { throw_nogpu(); }
void cv::gpu::pyrDown(const GpuMat&, GpuMat&, PyrDownBuf&, Stream&) { throw_nogpu(); }
void cv::gpu::pyrUp(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::PyrUpBuf::create(Size, int) { throw_nogpu(); }
void cv::gpu::pyrUp(const GpuMat&, GpuMat&, PyrUpBuf&, Stream&) { throw_nogpu(); }
......@@ -1413,15 +1417,15 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
namespace cv { namespace gpu { namespace imgproc
{
template <typename T, int cn>
void downsampleCaller(const DevMem2D src, DevMem2D dst);
void downsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
}}}
void cv::gpu::downsample(const GpuMat& src, GpuMat& dst)
void cv::gpu::downsample(const GpuMat& src, GpuMat& dst, Stream& stream)
{
CV_Assert(src.depth() < CV_64F && src.channels() <= 4);
typedef void (*Caller)(const DevMem2D, DevMem2D);
typedef void (*Caller)(const DevMem2D, DevMem2D, cudaStream_t stream);
static const Caller callers[6][4] =
{{imgproc::downsampleCaller<uchar,1>, imgproc::downsampleCaller<uchar,2>,
imgproc::downsampleCaller<uchar,3>, imgproc::downsampleCaller<uchar,4>},
......@@ -1437,7 +1441,7 @@ void cv::gpu::downsample(const GpuMat& src, GpuMat& dst)
CV_Error(CV_StsUnsupportedFormat, "bad number of channels");
dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
caller(src, dst.reshape(1));
caller(src, dst.reshape(1), StreamAccessor::getStream(stream));
}
......@@ -1447,15 +1451,15 @@ void cv::gpu::downsample(const GpuMat& src, GpuMat& dst)
namespace cv { namespace gpu { namespace imgproc
{
template <typename T, int cn>
void upsampleCaller(const DevMem2D src, DevMem2D dst);
void upsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
}}}
void cv::gpu::upsample(const GpuMat& src, GpuMat& dst)
void cv::gpu::upsample(const GpuMat& src, GpuMat& dst, Stream& stream)
{
CV_Assert(src.depth() < CV_64F && src.channels() <= 4);
typedef void (*Caller)(const DevMem2D, DevMem2D);
typedef void (*Caller)(const DevMem2D, DevMem2D, cudaStream_t stream);
static const Caller callers[6][5] =
{{imgproc::upsampleCaller<uchar,1>, imgproc::upsampleCaller<uchar,2>,
imgproc::upsampleCaller<uchar,3>, imgproc::upsampleCaller<uchar,4>},
......@@ -1471,31 +1475,73 @@ void cv::gpu::upsample(const GpuMat& src, GpuMat& dst)
CV_Error(CV_StsUnsupportedFormat, "bad number of channels");
dst.create(src.rows*2, src.cols*2, src.type());
caller(src, dst.reshape(1));
caller(src, dst.reshape(1), StreamAccessor::getStream(stream));
}
//////////////////////////////////////////////////////////////////////////////
// pyrDown
void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst)
void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream)
{
Mat ker = getGaussianKernel(5, 0, std::max(CV_32F, src.depth()));
GpuMat buf;
sepFilter2D(src, buf, src.depth(), ker, ker);
downsample(buf, dst);
PyrDownBuf buf;
pyrDown(src, dst, buf, stream);
}
cv::Mat cv::gpu::PyrDownBuf::ker;
void cv::gpu::PyrDownBuf::create(Size image_size, int image_type_)
{
if (ker.empty() || image_type_ != image_type)
ker = getGaussianKernel(5, 0, std::max(CV_32F, CV_MAT_DEPTH(image_type_)));
ensureSizeIsEnough(image_size.height, image_size.width, image_type_, buf);
if (filter.empty() || image_type_ != image_type)
{
image_type = image_type_;
filter = createSeparableLinearFilter_GPU(image_type, image_type, ker, ker);
}
}
void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, PyrDownBuf& buf, Stream& stream)
{
buf.create(src.size(), src.type());
buf.filter->apply(src, buf.buf, Rect(0, 0, src.cols, src.rows), stream);
downsample(buf.buf, dst, stream);
}
//////////////////////////////////////////////////////////////////////////////
// pyrUp
void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst)
void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream)
{
PyrUpBuf buf;
pyrUp(src, dst, buf, stream);
}
cv::Mat cv::gpu::PyrUpBuf::ker;
void cv::gpu::PyrUpBuf::create(Size image_size, int image_type_)
{
if (ker.empty() || image_type_ != image_type)
ker = getGaussianKernel(5, 0, std::max(CV_32F, CV_MAT_DEPTH(image_type_))) * 2;
ensureSizeIsEnough(image_size.height * 2, image_size.width * 2, image_type_, buf);
if (filter.empty() || image_type_ != image_type)
{
image_type = image_type_;
filter = createSeparableLinearFilter_GPU(image_type, image_type, ker, ker);
}
}
void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, PyrUpBuf& buf, Stream& stream)
{
GpuMat buf;
upsample(src, buf);
Mat ker = getGaussianKernel(5, 0, std::max(CV_32F, src.depth())) * 2;
sepFilter2D(buf, dst, buf.depth(), ker, ker);
buf.create(src.size(), src.type());
upsample(src, buf.buf, stream);
buf.filter->apply(buf.buf, dst, Rect(0, 0, buf.buf.cols, buf.buf.rows), stream);
}
#endif /* !defined (HAVE_CUDA) */
......
This diff is collapsed.
......@@ -866,3 +866,51 @@ TEST(GaussianBlur)
GPU_OFF;
}
}
TEST(pyrDown)
{
gpu::PyrDownBuf buf;
for (int size = 4000; size >= 1000; size -= 1000)
{
SUBTEST << "size " << size;
Mat src; gen(src, 1000, 1000, CV_16SC3, 0, 256);
Mat dst(Size(src.cols / 2, src.rows / 2), src.type());
CPU_ON;
pyrDown(src, dst);
CPU_OFF;
gpu::GpuMat d_src(src);
gpu::GpuMat d_dst(Size(src.cols / 2, src.rows / 2), src.type());
GPU_ON;
gpu::pyrDown(d_src, d_dst, buf);
GPU_OFF;
}
}
TEST(pyrUp)
{
gpu::PyrUpBuf buf;
for (int size = 4000; size >= 1000; size -= 1000)
{
SUBTEST << "size " << size;
Mat src; gen(src, 1000, 1000, CV_16SC3, 0, 256);
Mat dst(Size(src.cols * 2, src.rows * 2), src.type());
CPU_ON;
pyrUp(src, dst);
CPU_OFF;
gpu::GpuMat d_src(src);
gpu::GpuMat d_dst(Size(src.cols * 2, src.rows * 2), src.type());
GPU_ON;
gpu::pyrUp(d_src, d_dst, buf);
GPU_OFF;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment