implemented gpu::resize for all types

b2079d6d · Vladislav Vinogradov · acac27d8 · b2079d6d · b2079d6d · b2079d6d
Commit b2079d6d authored Sep 12, 2011 by Vladislav Vinogradov
6 changed files
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -633,8 +633,7 @@ namespace cv
        CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null());
        //! resizes the image
-        //! Supports INTER_NEAREST, INTER_LINEAR
+        //! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
-        //! supports CV_8UC1, CV_8UC4 types
        CV_EXPORTS void resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
        //! warps the image using affine transformation

--- a/modules/gpu/perf/perf_imgproc.cpp
+++ b/modules/gpu/perf/perf_imgproc.cpp
@@ -245,8 +245,8 @@ PERF_TEST_P(DevInfo_Size_MatType, threshold, testing::Combine(testing::ValuesIn(
 PERF_TEST_P(DevInfo_Size_MatType_Interpolation_SizeCoeff, resize, testing::Combine(testing::ValuesIn(devices()),
                                                                                   testing::Values(GPU_TYPICAL_MAT_SIZES), 
-                                                                                   testing::Values(CV_8UC1, CV_8UC4),
+                                                                                   testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                                                                                   testing::Values((int)INTER_NEAREST, (int)INTER_LINEAR),
+                                                                                   testing::Values((int)INTER_NEAREST, (int)INTER_LINEAR, (int)INTER_CUBIC),
                                                                                   testing::Values(0.5, 2.0)))
 {
    DeviceInfo devInfo = std::tr1::get<0>(GetParam());
@@ -264,7 +264,7 @@ PERF_TEST_P(DevInfo_Size_MatType_Interpolation_SizeCoeff, resize, testing::Combi
    GpuMat src(src_host);
    GpuMat dst;
-    declare.time(0.5).iterations(100);
+    declare.time(1.0).iterations(100);
    SIMPLE_TEST_CYCLE()
    {

--- a/modules/gpu/src/cuda/imgproc.cu
+++ b/modules/gpu/src/cuda/imgproc.cu
--- a/modules/gpu/src/imgproc.cpp
+++ b/modules/gpu/src/imgproc.cpp
@@ -272,14 +272,28 @@ void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q,
 ////////////////////////////////////////////////////////////////////////
 // resize
+namespace cv { namespace gpu {  namespace imgproc
+{
+    template <typename T> void resize_gpu(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);
+}}}
 void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
 {
-    static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR/*, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS*/};
+    using namespace cv::gpu::imgproc;
-    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
+    typedef void (*caller_t)(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);
-    CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR/* || interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4*/);
+    static const caller_t callers[6][4] = 
+    {
+        {resize_gpu<uchar>, resize_gpu<uchar2>, resize_gpu<uchar3>, resize_gpu<uchar4>},
+        {resize_gpu<schar>, resize_gpu<char2>, resize_gpu<char3>, resize_gpu<char4>},
+        {resize_gpu<ushort>, resize_gpu<ushort2>, resize_gpu<ushort3>, resize_gpu<ushort4>},
+        {resize_gpu<short>, resize_gpu<short2>, resize_gpu<short3>, resize_gpu<short4>},
+        {resize_gpu<int>, resize_gpu<int2>, resize_gpu<int3>, resize_gpu<int4>},
+        {resize_gpu<float>, resize_gpu<float2>, resize_gpu<float3>, resize_gpu<float4>}
+    };
-    CV_Assert( src.size().area() > 0 );
+    CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
+    CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
    CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );
    if( dsize == Size() )
@@ -294,34 +308,43 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
    dst.create(dsize, src.type());
-    NppiSize srcsz;
-    srcsz.width  = src.cols;
-    srcsz.height = src.rows;
-    NppiRect srcrect;
-    srcrect.x = srcrect.y = 0;
-    srcrect.width  = src.cols;
-    srcrect.height = src.rows;
-    NppiSize dstsz;
-    dstsz.width  = dst.cols;
-    dstsz.height = dst.rows;
    cudaStream_t stream = StreamAccessor::getStream(s);
-    NppStreamHandler h(stream);
+    if ((src.type() == CV_8UC1 || src.type() == CV_8UC4) && (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR))
-    if (src.type() == CV_8UC1)
    {
-        nppSafeCall( nppiResize_8u_C1R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,
+        static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS};
-            dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );
+        NppiSize srcsz;
+        srcsz.width  = src.cols;
+        srcsz.height = src.rows;
+        NppiRect srcrect;
+        srcrect.x = srcrect.y = 0;
+        srcrect.width  = src.cols;
+        srcrect.height = src.rows;
+        NppiSize dstsz;
+        dstsz.width  = dst.cols;
+        dstsz.height = dst.rows;
+        NppStreamHandler h(stream);
+        if (src.type() == CV_8UC1)
+        {
+            nppSafeCall( nppiResize_8u_C1R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,
+                dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );
+        }
+        else
+        {
+            nppSafeCall( nppiResize_8u_C4R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,
+                dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );
+        }
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
    }
    else
    {
-        nppSafeCall( nppiResize_8u_C4R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,
+        callers[src.depth()][src.channels() - 1](src, static_cast<float>(fx), static_cast<float>(fy), dst, interpolation, stream);
-            dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );
    }
-    if (stream == 0)
-        cudaSafeCall( cudaDeviceSynchronize() );
 }
 ////////////////////////////////////////////////////////////////////////

--- a/modules/gpu/test/test_imgproc.cpp
+++ b/modules/gpu/test/test_imgproc.cpp
@@ -137,7 +137,7 @@ struct Resize : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int
        size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
-        src = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
+        src = cvtest::randomMat(rng, size, type, 0.0, CV_MAT_DEPTH(type) == CV_32F ? 1.0 : 255.0, false);
        cv::resize(src, dst_gold1, cv::Size(), 2.0, 2.0, interpolation);
        cv::resize(src, dst_gold2, cv::Size(), 0.5, 0.5, interpolation);
@@ -146,7 +146,7 @@ struct Resize : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int
 TEST_P(Resize, Accuracy)
 {
-    static const char* interpolations[] = {"INTER_NEAREST", "INTER_LINEAR"};
+    static const char* interpolations[] = {"INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC"};
    const char* interpolationStr = interpolations[interpolation];
    PRINT_PARAM(devInfo);
@@ -169,14 +169,14 @@ TEST_P(Resize, Accuracy)
        gpuRes2.download(dst2);
    );
-    EXPECT_MAT_SIMILAR(dst_gold1, dst1, 0.5);
+    EXPECT_MAT_SIMILAR(dst_gold1, dst1, 0.2);
-    EXPECT_MAT_SIMILAR(dst_gold2, dst2, 0.5);
+    EXPECT_MAT_SIMILAR(dst_gold2, dst2, 0.2);
 }
 INSTANTIATE_TEST_CASE_P(ImgProc, Resize, testing::Combine(
                        testing::ValuesIn(devices()), 
-                        testing::Values(CV_8UC1, CV_8UC4), 
+                        testing::Values(CV_8UC1, CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), 
-                        testing::Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR)));
+                        testing::Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR, (int)cv::INTER_CUBIC)));
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // remap

--- a/samples/gpu/performance/tests.cpp
+++ b/samples/gpu/performance/tests.cpp
@@ -592,9 +592,81 @@ TEST(resize)
    for (int size = 1000; size <= 3000; size += 1000)
    {
-        SUBTEST << "size " << size;
+        SUBTEST << "size " << size << ", 8UC1, up";
+        gen(src, size, size, CV_8U, 0, 256);
+        dst.create(size * 2, size * 2, CV_8U);
+        CPU_ON;
+        resize(src, dst, dst.size());
+        CPU_OFF;
+        d_src = src;
+        d_dst.create(size * 2, size * 2, CV_8U);
+        GPU_ON;
+        gpu::resize(d_src, d_dst, d_dst.size());
+        GPU_OFF;
+    }
+    for (int size = 1000; size <= 3000; size += 1000)
+    {
+        SUBTEST << "size " << size << ", 8UC1, down";
        gen(src, size, size, CV_8U, 0, 256);
+        dst.create(size / 2, size / 2, CV_8U);
+        CPU_ON;
+        resize(src, dst, dst.size());
+        CPU_OFF;
+        d_src = src;
+        d_dst.create(size / 2, size / 2, CV_8U);
+        GPU_ON;
+        gpu::resize(d_src, d_dst, d_dst.size());
+        GPU_OFF;
+    }
+    for (int size = 1000; size <= 3000; size += 1000)
+    {
+        SUBTEST << "size " << size << ", 8UC3, up";
+        gen(src, size, size, CV_8UC3, 0, 256);
+        dst.create(size * 2, size * 2, CV_8U);
+        CPU_ON;
+        resize(src, dst, dst.size());
+        CPU_OFF;
+        d_src = src;
+        d_dst.create(size * 2, size * 2, CV_8U);
+        GPU_ON;
+        gpu::resize(d_src, d_dst, d_dst.size());
+        GPU_OFF;
+    }
+    for (int size = 1000; size <= 3000; size += 1000)
+    {
+        SUBTEST << "size " << size << ", 8UC3, down";
+        gen(src, size, size, CV_8UC3, 0, 256);
+        dst.create(size / 2, size / 2, CV_8U);
+        CPU_ON;
+        resize(src, dst, dst.size());
+        CPU_OFF;
+        d_src = src;
+        d_dst.create(size / 2, size / 2, CV_8U);
+        GPU_ON;
+        gpu::resize(d_src, d_dst, d_dst.size());
+        GPU_OFF;
+    }
+    for (int size = 1000; size <= 3000; size += 1000)
+    {
+        SUBTEST << "size " << size << ", 8UC4, up";
+        gen(src, size, size, CV_8UC4, 0, 256);
        dst.create(size * 2, size * 2, CV_8U);
        CPU_ON;
@@ -604,6 +676,60 @@ TEST(resize)
        d_src = src;
        d_dst.create(size * 2, size * 2, CV_8U);
+        GPU_ON;
+        gpu::resize(d_src, d_dst, d_dst.size());
+        GPU_OFF;
+    }
+    for (int size = 1000; size <= 3000; size += 1000)
+    {
+        SUBTEST << "size " << size << ", 8UC4, down";
+        gen(src, size, size, CV_8UC4, 0, 256);
+        dst.create(size / 2, size / 2, CV_8U);
+        CPU_ON;
+        resize(src, dst, dst.size());
+        CPU_OFF;
+        d_src = src;
+        d_dst.create(size / 2, size / 2, CV_8U);
+        GPU_ON;
+        gpu::resize(d_src, d_dst, d_dst.size());
+        GPU_OFF;
+    }
+    for (int size = 1000; size <= 3000; size += 1000)
+    {
+        SUBTEST << "size " << size << ", 32FC1, up";
+        gen(src, size, size, CV_32FC1, 0, 256);
+        dst.create(size * 2, size * 2, CV_8U);
+        CPU_ON;
+        resize(src, dst, dst.size());
+        CPU_OFF;
+        d_src = src;
+        d_dst.create(size * 2, size * 2, CV_8U);
+        GPU_ON;
+        gpu::resize(d_src, d_dst, d_dst.size());
+        GPU_OFF;
+    }
+    for (int size = 1000; size <= 3000; size += 1000)
+    {
+        SUBTEST << "size " << size << ", 32FC1, down";
+        gen(src, size, size, CV_32FC1, 0, 256);
+        dst.create(size / 2, size / 2, CV_8U);
+        CPU_ON;
+        resize(src, dst, dst.size());
+        CPU_OFF;
+        d_src = src;
+        d_dst.create(size / 2, size / 2, CV_8U);
        GPU_ON;
        gpu::resize(d_src, d_dst, d_dst.size());
        GPU_OFF;