implemented gpu::reduce

8b23c792 · Vladislav Vinogradov · ce35a6d8 · 8b23c792 · 8b23c792 · 8b23c792
Commit 8b23c792 authored Sep 22, 2011 by Vladislav Vinogradov
4 changed files
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -860,6 +860,9 @@ namespace cv
        //! counts non-zero array elements
        CV_EXPORTS int countNonZero(const GpuMat& src, GpuMat& buf);
+        //! reduces a matrix to a vector
+        CV_EXPORTS void reduce(const GpuMat& mtx, GpuMat& vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null());
        ///////////////////////////// Calibration 3D //////////////////////////////////

--- a/modules/gpu/src/cuda/matrix_reductions.cu
+++ b/modules/gpu/src/cuda/matrix_reductions.cu
--- a/modules/gpu/src/matrix_reductions.cpp
+++ b/modules/gpu/src/matrix_reductions.cpp
@@ -63,6 +63,7 @@ void cv::gpu::minMaxLoc(const GpuMat&, double*, double*, Point*, Point*, const G
 void cv::gpu::minMaxLoc(const GpuMat&, double*, double*, Point*, Point*, const GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); }
 int cv::gpu::countNonZero(const GpuMat&) { throw_nogpu(); return 0; }
 int cv::gpu::countNonZero(const GpuMat&, GpuMat&) { throw_nogpu(); return 0; }
+void cv::gpu::reduce(const GpuMat&, GpuMat&, int, int, int, Stream&) { throw_nogpu(); }
 #else
@@ -598,4 +599,150 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
    return caller(src, buf);
 }
+//////////////////////////////////////////////////////////////////////////////
+// reduce
+namespace cv { namespace gpu { namespace mathfunc {
+    template <typename T, typename S, typename D> void reduceRows_gpu(const DevMem2D& src, const DevMem2D& dst, int reduceOp, cudaStream_t stream);
+    template <typename T, typename S, typename D> void reduceCols_gpu(const DevMem2D& src, int cn, const DevMem2D& dst, int reduceOp, cudaStream_t stream);
+}}}
+void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int dtype, Stream& stream)
+{
+    using namespace cv::gpu::mathfunc;
+    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4 && dtype <= CV_32F);
+    CV_Assert(dim == 0 || dim == 1);
+    CV_Assert(reduceOp == CV_REDUCE_SUM || reduceOp == CV_REDUCE_AVG || reduceOp == CV_REDUCE_MAX || reduceOp == CV_REDUCE_MIN);
+    if (dtype < 0)
+        dtype = src.depth();
+    dst.create(1, dim == 0 ? src.cols : src.rows, CV_MAKETYPE(dtype, src.channels()));
+    if (dim == 0)
+    {
+        typedef void (*caller_t)(const DevMem2D& src, const DevMem2D& dst, int reduceOp, cudaStream_t stream);
+        static const caller_t callers[6][6] = 
+        {
+            {
+                reduceRows_gpu<unsigned char, int, unsigned char>,
+                0/*reduceRows_gpu<unsigned char, int, signed char>*/,
+                0/*reduceRows_gpu<unsigned char, int, unsigned short>*/,
+                0/*reduceRows_gpu<unsigned char, int, short>*/,
+                reduceRows_gpu<unsigned char, int, int>,
+                reduceRows_gpu<unsigned char, int, float>
+            },
+            {
+                0/*reduceRows_gpu<signed char, int, unsigned char>*/,
+                0/*reduceRows_gpu<signed char, int, signed char>*/,
+                0/*reduceRows_gpu<signed char, int, unsigned short>*/,
+                0/*reduceRows_gpu<signed char, int, short>*/,
+                0/*reduceRows_gpu<signed char, int, int>*/,
+                0/*reduceRows_gpu<signed char, int, float>*/
+            },
+            {
+                0/*reduceRows_gpu<unsigned short, int, unsigned char>*/,
+                0/*reduceRows_gpu<unsigned short, int, signed char>*/,
+                reduceRows_gpu<unsigned short, int, unsigned short>,
+                0/*reduceRows_gpu<unsigned short, int, short>*/,
+                reduceRows_gpu<unsigned short, int, int>,
+                reduceRows_gpu<unsigned short, int, float>
+            },
+            {
+                0/*reduceRows_gpu<short, int, unsigned char>*/,
+                0/*reduceRows_gpu<short, int, signed char>*/,
+                0/*reduceRows_gpu<short, int, unsigned short>*/,
+                reduceRows_gpu<short, int, short>,
+                reduceRows_gpu<short, int, int>,
+                reduceRows_gpu<short, int, float>
+            },
+            {
+                0/*reduceRows_gpu<int, int, unsigned char>*/,
+                0/*reduceRows_gpu<int, int, signed char>*/,
+                0/*reduceRows_gpu<int, int, unsigned short>*/,
+                0/*reduceRows_gpu<int, int, short>*/,
+                reduceRows_gpu<int, int, int>,
+                reduceRows_gpu<int, int, float>
+            },
+            {
+                0/*reduceRows_gpu<float, float, unsigned char>*/,
+                0/*reduceRows_gpu<float, float, signed char>*/,
+                0/*reduceRows_gpu<float, float, unsigned short>*/,
+                0/*reduceRows_gpu<float, float, short>*/,
+                0/*reduceRows_gpu<float, float, int>*/,
+                reduceRows_gpu<float, float, float>
+            }
+        };
+        const caller_t func = callers[src.depth()][dst.depth()];
+        if (!func)
+            CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of input and output array formats");
+        func(src.reshape(1), dst.reshape(1), reduceOp, StreamAccessor::getStream(stream));
+    }
+    else
+    {
+        typedef void (*caller_t)(const DevMem2D& src, int cn, const DevMem2D& dst, int reduceOp, cudaStream_t stream);
+        static const caller_t callers[6][6] = 
+        {
+            {
+                reduceCols_gpu<unsigned char, int, unsigned char>,
+                0/*reduceCols_gpu<unsigned char, int, signed char>*/,
+                0/*reduceCols_gpu<unsigned char, int, unsigned short>*/,
+                0/*reduceCols_gpu<unsigned char, int, short>*/,
+                reduceCols_gpu<unsigned char, int, int>,
+                reduceCols_gpu<unsigned char, int, float>
+            },
+            {
+                0/*reduceCols_gpu<signed char, int, unsigned char>*/,
+                0/*reduceCols_gpu<signed char, int, signed char>*/,
+                0/*reduceCols_gpu<signed char, int, unsigned short>*/,
+                0/*reduceCols_gpu<signed char, int, short>*/,
+                0/*reduceCols_gpu<signed char, int, int>*/,
+                0/*reduceCols_gpu<signed char, int, float>*/
+            },
+            {
+                0/*reduceCols_gpu<unsigned short, int, unsigned char>*/,
+                0/*reduceCols_gpu<unsigned short, int, signed char>*/,
+                reduceCols_gpu<unsigned short, int, unsigned short>,
+                0/*reduceCols_gpu<unsigned short, int, short>*/,
+                reduceCols_gpu<unsigned short, int, int>,
+                reduceCols_gpu<unsigned short, int, float>
+            },
+            {
+                0/*reduceCols_gpu<short, int, unsigned char>*/,
+                0/*reduceCols_gpu<short, int, signed char>*/,
+                0/*reduceCols_gpu<short, int, unsigned short>*/,
+                reduceCols_gpu<short, int, short>,
+                reduceCols_gpu<short, int, int>,
+                reduceCols_gpu<short, int, float>
+            },
+            {
+                0/*reduceCols_gpu<int, int, unsigned char>*/,
+                0/*reduceCols_gpu<int, int, signed char>*/,
+                0/*reduceCols_gpu<int, int, unsigned short>*/,
+                0/*reduceCols_gpu<int, int, short>*/,
+                reduceCols_gpu<int, int, int>,
+                reduceCols_gpu<int, int, float>
+            },
+            {
+                0/*reduceCols_gpu<float, unsigned char>*/,
+                0/*reduceCols_gpu<float, signed char>*/,
+                0/*reduceCols_gpu<float, unsigned short>*/,
+                0/*reduceCols_gpu<float, short>*/,
+                0/*reduceCols_gpu<float, int>*/,
+                reduceCols_gpu<float, float, float>
+            }
+        };
+        const caller_t func = callers[src.depth()][dst.depth()];
+        if (!func)
+            CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of input and output array formats");
+        func(src, src.channels(), dst, reduceOp, StreamAccessor::getStream(stream));        
+    }
+}
 #endif
--- a/modules/gpu/test/test_arithm.cpp
+++ b/modules/gpu/test/test_arithm.cpp
@@ -1788,4 +1788,76 @@ INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, testing::Combine(
                        testing::ValuesIn(types(CV_8U, CV_64F, 1, 1)),
                        testing::ValuesIn(types(CV_8U, CV_64F, 1, 1))));
+//////////////////////////////////////////////////////////////////////////////
+// reduce
+struct Reduce : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+    int dim;
+    int reduceOp;
+    cv::Size size;
+    cv::Mat src;
+    cv::Mat dst_gold;
+    virtual void SetUp() 
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+        dim = std::tr1::get<2>(GetParam());
+        reduceOp = std::tr1::get<3>(GetParam());
+        cv::gpu::setDevice(devInfo.deviceID());
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        size = cv::Size(rng.uniform(100, 400), rng.uniform(100, 400));
+        src = cvtest::randomMat(rng, size, type, 0.0, 255.0, false);
+        cv::reduce(src, dst_gold, dim, reduceOp, reduceOp == CV_REDUCE_SUM || reduceOp == CV_REDUCE_AVG ? CV_32F : CV_MAT_DEPTH(type));
+        if (dim == 1)
+        {
+            dst_gold.cols = dst_gold.rows;
+            dst_gold.rows = 1;
+            dst_gold.step = dst_gold.cols * dst_gold.elemSize();
+        }
+    }
+};
+TEST_P(Reduce, Accuracy) 
+{
+    static const char* reduceOpStrs[] = {"CV_REDUCE_SUM", "CV_REDUCE_AVG", "CV_REDUCE_MAX", "CV_REDUCE_MIN"};
+    const char* reduceOpStr = reduceOpStrs[reduceOp];
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(dim);
+    PRINT_PARAM(reduceOpStr);
+    PRINT_PARAM(size);
+    cv::Mat dst;
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst;
+        cv::gpu::reduce(cv::gpu::GpuMat(src), dev_dst, dim, reduceOp, reduceOp == CV_REDUCE_SUM || reduceOp == CV_REDUCE_AVG ? CV_32F : CV_MAT_DEPTH(type));
+        dev_dst.download(dst);
+    );
+    double norm = reduceOp == CV_REDUCE_SUM || reduceOp == CV_REDUCE_AVG ? 1e-1 : 0.0;
+    EXPECT_MAT_NEAR(dst_gold, dst, norm);
+}
+INSTANTIATE_TEST_CASE_P(Arithm, Reduce, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
+                        testing::Values(0, 1),
+                        testing::Values((int)CV_REDUCE_SUM, (int)CV_REDUCE_AVG, (int)CV_REDUCE_MAX, (int)CV_REDUCE_MIN)));
 #endif // HAVE_CUDA