optimized memory requirements for gpu::minMax's buffers, added support of compute capability 1.0

48183f10 · Alexey Spizhevoy · c4654620 · 48183f10 · 48183f10 · 48183f10
Commit 48183f10 authored Nov 25, 2010 by Alexey Spizhevoy
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 89 additions and 46 deletions

arithm.cpp modules/gpu/src/arithm.cpp +65 -41

mathfunc.cu modules/gpu/src/cuda/mathfunc.cu +0 -0

arithm.cpp tests/gpu/src/arithm.cpp +24 -5

No files found.
--- a/modules/gpu/src/arithm.cpp
+++ b/modules/gpu/src/arithm.cpp
@@ -490,44 +490,64 @@ Scalar cv::gpu::sum(const GpuMat& src)
 ////////////////////////////////////////////////////////////////////////
 // minMax
-namespace cv { namespace gpu { namespace mathfunc {
+namespace cv { namespace gpu { namespace mathfunc { namespace minmax {
+    void get_buf_size_required(int elem_size, int& b1cols, int& b1rows, 
+                               int& b2cols, int& b2rows);
    template <typename T> 
-    void min_max_caller(const DevMem2D src, double* minval, double* maxval);
+    void min_max_caller(const DevMem2D src, double* minval, double* maxval, 
-}}}
+                        unsigned char* minval_buf, unsigned char* maxval_buf);
+    template <typename T> 
+    void min_max_caller_2steps(const DevMem2D src, double* minval, double* maxval, 
+                               unsigned char* minval_buf, unsigned char* maxval_buf);
+}}}}
 void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal)
 {
-    GpuMat src_ = src.reshape(1);
+    using namespace mathfunc::minmax;
    double maxVal_;
-    if (!maxVal) 
+    if (!maxVal) maxVal = &maxVal_;
-        maxVal = &maxVal_;
+    GpuMat src_ = src.reshape(1);
+    // Allocate GPU buffers
+    Size b1size, b2size;
+    get_buf_size_required(src.elemSize(), b1size.width, b1size.height, b2size.width, b2size.height);
+    GpuMat b1(b1size, CV_8U), b2(b2size, CV_8U);
+    int major, minor;
+    getComputeCapability(getDevice(), major, minor);
-    switch (src_.type())
+    if (major >= 1 && minor >= 1)
    {
-    case CV_8U:
+        switch (src_.type())
-        mathfunc::min_max_caller<unsigned char>(src_, minVal, maxVal);
+        {
-        break;
+        case CV_8U: min_max_caller<unsigned char>(src_, minVal, maxVal, b1.data, b2.data); break;
-    case CV_8S:
+        case CV_8S: min_max_caller<signed char>(src_, minVal, maxVal, b1.data, b2.data); break;
-        mathfunc::min_max_caller<signed char>(src_, minVal, maxVal);
+        case CV_16U: min_max_caller<unsigned short>(src_, minVal, maxVal, b1.data, b2.data); break;
-        break;
+        case CV_16S: min_max_caller<signed short>(src_, minVal, maxVal, b1.data, b2.data); break;
-    case CV_16U:
+        case CV_32S: min_max_caller<int>(src_, minVal, maxVal, b1.data, b2.data); break;
-        mathfunc::min_max_caller<unsigned short>(src_, minVal, maxVal);
+        case CV_32F: min_max_caller<float>(src_, minVal, maxVal, b1.data, b2.data); break;
-        break;
+        case CV_64F: min_max_caller<double>(src_, minVal, maxVal, b1.data, b2.data); break;
-    case CV_16S:
+        default: CV_Error(CV_StsBadArg, "Unsupported type");
-        mathfunc::min_max_caller<signed short>(src_, minVal, maxVal);
+        }
-        break;
+    }
-    case CV_32S:
+    else
-        mathfunc::min_max_caller<int>(src_, minVal, maxVal);
+    {
-        break;
+        switch (src_.type())
-    case CV_32F:
+        {
-        mathfunc::min_max_caller<float>(src_, minVal, maxVal);
+        case CV_8U: min_max_caller_2steps<unsigned char>(src_, minVal, maxVal, b1.data, b2.data); break;
-        break;
+        case CV_8S: min_max_caller_2steps<signed char>(src_, minVal, maxVal, b1.data, b2.data); break;
-    case CV_64F:
+        case CV_16U: min_max_caller_2steps<unsigned short>(src_, minVal, maxVal, b1.data, b2.data); break;
-        mathfunc::min_max_caller<double>(src_, minVal, maxVal);
+        case CV_16S: min_max_caller_2steps<signed short>(src_, minVal, maxVal, b1.data, b2.data); break;
-        break;
+        case CV_32S: min_max_caller_2steps<int>(src_, minVal, maxVal, b1.data, b2.data); break;
-    default:
+        case CV_32F: min_max_caller_2steps<float>(src_, minVal, maxVal, b1.data, b2.data); break;
-        CV_Error(CV_StsBadArg, "Unsupported type");
+        default: CV_Error(CV_StsBadArg, "Unsupported type");
+        }
    }
 }
@@ -535,14 +555,18 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal)
 ////////////////////////////////////////////////////////////////////////
 // minMaxLoc
-namespace cv { namespace gpu { namespace mathfunc {
+namespace cv { namespace gpu { namespace mathfunc { namespace minmaxloc {
    template <typename T> 
-    void min_max_loc_caller(const DevMem2D src, double* minval, double* maxval, int* minlocx, int* minlocy,
+    void min_max_loc_caller(const DevMem2D src, double* minval, double* maxval, 
-                                                                                int* maxlocx, int* maxlocy);
+                            int* minlocx, int* minlocy, int* maxlocx, int* maxlocy);
-}}}
+}}}}
 void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc)
 {
+    using namespace mathfunc::minmaxloc;
    CV_Assert(src.channels() == 1);
    double maxVal_;
@@ -557,25 +581,25 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
    switch (src.type())
    {
    case CV_8U:
-        mathfunc::min_max_loc_caller<unsigned char>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
+        min_max_loc_caller<unsigned char>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
        break;
    case CV_8S:
-        mathfunc::min_max_loc_caller<signed char>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
+        min_max_loc_caller<signed char>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
        break;
    case CV_16U:
-        mathfunc::min_max_loc_caller<unsigned short>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
+        min_max_loc_caller<unsigned short>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
        break;
    case CV_16S:
-        mathfunc::min_max_loc_caller<signed short>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
+        min_max_loc_caller<signed short>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
        break;
    case CV_32S:
-        mathfunc::min_max_loc_caller<int>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
+        min_max_loc_caller<int>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
        break;
    case CV_32F:
-        mathfunc::min_max_loc_caller<float>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
+        min_max_loc_caller<float>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
        break;
    case CV_64F:
-        mathfunc::min_max_loc_caller<double>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
+        min_max_loc_caller<double>(src, minVal, maxVal, &minLoc->x, &minLoc->y, &maxLoc->x, &maxLoc->y);
        break;
    default:
        CV_Error(CV_StsBadArg, "Unsupported type");

--- a/modules/gpu/src/cuda/mathfunc.cu
+++ b/modules/gpu/src/cuda/mathfunc.cu
--- a/tests/gpu/src/arithm.cpp
+++ b/tests/gpu/src/arithm.cpp
@@ -678,8 +678,14 @@ struct CV_GpuMinMaxTest: public CvTest
    void run(int)
    {
+        int depth_end;
+        int major, minor;
+        cv::gpu::getComputeCapability(getDevice(), major, minor);
+        minor = 0;
+        if (minor >= 1) depth_end = CV_64F; else depth_end = CV_32F;
        for (int cn = 1; cn <= 4; ++cn)
-            for (int depth = CV_8U; depth <= CV_64F; ++depth)
+            for (int depth = CV_8U; depth <= depth_end; ++depth)
            {
                int rows = 1, cols = 3;
                test(rows, cols, cn, depth);
@@ -703,10 +709,11 @@ struct CV_GpuMinMaxTest: public CvTest
        }
        double minVal, maxVal;
+        cv::Point minLoc, maxLoc;
        Mat src_ = src.reshape(1);
        if (depth != CV_8S)
        {
-            cv::Point minLoc, maxLoc;
            cv::minMaxLoc(src_, &minVal, &maxVal, &minLoc, &maxLoc);
        }
        else 
@@ -727,8 +734,16 @@ struct CV_GpuMinMaxTest: public CvTest
        cv::Point minLoc_, maxLoc_;        
        cv::gpu::minMax(cv::gpu::GpuMat(src), &minVal_, &maxVal_);
-        CHECK(minVal == minVal_, CvTS::FAIL_INVALID_OUTPUT);
+        if (abs(minVal - minVal_) > 1e-3f)
-        CHECK(maxVal == maxVal_, CvTS::FAIL_INVALID_OUTPUT);
+        {
+            ts->printf(CvTS::CONSOLE, "\nfail: minVal=%f minVal_=%f rows=%d cols=%d depth=%d cn=%d\n", minVal, minVal_, rows, cols, depth, cn);
+            ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
+        }
+        if (abs(maxVal - maxVal_) > 1e-3f)
+        {
+            ts->printf(CvTS::CONSOLE, "\nfail: maxVal=%f maxVal_=%f rows=%d cols=%d depth=%d cn=%d\n", maxVal, maxVal_, rows, cols, depth, cn);
+            ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
+        }
    }  
 };
@@ -742,7 +757,11 @@ struct CV_GpuMinMaxLocTest: public CvTest
    void run(int)
    {
-        for (int depth = CV_8U; depth <= CV_64F; ++depth)
+        int depth_end;
+        int major, minor;
+        cv::gpu::getComputeCapability(getDevice(), major, minor);
+        if (minor >= 1) depth_end = CV_64F; else depth_end = CV_32F;
+        for (int depth = CV_8U; depth <= depth_end; ++depth)
        {
            int rows = 1, cols = 3;
            test(rows, cols, depth);