added ensureSizeIsEnough into gpu module, updated reduction methods

cbb132cc · Alexey Spizhevoy · f3a26568 · cbb132cc · cbb132cc · cbb132cc
Commit cbb132cc authored Jan 18, 2011 by Alexey Spizhevoy
5 changed files
--- a/doc/gpu_data_structures.tex
+++ b/doc/gpu_data_structures.tex
@@ -12,4 +12,24 @@ Creates continuous matrix in GPU memory.
 \cvarg{m}{Destination matrix. Will be only reshaped if it has proper type and area (\texttt{rows} $\times$ \texttt{cols}).}
 \end{description}
+Also the following wrappers are available:
+\cvdefCpp{GpuMat createContinuous(int rows, int cols, int type);\newline
+void createContinuous(Size size, int type, GpuMat\& m);\newline
+GpuMat createContinuous(Size size, int type);}
 Matrix is called continuous if its elements are stored continuously, i.e. wuthout gaps in the end of each row.
+\cvCppFunc{gpu::ensureSizeIsEnough}
+Ensures that size of matrix is big enough and matrix has proper type. The function doesn't reallocate memory if matrix has proper attributes already.
+\cvdefCpp{void ensureSizeIsEnough(int rows, int cols, int type, GpuMat\& m);}
+\begin{description}
+\cvarg{rows}{Minimum desired number of rows.}
+\cvarg{cols}{Minimum desired number of cols.}
+\cvarg{type}{Desired matrix type.}
+\cvarg{m}{Destination matrix.}
+\end{description}
+Also the following wrapper is available:
+\cvdefCpp{void ensureSizeIsEnough(Size size, int type, GpuMat\& m);}
\ No newline at end of file
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -252,9 +252,13 @@ namespace cv
    #include "GpuMat_BetaDeprecated.hpp"
 #endif
-        //! creates continuous GPU matrix
+        //! Creates continuous GPU matrix
        CV_EXPORTS void createContinuous(int rows, int cols, int type, GpuMat& m);
+        //! Ensures that size of the given matrix is not less than (rows, cols) size
+        //! and matrix type is match specified one too
+        CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m);
        //////////////////////////////// CudaMem ////////////////////////////////
        // CudaMem is limited cv::Mat with page locked memory allocation.
        // Page locked memory is only needed for async and faster coping to GPU.

--- a/modules/gpu/include/opencv2/gpu/matrix_operations.hpp
+++ b/modules/gpu/include/opencv2/gpu/matrix_operations.hpp
@@ -364,6 +364,10 @@ inline GpuMat createContinuous(Size size, int type)
    return m;
 }
+inline void ensureSizeIsEnough(Size size, int type, GpuMat& m)
+{
+    ensureSizeIsEnough(size.height, size.width, type, m);
+}
 ///////////////////////////////////////////////////////////////////////
@@ -401,6 +405,7 @@ inline CudaMem::CudaMem(const Mat& m, int _alloc_type) : flags(0), rows(0), cols
 inline CudaMem::~CudaMem()
 {
    release();
 }
 inline CudaMem& CudaMem::operator = (const CudaMem& m)

--- a/modules/gpu/src/matrix_operations.cpp
+++ b/modules/gpu/src/matrix_operations.cpp
@@ -551,6 +551,13 @@ void cv::gpu::createContinuous(int rows, int cols, int type, GpuMat& m)
    m = m.reshape(0, rows);
 }
+void cv::gpu::ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m)
+{
+    if (m.type() == type && m.rows >= rows && m.cols >= cols)
+        return;
+    m.create(rows, cols, type);
+}
 ///////////////////////////////////////////////////////////////////////
 //////////////////////////////// CudaMem //////////////////////////////

--- a/modules/gpu/src/matrix_reductions.cpp
+++ b/modules/gpu/src/matrix_reductions.cpp
@@ -159,7 +159,7 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf)
    Size bufSize;
    sum::get_buf_size_required(src.cols, src.rows, src.channels(), bufSize.width, bufSize.height); 
-    buf.create(bufSize, CV_8U);
+    ensureSizeIsEnough(bufSize, CV_8U, buf);
    Caller caller = callers[hasAtomicsSupport(getDevice())][src.depth()];
    if (!caller) CV_Error(CV_StsBadArg, "sum: unsupported type");
@@ -192,7 +192,7 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
    Size bufSize;
    sum::get_buf_size_required(src.cols, src.rows, src.channels(), bufSize.width, bufSize.height); 
-    buf.create(bufSize, CV_8U);
+    ensureSizeIsEnough(bufSize, CV_8U, buf);
    Caller caller = callers[hasAtomicsSupport(getDevice())][src.depth()];
    if (!caller) CV_Error(CV_StsBadArg, "sqrSum: unsupported type");
@@ -265,7 +265,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
    Size bufSize;
    get_buf_size_required(src.cols, src.rows, src.elemSize(), bufSize.width, bufSize.height);
-    buf.create(bufSize, CV_8U);
+    ensureSizeIsEnough(bufSize, CV_8U, buf);
    if (mask.empty())
    {
@@ -292,31 +292,31 @@ namespace cv { namespace gpu { namespace mathfunc { namespace minmaxloc {
    template <typename T> 
    void min_max_loc_caller(const DevMem2D src, double* minval, double* maxval, 
-                            int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf);
+                            int minloc[2], int maxloc[2], PtrStep valBuf, PtrStep locBuf);
    template <typename T> 
    void min_max_loc_mask_caller(const DevMem2D src, const PtrStep mask, double* minval, double* maxval, 
-                                 int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf);
+                                 int minloc[2], int maxloc[2], PtrStep valBuf, PtrStep locBuf);
    template <typename T> 
    void min_max_loc_multipass_caller(const DevMem2D src, double* minval, double* maxval, 
-                                     int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf);
+                                     int minloc[2], int maxloc[2], PtrStep valBuf, PtrStep locBuf);
    template <typename T> 
    void min_max_loc_mask_multipass_caller(const DevMem2D src, const PtrStep mask, double* minval, double* maxval, 
-                                           int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf);
+                                           int minloc[2], int maxloc[2], PtrStep valBuf, PtrStep locBuf);
 }}}}
 void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, const GpuMat& mask)
 {    
-    GpuMat valbuf, locbuf;
+    GpuMat valBuf, locBuf;
-    minMaxLoc(src, minVal, maxVal, minLoc, maxLoc, mask, valbuf, locbuf);
+    minMaxLoc(src, minVal, maxVal, minLoc, maxLoc, mask, valBuf, locBuf);
 }
 void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
-                        const GpuMat& mask, GpuMat& valbuf, GpuMat& locbuf)
+                        const GpuMat& mask, GpuMat& valBuf, GpuMat& locBuf)
 {
    using namespace mathfunc::minmaxloc;
@@ -348,23 +348,23 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
    int minLoc_[2];
    int maxLoc_[2];
-    Size valbuf_size, locbuf_size;
+    Size valBufSize, locBufSize;
-    get_buf_size_required(src.cols, src.rows, src.elemSize(), valbuf_size.width, 
+    get_buf_size_required(src.cols, src.rows, src.elemSize(), valBufSize.width, 
-                          valbuf_size.height, locbuf_size.width, locbuf_size.height);
+                          valBufSize.height, locBufSize.width, locBufSize.height);
-    valbuf.create(valbuf_size, CV_8U);
+    ensureSizeIsEnough(valBufSize, CV_8U, valBuf);
-    locbuf.create(locbuf_size, CV_8U);
+    ensureSizeIsEnough(locBufSize, CV_8U, locBuf);
    if (mask.empty())
    {
        Caller caller = callers[hasAtomicsSupport(getDevice())][src.type()];
        if (!caller) CV_Error(CV_StsBadArg, "minMaxLoc: unsupported type");
-        caller(src, minVal, maxVal, minLoc_, maxLoc_, valbuf, locbuf);
+        caller(src, minVal, maxVal, minLoc_, maxLoc_, valBuf, locBuf);
    }
    else
    {
        MaskedCaller caller = masked_callers[hasAtomicsSupport(getDevice())][src.type()];
        if (!caller) CV_Error(CV_StsBadArg, "minMaxLoc: unsupported type");
-        caller(src, mask, minVal, maxVal, minLoc_, maxLoc_, valbuf, locbuf);
+        caller(src, mask, minVal, maxVal, minLoc_, maxLoc_, valBuf, locBuf);
    }
    if (minLoc) { minLoc->x = minLoc_[0]; minLoc->y = minLoc_[1]; }
@@ -411,9 +411,9 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
    CV_Assert(src.channels() == 1);
    CV_Assert(src.type() != CV_64F || hasNativeDoubleSupport(getDevice()));
-    Size buf_size;
+    Size bufSize;
-    get_buf_size_required(src.cols, src.rows, buf_size.width, buf_size.height);
+    get_buf_size_required(src.cols, src.rows, bufSize.width, bufSize.height);
-    buf.create(buf_size, CV_8U);
+    ensureSizeIsEnough(bufSize, CV_8U, buf);
    Caller caller = callers[hasAtomicsSupport(getDevice())][src.type()];
    if (!caller) CV_Error(CV_StsBadArg, "countNonZero: unsupported type");