switched to Input/Output Array in reductions operations

8fcef225 · Vladislav Vinogradov · c52d5696 · 8fcef225 · 8fcef225 · 8fcef225
Commit 8fcef225 authored Apr 26, 2013 by Vladislav Vinogradov
11 changed files
--- a/modules/gpu/src/cascadeclassifier.cpp
+++ b/modules/gpu/src/cascadeclassifier.cpp
@@ -458,7 +458,7 @@ public:
                // generate integral for scale
                gpu::resize(image, src, level.sFrame, 0, 0, cv::INTER_LINEAR);
-                gpu::integralBuffered(src, sint, buff);
+                gpu::integral(src, sint, buff);
                // calculate job
                int totalWidth = level.workArea.width / step;

--- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp
+++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp
@@ -209,85 +209,150 @@ inline void LUT(InputArray src, InputArray lut, OutputArray dst, Stream& stream)
 CV_EXPORTS void copyMakeBorder(InputArray src, OutputArray dst, int top, int bottom, int left, int right, int borderType,
                               Scalar value = Scalar(), Stream& stream = Stream::Null());
-//! implements generalized matrix product algorithm GEMM from BLAS
-CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha,
-    const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null());
-//! scales and shifts array elements so that either the specified norm (alpha) or the minimum (alpha) and maximum (beta) array values get the specified values
-CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, double beta = 0,
-                          int norm_type = NORM_L2, int dtype = -1, const GpuMat& mask = GpuMat());
-CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double a, double b,
-                          int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf);
 //! computes norm of array
 //! supports NORM_INF, NORM_L1, NORM_L2
 //! supports all matrices except 64F
-CV_EXPORTS double norm(const GpuMat& src1, int normType=NORM_L2);
+CV_EXPORTS double norm(InputArray src1, int normType, InputArray mask, GpuMat& buf);
-CV_EXPORTS double norm(const GpuMat& src1, int normType, GpuMat& buf);
+static inline double norm(InputArray src, int normType)
-CV_EXPORTS double norm(const GpuMat& src1, int normType, const GpuMat& mask, GpuMat& buf);
+{
+    GpuMat buf;
+    return norm(src, normType, GpuMat(), buf);
+}
+static inline double norm(InputArray src, int normType, GpuMat& buf)
+{
+    return norm(src, normType, GpuMat(), buf);
+}
 //! computes norm of the difference between two arrays
 //! supports NORM_INF, NORM_L1, NORM_L2
 //! supports only CV_8UC1 type
-CV_EXPORTS double norm(const GpuMat& src1, const GpuMat& src2, int normType=NORM_L2);
+CV_EXPORTS double norm(InputArray src1, InputArray src2, GpuMat& buf, int normType=NORM_L2);
+static inline double norm(InputArray src1, InputArray src2, int normType=NORM_L2)
+{
+    GpuMat buf;
+    return norm(src1, src2, buf, normType);
+}
 //! computes sum of array elements
 //! supports only single channel images
-CV_EXPORTS Scalar sum(const GpuMat& src);
+CV_EXPORTS Scalar sum(InputArray src, InputArray mask, GpuMat& buf);
-CV_EXPORTS Scalar sum(const GpuMat& src, GpuMat& buf);
+static inline Scalar sum(InputArray src)
-CV_EXPORTS Scalar sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf);
+{
+    GpuMat buf;
+    return sum(src, GpuMat(), buf);
+}
+static inline Scalar sum(InputArray src, GpuMat& buf)
+{
+    return sum(src, GpuMat(), buf);
+}
 //! computes sum of array elements absolute values
 //! supports only single channel images
-CV_EXPORTS Scalar absSum(const GpuMat& src);
+CV_EXPORTS Scalar absSum(InputArray src, InputArray mask, GpuMat& buf);
-CV_EXPORTS Scalar absSum(const GpuMat& src, GpuMat& buf);
+static inline Scalar absSum(InputArray src)
-CV_EXPORTS Scalar absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf);
+{
+    GpuMat buf;
+    return absSum(src, GpuMat(), buf);
+}
+static inline Scalar absSum(InputArray src, GpuMat& buf)
+{
+    return absSum(src, GpuMat(), buf);
+}
 //! computes squared sum of array elements
 //! supports only single channel images
-CV_EXPORTS Scalar sqrSum(const GpuMat& src);
+CV_EXPORTS Scalar sqrSum(InputArray src, InputArray mask, GpuMat& buf);
-CV_EXPORTS Scalar sqrSum(const GpuMat& src, GpuMat& buf);
+static inline Scalar sqrSum(InputArray src)
-CV_EXPORTS Scalar sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf);
+{
+    GpuMat buf;
+    return sqrSum(src, GpuMat(), buf);
+}
+static inline Scalar sqrSum(InputArray src, GpuMat& buf)
+{
+    return sqrSum(src, GpuMat(), buf);
+}
 //! finds global minimum and maximum array elements and returns their values
-CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal=0, const GpuMat& mask=GpuMat());
+CV_EXPORTS void minMax(InputArray src, double* minVal, double* maxVal, InputArray mask, GpuMat& buf);
-CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf);
+static inline void minMax(InputArray src, double* minVal, double* maxVal=0, InputArray mask=noArray())
+{
+    GpuMat buf;
+    minMax(src, minVal, maxVal, mask, buf);
+}
 //! finds global minimum and maximum array elements and returns their values with locations
-CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0,
+CV_EXPORTS void minMaxLoc(InputArray src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
-                          const GpuMat& mask=GpuMat());
+                          InputArray mask, GpuMat& valbuf, GpuMat& locbuf);
-CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
+static inline void minMaxLoc(InputArray src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0,
-                          const GpuMat& mask, GpuMat& valbuf, GpuMat& locbuf);
+                             InputArray mask=noArray())
+{
+    GpuMat valBuf, locBuf;
+    minMaxLoc(src, minVal, maxVal, minLoc, maxLoc, mask, valBuf, locBuf);
+}
 //! counts non-zero array elements
-CV_EXPORTS int countNonZero(const GpuMat& src);
+CV_EXPORTS int countNonZero(InputArray src, GpuMat& buf);
-CV_EXPORTS int countNonZero(const GpuMat& src, GpuMat& buf);
+static inline int countNonZero(const GpuMat& src)
+{
+    GpuMat buf;
+    return countNonZero(src, buf);
+}
 //! reduces a matrix to a vector
-CV_EXPORTS void reduce(const GpuMat& mtx, GpuMat& vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null());
+CV_EXPORTS void reduce(InputArray mtx, OutputArray vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null());
 //! computes mean value and standard deviation of all or selected array elements
 //! supports only CV_8UC1 type
-CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev);
+CV_EXPORTS void meanStdDev(InputArray mtx, Scalar& mean, Scalar& stddev, GpuMat& buf);
-//! buffered version
+static inline void meanStdDev(InputArray src, Scalar& mean, Scalar& stddev)
-CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf);
+{
+    GpuMat buf;
+    meanStdDev(src, mean, stddev, buf);
+}
 //! computes the standard deviation of integral images
 //! supports only CV_32SC1 source type and CV_32FC1 sqr type
 //! output will have CV_32FC1 type
-CV_EXPORTS void rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& stream = Stream::Null());
+CV_EXPORTS void rectStdDev(InputArray src, InputArray sqr, OutputArray dst, Rect rect, Stream& stream = Stream::Null());
+//! scales and shifts array elements so that either the specified norm (alpha) or the minimum (alpha) and maximum (beta) array values get the specified values
+CV_EXPORTS void normalize(InputArray src, OutputArray dst, double alpha, double beta,
+                          int norm_type, int dtype, InputArray mask, GpuMat& norm_buf, GpuMat& cvt_buf);
+static inline void normalize(InputArray src, OutputArray dst, double alpha = 1, double beta = 0,
+                             int norm_type = NORM_L2, int dtype = -1, InputArray mask = noArray())
+{
+    GpuMat norm_buf;
+    GpuMat cvt_buf;
+    normalize(src, dst, alpha, beta, norm_type, dtype, mask, norm_buf, cvt_buf);
+}
 //! computes the integral image
 //! sum will have CV_32S type, but will contain unsigned int values
 //! supports only CV_8UC1 source type
-CV_EXPORTS void integral(const GpuMat& src, GpuMat& sum, Stream& stream = Stream::Null());
+CV_EXPORTS void integral(InputArray src, OutputArray sum, GpuMat& buffer, Stream& stream = Stream::Null());
-//! buffered version
+static inline void integralBuffered(InputArray src, OutputArray sum, GpuMat& buffer, Stream& stream = Stream::Null())
-CV_EXPORTS void integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& stream = Stream::Null());
+{
+    integral(src, sum, buffer, stream);
+}
+static inline void integral(InputArray src, OutputArray sum, Stream& stream = Stream::Null())
+{
+    GpuMat buffer;
+    integral(src, sum, buffer, stream);
+}
 //! computes squared integral image
 //! result matrix will have 64F type, but will contain 64U values
 //! supports source images of 8UC1 type only
-CV_EXPORTS void sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& stream = Stream::Null());
+CV_EXPORTS void sqrIntegral(InputArray src, OutputArray sqsum, GpuMat& buf, Stream& stream = Stream::Null());
+static inline void sqrIntegral(InputArray src, OutputArray sqsum, Stream& stream = Stream::Null())
+{
+    GpuMat buffer;
+    sqrIntegral(src, sqsum, buffer, stream);
+}
+//! implements generalized matrix product algorithm GEMM from BLAS
+CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha,
+    const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null());
 //! performs per-element multiplication of two full (not packed) Fourier spectrums
 //! supports 32FC2 matrixes only (interleaved format)

--- a/modules/gpuarithm/perf/perf_arithm.cpp
+++ b/modules/gpuarithm/perf/perf_arithm.cpp
@@ -265,7 +265,7 @@ PERF_TEST_P(Sz, Integral,
        cv::gpu::GpuMat dst;
        cv::gpu::GpuMat d_buf;
-        TEST_CYCLE() cv::gpu::integralBuffered(d_src, dst, d_buf);
+        TEST_CYCLE() cv::gpu::integral(d_src, dst, d_buf);
        GPU_SANITY_CHECK(dst);
    }
@@ -293,9 +293,9 @@ PERF_TEST_P(Sz, IntegralSqr,
    if (PERF_RUN_GPU())
    {
        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        cv::gpu::GpuMat dst, buf;
-        TEST_CYCLE() cv::gpu::sqrIntegral(d_src, dst);
+        TEST_CYCLE() cv::gpu::sqrIntegral(d_src, dst, buf);
        GPU_SANITY_CHECK(dst);
    }

--- a/modules/gpuarithm/perf/perf_reductions.cpp
+++ b/modules/gpuarithm/perf/perf_reductions.cpp
@@ -108,9 +108,10 @@ PERF_TEST_P(Sz_Norm, NormDiff,
    {
        const cv::gpu::GpuMat d_src1(src1);
        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat d_buf;
        double gpu_dst;
-        TEST_CYCLE() gpu_dst = cv::gpu::norm(d_src1, d_src2, normType);
+        TEST_CYCLE() gpu_dst = cv::gpu::norm(d_src1, d_src2, d_buf, normType);
        SANITY_CHECK(gpu_dst);

--- a/modules/gpuarithm/src/arithm.cpp
+++ b/modules/gpuarithm/src/arithm.cpp
@@ -49,11 +49,6 @@ using namespace cv::gpu;
 void cv::gpu::gemm(const GpuMat&, const GpuMat&, double, const GpuMat&, double, GpuMat&, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 void cv::gpu::mulSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, bool, Stream&) { throw_no_cuda(); }
 void cv::gpu::mulAndScaleSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, float, bool, Stream&) { throw_no_cuda(); }
@@ -294,116 +289,6 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G
 #endif
 }
-////////////////////////////////////////////////////////////////////////
-// integral
-void cv::gpu::integral(const GpuMat& src, GpuMat& sum, Stream& s)
-{
-    GpuMat buffer;
-    gpu::integralBuffered(src, sum, buffer, s);
-}
-namespace cv { namespace gpu { namespace cudev
-{
-    namespace imgproc
-    {
-        void shfl_integral_gpu(const PtrStepSzb& img, PtrStepSz<unsigned int> integral, cudaStream_t stream);
-    }
-}}}
-void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& s)
-{
-    CV_Assert(src.type() == CV_8UC1);
-    cudaStream_t stream = StreamAccessor::getStream(s);
-    cv::Size whole;
-    cv::Point offset;
-    src.locateROI(whole, offset);
-    if (deviceSupports(WARP_SHUFFLE_FUNCTIONS) && src.cols <= 2048
-        && offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (static_cast<int>(src.step) - offset.x))
-    {
-        ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer);
-        cv::gpu::cudev::imgproc::shfl_integral_gpu(src, buffer, stream);
-        sum.create(src.rows + 1, src.cols + 1, CV_32SC1);
-        sum.setTo(Scalar::all(0), s);
-        GpuMat inner = sum(Rect(1, 1, src.cols, src.rows));
-        GpuMat res = buffer(Rect(0, 0, src.cols, src.rows));
-        res.copyTo(inner, s);
-    }
-    else
-    {
-#ifndef HAVE_OPENCV_GPULEGACY
-    throw_no_cuda();
-#else
-        sum.create(src.rows + 1, src.cols + 1, CV_32SC1);
-        NcvSize32u roiSize;
-        roiSize.width = src.cols;
-        roiSize.height = src.rows;
-        cudaDeviceProp prop;
-        cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
-        Ncv32u bufSize;
-        ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
-        ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);
-        NppStStreamHandler h(stream);
-        ncvSafeCall( nppiStIntegral_8u32u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>()), static_cast<int>(src.step),
-            sum.ptr<Ncv32u>(), static_cast<int>(sum.step), roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );
-        if (stream == 0)
-            cudaSafeCall( cudaDeviceSynchronize() );
-#endif
-    }
-}
-//////////////////////////////////////////////////////////////////////////////
-// sqrIntegral
-void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)
-{
-#ifndef HAVE_OPENCV_GPULEGACY
-    (void) src;
-    (void) sqsum;
-    (void) s;
-    throw_no_cuda();
-#else
-    CV_Assert(src.type() == CV_8U);
-    NcvSize32u roiSize;
-    roiSize.width = src.cols;
-    roiSize.height = src.rows;
-    cudaDeviceProp prop;
-    cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
-    Ncv32u bufSize;
-    ncvSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop));
-    GpuMat buf(1, bufSize, CV_8U);
-    cudaStream_t stream = StreamAccessor::getStream(s);
-    NppStStreamHandler h(stream);
-    sqsum.create(src.rows + 1, src.cols + 1, CV_64F);
-    ncvSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>(0)), static_cast<int>(src.step),
-            sqsum.ptr<Ncv64u>(0), static_cast<int>(sqsum.step), roiSize, buf.ptr<Ncv8u>(0), bufSize, prop));
-    if (stream == 0)
-        cudaSafeCall( cudaDeviceSynchronize() );
-#endif
-}
 //////////////////////////////////////////////////////////////////////////////
 // mulSpectrums
@@ -650,8 +535,6 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
    (void) stream;
    throw_no_cuda();
 #else
-    using namespace cv::gpu::cudev::imgproc;
    CV_Assert(image.type() == CV_32F);
    CV_Assert(templ.type() == CV_32F);

--- a/modules/gpuarithm/src/reductions.cpp
+++ b/modules/gpuarithm/src/reductions.cpp
--- a/modules/gpuimgproc/src/match_template.cpp
+++ b/modules/gpuimgproc/src/match_template.cpp
@@ -268,7 +268,7 @@ namespace
            buf.image_sums.resize(1);
            gpu::integral(image, buf.image_sums[0], stream);
-            unsigned int templ_sum = (unsigned int)sum(templ)[0];
+            unsigned int templ_sum = (unsigned int)gpu::sum(templ)[0];
            matchTemplatePrepared_CCOFF_8U(templ.cols, templ.rows, buf.image_sums[0], templ_sum, result, StreamAccessor::getStream(stream));
        }
        else

--- a/modules/nonfree/src/surf_gpu.cpp
+++ b/modules/nonfree/src/surf_gpu.cpp
@@ -142,13 +142,13 @@ namespace
            bindImgTex(img);
-            gpu::integralBuffered(img, surf_.sum, surf_.intBuffer);
+            gpu::integral(img, surf_.sum, surf_.intBuffer);
            sumOffset = bindSumTex(surf_.sum);
            if (use_mask)
            {
                gpu::min(mask, 1.0, surf_.mask1);
-                gpu::integralBuffered(surf_.mask1, surf_.maskSum, surf_.intBuffer);
+                gpu::integral(surf_.mask1, surf_.maskSum, surf_.intBuffer);
                maskOffset = bindMaskSumTex(surf_.maskSum);
            }
        }

--- a/samples/gpu/driver_api_multi.cpp
+++ b/samples/gpu/driver_api_multi.cpp
@@ -138,7 +138,7 @@ void Worker::operator()(int device_id) const
    gpu::transpose(d_src, d_dst);
    // Check results
-    bool passed = norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
+    bool passed = cv::norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
    std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): "
        << (passed ? "passed" : "FAILED") << endl;

--- a/samples/gpu/farneback_optical_flow.cpp
+++ b/samples/gpu/farneback_optical_flow.cpp
@@ -22,9 +22,9 @@ inline T mapVal(T x, T a, T b, T c, T d)
 static void colorizeFlow(const Mat &u, const Mat &v, Mat &dst)
 {
    double uMin, uMax;
-    minMaxLoc(u, &uMin, &uMax, 0, 0);
+    cv::minMaxLoc(u, &uMin, &uMax, 0, 0);
    double vMin, vMax;
-    minMaxLoc(v, &vMin, &vMax, 0, 0);
+    cv::minMaxLoc(v, &vMin, &vMax, 0, 0);
    uMin = ::abs(uMin); uMax = ::abs(uMax);
    vMin = ::abs(vMin); vMax = ::abs(vMax);
    float dMax = static_cast<float>(::max(::max(uMin, uMax), ::max(vMin, vMax)));

--- a/samples/gpu/multi.cpp
+++ b/samples/gpu/multi.cpp
@@ -95,7 +95,7 @@ void Worker::operator()(int device_id) const
    gpu::transpose(d_src, d_dst);
    // Check results
-    bool passed = norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
+    bool passed = cv::norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
    std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): "
        << (passed ? "passed" : "FAILED") << endl;