Added implementation and test for the GPU version of subtract, multiply, divide,…

Added implementation and test for the GPU version of subtract, multiply, divide, transpose, absdiff, threshold, compare, meanStdDev, norm, based on NPP.

Added implementation and test for the GPU version of subtract, multiply, divide,…
Added implementation and test for the GPU version of subtract, multiply, divide, transpose, absdiff, threshold, compare, meanStdDev, norm, based on NPP.
37d39bd9 · Vladislav Vinogradov · 88a7a8f5 · 37d39bd9 · 37d39bd9 · 37d39bd9
Commit 37d39bd9 authored Sep 13, 2010 by Vladislav Vinogradov
6 changed files
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -204,6 +204,9 @@ namespace cv
            template<typename _Tp> _Tp* ptr(int y=0);
            template<typename _Tp> const _Tp* ptr(int y=0) const;
+            //! matrix transposition
+            GpuMat t() const;
            /*! includes several bit-fields:
            - the magic signature
            - continuity flag
@@ -343,7 +346,34 @@ namespace cv
        ////////////////////////////// Arithmetics ///////////////////////////////////
-        CV_EXPORTS void add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst);
+        //! adds one matrix to another (c = a + b)
+        CV_EXPORTS void add(const GpuMat& a, const GpuMat& b, GpuMat& c);
+        //! subtracts one matrix from another (c = a - b)
+		CV_EXPORTS void subtract(const GpuMat& a, const GpuMat& b, GpuMat& c);
+        //! computes element-wise product of the two arrays (c = a * b)
+		CV_EXPORTS void multiply(const GpuMat& a, const GpuMat& b, GpuMat& c);
+        //! computes element-wise quotient of the two arrays (c = a / b)
+		CV_EXPORTS void divide(const GpuMat& a, const GpuMat& b, GpuMat& c);
+        //! transposes the matrix
+		CV_EXPORTS void transpose(const GpuMat& src1, GpuMat& dst);
+        //! computes element-wise absolute difference of two arrays (c = abs(a - b))
+		CV_EXPORTS void absdiff(const GpuMat& a, const GpuMat& b, GpuMat& c);
+        //! applies fixed threshold to the image. 
+        //! Now supports only THRESH_TRUNC threshold type and one channels float source.
+        CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int thresholdType);
+        //! compares elements of two arrays (c = a <cmpop> b)
+        //! Now doesn't support CMP_NE.
+        CV_EXPORTS void compare(const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop);
+        //! computes mean value and standard deviation of all or selected array elements
+        CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev);
+        CV_EXPORTS double norm(const GpuMat& src1, int normType=NORM_L2);
+        CV_EXPORTS double norm(const GpuMat& src1, const GpuMat& src2, int normType=NORM_L2);
        ////////////////////////////// Image processing //////////////////////////////
        // DST[x,y] = SRC[xmap[x,y],ymap[x,y]] with bilinear interpolation.

--- a/modules/gpu/include/opencv2/gpu/matrix_operations.hpp
+++ b/modules/gpu/include/opencv2/gpu/matrix_operations.hpp
@@ -335,6 +335,13 @@ template<typename _Tp> inline const _Tp* GpuMat::ptr(int y) const
    return (const _Tp*)(data + step*y);
 }
+inline GpuMat GpuMat::t() const
+{
+    GpuMat tmp;
+    transpose(*this, tmp);
+    return tmp;
+}
 static inline void swap( GpuMat& a, GpuMat& b ) { a.swap(b); }

--- a/modules/gpu/src/arithm.cpp
+++ b/modules/gpu/src/arithm.cpp
@@ -49,18 +49,153 @@ using namespace std;
 #if !defined (HAVE_CUDA)
 void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) { throw_nogpu(); }
+void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) { throw_nogpu(); }
+void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) { throw_nogpu(); }
+void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) { throw_nogpu(); }
+void cv::gpu::transpose(const GpuMat& src1, GpuMat& dst) { throw_nogpu(); }
+void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) { throw_nogpu(); }
+double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int thresholdType) { throw_nogpu(); return 0.0; }
+void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop) { throw_nogpu(); }
+void cv::gpu::meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev) { throw_nogpu(); }
+double cv::gpu::norm(const GpuMat& src1, int normType) { throw_nogpu(); return 0.0; }
+double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) { throw_nogpu(); return 0.0; }
 #else /* !defined (HAVE_CUDA) */
+namespace
+{
+	typedef NppStatus (*npp_binary_func_8u_scale_t)(const Npp8u* pSrc1, int nSrc1Step, const Npp8u* pSrc2, int nSrc2Step, Npp8u* pDst, int nDstStep, 
+											  NppiSize oSizeROI, int nScaleFactor);
+	typedef NppStatus (*npp_binary_func_32f_t)(const Npp32f* pSrc1, int nSrc1Step, const Npp32f* pSrc2, int nSrc2Step, Npp32f* pDst, 
+									     int nDstStep, NppiSize oSizeROI);
+	void nppFuncCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, 
+					   npp_binary_func_8u_scale_t npp_func_8uc1, npp_binary_func_8u_scale_t npp_func_8uc4, npp_binary_func_32f_t npp_func_32fc1)
+	{
+        CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
+        CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32FC1);
+        dst.create( src1.size(), src1.type() );
+		NppiSize sz;
+		sz.width  = src1.cols;
+		sz.height = src1.rows;
+		if (src1.depth() == CV_8U)
+		{
+			if (src1.channels() == 1)
+			{
+				npp_func_8uc1((const Npp8u*)src1.ptr<char>(), src1.step, 
+					(const Npp8u*)src2.ptr<char>(), src2.step, 
+					(Npp8u*)dst.ptr<char>(), dst.step, sz, 0);
+			}
+			else
+			{
+				npp_func_8uc4((const Npp8u*)src1.ptr<char>(), src1.step, 
+					(const Npp8u*)src2.ptr<char>(), src2.step, 
+					(Npp8u*)dst.ptr<char>(), dst.step, sz, 0);
+			}        
+		}
+		else //if (src1.depth() == CV_32F)
+		{
+			npp_func_32fc1((const Npp32f*)src1.ptr<float>(), src1.step,
+				(const Npp32f*)src2.ptr<float>(), src2.step,
+				(Npp32f*)dst.ptr<float>(), dst.step, sz);
+		}
+	}
+}
 void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
 {
+	nppFuncCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32f_C1R);
+}
+void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) 
+{
+	nppFuncCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32f_C1R);
+}
+void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
+{
+	nppFuncCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, nppiMul_32f_C1R);
+}
+void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
+{
+	nppFuncCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32f_C1R);
+}
+void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
+{
+    CV_Assert(src.type() == CV_8UC1);
+    dst.create( src.cols, src.rows, src.type() );
+    NppiSize sz;
+    sz.width  = src.cols;
+    sz.height = src.rows;
+    nppiTranspose_8u_C1R((const Npp8u*)src.ptr<char>(), src.step, (Npp8u*)dst.ptr<char>(), dst.step, sz);
+}
+void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
+{
+	CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
+	CV_Assert((src1.depth() == CV_8U || src1.depth() == CV_32F) && src1.channels() == 1);
    dst.create( src1.size(), src1.type() );
-    CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
+    NppiSize sz;
+    sz.width  = src1.cols;
+    sz.height = src1.rows;
-    int nChannels = src1.channels();
+    if (src1.depth() == CV_8U)
-    CV_DbgAssert((src1.depth() == CV_8U  && nChannels == 1 || nChannels == 4) || 
+    {
-                 (src1.depth() == CV_32F && nChannels == 1));
+        nppiAbsDiff_8u_C1R((const Npp8u*)src1.ptr<char>(), src1.step, 
+                (const Npp8u*)src2.ptr<char>(), src2.step, 
+                (Npp8u*)dst.ptr<char>(), dst.step, sz);
+    }
+    else //if (src1.depth() == CV_32F)
+    {
+        nppiAbsDiff_32f_C1R((const Npp32f*)src1.ptr<float>(), src1.step,
+            (const Npp32f*)src2.ptr<float>(), src2.step,
+            (Npp32f*)dst.ptr<float>(), dst.step, sz);
+    }
+}
+double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double /*maxVal*/, int thresholdType) 
+{ 
+    CV_Assert(src.type() == CV_32FC1 && thresholdType == THRESH_TRUNC);
+    dst.create( src.size(), src.type() );
+    NppiSize sz;
+    sz.width  = src.cols;
+    sz.height = src.rows;
+    nppiThreshold_32f_C1R((const Npp32f*)src.ptr<float>(), src.step, 
+        (Npp32f*)dst.ptr<float>(), dst.step, sz, (Npp32f)thresh, NPP_CMP_GREATER);
+    return thresh;
+}
+void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop) 
+{
+    CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
+    CV_Assert((src1.type() == CV_8UC4 || src1.type() == CV_32FC1) && cmpop != CMP_NE);
+    dst.create( src1.size(), CV_8UC1 );
+    static const NppCmpOp nppCmpOp[] = { NPP_CMP_EQ, NPP_CMP_GREATER, NPP_CMP_GREATER_EQ, NPP_CMP_LESS, NPP_CMP_LESS_EQ };
    NppiSize sz;
    sz.width  = src1.cols;
@@ -68,25 +203,57 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
    if (src1.depth() == CV_8U)
    {
-        if (nChannels == 1)
+        nppiCompare_8u_C4R((const Npp8u*)src1.ptr<char>(), src1.step, 
-        {
+            (const Npp8u*)src2.ptr<char>(), src2.step, 
-            nppiAdd_8u_C1RSfs((const Npp8u*)src1.ptr<char>(), src1.step, 
+            (Npp8u*)dst.ptr<char>(), dst.step, sz, nppCmpOp[cmpop]);
-                              (const Npp8u*)src2.ptr<char>(), src2.step, 
-                              (Npp8u*)dst.ptr<char>(), dst.step, sz, 0);
-        }
-        else
-        {
-            nppiAdd_8u_C4RSfs((const Npp8u*)src1.ptr<char>(), src1.step, 
-                              (const Npp8u*)src2.ptr<char>(), src2.step, 
-                              (Npp8u*)dst.ptr<char>(), dst.step, sz, 0);
-        }        
    }
    else //if (src1.depth() == CV_32F)
    {
-        nppiAdd_32f_C1R((const Npp32f*)src1.ptr<float>(), src1.step,
+        nppiCompare_32f_C1R((const Npp32f*)src1.ptr<float>(), src1.step,
-                        (const Npp32f*)src2.ptr<float>(), src2.step,
+            (const Npp32f*)src2.ptr<float>(), src2.step,
-                        (Npp32f*)dst.ptr<float>(), dst.step, sz);
+            (Npp8u*)dst.ptr<char>(), dst.step, sz, nppCmpOp[cmpop]);
    }
 }
+void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev) 
+{
+    CV_Assert(src.type() == CV_8UC1);
+    NppiSize sz;
+    sz.width  = src.cols;
+    sz.height = src.rows;
+    nppiMean_StdDev_8u_C1R((const Npp8u*)src.ptr<char>(), src.step, sz, mean.val, stddev.val);
+}
+double cv::gpu::norm(const GpuMat& src1, int normType) 
+{
+    return norm(src1, GpuMat(src1.size(), src1.type(), Scalar::all(0.0)), normType);
+}
+double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
+{
+    CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
+    CV_Assert((src1.type() == CV_8UC1) && (normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2));
+    typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, 
+        NppiSize oSizeROI, Npp64f* pRetVal);
+    static const npp_norm_diff_func_t npp_norm_diff_func[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
+    NppiSize sz;
+    sz.width  = src1.cols;
+    sz.height = src1.rows;
+    int funcIdx = normType >> 1;
+    Npp64f retVal[3];
+    npp_norm_diff_func[funcIdx]((const Npp8u*)src1.ptr<char>(), src1.step, 
+        (const Npp8u*)src2.ptr<char>(), src2.step, 
+        sz, retVal);
+    return retVal[0];
+}
 #endif /* !defined (HAVE_CUDA) */
\ No newline at end of file
--- a/modules/gpu/src/precomp.hpp
+++ b/modules/gpu/src/precomp.hpp
@@ -55,7 +55,7 @@
 #include <vector>
 #include "opencv2/gpu/gpu.hpp"
-#include "opencv2/imgproc/types_c.h"
+#include "opencv2/imgproc/imgproc.hpp"
 #if defined(HAVE_CUDA)

--- a/tests/gpu/src/npp_image_addition.cpp
+++ b/tests/gpu/src/npp_image_addition.cpp
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "gputest.hpp"
-#include "opencv2/imgproc/imgproc.hpp"
-#include "opencv2/highgui/highgui.hpp"
-using namespace cv;
-using namespace std;
-using namespace gpu;
-class CV_GpuNppImageAdditionTest : public CvTest
-{
-public:
-    CV_GpuNppImageAdditionTest();
-    ~CV_GpuNppImageAdditionTest();
-protected:
-    void run(int);
-    int test8UC1(const Mat& imgL, const Mat& imgR);
-    int test8UC4(const Mat& imgL, const Mat& imgR);
-    int test32FC1(const Mat& imgL, const Mat& imgR);
-    int test(const Mat& imgL, const Mat& imgR);
-    int CheckNorm(const Mat& m1, const Mat& m2);
-};
-CV_GpuNppImageAdditionTest::CV_GpuNppImageAdditionTest(): CvTest( "GPU-NppImageAddition", "add" )
-{
-}
-CV_GpuNppImageAdditionTest::~CV_GpuNppImageAdditionTest() {}
-int CV_GpuNppImageAdditionTest::test8UC1(const Mat& imgL, const Mat& imgR)
-{
-    cv::Mat imgL_C1;
-    cv::Mat imgR_C1;
-    cvtColor(imgL, imgL_C1, CV_BGR2GRAY);
-    cvtColor(imgR, imgR_C1, CV_BGR2GRAY);
-    return test(imgL_C1, imgR_C1);
-}
-int CV_GpuNppImageAdditionTest::test8UC4(const Mat& imgL, const Mat& imgR)
-{
-    cv::Mat imgL_C4;
-    cv::Mat imgR_C4;
-    cvtColor(imgL, imgL_C4, CV_BGR2BGRA);
-    cvtColor(imgR, imgR_C4, CV_BGR2BGRA);
-    return test(imgL_C4, imgR_C4);
-}
-int CV_GpuNppImageAdditionTest::test32FC1( const Mat& imgL, const Mat& imgR )
-{
-    cv::Mat imgL_C1;
-    cv::Mat imgR_C1;
-    cvtColor(imgL, imgL_C1, CV_BGR2GRAY);
-    cvtColor(imgR, imgR_C1, CV_BGR2GRAY);
-    imgL_C1.convertTo(imgL_C1, CV_32F);
-    imgR_C1.convertTo(imgR_C1, CV_32F);
-    return test(imgL_C1, imgR_C1);
-}
-int CV_GpuNppImageAdditionTest::test( const Mat& imgL, const Mat& imgR )
-{
-    cv::Mat cpuAdd;
-    cv::add(imgL, imgR, cpuAdd);
-    GpuMat gpuL(imgL);
-    GpuMat gpuR(imgR);
-    GpuMat gpuAdd;
-    cv::gpu::add(gpuL, gpuR, gpuAdd);
-    return CheckNorm(cpuAdd, gpuAdd);
-}
-int CV_GpuNppImageAdditionTest::CheckNorm(const Mat& m1, const Mat& m2)
-{
-    double ret = norm(m1, m2);
-    if (ret < 1.0)
-    {
-        return CvTS::OK;
-    }
-    else
-    {
-        ts->printf(CvTS::LOG, "\nNorm: %f\n", ret);
-        return CvTS::FAIL_GENERIC;
-    }
-}
-void CV_GpuNppImageAdditionTest::run( int )
-{
-    //load images
-    cv::Mat img_l = cv::imread(std::string(ts->get_data_path()) + "stereobm/aloe-L.png");
-    cv::Mat img_r = cv::imread(std::string(ts->get_data_path()) + "stereobm/aloe-R.png");
-    if (img_l.empty() || img_r.empty())
-    {
-        ts->set_failed_test_info(CvTS::FAIL_MISSING_TEST_DATA);
-        return;
-    }
-    //run tests
-    int testResult = test8UC1(img_l, img_r);
-    if (testResult != CvTS::OK)
-    {
-        ts->set_failed_test_info(testResult);
-        return;
-    }
-    testResult = test8UC4(img_l, img_r);
-    if (testResult != CvTS::OK)
-    {
-        ts->set_failed_test_info(testResult);
-        return;
-    }
-    testResult = test32FC1(img_l, img_r);
-    if (testResult != CvTS::OK)
-    {
-        ts->set_failed_test_info(testResult);
-        return;
-    }
-    ts->set_failed_test_info(CvTS::OK);
-}
-CV_GpuNppImageAdditionTest CV_GpuNppImageAddition_test;
\ No newline at end of file
--- a/tests/gpu/src/npp_image_arithm.cpp
+++ b/tests/gpu/src/npp_image_arithm.cpp