refactored gpu module, added vec math operators for uint, added support of 2…

refactored gpu module, added vec math operators for uint, added support of 2 channel images into gpu::sum (removed support of double)

refactored gpu module, added vec math operators for uint, added support of 2…
refactored gpu module, added vec math operators for uint, added support of 2 channel images into gpu::sum (removed support of double)
d8a7ff1e · Alexey Spizhevoy · e5eec31b · d8a7ff1e · d8a7ff1e · d8a7ff1e
Commit d8a7ff1e authored Dec 15, 2010 by Alexey Spizhevoy
Showing with 107 additions and 14 deletions

arithm.cpp modules/gpu/src/arithm.cpp +12 -13

mathfunc.cu modules/gpu/src/cuda/mathfunc.cu +0 -0

vecmath.hpp modules/gpu/src/opencv2/gpu/device/vecmath.hpp +85 -0

arithm.cpp tests/gpu/src/arithm.cpp +10 -1

No files found.
--- a/modules/gpu/src/arithm.cpp
+++ b/modules/gpu/src/arithm.cpp
@@ -486,10 +486,10 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode)
 namespace cv { namespace gpu { namespace mathfunc
 {
    template <typename T>
-    void sum_caller(const DevMem2D src, PtrStep buf, double* sum);
+    void sum_caller(const DevMem2D src, PtrStep buf, double* sum, int cn);
    template <typename T>
-    void sum_multipass_caller(const DevMem2D src, PtrStep buf, double* sum);
+    void sum_multipass_caller(const DevMem2D src, PtrStep buf, double* sum, int cn);
    template <typename T>
    void sqsum_caller(const DevMem2D src, PtrStep buf, double* sum);
@@ -499,7 +499,7 @@ namespace cv { namespace gpu { namespace mathfunc
    namespace sum
    {
-        void get_buf_size_required(int cols, int rows, int& bufcols, int& bufrows);
+        void get_buf_size_required(int cols, int rows, int cn, int& bufcols, int& bufrows);
    }
 }}}
@@ -512,27 +512,26 @@ Scalar cv::gpu::sum(const GpuMat& src)
 Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf) 
 {
    using namespace mathfunc;
-    CV_Assert(src.channels() == 1);
-    typedef void (*Caller)(const DevMem2D, PtrStep, double*);
+    typedef void (*Caller)(const DevMem2D, PtrStep, double*, int);
    static const Caller callers[2][7] = 
        { { sum_multipass_caller<unsigned char>, sum_multipass_caller<char>, 
            sum_multipass_caller<unsigned short>, sum_multipass_caller<short>, 
            sum_multipass_caller<int>, sum_multipass_caller<float>, 0 },
          { sum_caller<unsigned char>, sum_caller<char>, 
            sum_caller<unsigned short>, sum_caller<short>, 
-            sum_caller<int>, sum_caller<float>, sum_caller<double> } };
+            sum_caller<int>, sum_caller<float>, 0 } };
    Size bufSize;
-    sum::get_buf_size_required(src.cols, src.rows, bufSize.width, bufSize.height); 
+    sum::get_buf_size_required(src.cols, src.rows, src.channels(), bufSize.width, bufSize.height); 
    buf.create(bufSize, CV_8U);
-    Caller caller = callers[hasAtomicsSupport(getDevice())][src.type()];
+    Caller caller = callers[hasAtomicsSupport(getDevice())][src.depth()];
    if (!caller) CV_Error(CV_StsBadArg, "sum: unsupported type");
-    double result;
+    double result[4];
-    caller(src, buf, &result);
+    caller(src, buf, result, src.channels());
-    return result;
+    return Scalar(result[0], result[1], result[2], result[3]);
 }
 Scalar cv::gpu::sqrSum(const GpuMat& src) 
@@ -553,10 +552,10 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
            sqsum_multipass_caller<int>, sqsum_multipass_caller<float>, 0 },
          { sqsum_caller<unsigned char>, sqsum_caller<char>, 
            sqsum_caller<unsigned short>, sqsum_caller<short>, 
-            sqsum_caller<int>, sqsum_caller<float>, sqsum_caller<double> } };
+            sqsum_caller<int>, sqsum_caller<float>, 0 } };
    Size bufSize;
-    sum::get_buf_size_required(src.cols, src.rows, bufSize.width, bufSize.height); 
+    sum::get_buf_size_required(src.cols, src.rows, 1, bufSize.width, bufSize.height); 
    buf.create(bufSize, CV_8U);
    Caller caller = callers[hasAtomicsSupport(getDevice())][src.type()];

--- a/modules/gpu/src/cuda/mathfunc.cu
+++ b/modules/gpu/src/cuda/mathfunc.cu
--- a/modules/gpu/src/opencv2/gpu/device/vecmath.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/vecmath.hpp
@@ -866,6 +866,91 @@ namespace cv
                return make_float4(a.x * s, a.y * s, a.z * s, a.w * s);
            }
+            static __device__  uint1 operator+(const uint1& a, const uint1& b)
+            {
+                return make_uint1(a.x + b.x);
+            }
+            static __device__  uint1 operator-(const uint1& a, const uint1& b)
+            {
+                return make_uint1(a.x - b.x);
+            }
+            static __device__  uint1 operator*(const uint1& a, const uint1& b)
+            {
+                return make_uint1(a.x * b.x);
+            }
+            static __device__  uint1 operator/(const uint1& a, const uint1& b)
+            {
+                return make_uint1(a.x / b.x);
+            }
+            static __device__ float1 operator*(const uint1& a, float s)
+            {
+                return make_float1(a.x * s);
+            }
+            static __device__  uint2 operator+(const uint2& a, const uint2& b)
+            {
+                return make_uint2(a.x + b.x, a.y + b.y);
+            }
+            static __device__  uint2 operator-(const uint2& a, const uint2& b)
+            {
+                return make_uint2(a.x - b.x, a.y - b.y);
+            }
+            static __device__  uint2 operator*(const uint2& a, const uint2& b)
+            {
+                return make_uint2(a.x * b.x, a.y * b.y);
+            }
+            static __device__  uint2 operator/(const uint2& a, const uint2& b)
+            {
+                return make_uint2(a.x / b.x, a.y / b.y);
+            }
+            static __device__ float2 operator*(const uint2& a, float s)
+            {
+                return make_float2(a.x * s, a.y * s);
+            }
+            static __device__  uint3 operator+(const uint3& a, const uint3& b)
+            {
+                return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
+            }
+            static __device__  uint3 operator-(const uint3& a, const uint3& b)
+            {
+                return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
+            }
+            static __device__  uint3 operator*(const uint3& a, const uint3& b)
+            {
+                return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);
+            }
+            static __device__  uint3 operator/(const uint3& a, const uint3& b)
+            {
+                return make_uint3(a.x / b.x, a.y / b.y, a.z / b.z);
+            }
+            static __device__ float3 operator*(const uint3& a, float s)
+            {
+                return make_float3(a.x * s, a.y * s, a.z * s);
+            }
+            static __device__  uint4 operator+(const uint4& a, const uint4& b)
+            {
+                return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
+            }
+            static __device__  uint4 operator-(const uint4& a, const uint4& b)
+            {
+                return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
+            }
+            static __device__  uint4 operator*(const uint4& a, const uint4& b)
+            {
+                return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
+            }
+            static __device__  uint4 operator/(const uint4& a, const uint4& b)
+            {
+                return make_uint4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
+            }
+            static __device__ float4 operator*(const uint4& a, float s)
+            {
+                return make_float4(a.x * s, a.y * s, a.z * s, a.w * s);
+            }
            static __device__  float1 operator+(const float1& a, const float1& b)
            {
                return make_float1(a.x + b.x);

--- a/tests/gpu/src/arithm.cpp
+++ b/tests/gpu/src/arithm.cpp
@@ -942,9 +942,18 @@ struct CV_GpuSumTest: CvTest
            Scalar a, b;
            double max_err = 1e-5;
-            int typemax = hasNativeDoubleSupport(getDevice()) ? CV_64F : CV_32F;
+            int typemax = CV_32F;
            for (int type = CV_8U; type <= typemax; ++type) 
            {
+                gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 2), src);
+                a = sum(src);
+                b = sum(GpuMat(src));
+                if (abs(a[0] - b[0]) + abs(a[1] - b[1]) > src.size().area() * max_err)
+                {
+                    ts->printf(CvTS::CONSOLE, "cols: %d, rows: %d, expected: %f, actual: %f\n", src.cols, src.rows, a[0], b[0]);
+                    ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
+                    return;
+                }
                gen(1 + rand() % 500, 1 + rand() % 500, type, src);
                a = sum(src);
                b = sum(GpuMat(src));