added per-element min/max to gpu module.

fixed compile error in transform.

added per-element min/max to gpu module.
fixed compile error in transform.
17d90143 · Vladislav Vinogradov · d96c5ebb · 17d90143 · 17d90143 · 17d90143
Commit 17d90143 authored Dec 06, 2010 by Vladislav Vinogradov
5 changed files
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -546,6 +546,26 @@ namespace cv
        CV_EXPORTS GpuMat operator & (const GpuMat& src1, const GpuMat& src2);
        CV_EXPORTS GpuMat operator ^ (const GpuMat& src1, const GpuMat& src2);
+        //! computes per-element minimum of two arrays (dst = min(src1, src2))
+        CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst);
+        //! Async version
+        CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream);
+        //! computes per-element minimum of array and scalar (dst = min(src1, src2))
+        CV_EXPORTS void min(const GpuMat& src1, double src2, GpuMat& dst);
+        //! Async version
+        CV_EXPORTS void min(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream);
+        //! computes per-element maximum of two arrays (dst = max(src1, src2))
+        CV_EXPORTS void max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst);
+        //! Async version
+        CV_EXPORTS void max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream);
+        //! computes per-element maximum of array and scalar (dst = max(src1, src2))
+        CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst);
+        //! Async version
+        CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream);
        ////////////////////////////// Image processing //////////////////////////////

--- a/modules/gpu/src/arithm.cpp
+++ b/modules/gpu/src/arithm.cpp
@@ -98,6 +98,14 @@ cv::gpu::GpuMat cv::gpu::operator ~ (const GpuMat&) { throw_nogpu(); return GpuM
 cv::gpu::GpuMat cv::gpu::operator | (const GpuMat&, const GpuMat&) { throw_nogpu(); return GpuMat(); }
 cv::gpu::GpuMat cv::gpu::operator & (const GpuMat&, const GpuMat&) { throw_nogpu(); return GpuMat(); }
 cv::gpu::GpuMat cv::gpu::operator ^ (const GpuMat&, const GpuMat&) { throw_nogpu(); return GpuMat(); }
+void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
+void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); }
+void cv::gpu::min(const GpuMat&, double, GpuMat&) { throw_nogpu(); }
+void cv::gpu::min(const GpuMat&, double, GpuMat&, const Stream&) { throw_nogpu(); }
+void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
+void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); }
+void cv::gpu::max(const GpuMat&, double, GpuMat&) { throw_nogpu(); }
+void cv::gpu::max(const GpuMat&, double, GpuMat&, const Stream&) { throw_nogpu(); }
 #else /* !defined (HAVE_CUDA) */
@@ -1119,5 +1127,144 @@ cv::gpu::GpuMat cv::gpu::operator ^ (const GpuMat& src1, const GpuMat& src2)
    return dst;
 }
+//////////////////////////////////////////////////////////////////////////////
+// min/max
+namespace cv { namespace gpu { namespace mathfunc
+{
+    template <typename T>
+    void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream);
+    template <typename T>
+    void max_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream);
+    template <typename T>
+    void min_gpu(const DevMem2D_<T>& src1, double src2, const DevMem2D_<T>& dst, cudaStream_t stream);
+    template <typename T>
+    void max_gpu(const DevMem2D_<T>& src1, double src2, const DevMem2D_<T>& dst, cudaStream_t stream);
+}}}
+namespace
+{
+    template <typename T>
+    void min_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream)
+    {
+        CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
+        dst.create(src1.size(), src1.type());
+        mathfunc::min_gpu<T>(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
+    }
+    template <typename T>
+    void min_caller(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream)
+    {
+        dst.create(src1.size(), src1.type());
+        mathfunc::min_gpu<T>(src1.reshape(1), src2, dst.reshape(1), stream);
+    }
+    template <typename T>
+    void max_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream)
+    {
+        CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
+        dst.create(src1.size(), src1.type());
+        mathfunc::max_gpu<T>(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
+    }
+    template <typename T>
+    void max_caller(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream)
+    {
+        dst.create(src1.size(), src1.type());
+        mathfunc::max_gpu<T>(src1.reshape(1), src2, dst.reshape(1), stream);
+    }
+}
+void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) 
+{ 
+    typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
+    static const func_t funcs[] = 
+    {
+        min_caller<uchar>, min_caller<char>, min_caller<ushort>, min_caller<short>, min_caller<int>, 
+        min_caller<float>, min_caller<double>
+    };
+    funcs[src1.depth()](src1, src2, dst, 0);
+}
+void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream) 
+{ 
+    typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
+    static const func_t funcs[] = 
+    {
+        min_caller<uchar>, min_caller<char>, min_caller<ushort>, min_caller<short>, min_caller<int>, 
+        min_caller<float>, min_caller<double>
+    };
+    funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
+}
+void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst) 
+{
+    typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
+    static const func_t funcs[] = 
+    {
+        min_caller<uchar>, min_caller<char>, min_caller<ushort>, min_caller<short>, min_caller<int>, 
+        min_caller<float>, min_caller<double>
+    };
+    funcs[src1.depth()](src1, src2, dst, 0);
+}
+void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream) 
+{
+    typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
+    static const func_t funcs[] = 
+    {
+        min_caller<uchar>, min_caller<char>, min_caller<ushort>, min_caller<short>, min_caller<int>, 
+        min_caller<float>, min_caller<double>
+    };
+    funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
+}
+void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) 
+{ 
+    typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
+    static const func_t funcs[] = 
+    {
+        max_caller<uchar>, max_caller<char>, max_caller<ushort>, max_caller<short>, max_caller<int>, 
+        max_caller<float>, max_caller<double>
+    };
+    funcs[src1.depth()](src1, src2, dst, 0);
+}
+void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream) 
+{ 
+    typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
+    static const func_t funcs[] = 
+    {
+        max_caller<uchar>, max_caller<char>, max_caller<ushort>, max_caller<short>, max_caller<int>, 
+        max_caller<float>, max_caller<double>
+    };
+    funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
+}
+void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst) 
+{
+    typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
+    static const func_t funcs[] = 
+    {
+        max_caller<uchar>, max_caller<char>, max_caller<ushort>, max_caller<short>, max_caller<int>, 
+        max_caller<float>, max_caller<double>
+    };
+    funcs[src1.depth()](src1, src2, dst, 0);
+}
+void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream) 
+{
+    typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
+    static const func_t funcs[] = 
+    {
+        max_caller<uchar>, max_caller<char>, max_caller<ushort>, max_caller<short>, max_caller<int>, 
+        max_caller<float>, max_caller<double>
+    };
+    funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
+}
 #endif /* !defined (HAVE_CUDA) */
--- a/modules/gpu/src/cuda/mathfunc.cu
+++ b/modules/gpu/src/cuda/mathfunc.cu
@@ -43,6 +43,7 @@
 #include "cuda_shared.hpp"
 #include "transform.hpp"
 #include "limits_gpu.hpp"
+#include "saturate_cast.hpp"
 using namespace cv::gpu;
 using namespace cv::gpu::device;
@@ -1295,4 +1296,127 @@ namespace cv { namespace gpu { namespace mathfunc
    template void transpose_gpu<short2 >(const DevMem2D& src, const DevMem2D& dst);
    template void transpose_gpu<int    >(const DevMem2D& src, const DevMem2D& dst);
    template void transpose_gpu<float  >(const DevMem2D& src, const DevMem2D& dst);
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+// min/max
+    struct MinOp
+    {        
+        template <typename T>
+        __device__ T operator()(T a, T b)
+        {
+            return min(a, b);
+        }
+        __device__ float operator()(float a, float b)
+        {
+            return fmin(a, b);
+        }
+        __device__ double operator()(double a, double b)
+        {
+            return fmin(a, b);
+        }
+    };
+    struct MaxOp
+    {        
+        template <typename T>
+        __device__ T operator()(T a, T b)
+        {
+            return max(a, b);
+        }
+        __device__ float operator()(float a, float b)
+        {
+            return fmax(a, b);
+        }
+        __device__ double operator()(double a, double b)
+        {
+            return fmax(a, b);
+        }
+    };
+    struct ScalarMinOp
+    {
+        double s;
+        explicit ScalarMinOp(double s_) : s(s_) {}
+        template <typename T>
+        __device__ T operator()(T a)
+        {
+            return saturate_cast<T>(fmin((double)a, s));
+        }
+    };
+    struct ScalarMaxOp
+    {
+        double s;
+        explicit ScalarMaxOp(double s_) : s(s_) {}
+        template <typename T>
+        __device__ T operator()(T a)
+        {
+            return saturate_cast<T>(fmax((double)a, s));
+        }
+    };
+    template <typename T>
+    void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
+    {
+        MinOp op;
+        transform(src1, src2, dst, op, stream);    
+    }
+    template void min_gpu<uchar >(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream);
+    template void min_gpu<char  >(const DevMem2D_<char>& src1, const DevMem2D_<char>& src2, const DevMem2D_<char>& dst, cudaStream_t stream);
+    template void min_gpu<ushort>(const DevMem2D_<ushort>& src1, const DevMem2D_<ushort>& src2, const DevMem2D_<ushort>& dst, cudaStream_t stream);
+    template void min_gpu<short >(const DevMem2D_<short>& src1, const DevMem2D_<short>& src2, const DevMem2D_<short>& dst, cudaStream_t stream);
+    template void min_gpu<int   >(const DevMem2D_<int>& src1, const DevMem2D_<int>& src2, const DevMem2D_<int>& dst, cudaStream_t stream);
+    template void min_gpu<float >(const DevMem2D_<float>& src1, const DevMem2D_<float>& src2, const DevMem2D_<float>& dst, cudaStream_t stream);
+    template void min_gpu<double>(const DevMem2D_<double>& src1, const DevMem2D_<double>& src2, const DevMem2D_<double>& dst, cudaStream_t stream);
+    template <typename T>
+    void max_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
+    {
+        MaxOp op;
+        transform(src1, src2, dst, op, stream);    
+    }
+    template void max_gpu<uchar >(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream);
+    template void max_gpu<char  >(const DevMem2D_<char>& src1, const DevMem2D_<char>& src2, const DevMem2D_<char>& dst, cudaStream_t stream);
+    template void max_gpu<ushort>(const DevMem2D_<ushort>& src1, const DevMem2D_<ushort>& src2, const DevMem2D_<ushort>& dst, cudaStream_t stream);
+    template void max_gpu<short >(const DevMem2D_<short>& src1, const DevMem2D_<short>& src2, const DevMem2D_<short>& dst, cudaStream_t stream);
+    template void max_gpu<int   >(const DevMem2D_<int>& src1, const DevMem2D_<int>& src2, const DevMem2D_<int>& dst, cudaStream_t stream);
+    template void max_gpu<float >(const DevMem2D_<float>& src1, const DevMem2D_<float>& src2, const DevMem2D_<float>& dst, cudaStream_t stream);
+    template void max_gpu<double>(const DevMem2D_<double>& src1, const DevMem2D_<double>& src2, const DevMem2D_<double>& dst, cudaStream_t stream);
+    template <typename T>
+    void min_gpu(const DevMem2D_<T>& src1, double src2, const DevMem2D_<T>& dst, cudaStream_t stream)
+    {
+        ScalarMinOp op(src2);
+        transform(src1, dst, op, stream);    
+    }
+    template void min_gpu<uchar >(const DevMem2D& src1, double src2, const DevMem2D& dst, cudaStream_t stream);
+    template void min_gpu<char  >(const DevMem2D_<char>& src1, double src2, const DevMem2D_<char>& dst, cudaStream_t stream);
+    template void min_gpu<ushort>(const DevMem2D_<ushort>& src1, double src2, const DevMem2D_<ushort>& dst, cudaStream_t stream);
+    template void min_gpu<short >(const DevMem2D_<short>& src1, double src2, const DevMem2D_<short>& dst, cudaStream_t stream);
+    template void min_gpu<int   >(const DevMem2D_<int>& src1, double src2, const DevMem2D_<int>& dst, cudaStream_t stream);
+    template void min_gpu<float >(const DevMem2D_<float>& src1, double src2, const DevMem2D_<float>& dst, cudaStream_t stream);
+    template void min_gpu<double>(const DevMem2D_<double>& src1, double src2, const DevMem2D_<double>& dst, cudaStream_t stream);
+    template <typename T>
+    void max_gpu(const DevMem2D_<T>& src1, double src2, const DevMem2D_<T>& dst, cudaStream_t stream)
+    {
+        ScalarMaxOp op(src2);
+        transform(src1, dst, op, stream);    
+    }
+    template void max_gpu<uchar >(const DevMem2D& src1, double src2, const DevMem2D& dst, cudaStream_t stream);
+    template void max_gpu<char  >(const DevMem2D_<char>& src1, double src2, const DevMem2D_<char>& dst, cudaStream_t stream);
+    template void max_gpu<ushort>(const DevMem2D_<ushort>& src1, double src2, const DevMem2D_<ushort>& dst, cudaStream_t stream);
+    template void max_gpu<short >(const DevMem2D_<short>& src1, double src2, const DevMem2D_<short>& dst, cudaStream_t stream);
+    template void max_gpu<int   >(const DevMem2D_<int>& src1, double src2, const DevMem2D_<int>& dst, cudaStream_t stream);
+    template void max_gpu<float >(const DevMem2D_<float>& src1, double src2, const DevMem2D_<float>& dst, cudaStream_t stream);
+    template void max_gpu<double>(const DevMem2D_<double>& src1, double src2, const DevMem2D_<double>& dst, cudaStream_t stream);
 }}}
--- a/modules/gpu/src/cuda/transform.hpp
+++ b/modules/gpu/src/cuda/transform.hpp
@@ -64,7 +64,7 @@ namespace cv { namespace gpu { namespace device
    //! Transform kernels
-    template <typename T, typename D, typename Mask, typename UnOp>
+    template <typename T, typename D, typename UnOp, typename Mask>
    static __global__ void transform(const DevMem2D_<T> src, PtrStep_<D> dst, const Mask mask, UnOp op)
    {
 		const int x = blockDim.x * blockIdx.x + threadIdx.x;
@@ -77,7 +77,7 @@ namespace cv { namespace gpu { namespace device
        }
    }
-    template <typename T1, typename T2, typename D, typename Mask, typename BinOp>
+    template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
    static __global__ void transform(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst, const Mask mask, BinOp op)
    {
 		const int x = blockDim.x * blockIdx.x + threadIdx.x;
@@ -105,7 +105,7 @@ namespace cv
            grid.x = divUp(src.cols, threads.x);
            grid.y = divUp(src.rows, threads.y);        
-            device::transform<T, D, UnOp><<<grid, threads, 0, stream>>>(src, dst, device::NoMask(), op);
+            device::transform<T, D><<<grid, threads, 0, stream>>>(src, dst, device::NoMask(), op);
            if (stream == 0)
                cudaSafeCall( cudaThreadSynchronize() );

--- a/tests/gpu/src/arithm.cpp
+++ b/tests/gpu/src/arithm.cpp
@@ -949,6 +949,30 @@ struct CV_GpuCountNonZeroTest: CvTest
    }
 };
+////////////////////////////////////////////////////////////////////////////////
+// min/max
+struct CV_GpuImageMinMaxTest : public CV_GpuArithmTest
+{
+    CV_GpuImageMinMaxTest() : CV_GpuArithmTest( "GPU-ImageMinMax", "min/max" ) {}
+    int test( const Mat& mat1, const Mat& mat2 )
+    {
+        cv::Mat cpuMinRes, cpuMaxRes;
+        cv::min(mat1, mat2, cpuMinRes);
+        cv::max(mat1, mat2, cpuMaxRes);
+        GpuMat gpu1(mat1);
+        GpuMat gpu2(mat2);
+        GpuMat gpuMinRes, gpuMaxRes;
+        cv::gpu::min(gpu1, gpu2, gpuMinRes);
+        cv::gpu::max(gpu1, gpu2, gpuMaxRes);
+        return CheckNorm(cpuMinRes, gpuMinRes) == CvTS::OK && CheckNorm(cpuMaxRes, gpuMaxRes) == CvTS::OK ?
+            CvTS::OK : CvTS::FAIL_GENERIC;
+    }
+};
 /////////////////////////////////////////////////////////////////////////////
 /////////////////// tests registration  /////////////////////////////////////
@@ -979,3 +1003,4 @@ CV_GpuNppImagePolarToCartTest CV_GpuNppImagePolarToCart_test;
 CV_GpuMinMaxTest CV_GpuMinMaxTest_test;
 CV_GpuMinMaxLocTest CV_GpuMinMaxLocTest_test;
 CV_GpuCountNonZeroTest CV_CountNonZero_test;
+CV_GpuImageMinMaxTest CV_GpuImageMinMax_test;