fixed gpu arithm functions (mismatch with cpu version)

9034a2d6 · Vladislav Vinogradov · 15902284 · 9034a2d6 · 9034a2d6 · 9034a2d6
Commit 9034a2d6 authored Mar 26, 2012 by Vladislav Vinogradov
5 changed files
--- a/modules/gpu/src/cuda/element_operations.cu
+++ b/modules/gpu/src/cuda/element_operations.cu
@@ -488,11 +488,29 @@ namespace cv { namespace gpu { namespace device

    template <typename T, typename D> struct Multiply : binary_function<T, T, D>
    {
-        Multiply(double scale_) : scale(scale_) {}
+        Multiply(float scale_) : scale(scale_) {}
        __device__ __forceinline__ D operator ()(T a, T b) const
        {
            return saturate_cast<D>(scale * a * b);
        }
+        const float scale;
+    };
+    template <typename T> struct Multiply<T, double> : binary_function<T, T, double>
+    {
+        Multiply(double scale_) : scale(scale_) {}
+        __device__ __forceinline__ double operator ()(T a, T b) const
+        {
+            return scale * a * b;
+        }
+        const double scale;
+    };
+    template <> struct Multiply<int, int> : binary_function<int, int, int>
+    {
+        Multiply(double scale_) : scale(scale_) {}
+        __device__ __forceinline__ int operator ()(int a, int b) const
+        {
+            return saturate_cast<int>(scale * a * b);
+        }
        const double scale;
    };

@@ -517,11 +535,36 @@ namespace cv { namespace gpu { namespace device
        enum { smart_shift = 4 };
    };

+    template <typename T, typename D> struct MultiplyCaller
+    {
+        static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream)
+        {
+            Multiply<T, D> op(static_cast<float>(scale));
+            cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
+        }
+    };
+    template <typename T> struct MultiplyCaller<T, double>
+    {
+        static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream)
+        {
+            cudaSafeCall( cudaSetDoubleForDevice(&scale) );
+            Multiply<T, double> op(scale);
+            cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<double>)dst, op, WithOutMask(), stream);
+        }
+    };
+    template <> struct MultiplyCaller<int, int>
+    {
+        static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream)
+        {
+            cudaSafeCall( cudaSetDoubleForDevice(&scale) );
+            Multiply<int, int> op(scale);
+            cv::gpu::device::transform((DevMem2D_<int>)src1, (DevMem2D_<int>)src2, (DevMem2D_<int>)dst, op, WithOutMask(), stream);
+        }
+    };
+
    template <typename T, typename D> void multiply_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream)
    {
-        cudaSafeCall( cudaSetDoubleForDevice(&scale) );
-        Multiply<T, D> op(scale);
-        cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
+        MultiplyCaller<T, D>::call(src1, src2, dst, scale, stream);
    }

    template void multiply_gpu<uchar, uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
@@ -729,7 +772,7 @@ namespace cv { namespace gpu { namespace device
        Divide(double scale_) : scale(scale_) {}
        __device__ __forceinline__ D operator ()(T a, T b) const
        {
-            return b != 0 ? saturate_cast<D>(scale * a / b) : 0;
+            return b != 0 ? saturate_cast<D>(a * scale / b) : 0;
        }
        const double scale;
    };

--- a/modules/gpu/src/element_operations.cpp
+++ b/modules/gpu/src/element_operations.cpp
--- a/modules/gpu/src/stereobm.cpp
+++ b/modules/gpu/src/stereobm.cpp
@@ -55,7 +55,7 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&,

 #else /* !defined (HAVE_CUDA) */

-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
    namespace stereobm
    {
@@ -65,10 +65,13 @@ namespace cv { namespace gpu { namespace device
    }
 }}}

-const float defaultAvgTexThreshold = 3;
+namespace
+{
+    const float defaultAvgTexThreshold = 3;
+}

 cv::gpu::StereoBM_GPU::StereoBM_GPU()
-    : preset(BASIC_PRESET), ndisp(DEFAULT_NDISP), winSize(DEFAULT_WINSZ), avergeTexThreshold(defaultAvgTexThreshold)  
+    : preset(BASIC_PRESET), ndisp(DEFAULT_NDISP), winSize(DEFAULT_WINSZ), avergeTexThreshold(defaultAvgTexThreshold)
 {
 }

@@ -100,9 +103,9 @@ namespace
    {
        using namespace ::cv::gpu::device::stereobm;

-        CV_DbgAssert(left.rows == right.rows && left.cols == right.cols);
-        CV_DbgAssert(left.type() == CV_8UC1);
-        CV_DbgAssert(right.type() == CV_8UC1);
+        CV_Assert(left.rows == right.rows && left.cols == right.cols);
+        CV_Assert(left.type() == CV_8UC1);
+        CV_Assert(right.type() == CV_8UC1);

        disparity.create(left.size(), CV_8U);
        minSSD.create(left.size(), CV_32S);
@@ -115,7 +118,7 @@ namespace
            leBuf.create( left.size(),  left.type());
            riBuf.create(right.size(), right.type());

-		    prefilter_xsobel( left, leBuf, 31, stream);
+            prefilter_xsobel( left, leBuf, 31, stream);
            prefilter_xsobel(right, riBuf, 31, stream);

            le_for_bm = leBuf;

--- a/modules/gpu/test/test_core.cpp
+++ b/modules/gpu/test/test_core.cpp
--- a/modules/highgui/src/window_gtk.cpp
+++ b/modules/highgui/src/window_gtk.cpp
@@ -254,7 +254,7 @@ static void cvImageWidget_set_size(GtkWidget * widget, int max_width, int max_he
 }

 static void
-cvImageWidget_size_allocate (GtkWidget     *widget,  
+cvImageWidget_size_allocate (GtkWidget     *widget,
                        GtkAllocation *allocation)
 {
  CvImageWidget *image_widget;
@@ -719,7 +719,7 @@ namespace
        void generateBitmapFont(const std::string& family, int height, int weight, bool italic, bool underline, int start, int count, int base) const;

        bool isGlContextInitialized() const;
-        
+
        PFNGLGENBUFFERSPROC    glGenBuffersExt;
        PFNGLDELETEBUFFERSPROC glDeleteBuffersExt;

@@ -866,22 +866,22 @@ namespace

        CV_FUNCNAME( "GlFuncTab_GTK::generateBitmapFont" );

-        __BEGIN__;        
-        
+        __BEGIN__;
+
        fontDecr = pango_font_description_new();
-        
+
        pango_font_description_set_size(fontDecr, height);
-        
+
        pango_font_description_set_family_static(fontDecr, family.c_str());
-        
+
        pango_font_description_set_weight(fontDecr, static_cast<PangoWeight>(weight));
-        
+
        pango_font_description_set_style(fontDecr, italic ? PANGO_STYLE_ITALIC : PANGO_STYLE_NORMAL);
-                
+
        pangoFont = gdk_gl_font_use_pango_font(fontDecr, start, count, base);
-        
+
        pango_font_description_free(fontDecr);
-        
+
        if (!pangoFont)
            CV_ERROR(CV_OpenGlApiCallError, "Can't create font");

@@ -960,13 +960,13 @@ namespace

    void releaseGlContext(CvWindow* window)
    {
-        CV_FUNCNAME( "releaseGlContext" );
+        //CV_FUNCNAME( "releaseGlContext" );

-        __BEGIN__;
+        //__BEGIN__;

        window->useGl = false;

-        __END__;
+        //__END__;
    }

    void drawGl(CvWindow* window)