fixed bug in gpu::transformSmart with mask

6c1de21a · Vladislav Vinogradov · cec641fb · 6c1de21a · 6c1de21a · 6c1de21a
Commit 6c1de21a authored Jan 22, 2012 by Vladislav Vinogradov
Showing with 21 additions and 13 deletions

matrix_operations.cu modules/core/src/cuda/matrix_operations.cu +14 -9

gpumat.cpp modules/core/src/gpumat.cpp +5 -2

transform_detail.hpp ...es/gpu/src/opencv2/gpu/device/detail/transform_detail.hpp +2 -2

No files found.
--- a/modules/core/src/cuda/matrix_operations.cu
+++ b/modules/core/src/cuda/matrix_operations.cu
@@ -59,27 +59,32 @@ namespace cv { namespace gpu { namespace device
    ////////////////////////////////// CopyTo /////////////////////////////////
    ///////////////////////////////////////////////////////////////////////////
-    template <typename T> void copyToWithMask(DevMem2Db src, DevMem2Db dst, DevMem2Db mask, int channels, cudaStream_t stream)
+    template <typename T> void copyToWithMask(DevMem2Db src, DevMem2Db dst, int cn, DevMem2Db mask, bool colorMask, cudaStream_t stream)
-    {        
+    {
-        cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, identity<T>(), SingleMaskChannels(mask, channels), stream);
+        if (colorMask)
+            cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, identity<T>(), SingleMask(mask), stream);
+        else
+            cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, identity<T>(), SingleMaskChannels(mask, cn), stream);
    }
-    void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, int depth, int channels, DevMem2Db mask, cudaStream_t stream)
+    void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, int elemSize1, int cn, DevMem2Db mask, bool colorMask, cudaStream_t stream)
    {
-        typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, DevMem2Db mask, int channels, cudaStream_t stream);
+        typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, int cn, DevMem2Db mask, bool colorMask, cudaStream_t stream);
        static func_t tab[] =
        {
+            0,
            copyToWithMask<unsigned char>,
-            copyToWithMask<signed char>,
            copyToWithMask<unsigned short>,
-            copyToWithMask<short>,
+            0,
            copyToWithMask<int>,
-            copyToWithMask<float>,
+            0,
+            0,
+            0,
            copyToWithMask<double>
        };
-        tab[depth](src, dst, mask, channels, stream);
+        tab[elemSize1](src, dst, cn, mask, colorMask, stream);
    }
    ///////////////////////////////////////////////////////////////////////////

--- a/modules/core/src/gpumat.cpp
+++ b/modules/core/src/gpumat.cpp
@@ -348,7 +348,7 @@ namespace
 namespace cv { namespace gpu { namespace device
 {
-    void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, int depth, int channels, DevMem2Db mask, cudaStream_t stream);
+    void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, int elemSize1, int cn, DevMem2Db mask, bool colorMask, cudaStream_t stream);
    template <typename T>
    void set_to_gpu(DevMem2Db mat, const T* scalar, int channels, cudaStream_t stream);
@@ -405,7 +405,10 @@ namespace cv { namespace gpu
 {
    CV_EXPORTS void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0)
    {
-        cv::gpu::device::copyToWithMask_gpu(src.reshape(1), dst.reshape(1), src.depth(), src.channels(), mask, stream);
+        CV_Assert(src.size() == dst.size() && src.type() == dst.type());
+        CV_Assert(src.size() == mask.size() && mask.depth() == CV_8U && (mask.channels() == 1 || mask.channels() == src.channels()));
+        cv::gpu::device::copyToWithMask_gpu(src.reshape(1), dst.reshape(1), src.elemSize1(), src.channels(), mask.reshape(1), mask.channels() != 1, stream);
    }
    CV_EXPORTS void convertTo(const GpuMat& src, GpuMat& dst)

--- a/modules/gpu/src/opencv2/gpu/device/detail/transform_detail.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/detail/transform_detail.hpp
@@ -221,7 +221,7 @@ namespace cv { namespace gpu { namespace device
                if (x_shifted + ft::smart_shift - 1 < src_.cols)
                {
                    const read_type src_n_el = ((const read_type*)src)[x];
-                    write_type dst_n_el;
+                    write_type dst_n_el = ((const write_type*)dst)[x];
                    OpUnroller<ft::smart_shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
@@ -273,7 +273,7 @@ namespace cv { namespace gpu { namespace device
                {
                    const read_type1 src1_n_el = ((const read_type1*)src1)[x];
                    const read_type2 src2_n_el = ((const read_type2*)src2)[x];
-                    write_type dst_n_el;
+                    write_type dst_n_el = ((const write_type*)dst)[x];
                    OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);