fixed implementation of gpumat::setTo()

2fe75ea3 · Andrey Morozov · f37ac8e4 · 2fe75ea3 · 2fe75ea3
Commit 2fe75ea3 authored Jul 21, 2010 by Andrey Morozov
Hide whitespace changes
Inline Side-by-side

Showing with 53 additions and 173 deletions

matrix_operations.cu modules/gpu/src/cuda/matrix_operations.cu +15 -13

operator_set_to.cpp tests/gpu/src/operator_set_to.cpp +38 -160

No files found.
--- a/modules/gpu/src/cuda/matrix_operations.cu
+++ b/modules/gpu/src/cuda/matrix_operations.cu
@@ -76,19 +76,21 @@ namespace mat_operators
    };

    template <typename T, int channels>
-    __device__ size_t GetIndex(size_t i, int cols, int rows, int step)
+    __device__ size_t GetIndex(size_t i, int cols, int step)
    {
-        return ((i / static_cast<size_t>(cols))*static_cast<size_t>(step) / static_cast<size_t>(sizeof(T))) +
-                (i % static_cast<size_t>(rows))*static_cast<size_t>(channels) ;
+        size_t ret =    (i / static_cast<size_t>(cols))*static_cast<size_t>(step) / static_cast<size_t>(sizeof(T)) +
+                        (i % static_cast<size_t>(cols))*static_cast<size_t>(channels);
+        return  ret;
    }

    template <typename T, int channels>
    __global__ void kernel_set_to_without_mask(T * mat, int cols, int rows, int step)
    {
        size_t i = (blockIdx.x * blockDim.x + threadIdx.x);
+
        if (i < cols * rows)
        {
-            unroll<T, channels>::unroll_set(mat, GetIndex<T,channels>(i, cols, rows, step));
+            unroll<T, channels>::unroll_set(mat, GetIndex<T,channels>(i, cols, step));
        }
    }

@@ -97,7 +99,7 @@ namespace mat_operators
    {
        size_t i = (blockIdx.x * blockDim.x + threadIdx.x);
        if (i < cols * rows)
-            unroll<T, channels>::unroll_set_with_mask(mat, mask[i], GetIndex<T,channels>(i, cols, rows, step));
+            unroll<T, channels>::unroll_set_with_mask(mat, mask[i], GetIndex<T,channels>(i, cols, step));
    }
 }

@@ -105,10 +107,10 @@ extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const doubl
 {
    // download scalar to constant memory
    float data[4];
-    data[0] = scalar[0];
-    data[1] = scalar[1];
-    data[2] = scalar[2];
-    data[3] = scalar[3];
+    data[0] = static_cast<float>(scalar[0]);
+    data[1] = static_cast<float>(scalar[1]);
+    data[2] = static_cast<float>(scalar[2]);
+    data[3] = static_cast<float>(scalar[3]);
    cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data)));

    dim3 threadsPerBlock(256,1,1);
@@ -144,10 +146,10 @@ extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const doubl
 extern "C" void cv::gpu::impl::set_to_without_mask(const DevMem2D& mat, const double * scalar, int elemSize1, int channels)
 {
    float data[4];
-    data[0] = scalar[0];
-    data[1] = scalar[1];
-    data[2] = scalar[2];
-    data[3] = scalar[3];
+    data[0] = static_cast<float>(scalar[0]);
+    data[1] = static_cast<float>(scalar[1]);
+    data[2] = static_cast<float>(scalar[2]);
+    data[3] = static_cast<float>(scalar[3]);
    cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data)));

    dim3 threadsPerBlock(256, 1, 1);

--- a/tests/gpu/src/operator_set_to.cpp
+++ b/tests/gpu/src/operator_set_to.cpp
@@ -21,6 +21,8 @@ class CV_GpuMatOpSetTo : public CvTest
        void print_mat(gpu::GpuMat & mat, std::string name = "gpu mat");
        void run(int);

+        bool compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat);
+
        bool test_cv_8u_c1();
        bool test_cv_8u_c2();
        bool test_cv_8u_c3();
@@ -34,15 +36,15 @@ class CV_GpuMatOpSetTo : public CvTest
        bool test_cv_32f_c4();

    private:
-        int w;
-        int h;
+        int rows;
+        int cols;
        Scalar s;
 };

 CV_GpuMatOpSetTo::CV_GpuMatOpSetTo(): CvTest( "GpuMatOperatorSetTo", "setTo" )
 {
-    w = 100;
-    h = 100;
+    rows = 127;
+    cols = 129;

    s.val[0] = 128.0;
    s.val[1] = 128.0;
@@ -66,13 +68,16 @@ void CV_GpuMatOpSetTo::print_mat(gpu::GpuMat & mat, std::string name)
    print_mat(newmat, name);
 }

-bool CV_GpuMatOpSetTo::test_cv_8u_c1()
+bool CV_GpuMatOpSetTo::compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat)
 {
-    Mat cpumat(w, h, CV_8U, Scalar::all(0));
-    GpuMat gpumat(cpumat);
-
+    //int64 time = getTickCount();
    cpumat.setTo(s);
+    //int64 time1 = getTickCount();
    gpumat.setTo(s);
+    //int64 time2 = getTickCount();
+    //std::cout << "\ntime cpu:" << double((time1 - time) / getTickFrequency());
+    //std::cout << "\ntime gpu:" << double((time2 - time1) / getTickFrequency());
+    //std::cout << "\n";

 #ifdef PRINT_MATRIX
    print_mat(cpumat);
@@ -82,7 +87,7 @@ bool CV_GpuMatOpSetTo::test_cv_8u_c1()

    double ret = norm(cpumat, gpumat);

-    if (ret < 0.1)
+    if (ret < 1.0)
        return true;
    else
    {
@@ -91,205 +96,78 @@ bool CV_GpuMatOpSetTo::test_cv_8u_c1()
    }
 }

-bool CV_GpuMatOpSetTo::test_cv_8u_c2()
+
+bool CV_GpuMatOpSetTo::test_cv_8u_c1()
 {
-    Mat cpumat(w, h, CV_8UC2, Scalar::all(0));
+    Mat cpumat(rows, cols, CV_8U, Scalar::all(0));
    GpuMat gpumat(cpumat);

-    cpumat.setTo(s);
-    gpumat.setTo(s);
-
-#ifdef PRINT_MATRIX
-    print_mat(cpumat);
-    print_mat(gpumat);
-    cv::waitKey(0);
-#endif
+    return compare_matrix(cpumat, gpumat);
+}

-    double ret = norm(cpumat, gpumat);
+bool CV_GpuMatOpSetTo::test_cv_8u_c2()
+{
+    Mat cpumat(rows, cols, CV_8UC2, Scalar::all(0));
+    GpuMat gpumat(cpumat);

-    if (ret < 0.1)
-        return true;
-    else
-    {
-        std::cout << "return : " << ret << "\n";
-        return false;
-    }
+    return compare_matrix(cpumat, gpumat);
 }

 bool CV_GpuMatOpSetTo::test_cv_8u_c3()
 {
-    Mat cpumat(w, h, CV_8UC3, Scalar::all(0));
+    Mat cpumat(rows, cols, CV_8UC3, Scalar::all(0));
    GpuMat gpumat(cpumat);

-    cpumat.setTo(s);
-    gpumat.setTo(s);
-
-#ifdef PRINT_MATRIX
-    print_mat(cpumat);
-    print_mat(gpumat);
-    cv::waitKey(0);
-#endif
-
-    double ret = norm(cpumat, gpumat);
-
-    if (ret < 0.1)
-        return true;
-    else
-    {
-        std::cout << "return : " << ret << "\n";
-        return false;
-    }
+    return compare_matrix(cpumat, gpumat);
 }

 bool CV_GpuMatOpSetTo::test_cv_8u_c4()
 {
-    Mat cpumat(w, h, CV_8UC4, Scalar::all(0));
+    Mat cpumat(rows, cols, CV_8UC4, Scalar::all(0));
    GpuMat gpumat(cpumat);

-    cpumat.setTo(s);
-    gpumat.setTo(s);
-
-#ifdef PRINT_MATRIX
-    print_mat(cpumat);
-    print_mat(gpumat);
-    cv::waitKey(0);
-#endif
-
-    double ret = norm(cpumat, gpumat);
-
-    if (ret < 0.1)
-        return true;
-    else
-    {
-        std::cout << "return : " << ret << "\n";
-        return false;
-    }
+    return compare_matrix(cpumat, gpumat);
 }

 bool CV_GpuMatOpSetTo::test_cv_16u_c4()
 {
-    Mat cpumat(w, h, CV_16UC4, Scalar::all(0));
+    Mat cpumat(rows, cols, CV_16UC4, Scalar::all(0));
    GpuMat gpumat(cpumat);

-    cpumat.setTo(s);
-    gpumat.setTo(s);
-
-#ifdef PRINT_MATRIX
-    print_mat(cpumat);
-    print_mat(gpumat);
-    cv::waitKey(0);
-#endif
-
-    double ret = norm(cpumat, gpumat);
-
-    if (ret < 0.1)
-        return true;
-    else
-    {
-        std::cout << "return : " << ret << "\n";
-        return false;
-    }
+    return compare_matrix(cpumat, gpumat);
 }


 bool CV_GpuMatOpSetTo::test_cv_32f_c1()
 {
-    Mat cpumat(w, h, CV_32F, Scalar::all(0));
+    Mat cpumat(rows, cols, CV_32F, Scalar::all(0));
    GpuMat gpumat(cpumat);

-    cpumat.setTo(s);
-    gpumat.setTo(s);
-
-#ifdef PRINT_MATRIX
-    print_mat(cpumat);
-    print_mat(gpumat);
-    cv::waitKey(0);
-#endif
-
-    double ret = norm(cpumat, gpumat);
-
-    if (ret < 0.1)
-        return true;
-    else
-    {
-        std::cout << "return : " << ret << "\n";
-        return false;
-    }
+    return compare_matrix(cpumat, gpumat);
 }

 bool CV_GpuMatOpSetTo::test_cv_32f_c2()
 {
-    Mat cpumat(w, h, CV_32FC2, Scalar::all(0));
+    Mat cpumat(rows, cols, CV_32FC2, Scalar::all(0));
    GpuMat gpumat(cpumat);

-    cpumat.setTo(s);
-    gpumat.setTo(s);
-
-#ifdef PRINT_MATRIX
-    print_mat(cpumat);
-    print_mat(gpumat);
-    cv::waitKey(0);
-#endif
-
-    double ret = norm(cpumat, gpumat);
-
-    if (ret < 0.1)
-        return true;
-    else
-    {
-        std::cout << "return : " << ret;
-        return false;
-    }
+    return compare_matrix(cpumat, gpumat);
 }

 bool CV_GpuMatOpSetTo::test_cv_32f_c3()
 {
-    Mat cpumat(w, h, CV_32FC3, Scalar::all(0));
+    Mat cpumat(rows, cols, CV_32FC3, Scalar::all(0));
    GpuMat gpumat(cpumat);

-    cpumat.setTo(s);
-    gpumat.setTo(s);
-
-#ifdef PRINT_MATRIX
-    print_mat(cpumat);
-    print_mat(gpumat);
-    cv::waitKey(0);
-#endif
-
-    double ret = norm(cpumat, gpumat);
-
-    if (ret < 0.1)
-        return true;
-    else
-    {
-        std::cout << "return : " << ret;
-        return false;
-    }
+    return compare_matrix(cpumat, gpumat);
 }

 bool CV_GpuMatOpSetTo::test_cv_32f_c4()
 {
-    Mat cpumat(w, h, CV_32FC4, Scalar::all(0));
+    Mat cpumat(rows, cols, CV_32FC4, Scalar::all(0));
    GpuMat gpumat(cpumat);

-    cpumat.setTo(s);
-    gpumat.setTo(s);
-
-#ifdef PRINT_MATRIX
-    print_mat(cpumat);
-    print_mat(gpumat);
-    cv::waitKey(0);
-#endif
-
-    double ret = norm(cpumat, gpumat);
-
-    if (ret < 0.1)
-        return true;
-    else
-    {
-        std::cout << "return : " << ret << "\n";
-        return false;
-    }
+    return compare_matrix(cpumat, gpumat);
 }

 void CV_GpuMatOpSetTo::run( int /* start_from */)