fixed bug in gpu filter engine (incorrect buffer type) and in vector's saturate_cast.

changed buffer type in linear filters to float. added support of 1 channel image to linear filters. added support of BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border type to gpu linear filters. minor fix in tests. update comments in gpu.hpp.

fixed bug in gpu filter engine (incorrect buffer type) and in vector's saturate_cast.
changed buffer type in linear filters to float. added support of 1 channel image to linear filters. added support of BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border type to gpu linear filters. minor fix in tests. update comments in gpu.hpp.
49ec8ba7 · Vladislav Vinogradov · 108ab940 · 49ec8ba7 · 49ec8ba7 · 49ec8ba7
Commit 49ec8ba7 authored Dec 13, 2010 by Vladislav Vinogradov
9 changed files
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -388,7 +388,7 @@ namespace cv
        CV_EXPORTS void divide(const GpuMat& a, const Scalar& sc, GpuMat& c);
        //! transposes the matrix
-        //! supports only CV_8UC1 type
+        //! supports CV_8UC1, CV_8SC1, CV_8UC4, CV_8SC4, CV_16UC2, CV_16SC2, CV_32SC1, CV_32FC1 type
        CV_EXPORTS void transpose(const GpuMat& src1, GpuMat& dst);
        //! computes element-wise absolute difference of two arrays (c = abs(a - b))
@@ -725,11 +725,11 @@ namespace cv
        };
        //! returns the non-separable filter engine with the specified filter
-        CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D);
+        CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D, int srcType, int dstType);
        //! returns the separable filter engine with the specified filters
        CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter, 
-            const Ptr<BaseColumnFilter_GPU>& columnFilter);
+            const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType);
        //! returns horizontal 1D box filter
        //! supports only CV_8UC1 source type and CV_32FC1 sum type
@@ -767,23 +767,40 @@ namespace cv
        CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, 
            const Point& anchor = Point(-1,-1));
-        //! returns the primitive row filter with the specified kernel
+        //! returns the primitive row filter with the specified kernel.
+        //! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 source type.
+        //! there are two version of algorithm: NPP and OpenCV.
+        //! NPP calls when srcType == CV_8UC1 or srcType == CV_8UC4 and bufType == srcType,
+        //! otherwise calls OpenCV version.
+        //! NPP supports only BORDER_CONSTANT border type.
+        //! OpenCV version supports only CV_32F as buffer depth and 
+        //! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types.
        CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, 
-            int anchor = -1);
+            int anchor = -1, int borderType = BORDER_CONSTANT);
-        //! returns the primitive column filter with the specified kernel
+        //! returns the primitive column filter with the specified kernel.
+        //! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 dst type.
+        //! there are two version of algorithm: NPP and OpenCV.
+        //! NPP calls when dstType == CV_8UC1 or dstType == CV_8UC4 and bufType == dstType,
+        //! otherwise calls OpenCV version.
+        //! NPP supports only BORDER_CONSTANT border type.
+        //! OpenCV version supports only CV_32F as buffer depth and 
+        //! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types.
        CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, 
-            int anchor = -1);
+            int anchor = -1, int borderType = BORDER_CONSTANT);
        //! returns the separable linear filter engine
        CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, 
-            const Mat& columnKernel, const Point& anchor = Point(-1,-1));
+            const Mat& columnKernel, const Point& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT,
+            int columnBorderType = -1);
        //! returns filter engine for the generalized Sobel operator
-        CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize);
+        CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, 
+            int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
        //! returns the Gaussian filter engine
-        CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0);
+        CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, 
+            int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
        //! returns maximum filter
        CV_EXPORTS Ptr<BaseFilter_GPU> getMaxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1));
@@ -812,16 +829,19 @@ namespace cv
        //! applies separable 2D linear filter to the image
        CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, 
-            Point anchor = Point(-1,-1));
+            Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
        //! applies generalized Sobel operator to the image
-        CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1);
+        CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, 
+            int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
        //! applies the vertical or horizontal Scharr operator to the image
-        CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale = 1);
+        CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale = 1, 
+            int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
        //! smooths the image using Gaussian filter.
-        CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2 = 0);
+        CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2 = 0, 
+            int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
        //! applies Laplacian operator to the image
        //! supports only ksize = 1 and ksize = 3

--- a/modules/gpu/src/arithm.cpp
+++ b/modules/gpu/src/arithm.cpp
@@ -277,12 +277,12 @@ namespace cv { namespace gpu { namespace mathfunc
 void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
 {
-    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_8SC4 
+    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8SC1 || src.type() == CV_8UC4 || src.type() == CV_8SC4 
        || src.type() == CV_16UC2 || src.type() == CV_16SC2 || src.type() == CV_32SC1 || src.type() == CV_32FC1);
    dst.create( src.cols, src.rows, src.type() );
-    if (src.type() == CV_8UC1)
+    if (src.type() == CV_8UC1 || src.type() == CV_8SC1)
    {
        NppiSize sz;
        sz.width  = src.cols;

--- a/modules/gpu/src/cuda/filters.cu
+++ b/modules/gpu/src/cuda/filters.cu
--- a/modules/gpu/src/cuda/internal_shared.hpp
+++ b/modules/gpu/src/cuda/internal_shared.hpp
@@ -59,7 +59,8 @@ namespace cv
        enum 
        {
            BORDER_REFLECT101_GPU = 0,
-            BORDER_REPLICATE_GPU
+            BORDER_REPLICATE_GPU,
+            BORDER_CONSTANT_GPU
        };
        // Converts CPU border extrapolation mode into GPU internal analogue.

--- a/modules/gpu/src/filtering.cpp
+++ b/modules/gpu/src/filtering.cpp
--- a/modules/gpu/src/imgproc_gpu.cpp
+++ b/modules/gpu/src/imgproc_gpu.cpp
@@ -972,6 +972,12 @@ bool cv::gpu::tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType)
        gpuBorderType = cv::gpu::BORDER_REPLICATE_GPU;
        return true;
    }
+    if (cpuBorderType == cv::BORDER_CONSTANT)
+    {
+        gpuBorderType = cv::gpu::BORDER_CONSTANT_GPU;
+        return true;
+    }
    return false;
 }

--- a/modules/gpu/src/opencv2/gpu/device/vecmath.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/vecmath.hpp
--- a/tests/gpu/src/brute_force_matcher.cpp
+++ b/tests/gpu/src/brute_force_matcher.cpp
@@ -107,7 +107,7 @@ protected:
            if (!compareMatches(matchesCPU, matchesGPU))
            {
-                ts->printf(CvTS::LOG, "Match FAIL");
+                ts->printf(CvTS::LOG, "Match FAIL\n");
                ts->set_failed_test_info(CvTS::FAIL_MISMATCH);
                return;
            }
@@ -119,7 +119,7 @@ protected:
            if (!compareMatches(knnMatchesCPU, knnMatchesGPU))
            {
-                ts->printf(CvTS::LOG, "KNN Match FAIL");
+                ts->printf(CvTS::LOG, "KNN Match FAIL\n");
                ts->set_failed_test_info(CvTS::FAIL_MISMATCH);
                return;
            }
@@ -131,7 +131,7 @@ protected:
            if (!compareMatches(radiusMatchesCPU, radiusMatchesGPU))
            {
-                ts->printf(CvTS::LOG, "Radius Match FAIL");
+                ts->printf(CvTS::LOG, "Radius Match FAIL\n");
                ts->set_failed_test_info(CvTS::FAIL_MISMATCH);
                return;
            }

--- a/tests/gpu/src/filters.cpp
+++ b/tests/gpu/src/filters.cpp
@@ -80,7 +80,8 @@ protected:
        double res = norm(m1ROI, m2ROI, NORM_INF);
-        if (res <= 1)
+        // Max difference (2.0) in GaussianBlur
+        if (res <= 2)
            return CvTS::OK;
        ts->printf(CvTS::LOG, "Norm: %f\n", res);
@@ -166,8 +167,6 @@ struct CV_GpuNppImageSobelTest : public CV_GpuNppFilterTest
    int test(const Mat& img)
    {
-        if (img.type() != CV_8UC1)
-            return CvTS::OK;
        int ksizes[] = {3, 5, 7};
        int ksizes_num = sizeof(ksizes) / sizeof(int);
@@ -183,10 +182,8 @@ struct CV_GpuNppImageSobelTest : public CV_GpuNppFilterTest
            cv::Sobel(img, cpudst, -1, dx, dy, ksizes[i]);
            GpuMat gpu1(img);
-            gpu1.convertTo(gpu1, CV_32S);
            GpuMat gpudst;
            cv::gpu::Sobel(gpu1, gpudst, -1, dx, dy, ksizes[i]);
-            gpudst.convertTo(gpudst, CV_8U);
            if (CheckNorm(cpudst, gpudst, Size(ksizes[i], ksizes[i])) != CvTS::OK)
                test_res = CvTS::FAIL_GENERIC;
@@ -204,20 +201,15 @@ struct CV_GpuNppImageScharrTest : public CV_GpuNppFilterTest
    int test(const Mat& img)
    {
-        if (img.type() != CV_8UC1)
-            return CvTS::OK;
        int dx = 1, dy = 0;
        Mat cpudst;
        cv::Scharr(img, cpudst, -1, dx, dy);
        GpuMat gpu1(img);
-        gpu1.convertTo(gpu1, CV_32S);
        GpuMat gpudst;
        cv::gpu::Scharr(gpu1, gpudst, -1, dx, dy);
-        gpudst.convertTo(gpudst, CV_8U);
        return CheckNorm(cpudst, gpudst, Size(3, 3));
    }
 };
@@ -244,7 +236,7 @@ struct CV_GpuNppImageGaussianBlurTest : public CV_GpuNppFilterTest
            {
                cv::Size ksize(ksizes[i], ksizes[j]);
-                ts->printf(CvTS::LOG, "ksize = (%dx%d)\t", ksizes[i], ksizes[j]);
+                ts->printf(CvTS::LOG, "ksize = (%dx%d)\t\n", ksizes[i], ksizes[j]);
                Mat cpudst;
                cv::GaussianBlur(img, cpudst, ksize, sigma1);