added Sobel, GaussianBlur, Canny to gpu module.

minor fix of matrix_operations.cpp.

added Sobel, GaussianBlur, Canny to gpu module.
minor fix of matrix_operations.cpp.
49fa536c · Vladislav Vinogradov · 12656df1 · 49fa536c · 49fa536c · 49fa536c
Commit 49fa536c authored Oct 04, 2010 by Vladislav Vinogradov
6 changed files
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -533,9 +533,18 @@ namespace cv
        //! applies an advanced morphological operation to the image
        CV_EXPORTS void morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor, int iterations);

+        //! 1D mask Window Sum for 8 bit images
        CV_EXPORTS void sumWindowColumn(const GpuMat& src, GpuMat& dst, int ksize, int anchor = -1);
        CV_EXPORTS void sumWindowRow(const GpuMat& src, GpuMat& dst, int ksize, int anchor = -1);

+        //! applies generalized Sobel operator to the image
+        CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1);
+
+        //! smooths the image using Gaussian filter.
+        CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2 = 0);
+
+        //! applies Canny edge detector and produces the edge map.
+        CV_EXPORTS void Canny(const GpuMat& image, GpuMat& edges, double threshold1, double threshold2, int apertureSize = 3);

        //////////////////////////////// Image Labeling ////////////////////////////////


--- a/modules/gpu/src/filtering_npp.cpp
+++ b/modules/gpu/src/filtering_npp.cpp
@@ -54,6 +54,8 @@ void cv::gpu::morphologyEx( const GpuMat&, GpuMat&, int, const Mat&, Point, int)
 void cv::gpu::boxFilter(const GpuMat&, GpuMat&, Size, Point) { throw_nogpu(); }
 void cv::gpu::sumWindowColumn(const GpuMat&, GpuMat&, int, int) { throw_nogpu(); }
 void cv::gpu::sumWindowRow(const GpuMat&, GpuMat&, int, int) { throw_nogpu(); }
+void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double) { throw_nogpu(); }
+void cv::gpu::GaussianBlur(const GpuMat&, GpuMat&, Size, double, double) { throw_nogpu(); }

 #else

@@ -237,4 +239,186 @@ void cv::gpu::sumWindowRow(const GpuMat& src, GpuMat& dst, int ksize, int anchor
    sumWindowCaller(nppiSumWindowRow_8u32f_C1R, src, dst, ksize, anchor);
 }

+////////////////////////////////////////////////////////////////////////
+// Filter Engine
+
+namespace
+{
+    typedef NppStatus (*nppFilter1D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oROI, 
+                     const Npp32s * pKernel, Npp32s nMaskSize, Npp32s nAnchor, Npp32s nDivisor);
+    typedef NppStatus (*nppFilter2D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI, 
+                  const Npp32s * pKernel, NppiSize oKernelSize, NppiPoint oAnchor, Npp32s nDivisor);
+
+    void applyRowFilter(const GpuMat& src, GpuMat& dst, const GpuMat& rowKernel, Npp32s anchor = -1, Npp32s nDivisor = 1)
+    {
+        static const nppFilter1D_t nppFilter1D_callers[] = {nppiFilterRow_8u_C1R, nppiFilterRow_8u_C4R};
+
+        CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
+
+        int kRowSize = rowKernel.cols;
+
+        dst.create(src.size(), src.type());
+        dst = Scalar();
+
+        NppiSize oROI;
+        oROI.width = src.cols - kRowSize + 1;
+        oROI.height = src.rows;
+
+        if (anchor < 0)
+            anchor = kRowSize >> 1;
+
+        GpuMat srcROI = src.colRange(kRowSize-1, oROI.width);
+        GpuMat dstROI = dst.colRange(kRowSize-1, oROI.width);
+
+        nppFilter1D_callers[src.channels() >> 2](srcROI.ptr<Npp8u>(), srcROI.step, dstROI.ptr<Npp8u>(), dstROI.step, oROI, 
+                rowKernel.ptr<Npp32s>(), kRowSize, anchor, nDivisor);
+    }
+
+    void applyColumnFilter(const GpuMat& src, GpuMat& dst, const GpuMat& columnKernel, Npp32s anchor = -1, Npp32s nDivisor = 1)
+    {
+        static const nppFilter1D_t nppFilter1D_callers[] = {nppiFilterColumn_8u_C1R, nppiFilterColumn_8u_C4R};
+
+        CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
+
+        int kColSize = columnKernel.cols;
+
+        dst.create(src.size(), src.type());
+        dst = Scalar();
+
+        NppiSize oROI;
+        oROI.width = src.cols;
+        oROI.height = src.rows - kColSize + 1;
+
+        if (anchor < 0)
+            anchor = kColSize >> 1;
+
+        GpuMat srcROI = src.rowRange(kColSize-1, oROI.height);
+        GpuMat dstROI = dst.rowRange(kColSize-1, oROI.height);
+        
+        nppFilter1D_callers[src.channels() >> 2](srcROI.ptr<Npp8u>(), srcROI.step, dstROI.ptr<Npp8u>(), dstROI.step, oROI, 
+                columnKernel.ptr<Npp32s>(), kColSize, anchor, nDivisor);
+    }
+
+    inline void applySeparableFilter(const GpuMat& src, GpuMat& dst, const GpuMat& rowKernel, const GpuMat& columnKernel, 
+        const cv::Point& anchor = cv::Point(-1, -1), Npp32s nDivisor = 1)
+    {
+        GpuMat dstBuf;
+        applyRowFilter(src, dstBuf, rowKernel, anchor.x, nDivisor);
+        applyColumnFilter(dstBuf, dst, columnKernel, anchor.y, nDivisor);
+    }
+
+    void makeNppKernel(Mat kernel, GpuMat& dst)
+    {
+        kernel.convertTo(kernel, CV_32S); 
+        kernel = kernel.t();
+        int ksize = kernel.cols;
+        for (int i = 0; i < ksize / 2; ++i)
+        {
+            std::swap(kernel.at<int>(0, i), kernel.at<int>(0, ksize - 1 - i));
+        }
+        dst.upload(kernel);
+    }
+
+    void applyFilter2D(const GpuMat& src, GpuMat& dst, const GpuMat& kernel, cv::Point anchor = cv::Point(-1, -1), Npp32s nDivisor = 1)
+    {
+        static const nppFilter2D_t nppFilter2D_callers[] = {nppiFilter_8u_C1R, nppiFilter_8u_C4R};        
+
+        CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
+
+        dst.create(src.size(), src.type());
+        dst = Scalar();
+
+        NppiSize oROI;
+        oROI.width = src.cols - kernel.cols + 1;
+        oROI.height = src.rows - kernel.rows + 1;
+
+        if (anchor.x < 0)
+            anchor.x = kernel.cols >> 1;
+        if (anchor.y < 0)
+            anchor.y = kernel.rows >> 1;
+
+        GpuMat srcROI = src(Range(kernel.rows-1, oROI.height), Range(kernel.cols-1, oROI.width));
+        GpuMat dstROI = dst(Range(kernel.rows-1, oROI.height), Range(kernel.cols-1, oROI.width));
+
+        NppiSize oKernelSize;
+        oKernelSize.height = kernel.rows;
+        oKernelSize.width = kernel.cols;
+        NppiPoint oAnchor;
+        oAnchor.x = anchor.x;
+        oAnchor.y = anchor.y;
+        
+        nppFilter2D_callers[src.channels() >> 2](srcROI.ptr<Npp8u>(), srcROI.step, dstROI.ptr<Npp8u>(), dstROI.step, oROI, 
+                kernel.ptr<Npp32s>(), oKernelSize, oAnchor, nDivisor);
+    }
+}
+
+////////////////////////////////////////////////////////////////////////
+// Sobel
+
+void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize, double scale)
+{
+    Mat kx, ky;
+    getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);
+
+    if (scale != 1)
+    {
+        // usually the smoothing part is the slowest to compute,
+        // so try to scale it instead of the faster differenciating part
+        if (dx == 0)
+            kx *= scale;
+        else
+            ky *= scale;
+    }
+    
+    GpuMat rowKernel; makeNppKernel(kx, rowKernel);
+    GpuMat columnKernel; makeNppKernel(ky, columnKernel);
+
+    applySeparableFilter(src, dst, rowKernel, columnKernel);
+}
+
+////////////////////////////////////////////////////////////////////////
+// GaussianBlur
+
+void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2)
+{
+    if (ksize.width == 1 && ksize.height == 1)
+    {
+        src.copyTo(dst);
+        return;
+    }
+
+    int depth = src.depth();
+    if (sigma2 <= 0)
+        sigma2 = sigma1;
+
+    // automatic detection of kernel size from sigma
+    if (ksize.width <= 0 && sigma1 > 0)
+        ksize.width = cvRound(sigma1 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1;
+    if (ksize.height <= 0 && sigma2 > 0)
+        ksize.height = cvRound(sigma2 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1;
+
+    CV_Assert(ksize.width > 0 && ksize.width % 2 == 1 && ksize.height > 0 && ksize.height % 2 == 1);
+
+    sigma1 = std::max(sigma1, 0.0);
+    sigma2 = std::max(sigma2, 0.0);
+    
+    const int scaleFactor = 256;
+
+    Mat kx = getGaussianKernel(ksize.width, sigma1, std::max(depth, CV_32F));
+    kx.convertTo(kx, kx.depth(), scaleFactor);
+    Mat ky;
+    if (ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON)
+        ky = kx;
+    else
+    {
+        ky = getGaussianKernel(ksize.height, sigma2, std::max(depth, CV_32F));        
+        ky.convertTo(ky, ky.depth(), scaleFactor);
+    }
+
+    GpuMat rowKernel; makeNppKernel(kx, rowKernel);
+    GpuMat columnKernel; makeNppKernel(ky, columnKernel);
+
+    applySeparableFilter(src, dst, rowKernel, columnKernel, cv::Point(-1, -1), scaleFactor);
+}
+
 #endif
--- a/modules/gpu/src/imgproc_gpu.cpp
+++ b/modules/gpu/src/imgproc_gpu.cpp
@@ -62,6 +62,7 @@ void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_
 void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_nogpu(); }
 void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int) { throw_nogpu(); }
 void cv::gpu::integral(GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); }
+void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int) { throw_nogpu(); }

 #else /* !defined (HAVE_CUDA) */

@@ -986,4 +987,33 @@ void cv::gpu::integral(GpuMat& src, GpuMat& sum, GpuMat& sqsum)
        sum.step, sqsum.ptr<Npp32f>(), sqsum.step, sz, 0, 0.0f, h) );
 }

+////////////////////////////////////////////////////////////////////////
+// Canny
+
+void cv::gpu::Canny(const GpuMat& image, GpuMat& edges, double threshold1, double threshold2, int apertureSize)
+{
+    CV_Assert(image.type() == CV_8UC1);
+
+    GpuMat srcDx, srcDy;
+
+    Sobel(image, srcDx, -1, 1, 0, apertureSize);
+    Sobel(image, srcDy, -1, 0, 1, apertureSize);
+
+    srcDx.convertTo(srcDx, CV_32F);
+    srcDy.convertTo(srcDy, CV_32F);
+
+    edges.create(image.size(), CV_8UC1);
+
+    NppiSize sz;
+    sz.height = image.rows;
+    sz.width = image.cols;
+
+    int bufsz;
+    nppSafeCall( nppiCannyGetBufferSize(sz, &bufsz) );
+    GpuMat buf(1, bufsz, CV_8UC1);
+
+    nppSafeCall( nppiCanny_32f8u_C1R(srcDx.ptr<Npp32f>(), srcDx.step, srcDy.ptr<Npp32f>(), srcDy.step, 
+        edges.ptr<Npp8u>(), edges.step, sz, (Npp32f)threshold1, (Npp32f)threshold2, buf.ptr<Npp8u>()) );
+}
+
 #endif /* !defined (HAVE_CUDA) */
--- a/modules/gpu/src/matrix_operations.cpp
+++ b/modules/gpu/src/matrix_operations.cpp
--- a/tests/gpu/src/gputest_main.cpp
+++ b/tests/gpu/src/gputest_main.cpp
@@ -61,6 +61,9 @@ const char* blacklist[] =
    //"GPU-NppImageLog",              // different precision
    //"GPU-NppImageMagnitude",        // different precision
    //"GPU-NppImageSumWindow",        // different border interpolation
+    //"GPU-NppImageSobel",            // ???
+    //"GPU-NppImageGaussianBlur",     // different border interpolation
+    "GPU-NppImageCanny",            // NPP_TEXTURE_BIND_ERROR
    0
 };


--- a/tests/gpu/src/imgproc_gpu.cpp
+++ b/tests/gpu/src/imgproc_gpu.cpp
@@ -492,6 +492,115 @@ struct CV_GpuNppImageSumWindowTest : public CV_GpuImageProcTest
    }
 };

+////////////////////////////////////////////////////////////////////////////////
+// Sobel
+struct CV_GpuNppImageSobelTest : public CV_GpuImageProcTest
+{
+    CV_GpuNppImageSobelTest() : CV_GpuImageProcTest( "GPU-NppImageSobel", "Sobel" ) {}
+
+    int test(const Mat& img)
+    {
+        if (img.type() != CV_8UC1 && img.type() != CV_8UC4)
+        {
+            ts->printf(CvTS::LOG, "\nUnsupported type\n");
+            return CvTS::OK;
+        }
+
+        int ksizes[] = {3, 5, 7};
+        int ksizes_num = sizeof(ksizes) / sizeof(int);
+
+        int dx = 1, dy = 0;
+
+        int test_res = CvTS::OK;
+
+        for (int i = 0; i < ksizes_num; ++i)
+        {
+            ts->printf(CvTS::LOG, "\nksize = %d\n", ksizes[i]);
+
+            Mat cpudst;
+            cv::Sobel(img, cpudst, -1, dx, dy, ksizes[i]);
+
+            GpuMat gpu1(img);
+            GpuMat gpudst;
+            cv::gpu::Sobel(gpu1, gpudst, -1, dx, dy, ksizes[i]);
+
+            if (CheckNorm(cpudst, gpudst) != CvTS::OK)
+                test_res = CvTS::FAIL_GENERIC;
+        }
+
+        return test_res;
+    }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// GaussianBlur
+struct CV_GpuNppImageGaussianBlurTest : public CV_GpuImageProcTest
+{
+    CV_GpuNppImageGaussianBlurTest() : CV_GpuImageProcTest( "GPU-NppImageGaussianBlur", "GaussianBlur" ) {}
+
+    int test(const Mat& img)
+    {
+        if (img.type() != CV_8UC1 && img.type() != CV_8UC4)
+        {
+            ts->printf(CvTS::LOG, "\nUnsupported type\n");
+            return CvTS::OK;
+        }
+
+        int ksizes[] = {3, 5, 7};
+        int ksizes_num = sizeof(ksizes) / sizeof(int);
+
+        int test_res = CvTS::OK;
+
+        const double sigma1 = 3.0;
+
+        for (int i = 0; i < ksizes_num; ++i)
+        {
+            for (int j = 0; j < ksizes_num; ++j)
+            {
+                ts->printf(CvTS::LOG, "\nksize = (%dx%d)\n", ksizes[i], ksizes[j]);
+
+                Mat cpudst;
+                cv::GaussianBlur(img, cpudst, cv::Size(ksizes[i], ksizes[j]), sigma1);
+
+                GpuMat gpu1(img);
+                GpuMat gpudst;
+                cv::gpu::GaussianBlur(gpu1, gpudst, cv::Size(ksizes[i], ksizes[j]), sigma1);
+                if (CheckNorm(cpudst, gpudst) != CvTS::OK)
+                    test_res = CvTS::FAIL_GENERIC;
+            }
+        }
+
+        return test_res;
+    }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Canny
+struct CV_GpuNppImageCannyTest : public CV_GpuImageProcTest
+{
+    CV_GpuNppImageCannyTest() : CV_GpuImageProcTest( "GPU-NppImageCanny", "Canny" ) {}
+
+    int test(const Mat& img)
+    {
+        if (img.type() != CV_8UC1)
+        {
+            ts->printf(CvTS::LOG, "\nUnsupported type\n");
+            return CvTS::OK;
+        }
+
+        const double threshold1 = 1.0, threshold2 = 10.0;
+
+        Mat cpudst;
+        cv::Canny(img, cpudst, threshold1, threshold2);
+
+        GpuMat gpu1(img);
+        GpuMat gpudst;
+        cv::gpu::Canny(gpu1, gpudst, threshold1, threshold2);
+
+        return CheckNorm(cpudst, gpudst);
+    }
+};
+
 ////////////////////////////////////////////////////////////////////////////////
 // cvtColor
 class CV_GpuCvtColorTest : public CvTest
@@ -598,4 +707,7 @@ CV_GpuNppImageWarpPerspectiveTest CV_GpuNppImageWarpPerspective_test;
 CV_GpuNppImageIntegralTest CV_GpuNppImageIntegral_test;
 CV_GpuNppImageBlurTest CV_GpuNppImageBlur_test;
 CV_GpuNppImageSumWindowTest CV_GpuNppImageSumWindow_test;
+CV_GpuNppImageSobelTest CV_GpuNppImageSobel_test;
+CV_GpuNppImageGaussianBlurTest CV_GpuNppImageGaussianBlur_test;
+CV_GpuNppImageCannyTest CV_GpuNppImageCanny_test;
 CV_GpuCvtColorTest CV_GpuCvtColor_test;
\ No newline at end of file