added sipport of BORDER_REPLICATE mode into gpu::corner* functions

6ef4d9b1 · Alexey Spizhevoy · bb9bf53e · 6ef4d9b1 · 6ef4d9b1 · 6ef4d9b1
Commit 6ef4d9b1 authored Dec 06, 2010 by Alexey Spizhevoy
6 changed files
--- a/modules/gpu/src/cuda/border_interpolate.hpp
+++ b/modules/gpu/src/cuda/border_interpolate.hpp
@@ -44,7 +44,7 @@ namespace cv { namespace gpu {
    struct BrdReflect101 
    {
-        BrdReflect101(int len) : last(len - 1) {}
+        BrdReflect101(int len): last(len - 1) {}
        __device__ int idx_low(int i) const
        {
@@ -73,7 +73,7 @@ namespace cv { namespace gpu {
    template <typename T>
    struct BrdRowReflect101: BrdReflect101
    {
-        BrdRowReflect101(int len) : BrdReflect101(len) {}
+        BrdRowReflect101(int len): BrdReflect101(len) {}
        __device__ float at_low(int i, const T* data) const 
        {
@@ -90,7 +90,7 @@ namespace cv { namespace gpu {
    template <typename T>
    struct BrdColReflect101: BrdReflect101
    {
-        BrdColReflect101(int len, int step) : BrdReflect101(len), step(step) {}
+        BrdColReflect101(int len, int step): BrdReflect101(len), step(step) {}
        __device__ float at_low(int i, const T* data) const 
        {
@@ -108,7 +108,7 @@ namespace cv { namespace gpu {
    struct BrdReplicate
    {
-        BrdReplicate(int len) : last(len - 1) {}
+        BrdReplicate(int len): last(len - 1) {}
        __device__ int idx_low(int i) const
        {
@@ -122,7 +122,7 @@ namespace cv { namespace gpu {
        __device__ int idx(int i) const
        {
-            return min(max(i, last), 0);
+            return max(min(i, last), 0);
        }
        bool is_range_safe(int mini, int maxi) const 
@@ -137,7 +137,7 @@ namespace cv { namespace gpu {
    template <typename T>
    struct BrdRowReplicate: BrdReplicate
    {
-        BrdRowReplicate(int len) : BrdReplicate(len) {}
+        BrdRowReplicate(int len): BrdReplicate(len) {}
        __device__ float at_low(int i, const T* data) const 
        {
@@ -154,7 +154,7 @@ namespace cv { namespace gpu {
    template <typename T>
    struct BrdColReplicate: BrdReplicate
    {
-        BrdColReplicate(int len, int step) : BrdReplicate(len), step(step) {}
+        BrdColReplicate(int len, int step): BrdReplicate(len), step(step) {}
        __device__ float at_low(int i, const T* data) const 
        {

--- a/modules/gpu/src/cuda/cuda_shared.hpp
+++ b/modules/gpu/src/cuda/cuda_shared.hpp
@@ -99,9 +99,9 @@ namespace cv
        // border interpolation modes)
        enum 
        {
-            BORDER_REFLECT101 = 0
+            BORDER_REFLECT101 = 0,
+            BORDER_REPLICATE
        };
    }
 }

--- a/modules/gpu/src/cuda/imgproc.cu
+++ b/modules/gpu/src/cuda/imgproc.cu
@@ -42,6 +42,7 @@
 #include "cuda_shared.hpp"
 #include "border_interpolate.hpp"
+#include <stdio.h>
 using namespace cv::gpu;
@@ -498,6 +499,39 @@ namespace cv { namespace gpu { namespace imgproc
    texture<float, 2> harrisDxTex;
    texture<float, 2> harrisDyTex;
+    __global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k,
+                                        PtrStep dst)
+    {
+        const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
+        if (x < cols && y < rows)
+        {
+            float a = 0.f;
+            float b = 0.f;
+            float c = 0.f;
+            const int ibegin = y - (block_size / 2);
+            const int jbegin = x - (block_size / 2);
+            const int iend = ibegin + block_size;
+            const int jend = jbegin + block_size;
+            for (int i = ibegin; i < iend; ++i)
+            {
+                for (int j = jbegin; j < jend; ++j)
+                {
+                    float dx = tex2D(harrisDxTex, j, i);
+                    float dy = tex2D(harrisDyTex, j, i);
+                    a += dx * dx;
+                    b += dx * dy;
+                    c += dy * dy;
+                }
+            }
+            ((float*)dst.ptr(y))[x] = a * c - b * b - k * (a + c) * (a + c);
+        }
+    }
    template <typename B>
    __global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k,
                                        PtrStep dst, B border_row, B border_col)
@@ -555,6 +589,13 @@ namespace cv { namespace gpu { namespace imgproc
            cornerHarris_kernel<<<grid, threads>>>(
                    cols, rows, block_size, k, dst, BrdReflect101(cols), BrdReflect101(rows));
            break;
+        case BORDER_REPLICATE:
+            harrisDxTex.addressMode[0] = cudaAddressModeClamp;
+            harrisDxTex.addressMode[1] = cudaAddressModeClamp;
+            harrisDyTex.addressMode[0] = cudaAddressModeClamp;
+            harrisDyTex.addressMode[1] = cudaAddressModeClamp;
+            cornerHarris_kernel<<<grid, threads>>>(cols, rows, block_size, k, dst);
+            break;
        }
        cudaSafeCall(cudaThreadSynchronize());
@@ -567,6 +608,42 @@ namespace cv { namespace gpu { namespace imgproc
    texture<float, 2> minEigenValDxTex;
    texture<float, 2> minEigenValDyTex;
+    __global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size, 
+                                             PtrStep dst)
+    {
+        const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
+        if (x < cols && y < rows)
+        {
+            float a = 0.f;
+            float b = 0.f;
+            float c = 0.f;
+            const int ibegin = y - (block_size / 2);
+            const int jbegin = x - (block_size / 2);
+            const int iend = ibegin + block_size;
+            const int jend = jbegin + block_size;
+            for (int i = ibegin; i < iend; ++i)
+            {
+                for (int j = jbegin; j < jend; ++j)
+                {
+                    float dx = tex2D(minEigenValDxTex, j, i);
+                    float dy = tex2D(minEigenValDyTex, j, i);
+                    a += dx * dx;
+                    b += dx * dy;
+                    c += dy * dy;
+                }
+            }
+            a *= 0.5f;
+            c *= 0.5f;
+            ((float*)dst.ptr(y))[x] = (a + c) - sqrtf((a - c) * (a - c) + b * b);
+        }
+    }
    template <typename B>
    __global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size, 
                                             PtrStep dst, B border_row, B border_col)
@@ -624,8 +701,14 @@ namespace cv { namespace gpu { namespace imgproc
        {
        case BORDER_REFLECT101:
            cornerMinEigenVal_kernel<<<grid, threads>>>(
-                    cols, rows, block_size, dst, 
+                    cols, rows, block_size, dst, BrdReflect101(cols), BrdReflect101(rows));
-                    BrdReflect101(cols), BrdReflect101(rows));
+            break;
+        case BORDER_REPLICATE:
+            minEigenValDxTex.addressMode[0] = cudaAddressModeClamp;
+            minEigenValDxTex.addressMode[1] = cudaAddressModeClamp;
+            minEigenValDyTex.addressMode[0] = cudaAddressModeClamp;
+            minEigenValDyTex.addressMode[1] = cudaAddressModeClamp;
+            cornerMinEigenVal_kernel<<<grid, threads>>>(cols, rows, block_size, dst);
            break;
        }

--- a/modules/gpu/src/cuda/linear_filters_beta.cu
+++ b/modules/gpu/src/cuda/linear_filters_beta.cu
@@ -150,7 +150,8 @@ void rowFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor,
    static const Caller callers[] = 
    { 
-        rowFilterCaller<T, BrdRowReflect101<T> >
+        rowFilterCaller<T, BrdRowReflect101<T> >,
+        rowFilterCaller<T, BrdRowReplicate<T> >
    };
    callers[brd_interp](src, dst, anchor, kernel, ksize);
@@ -251,7 +252,8 @@ void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor,
    static const Caller callers[] = 
    { 
-        colFilterCaller<T, BrdColReflect101<T> >
+        colFilterCaller<T, BrdColReflect101<T> >,
+        colFilterCaller<T, BrdColReplicate<T> >
    };
    callers[brd_interp](src, dst, anchor, kernel, ksize);
@@ -261,4 +263,4 @@ void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor,
 template void colFilterCaller<unsigned char>(const DevMem2D_<unsigned char>, PtrStepf, int, const float*, int, int);
 template void colFilterCaller<float>(const DevMem2D_<float>, PtrStepf, int, const float*, int, int);
 }}} 
\ No newline at end of file
--- a/modules/gpu/src/imgproc_gpu.cpp
+++ b/modules/gpu/src/imgproc_gpu.cpp
@@ -944,10 +944,13 @@ void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ks
    switch (borderType)
    {
    case cv::BORDER_REFLECT101:
-        gpuBorderType = cv::gpu::BORDER_REFLECT101;
+        gpuBorderType = cv::gpu::BORDER_REFLECT101; 
+        break;
+    case cv::BORDER_REPLICATE:
+        gpuBorderType = cv::gpu::BORDER_REPLICATE; 
        break;
    default:
-        CV_Error(CV_StsBadArg, "cornerHarris: unsupported border type");
+        CV_Error(CV_StsBadArg, "cornerHarris: unsupported border extrapolation mode");
    }
    GpuMat Dx, Dy;
@@ -964,8 +967,11 @@ void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, i
    case cv::BORDER_REFLECT101:
        gpuBorderType = cv::gpu::BORDER_REFLECT101;
        break;
+    case cv::BORDER_REPLICATE:
+        gpuBorderType = cv::gpu::BORDER_REPLICATE; 
+        break;
    default:
-        CV_Error(CV_StsBadArg, "cornerMinEigenVal: unsupported border type");
+        CV_Error(CV_StsBadArg, "cornerMinEigenVal: unsupported border extrapolation mode");
    }
    GpuMat Dx, Dy;

--- a/tests/gpu/src/imgproc_gpu.cpp
+++ b/tests/gpu/src/imgproc_gpu.cpp
@@ -640,15 +640,37 @@ struct CV_GpuCornerHarrisTest: CvTest
            rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(256));
        double k = 0.1;
-        int borderType = BORDER_REFLECT101;
        cv::Mat dst_gold;
+        cv::gpu::GpuMat dst;
+        cv::Mat dsth;
+        int borderType;
+        borderType = BORDER_REFLECT101;
        cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType); 
+        cv::gpu::cornerHarris(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, k, borderType);
-        cv::gpu::GpuMat dst;
+        dsth = dst;
+        for (int i = 0; i < dst.rows; ++i)
+        {
+            for (int j = 0; j < dst.cols; ++j)
+            {
+                float a = dst_gold.at<float>(i, j);
+                float b = dsth.at<float>(i, j);
+                if (fabs(a - b) > 1e-3f) 
+                {
+                    ts->printf(CvTS::CONSOLE, "%d %d %f %f %d\n", i, j, a, b, apertureSize);
+                    ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
+                    return false;
+                };
+            }
+        }
+        borderType = BORDER_REPLICATE;
+        cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType); 
        cv::gpu::cornerHarris(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, k, borderType);
-        cv::Mat dsth = dst;
+        dsth = dst;
        for (int i = 0; i < dst.rows; ++i)
        {
            for (int j = 0; j < dst.cols; ++j)
@@ -703,15 +725,37 @@ struct CV_GpuCornerMinEigenValTest: CvTest
        else if (depth == CV_8U)
            rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(256));
-        int borderType = BORDER_REFLECT101;
        cv::Mat dst_gold;
+        cv::gpu::GpuMat dst;
+        cv::Mat dsth;
+        int borderType;
+        borderType = BORDER_REFLECT101;
        cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType); 
+        cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, borderType);      
-        cv::gpu::GpuMat dst;
+        dsth = dst;
+        for (int i = 0; i < dst.rows; ++i)
+        {
+            for (int j = 0; j < dst.cols; ++j)
+            {
+                float a = dst_gold.at<float>(i, j);
+                float b = dsth.at<float>(i, j);
+                if (fabs(a - b) > 1e-2f) 
+                {
+                    ts->printf(CvTS::CONSOLE, "%d %d %f %f %d %d\n", i, j, a, b, apertureSize, blockSize);
+                    ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
+                    return false;
+                };
+            }
+        }
+        borderType = BORDER_REPLICATE;
+        cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType); 
        cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, borderType);      
-        cv::Mat dsth = dst;
+        dsth = dst;
        for (int i = 0; i < dst.rows; ++i)
        {
            for (int j = 0; j < dst.cols; ++j)
@@ -726,6 +770,7 @@ struct CV_GpuCornerMinEigenValTest: CvTest
                };
            }
        }
        return true;
    }
 };