fixed octave computation in SURF_GPU

used random images in gpu filter tests

fixed octave computation in SURF_GPU
used random images in gpu filter tests
089a835c · Vladislav Vinogradov · 903c05db · 089a835c · 089a835c · 089a835c
Commit 089a835c authored Mar 26, 2012 by Vladislav Vinogradov
7 changed files
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -1516,13 +1516,14 @@ class CV_EXPORTS SURF_GPU
 public:
    enum KeypointLayout
    {
-        SF_X = 0,
+        X_ROW = 0,
-        SF_Y,
+        Y_ROW,
-        SF_LAPLACIAN,
+        LAPLACIAN_ROW,
-        SF_SIZE,
+        OCTAVE_ROW,
-        SF_DIR,
+        SIZE_ROW,
-        SF_HESSIAN,
+        ANGLE_ROW,
-        SF_FEATURE_STRIDE
+        HESSIAN_ROW,
+        ROWS_COUNT
    };
    //! the default constructor

--- a/modules/gpu/src/cuda/surf.cu
+++ b/modules/gpu/src/cuda/surf.cu
@@ -117,7 +117,7 @@ namespace cv { namespace gpu { namespace device
        template <int N> __device__ float icvCalcHaarPatternSum(const float src[][5], int oldSize, int newSize, int y, int x)
        {
        #if __CUDA_ARCH__ >= 200
-            typedef double real_t;        
+            typedef double real_t;
        #else
            typedef float  real_t;
        #endif
@@ -248,7 +248,7 @@ namespace cv { namespace gpu { namespace device
        template <typename Mask>
        __global__ void icvFindMaximaInLayer(const PtrStepf det, const PtrStepf trace, int4* maxPosBuffer, unsigned int* maxCounter)
        {
-            #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
+            #if __CUDA_ARCH__ >= 110
            extern __shared__ float N9[];
@@ -368,10 +368,10 @@ namespace cv { namespace gpu { namespace device
        // INTERPOLATION
        __global__ void icvInterpolateKeypoint(const PtrStepf det, const int4* maxPosBuffer,
-            float* featureX, float* featureY, int* featureLaplacian, float* featureSize, float* featureHessian,
+            float* featureX, float* featureY, int* featureLaplacian, int* featureOctave, float* featureSize, float* featureHessian,
            unsigned int* featureCounter)
        {
-            #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
+            #if __CUDA_ARCH__ >= 110
            const int4 maxPos = maxPosBuffer[blockIdx.x];
@@ -459,6 +459,7 @@ namespace cv { namespace gpu { namespace device
                                featureX[ind] = px;
                                featureY[ind] = py;
                                featureLaplacian[ind] = maxPos.w;
+                                featureOctave[ind] = c_octave;
                                featureSize[ind] = psize;
                                featureHessian[ind] = N9[1][1][1];
                            }
@@ -471,7 +472,7 @@ namespace cv { namespace gpu { namespace device
        }
        void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter, 
-            float* featureX, float* featureY, int* featureLaplacian, float* featureSize, float* featureHessian, 
+            float* featureX, float* featureY, int* featureLaplacian, int* featureOctave, float* featureSize, float* featureHessian, 
            unsigned int* featureCounter)
        {
            dim3 threads;
@@ -482,7 +483,7 @@ namespace cv { namespace gpu { namespace device
            dim3 grid;
            grid.x = maxCounter;
-            icvInterpolateKeypoint<<<grid, threads>>>(det, maxPosBuffer, featureX, featureY, featureLaplacian, featureSize, featureHessian, featureCounter);
+            icvInterpolateKeypoint<<<grid, threads>>>(det, maxPosBuffer, featureX, featureY, featureLaplacian, featureOctave, featureSize, featureHessian, featureCounter);
            cudaSafeCall( cudaGetLastError() );
            cudaSafeCall( cudaDeviceSynchronize() );

--- a/modules/gpu/src/filtering.cpp
+++ b/modules/gpu/src/filtering.cpp
@@ -948,7 +948,9 @@ namespace
        {
            DeviceInfo devInfo;
            int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
-            CV_Assert(cc >= 20 || ksize <= 16);
+            if (ksize > 16 && cc < 20)
+                CV_Error(CV_StsNotImplemented, "column linear filter doesn't implemented for kernel size > 16 for device with compute capabilities less than 2.0");
            func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type, cc, StreamAccessor::getStream(s));
        }

--- a/modules/gpu/src/surf.cpp
+++ b/modules/gpu/src/surf.cpp
@@ -80,7 +80,7 @@ namespace cv { namespace gpu { namespace device
            int img_rows, int img_cols, int octave, bool use_mask, int nLayers);
        void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter,
-            float* featureX, float* featureY, int* featureLaplacian, float* featureSize, float* featureHessian,
+            float* featureX, float* featureY, int* featureLaplacian, int* featureOctave, float* featureSize, float* featureHessian,
            unsigned int* featureCounter);
        void icvCalcOrientation_gpu(const float* featureX, const float* featureY, const float* featureSize, float* featureDir, int nFeatures);
@@ -161,7 +161,7 @@ namespace
            ensureSizeIsEnough(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1, surf_.trace);
            ensureSizeIsEnough(1, maxCandidates, CV_32SC4, surf_.maxPosBuffer);
-            ensureSizeIsEnough(SURF_GPU::SF_FEATURE_STRIDE, maxFeatures, CV_32FC1, keypoints);
+            ensureSizeIsEnough(SURF_GPU::ROWS_COUNT, maxFeatures, CV_32FC1, keypoints);
            keypoints.setTo(Scalar::all(0));
            for (int octave = 0; octave < surf_.nOctaves; ++octave)
@@ -183,9 +183,10 @@ namespace
                if (maxCounter > 0)
                {
                    icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer.ptr<int4>(), maxCounter,
-                        keypoints.ptr<float>(SURF_GPU::SF_X), keypoints.ptr<float>(SURF_GPU::SF_Y),
+                        keypoints.ptr<float>(SURF_GPU::X_ROW), keypoints.ptr<float>(SURF_GPU::Y_ROW),
-                        keypoints.ptr<int>(SURF_GPU::SF_LAPLACIAN), keypoints.ptr<float>(SURF_GPU::SF_SIZE),
+                        keypoints.ptr<int>(SURF_GPU::LAPLACIAN_ROW), keypoints.ptr<int>(SURF_GPU::OCTAVE_ROW), 
-                        keypoints.ptr<float>(SURF_GPU::SF_HESSIAN), counters.ptr<unsigned int>());
+                        keypoints.ptr<float>(SURF_GPU::SIZE_ROW), keypoints.ptr<float>(SURF_GPU::HESSIAN_ROW), 
+                        counters.ptr<unsigned int>());
                }
            }
            unsigned int featureCounter;
@@ -195,7 +196,7 @@ namespace
            keypoints.cols = featureCounter;
            if (surf_.upright)
-                keypoints.row(SURF_GPU::SF_DIR).setTo(Scalar::all(90.0));
+                keypoints.row(SURF_GPU::ANGLE_ROW).setTo(Scalar::all(90.0));
            else
                findOrientation(keypoints);
        }
@@ -205,8 +206,8 @@ namespace
            const int nFeatures = keypoints.cols;
            if (nFeatures > 0)
            {
-                icvCalcOrientation_gpu(keypoints.ptr<float>(SURF_GPU::SF_X), keypoints.ptr<float>(SURF_GPU::SF_Y),
+                icvCalcOrientation_gpu(keypoints.ptr<float>(SURF_GPU::X_ROW), keypoints.ptr<float>(SURF_GPU::Y_ROW),
-                    keypoints.ptr<float>(SURF_GPU::SF_SIZE), keypoints.ptr<float>(SURF_GPU::SF_DIR), nFeatures);
+                    keypoints.ptr<float>(SURF_GPU::SIZE_ROW), keypoints.ptr<float>(SURF_GPU::ANGLE_ROW), nFeatures);
            }
        }
@@ -216,8 +217,8 @@ namespace
            if (nFeatures > 0)
            {
                ensureSizeIsEnough(nFeatures, descriptorSize, CV_32F, descriptors);
-                compute_descriptors_gpu(descriptors, keypoints.ptr<float>(SURF_GPU::SF_X), keypoints.ptr<float>(SURF_GPU::SF_Y),
+                compute_descriptors_gpu(descriptors, keypoints.ptr<float>(SURF_GPU::X_ROW), keypoints.ptr<float>(SURF_GPU::Y_ROW),
-                    keypoints.ptr<float>(SURF_GPU::SF_SIZE), keypoints.ptr<float>(SURF_GPU::SF_DIR), nFeatures);
+                    keypoints.ptr<float>(SURF_GPU::SIZE_ROW), keypoints.ptr<float>(SURF_GPU::ANGLE_ROW), nFeatures);
            }
        }
@@ -266,20 +267,22 @@ void cv::gpu::SURF_GPU::uploadKeypoints(const vector<KeyPoint>& keypoints, GpuMa
        keypointsGPU.release();
    else
    {
-        Mat keypointsCPU(SURF_GPU::SF_FEATURE_STRIDE, static_cast<int>(keypoints.size()), CV_32FC1);
+        Mat keypointsCPU(SURF_GPU::ROWS_COUNT, static_cast<int>(keypoints.size()), CV_32FC1);
-        float* kp_x = keypointsCPU.ptr<float>(SURF_GPU::SF_X);
+        float* kp_x = keypointsCPU.ptr<float>(SURF_GPU::X_ROW);
-        float* kp_y = keypointsCPU.ptr<float>(SURF_GPU::SF_Y);
+        float* kp_y = keypointsCPU.ptr<float>(SURF_GPU::Y_ROW);
-        int* kp_laplacian = keypointsCPU.ptr<int>(SURF_GPU::SF_LAPLACIAN);
+        int* kp_laplacian = keypointsCPU.ptr<int>(SURF_GPU::LAPLACIAN_ROW);
-        float* kp_size = keypointsCPU.ptr<float>(SURF_GPU::SF_SIZE);
+        int* kp_octave = keypointsCPU.ptr<int>(SURF_GPU::OCTAVE_ROW);
-        float* kp_dir = keypointsCPU.ptr<float>(SURF_GPU::SF_DIR);
+        float* kp_size = keypointsCPU.ptr<float>(SURF_GPU::SIZE_ROW);
-        float* kp_hessian = keypointsCPU.ptr<float>(SURF_GPU::SF_HESSIAN);
+        float* kp_dir = keypointsCPU.ptr<float>(SURF_GPU::ANGLE_ROW);
+        float* kp_hessian = keypointsCPU.ptr<float>(SURF_GPU::HESSIAN_ROW);
        for (size_t i = 0, size = keypoints.size(); i < size; ++i)
        {
            const KeyPoint& kp = keypoints[i];
            kp_x[i] = kp.pt.x;
            kp_y[i] = kp.pt.y;
+            kp_octave[i] = kp.octave;
            kp_size[i] = kp.size;
            kp_dir[i] = kp.angle;
            kp_hessian[i] = kp.response;
@@ -290,30 +293,6 @@ void cv::gpu::SURF_GPU::uploadKeypoints(const vector<KeyPoint>& keypoints, GpuMa
    }
 }
-namespace
-{
-    int getPointOctave(float size, const SURF_GPU& params)
-    {
-        int best_octave = 0;
-        float min_diff = numeric_limits<float>::max();
-        for (int octave = 1; octave < params.nOctaves; ++octave)
-        {
-            for (int layer = 0; layer < params.nOctaveLayers; ++layer)
-            {
-                float diff = std::abs(size - (float)calcSize(octave, layer));
-                if (min_diff > diff)
-                {
-                    min_diff = diff;
-                    best_octave = octave;
-                    if (min_diff == 0)
-                        return best_octave;
-                }
-            }
-        }
-        return best_octave;
-    }
-}
 void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat& keypointsGPU, vector<KeyPoint>& keypoints)
 {
    const int nFeatures = keypointsGPU.cols;
@@ -322,18 +301,19 @@ void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat& keypointsGPU, vector<Key
        keypoints.clear();
    else
    {
-        CV_Assert(keypointsGPU.type() == CV_32FC1 && keypointsGPU.rows == SF_FEATURE_STRIDE);
+        CV_Assert(keypointsGPU.type() == CV_32FC1 && keypointsGPU.rows == ROWS_COUNT);
        Mat keypointsCPU(keypointsGPU);
        keypoints.resize(nFeatures);
-        float* kp_x = keypointsCPU.ptr<float>(SF_X);
+        float* kp_x = keypointsCPU.ptr<float>(SURF_GPU::X_ROW);
-        float* kp_y = keypointsCPU.ptr<float>(SF_Y);
+        float* kp_y = keypointsCPU.ptr<float>(SURF_GPU::Y_ROW);
-        int* kp_laplacian = keypointsCPU.ptr<int>(SF_LAPLACIAN);
+        int* kp_laplacian = keypointsCPU.ptr<int>(SURF_GPU::LAPLACIAN_ROW);
-        float* kp_size = keypointsCPU.ptr<float>(SF_SIZE);
+        int* kp_octave = keypointsCPU.ptr<int>(SURF_GPU::OCTAVE_ROW);
-        float* kp_dir = keypointsCPU.ptr<float>(SF_DIR);
+        float* kp_size = keypointsCPU.ptr<float>(SURF_GPU::SIZE_ROW);
-        float* kp_hessian = keypointsCPU.ptr<float>(SF_HESSIAN);
+        float* kp_dir = keypointsCPU.ptr<float>(SURF_GPU::ANGLE_ROW);
+        float* kp_hessian = keypointsCPU.ptr<float>(SURF_GPU::HESSIAN_ROW);
        for (int i = 0; i < nFeatures; ++i)
        {
@@ -341,10 +321,10 @@ void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat& keypointsGPU, vector<Key
            kp.pt.x = kp_x[i];
            kp.pt.y = kp_y[i];
            kp.class_id = kp_laplacian[i];
+            kp.octave = kp_octave[i];
            kp.size = kp_size[i];
            kp.angle = kp_dir[i];
            kp.response = kp_hessian[i];
-            kp.octave = getPointOctave(kp.size, *this);
        }
    }
 }

--- a/modules/gpu/test/test_core.cpp
+++ b/modules/gpu/test/test_core.cpp
@@ -437,7 +437,7 @@ TEST_P(Multiply_Array, WithScale)
        cv::Mat dst_gold;
        cv::multiply(mat1, mat2, dst_gold, scale, depth.second);
-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+        EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
    }
 }
@@ -2715,7 +2715,7 @@ TEST_P(Sum, Sqr)
    cv::Scalar val_gold = sqrSumGold(src);
-    EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 10);
+    EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
 }
 INSTANTIATE_TEST_CASE_P(GPU_Core, Sum, testing::Combine(

--- a/modules/gpu/test/test_features2d.cpp
+++ b/modules/gpu/test/test_features2d.cpp
@@ -85,7 +85,7 @@ testing::AssertionResult assertKeyPointsEquals(const char* gold_expr, const char
    std::sort(actual.begin(), actual.end(), KeyPointLess());
    std::sort(gold.begin(), gold.end(), KeyPointLess());
-    for (size_t i; i < gold.size(); ++i)
+    for (size_t i = 0; i < gold.size(); ++i)
    {
        const cv::KeyPoint& p1 = gold[i];
        const cv::KeyPoint& p2 = actual[i];

--- a/modules/gpu/test/test_filters.cpp
+++ b/modules/gpu/test/test_filters.cpp