Commit 089a835c authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

fixed octave computation in SURF_GPU

used random images in gpu filter tests
parent 903c05db
...@@ -1516,13 +1516,14 @@ class CV_EXPORTS SURF_GPU ...@@ -1516,13 +1516,14 @@ class CV_EXPORTS SURF_GPU
public: public:
enum KeypointLayout enum KeypointLayout
{ {
SF_X = 0, X_ROW = 0,
SF_Y, Y_ROW,
SF_LAPLACIAN, LAPLACIAN_ROW,
SF_SIZE, OCTAVE_ROW,
SF_DIR, SIZE_ROW,
SF_HESSIAN, ANGLE_ROW,
SF_FEATURE_STRIDE HESSIAN_ROW,
ROWS_COUNT
}; };
//! the default constructor //! the default constructor
......
...@@ -117,7 +117,7 @@ namespace cv { namespace gpu { namespace device ...@@ -117,7 +117,7 @@ namespace cv { namespace gpu { namespace device
template <int N> __device__ float icvCalcHaarPatternSum(const float src[][5], int oldSize, int newSize, int y, int x) template <int N> __device__ float icvCalcHaarPatternSum(const float src[][5], int oldSize, int newSize, int y, int x)
{ {
#if __CUDA_ARCH__ >= 200 #if __CUDA_ARCH__ >= 200
typedef double real_t; typedef double real_t;
#else #else
typedef float real_t; typedef float real_t;
#endif #endif
...@@ -248,7 +248,7 @@ namespace cv { namespace gpu { namespace device ...@@ -248,7 +248,7 @@ namespace cv { namespace gpu { namespace device
template <typename Mask> template <typename Mask>
__global__ void icvFindMaximaInLayer(const PtrStepf det, const PtrStepf trace, int4* maxPosBuffer, unsigned int* maxCounter) __global__ void icvFindMaximaInLayer(const PtrStepf det, const PtrStepf trace, int4* maxPosBuffer, unsigned int* maxCounter)
{ {
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110 #if __CUDA_ARCH__ >= 110
extern __shared__ float N9[]; extern __shared__ float N9[];
...@@ -368,10 +368,10 @@ namespace cv { namespace gpu { namespace device ...@@ -368,10 +368,10 @@ namespace cv { namespace gpu { namespace device
// INTERPOLATION // INTERPOLATION
__global__ void icvInterpolateKeypoint(const PtrStepf det, const int4* maxPosBuffer, __global__ void icvInterpolateKeypoint(const PtrStepf det, const int4* maxPosBuffer,
float* featureX, float* featureY, int* featureLaplacian, float* featureSize, float* featureHessian, float* featureX, float* featureY, int* featureLaplacian, int* featureOctave, float* featureSize, float* featureHessian,
unsigned int* featureCounter) unsigned int* featureCounter)
{ {
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110 #if __CUDA_ARCH__ >= 110
const int4 maxPos = maxPosBuffer[blockIdx.x]; const int4 maxPos = maxPosBuffer[blockIdx.x];
...@@ -459,6 +459,7 @@ namespace cv { namespace gpu { namespace device ...@@ -459,6 +459,7 @@ namespace cv { namespace gpu { namespace device
featureX[ind] = px; featureX[ind] = px;
featureY[ind] = py; featureY[ind] = py;
featureLaplacian[ind] = maxPos.w; featureLaplacian[ind] = maxPos.w;
featureOctave[ind] = c_octave;
featureSize[ind] = psize; featureSize[ind] = psize;
featureHessian[ind] = N9[1][1][1]; featureHessian[ind] = N9[1][1][1];
} }
...@@ -471,7 +472,7 @@ namespace cv { namespace gpu { namespace device ...@@ -471,7 +472,7 @@ namespace cv { namespace gpu { namespace device
} }
void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter, void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter,
float* featureX, float* featureY, int* featureLaplacian, float* featureSize, float* featureHessian, float* featureX, float* featureY, int* featureLaplacian, int* featureOctave, float* featureSize, float* featureHessian,
unsigned int* featureCounter) unsigned int* featureCounter)
{ {
dim3 threads; dim3 threads;
...@@ -482,7 +483,7 @@ namespace cv { namespace gpu { namespace device ...@@ -482,7 +483,7 @@ namespace cv { namespace gpu { namespace device
dim3 grid; dim3 grid;
grid.x = maxCounter; grid.x = maxCounter;
icvInterpolateKeypoint<<<grid, threads>>>(det, maxPosBuffer, featureX, featureY, featureLaplacian, featureSize, featureHessian, featureCounter); icvInterpolateKeypoint<<<grid, threads>>>(det, maxPosBuffer, featureX, featureY, featureLaplacian, featureOctave, featureSize, featureHessian, featureCounter);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
......
...@@ -948,7 +948,9 @@ namespace ...@@ -948,7 +948,9 @@ namespace
{ {
DeviceInfo devInfo; DeviceInfo devInfo;
int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion(); int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
CV_Assert(cc >= 20 || ksize <= 16); if (ksize > 16 && cc < 20)
CV_Error(CV_StsNotImplemented, "column linear filter doesn't implemented for kernel size > 16 for device with compute capabilities less than 2.0");
func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type, cc, StreamAccessor::getStream(s)); func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type, cc, StreamAccessor::getStream(s));
} }
......
...@@ -80,7 +80,7 @@ namespace cv { namespace gpu { namespace device ...@@ -80,7 +80,7 @@ namespace cv { namespace gpu { namespace device
int img_rows, int img_cols, int octave, bool use_mask, int nLayers); int img_rows, int img_cols, int octave, bool use_mask, int nLayers);
void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter, void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter,
float* featureX, float* featureY, int* featureLaplacian, float* featureSize, float* featureHessian, float* featureX, float* featureY, int* featureLaplacian, int* featureOctave, float* featureSize, float* featureHessian,
unsigned int* featureCounter); unsigned int* featureCounter);
void icvCalcOrientation_gpu(const float* featureX, const float* featureY, const float* featureSize, float* featureDir, int nFeatures); void icvCalcOrientation_gpu(const float* featureX, const float* featureY, const float* featureSize, float* featureDir, int nFeatures);
...@@ -161,7 +161,7 @@ namespace ...@@ -161,7 +161,7 @@ namespace
ensureSizeIsEnough(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1, surf_.trace); ensureSizeIsEnough(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1, surf_.trace);
ensureSizeIsEnough(1, maxCandidates, CV_32SC4, surf_.maxPosBuffer); ensureSizeIsEnough(1, maxCandidates, CV_32SC4, surf_.maxPosBuffer);
ensureSizeIsEnough(SURF_GPU::SF_FEATURE_STRIDE, maxFeatures, CV_32FC1, keypoints); ensureSizeIsEnough(SURF_GPU::ROWS_COUNT, maxFeatures, CV_32FC1, keypoints);
keypoints.setTo(Scalar::all(0)); keypoints.setTo(Scalar::all(0));
for (int octave = 0; octave < surf_.nOctaves; ++octave) for (int octave = 0; octave < surf_.nOctaves; ++octave)
...@@ -183,9 +183,10 @@ namespace ...@@ -183,9 +183,10 @@ namespace
if (maxCounter > 0) if (maxCounter > 0)
{ {
icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer.ptr<int4>(), maxCounter, icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer.ptr<int4>(), maxCounter,
keypoints.ptr<float>(SURF_GPU::SF_X), keypoints.ptr<float>(SURF_GPU::SF_Y), keypoints.ptr<float>(SURF_GPU::X_ROW), keypoints.ptr<float>(SURF_GPU::Y_ROW),
keypoints.ptr<int>(SURF_GPU::SF_LAPLACIAN), keypoints.ptr<float>(SURF_GPU::SF_SIZE), keypoints.ptr<int>(SURF_GPU::LAPLACIAN_ROW), keypoints.ptr<int>(SURF_GPU::OCTAVE_ROW),
keypoints.ptr<float>(SURF_GPU::SF_HESSIAN), counters.ptr<unsigned int>()); keypoints.ptr<float>(SURF_GPU::SIZE_ROW), keypoints.ptr<float>(SURF_GPU::HESSIAN_ROW),
counters.ptr<unsigned int>());
} }
} }
unsigned int featureCounter; unsigned int featureCounter;
...@@ -195,7 +196,7 @@ namespace ...@@ -195,7 +196,7 @@ namespace
keypoints.cols = featureCounter; keypoints.cols = featureCounter;
if (surf_.upright) if (surf_.upright)
keypoints.row(SURF_GPU::SF_DIR).setTo(Scalar::all(90.0)); keypoints.row(SURF_GPU::ANGLE_ROW).setTo(Scalar::all(90.0));
else else
findOrientation(keypoints); findOrientation(keypoints);
} }
...@@ -205,8 +206,8 @@ namespace ...@@ -205,8 +206,8 @@ namespace
const int nFeatures = keypoints.cols; const int nFeatures = keypoints.cols;
if (nFeatures > 0) if (nFeatures > 0)
{ {
icvCalcOrientation_gpu(keypoints.ptr<float>(SURF_GPU::SF_X), keypoints.ptr<float>(SURF_GPU::SF_Y), icvCalcOrientation_gpu(keypoints.ptr<float>(SURF_GPU::X_ROW), keypoints.ptr<float>(SURF_GPU::Y_ROW),
keypoints.ptr<float>(SURF_GPU::SF_SIZE), keypoints.ptr<float>(SURF_GPU::SF_DIR), nFeatures); keypoints.ptr<float>(SURF_GPU::SIZE_ROW), keypoints.ptr<float>(SURF_GPU::ANGLE_ROW), nFeatures);
} }
} }
...@@ -216,8 +217,8 @@ namespace ...@@ -216,8 +217,8 @@ namespace
if (nFeatures > 0) if (nFeatures > 0)
{ {
ensureSizeIsEnough(nFeatures, descriptorSize, CV_32F, descriptors); ensureSizeIsEnough(nFeatures, descriptorSize, CV_32F, descriptors);
compute_descriptors_gpu(descriptors, keypoints.ptr<float>(SURF_GPU::SF_X), keypoints.ptr<float>(SURF_GPU::SF_Y), compute_descriptors_gpu(descriptors, keypoints.ptr<float>(SURF_GPU::X_ROW), keypoints.ptr<float>(SURF_GPU::Y_ROW),
keypoints.ptr<float>(SURF_GPU::SF_SIZE), keypoints.ptr<float>(SURF_GPU::SF_DIR), nFeatures); keypoints.ptr<float>(SURF_GPU::SIZE_ROW), keypoints.ptr<float>(SURF_GPU::ANGLE_ROW), nFeatures);
} }
} }
...@@ -266,20 +267,22 @@ void cv::gpu::SURF_GPU::uploadKeypoints(const vector<KeyPoint>& keypoints, GpuMa ...@@ -266,20 +267,22 @@ void cv::gpu::SURF_GPU::uploadKeypoints(const vector<KeyPoint>& keypoints, GpuMa
keypointsGPU.release(); keypointsGPU.release();
else else
{ {
Mat keypointsCPU(SURF_GPU::SF_FEATURE_STRIDE, static_cast<int>(keypoints.size()), CV_32FC1); Mat keypointsCPU(SURF_GPU::ROWS_COUNT, static_cast<int>(keypoints.size()), CV_32FC1);
float* kp_x = keypointsCPU.ptr<float>(SURF_GPU::SF_X); float* kp_x = keypointsCPU.ptr<float>(SURF_GPU::X_ROW);
float* kp_y = keypointsCPU.ptr<float>(SURF_GPU::SF_Y); float* kp_y = keypointsCPU.ptr<float>(SURF_GPU::Y_ROW);
int* kp_laplacian = keypointsCPU.ptr<int>(SURF_GPU::SF_LAPLACIAN); int* kp_laplacian = keypointsCPU.ptr<int>(SURF_GPU::LAPLACIAN_ROW);
float* kp_size = keypointsCPU.ptr<float>(SURF_GPU::SF_SIZE); int* kp_octave = keypointsCPU.ptr<int>(SURF_GPU::OCTAVE_ROW);
float* kp_dir = keypointsCPU.ptr<float>(SURF_GPU::SF_DIR); float* kp_size = keypointsCPU.ptr<float>(SURF_GPU::SIZE_ROW);
float* kp_hessian = keypointsCPU.ptr<float>(SURF_GPU::SF_HESSIAN); float* kp_dir = keypointsCPU.ptr<float>(SURF_GPU::ANGLE_ROW);
float* kp_hessian = keypointsCPU.ptr<float>(SURF_GPU::HESSIAN_ROW);
for (size_t i = 0, size = keypoints.size(); i < size; ++i) for (size_t i = 0, size = keypoints.size(); i < size; ++i)
{ {
const KeyPoint& kp = keypoints[i]; const KeyPoint& kp = keypoints[i];
kp_x[i] = kp.pt.x; kp_x[i] = kp.pt.x;
kp_y[i] = kp.pt.y; kp_y[i] = kp.pt.y;
kp_octave[i] = kp.octave;
kp_size[i] = kp.size; kp_size[i] = kp.size;
kp_dir[i] = kp.angle; kp_dir[i] = kp.angle;
kp_hessian[i] = kp.response; kp_hessian[i] = kp.response;
...@@ -290,30 +293,6 @@ void cv::gpu::SURF_GPU::uploadKeypoints(const vector<KeyPoint>& keypoints, GpuMa ...@@ -290,30 +293,6 @@ void cv::gpu::SURF_GPU::uploadKeypoints(const vector<KeyPoint>& keypoints, GpuMa
} }
} }
namespace
{
int getPointOctave(float size, const SURF_GPU& params)
{
int best_octave = 0;
float min_diff = numeric_limits<float>::max();
for (int octave = 1; octave < params.nOctaves; ++octave)
{
for (int layer = 0; layer < params.nOctaveLayers; ++layer)
{
float diff = std::abs(size - (float)calcSize(octave, layer));
if (min_diff > diff)
{
min_diff = diff;
best_octave = octave;
if (min_diff == 0)
return best_octave;
}
}
}
return best_octave;
}
}
void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat& keypointsGPU, vector<KeyPoint>& keypoints) void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat& keypointsGPU, vector<KeyPoint>& keypoints)
{ {
const int nFeatures = keypointsGPU.cols; const int nFeatures = keypointsGPU.cols;
...@@ -322,18 +301,19 @@ void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat& keypointsGPU, vector<Key ...@@ -322,18 +301,19 @@ void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat& keypointsGPU, vector<Key
keypoints.clear(); keypoints.clear();
else else
{ {
CV_Assert(keypointsGPU.type() == CV_32FC1 && keypointsGPU.rows == SF_FEATURE_STRIDE); CV_Assert(keypointsGPU.type() == CV_32FC1 && keypointsGPU.rows == ROWS_COUNT);
Mat keypointsCPU(keypointsGPU); Mat keypointsCPU(keypointsGPU);
keypoints.resize(nFeatures); keypoints.resize(nFeatures);
float* kp_x = keypointsCPU.ptr<float>(SF_X); float* kp_x = keypointsCPU.ptr<float>(SURF_GPU::X_ROW);
float* kp_y = keypointsCPU.ptr<float>(SF_Y); float* kp_y = keypointsCPU.ptr<float>(SURF_GPU::Y_ROW);
int* kp_laplacian = keypointsCPU.ptr<int>(SF_LAPLACIAN); int* kp_laplacian = keypointsCPU.ptr<int>(SURF_GPU::LAPLACIAN_ROW);
float* kp_size = keypointsCPU.ptr<float>(SF_SIZE); int* kp_octave = keypointsCPU.ptr<int>(SURF_GPU::OCTAVE_ROW);
float* kp_dir = keypointsCPU.ptr<float>(SF_DIR); float* kp_size = keypointsCPU.ptr<float>(SURF_GPU::SIZE_ROW);
float* kp_hessian = keypointsCPU.ptr<float>(SF_HESSIAN); float* kp_dir = keypointsCPU.ptr<float>(SURF_GPU::ANGLE_ROW);
float* kp_hessian = keypointsCPU.ptr<float>(SURF_GPU::HESSIAN_ROW);
for (int i = 0; i < nFeatures; ++i) for (int i = 0; i < nFeatures; ++i)
{ {
...@@ -341,10 +321,10 @@ void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat& keypointsGPU, vector<Key ...@@ -341,10 +321,10 @@ void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat& keypointsGPU, vector<Key
kp.pt.x = kp_x[i]; kp.pt.x = kp_x[i];
kp.pt.y = kp_y[i]; kp.pt.y = kp_y[i];
kp.class_id = kp_laplacian[i]; kp.class_id = kp_laplacian[i];
kp.octave = kp_octave[i];
kp.size = kp_size[i]; kp.size = kp_size[i];
kp.angle = kp_dir[i]; kp.angle = kp_dir[i];
kp.response = kp_hessian[i]; kp.response = kp_hessian[i];
kp.octave = getPointOctave(kp.size, *this);
} }
} }
} }
......
...@@ -437,7 +437,7 @@ TEST_P(Multiply_Array, WithScale) ...@@ -437,7 +437,7 @@ TEST_P(Multiply_Array, WithScale)
cv::Mat dst_gold; cv::Mat dst_gold;
cv::multiply(mat1, mat2, dst_gold, scale, depth.second); cv::multiply(mat1, mat2, dst_gold, scale, depth.second);
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
} }
} }
...@@ -2715,7 +2715,7 @@ TEST_P(Sum, Sqr) ...@@ -2715,7 +2715,7 @@ TEST_P(Sum, Sqr)
cv::Scalar val_gold = sqrSumGold(src); cv::Scalar val_gold = sqrSumGold(src);
EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 10); EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Sum, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Sum, testing::Combine(
......
...@@ -85,7 +85,7 @@ testing::AssertionResult assertKeyPointsEquals(const char* gold_expr, const char ...@@ -85,7 +85,7 @@ testing::AssertionResult assertKeyPointsEquals(const char* gold_expr, const char
std::sort(actual.begin(), actual.end(), KeyPointLess()); std::sort(actual.begin(), actual.end(), KeyPointLess());
std::sort(gold.begin(), gold.end(), KeyPointLess()); std::sort(gold.begin(), gold.end(), KeyPointLess());
for (size_t i; i < gold.size(); ++i) for (size_t i = 0; i < gold.size(); ++i)
{ {
const cv::KeyPoint& p1 = gold[i]; const cv::KeyPoint& p1 = gold[i];
const cv::KeyPoint& p2 = actual[i]; const cv::KeyPoint& p2 = actual[i];
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment