Commit efdfca7a authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

do not use the large "score" buffer; now without non-max suppression OpenCL FAST…

do not use the large "score" buffer; now without non-max suppression OpenCL FAST is pretty efficient
parent 06c138bd
...@@ -268,23 +268,13 @@ static bool ocl_FAST( InputArray _img, std::vector<KeyPoint>& keypoints, ...@@ -268,23 +268,13 @@ static bool ocl_FAST( InputArray _img, std::vector<KeyPoint>& keypoints,
if (fastKptKernel.empty()) if (fastKptKernel.empty())
return false; return false;
UMat kp1(1, maxKeypoints*2+1, CV_32S), score; UMat kp1(1, maxKeypoints*2+1, CV_32S);
UMat ucounter1(kp1, Rect(0,0,1,1)); UMat ucounter1(kp1, Rect(0,0,1,1));
ucounter1.setTo(Scalar::all(0)); ucounter1.setTo(Scalar::all(0));
if( nonmax_suppression )
{
score.create(img.size(), CV_8U);
score.setTo(Scalar::all(0));
}
else
score = img; // initialize score with some non-empty value
if( !fastKptKernel.args(ocl::KernelArg::ReadOnly(img), if( !fastKptKernel.args(ocl::KernelArg::ReadOnly(img),
ocl::KernelArg::WriteOnlyNoSize(score),
ocl::KernelArg::PtrReadWrite(kp1), ocl::KernelArg::PtrReadWrite(kp1),
nonmax_suppression ? 1 : 0,
maxKeypoints, threshold).run(2, globalsize, 0, true)) maxKeypoints, threshold).run(2, globalsize, 0, true))
return false; return false;
...@@ -319,7 +309,7 @@ static bool ocl_FAST( InputArray _img, std::vector<KeyPoint>& keypoints, ...@@ -319,7 +309,7 @@ static bool ocl_FAST( InputArray _img, std::vector<KeyPoint>& keypoints,
size_t globalsize_nms[] = { counter }; size_t globalsize_nms[] = { counter };
if( !fastNMSKernel.args(ocl::KernelArg::PtrReadOnly(kp1), if( !fastNMSKernel.args(ocl::KernelArg::PtrReadOnly(kp1),
ocl::KernelArg::PtrReadWrite(kp2), ocl::KernelArg::PtrReadWrite(kp2),
ocl::KernelArg::ReadOnlyNoSize(score), ocl::KernelArg::ReadOnly(img),
counter, counter).run(1, globalsize_nms, 0, true)) counter, counter).run(1, globalsize_nms, 0, true))
return false; return false;
...@@ -340,9 +330,10 @@ static bool ocl_FAST( InputArray _img, std::vector<KeyPoint>& keypoints, ...@@ -340,9 +330,10 @@ static bool ocl_FAST( InputArray _img, std::vector<KeyPoint>& keypoints,
void FAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression, int type) void FAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression, int type)
{ {
if( ocl::useOpenCL() && _img.isUMat() && type == FastFeatureDetector::TYPE_9_16 && double t = (double)getTickCount();
if( ocl::useOpenCL() && /*_img.isUMat() &&*/ type == FastFeatureDetector::TYPE_9_16 &&
ocl_FAST(_img, keypoints, threshold, nonmax_suppression, 10000)) ocl_FAST(_img, keypoints, threshold, nonmax_suppression, 10000))
return; ;
switch(type) { switch(type) {
case FastFeatureDetector::TYPE_5_8: case FastFeatureDetector::TYPE_5_8:
...@@ -359,6 +350,7 @@ void FAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool ...@@ -359,6 +350,7 @@ void FAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool
FAST_t<16>(_img, keypoints, threshold, nonmax_suppression); FAST_t<16>(_img, keypoints, threshold, nonmax_suppression);
break; break;
} }
printf("time=%.2fms\n", ((double)getTickCount() - t)*1000./getTickFrequency());
} }
......
// OpenCL port of the FAST corner detector. // OpenCL port of the FAST corner detector.
// Copyright (C) 2014, Itseez Inc. See the license at http://opencv.org // Copyright (C) 2014, Itseez Inc. See the license at http://opencv.org
inline int cornerScore(__global const uchar* img, int step, int threshold) inline int cornerScore(__global const uchar* img, int step)
{ {
int k, tofs, v = img[0], a0 = threshold, b0; int k, tofs, v = img[0], a0 = 0, b0;
int d[16]; int d[16];
#define LOAD2(idx, ofs) \ #define LOAD2(idx, ofs) \
tofs = ofs; d[idx] = (short)(v - img[tofs]); d[idx+8] = (short)(v - img[-tofs]) tofs = ofs; d[idx] = (short)(v - img[tofs]); d[idx+8] = (short)(v - img[-tofs])
...@@ -53,8 +53,7 @@ __kernel ...@@ -53,8 +53,7 @@ __kernel
void FAST_findKeypoints( void FAST_findKeypoints(
__global const uchar * _img, int step, int img_offset, __global const uchar * _img, int step, int img_offset,
int img_rows, int img_cols, int img_rows, int img_cols,
__global uchar * score, int score_step, int score_offset, volatile __global int* kp_loc,
volatile __global int* kp_loc, int calc_score,
int max_keypoints, int threshold ) int max_keypoints, int threshold )
{ {
int j = get_global_id(0) + 3; int j = get_global_id(0) + 3;
...@@ -118,8 +117,6 @@ void FAST_findKeypoints( ...@@ -118,8 +117,6 @@ void FAST_findKeypoints(
kp_loc[1 + 2*idx] = j; kp_loc[1 + 2*idx] = j;
kp_loc[2 + 2*idx] = i; kp_loc[2 + 2*idx] = i;
} }
if(calc_score)
score[mad24(i, score_step, score_offset+j)] = cornerScore(img, step, threshold);
} }
} }
} }
...@@ -130,8 +127,8 @@ void FAST_findKeypoints( ...@@ -130,8 +127,8 @@ void FAST_findKeypoints(
__kernel __kernel
void FAST_nonmaxSupression( void FAST_nonmaxSupression(
__global const int* kp_in, volatile __global int* kp_out, __global const int* kp_in, volatile __global int* kp_out,
__global const uchar * _score, int step, int score_offset, __global const uchar * _img, int step, int img_offset,
int counter, int max_keypoints) int rows, int cols, int counter, int max_keypoints)
{ {
const int idx = get_global_id(0); const int idx = get_global_id(0);
...@@ -139,14 +136,19 @@ void FAST_nonmaxSupression( ...@@ -139,14 +136,19 @@ void FAST_nonmaxSupression(
{ {
int x = kp_in[1 + 2*idx]; int x = kp_in[1 + 2*idx];
int y = kp_in[2 + 2*idx]; int y = kp_in[2 + 2*idx];
__global const uchar* img = _img + mad24(y, step, x + img_offset);
__global const uchar* score = _score + mad24(y, step, x + score_offset); int s = cornerScore(img, step);
int s = score[0];
if( (s > (int)score[1]) + (s > (int)score[-1]) + if( (x < 4 || s > cornerScore(img-1, step)) +
(s > (int)score[-step]) + (s > (int)score[step]) + (y < 4 || s > cornerScore(img-step, step)) != 2 )
(s > (int)score[-step-1]) + (s > (int)score[-step+1]) + return;
(s > (int)score[step-1]) + (s > (int)score[step+1]) == 8 ) if( (x >= cols - 4 || s > cornerScore(img+1, step)) +
(y >= rows - 4 || s > cornerScore(img+step, step)) +
(x < 4 || y < 4 || s > cornerScore(img-step-1, step)) +
(x >= cols - 4 || y < 4 || s > cornerScore(img-step+1, step)) +
(x < 4 || y >= rows - 4 || s > cornerScore(img+step-1, step)) +
(x >= cols - 4 || y >= rows - 4 || s > cornerScore(img+step+1, step)) == 6)
{ {
int new_idx = atomic_inc(kp_out); int new_idx = atomic_inc(kp_out);
if( new_idx < max_keypoints ) if( new_idx < max_keypoints )
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment