Commit 4f051f04 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #1371 from sovrasov:ocl_surf

parents 1e354070 bac7c26c
......@@ -40,7 +40,7 @@ PERF_TEST_P(surf, extract, testing::Values(SURF_IMAGES))
Ptr<SURF> detector = SURF::create();
vector<KeyPoint> points;
vector<float> descriptors;
Mat descriptors;
detector->detect(frame, points, mask);
TEST_CYCLE() detector->compute(frame, points, descriptors);
......@@ -58,7 +58,7 @@ PERF_TEST_P(surf, full, testing::Values(SURF_IMAGES))
declare.in(frame).time(90);
Ptr<SURF> detector = SURF::create();
vector<KeyPoint> points;
vector<float> descriptors;
Mat descriptors;
TEST_CYCLE() detector->detectAndCompute(frame, mask, points, descriptors, false);
......
......@@ -875,9 +875,6 @@ inline float linearFilter(
float centerX, float centerY, float win_offset,
float cos_dir, float sin_dir, float y, float x )
{
x -= 0.5f;
y -= 0.5f;
float out = 0.0f;
const int x1 = round(x);
......@@ -900,6 +897,60 @@ inline float linearFilter(
return out;
}
inline float areaFilter( __PARAM_imgTex__, int img_rows, int img_cols,
float centerX, float centerY, float win_offset,
float cos_dir, float sin_dir, float x, float y, float s)
{
float fsx1 = x * s;
float fsx2 = fsx1 + s;
int sx1 = convert_int_rtp(fsx1);
int sx2 = convert_int_rtn(fsx2);
float fsy1 = y * s;
float fsy2 = fsy1 + s;
int sy1 = convert_int_rtp(fsy1);
int sy2 = convert_int_rtn(fsy2);
float scale = 1.f / (s * s);
float out = 0.f;
for (int dy = sy1; dy < sy2; ++dy)
{
for (int dx = sx1; dx < sx2; ++dx)
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, dy, dx) * scale;
if (sx1 > fsx1)
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, dy, (sx1 -1)) * ((sx1 - fsx1) * scale);
if (sx2 < fsx2)
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, dy, sx2) * ((fsx2 -sx2) * scale);
}
if (sy1 > fsy1)
for (int dx = sx1; dx < sx2; ++dx)
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
if (sy2 < fsy2)
for (int dx = sx1; dx < sx2; ++dx)
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, sy2, dx) * ((fsy2 -sy2) * scale);
if ((sy1 > fsy1) && (sx1 > fsx1))
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
if ((sy1 > fsy1) && (sx2 < fsx2))
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
if ((sy2 < fsy2) && (sx2 < fsx2))
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
if ((sy2 < fsy2) && (sx1 > fsx1))
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
return out;
}
void calc_dx_dy(
__PARAM_imgTex__,
int img_rows, int img_cols,
......@@ -946,9 +997,18 @@ void calc_dx_dy(
const float icoo = ((float)yIndex / (PATCH_SZ + 1)) * win_size;
const float jcoo = ((float)xIndex / (PATCH_SZ + 1)) * win_size;
s_PATCH[get_local_id(1) * 6 + get_local_id(0)] =
linearFilter(__PASS_imgTex__, img_rows, img_cols, centerX, centerY,
win_offset, cos_dir, sin_dir, icoo, jcoo);
if (s > 1)
{
s_PATCH[get_local_id(1) * 6 + get_local_id(0)] =
areaFilter(__PASS_imgTex__, img_rows, img_cols, centerX, centerY,
win_offset, cos_dir, sin_dir, xIndex, yIndex, s);
}
else
{
s_PATCH[get_local_id(1) * 6 + get_local_id(0)] =
linearFilter(__PASS_imgTex__, img_rows, img_cols, centerX, centerY,
win_offset, cos_dir, sin_dir, icoo, jcoo);
}
barrier(CLK_LOCAL_MEM_FENCE);
......@@ -1075,18 +1135,16 @@ void SURF_computeDescriptors64(
reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid);
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 25)
if (tid == 0)
{
__global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 2);
// write dx, dy, |dx|, |dy|
if (tid == 0)
{
descriptors_block[0] = sdx[0];
descriptors_block[1] = sdy[0];
descriptors_block[2] = sdxabs[0];
descriptors_block[3] = sdyabs[0];
}
descriptors_block[0] = sdx[0];
descriptors_block[1] = sdy[0];
descriptors_block[2] = sdxabs[0];
descriptors_block[3] = sdyabs[0];
}
}
......@@ -1102,10 +1160,10 @@ void SURF_computeDescriptors128(
descriptors_step /= sizeof(*descriptors);
keypoints_step /= sizeof(*keypoints);
__global float * featureX = keypoints + X_ROW * keypoints_step;
__global float * featureY = keypoints + Y_ROW * keypoints_step;
__global float* featureSize = keypoints + SIZE_ROW * keypoints_step;
__global float* featureDir = keypoints + ANGLE_ROW * keypoints_step;
__global const float * featureX = keypoints + X_ROW * keypoints_step;
__global const float * featureY = keypoints + Y_ROW * keypoints_step;
__global const float* featureSize = keypoints + SIZE_ROW * keypoints_step;
__global const float* featureDir = keypoints + ANGLE_ROW * keypoints_step;
// 2 floats (dx,dy) for each thread (5x5 sample points in each sub-region)
volatile __local float sdx[25];
......
......@@ -91,11 +91,13 @@ bool SURF_OCL::init(const SURF_Impl* p)
if(ocl::haveOpenCL())
{
const ocl::Device& dev = ocl::Device::getDefault();
if( dev.type() == ocl::Device::TYPE_CPU || dev.doubleFPConfig() == 0 )
if( dev.type() == ocl::Device::TYPE_CPU )
return false;
haveImageSupport = false;//dev.imageSupport();
kerOpts = haveImageSupport ? "-D HAVE_IMAGE2D -D DOUBLE_SUPPORT" : "";
// status = 1;
haveImageSupport = dev.imageSupport();
kerOpts = format("%s%s",
haveImageSupport ? "-D HAVE_IMAGE2D" : "",
dev.doubleFPConfig() > 0? " -D DOUBLE_SUPPORT": "");
status = 1;
}
}
return status > 0;
......@@ -243,7 +245,7 @@ bool SURF_OCL::computeDescriptors(const UMat &keypoints, OutputArray _descriptor
}
size_t localThreads[] = {6, 6};
size_t globalThreads[] = {nFeatures*localThreads[0], localThreads[1]};
size_t globalThreads[] = {nFeatures*localThreads[0], 16 * localThreads[1]};
if(haveImageSupport)
{
......@@ -420,7 +422,7 @@ bool SURF_OCL::findMaximaInLayer(int counterOffset, int octave,
ocl::KernelArg::PtrReadWrite(maxPosBuffer),
ocl::KernelArg::PtrReadWrite(counters),
counterOffset, img_rows, img_cols,
octave, nOctaveLayers,
nOctaveLayers, octave,
layer_rows, layer_cols,
maxCandidates,
(float)params->hessianThreshold).run(2, globalThreads, localThreads, true);
......
......@@ -357,9 +357,9 @@ protected:
}
if(imgLoadMode == IMREAD_GRAYSCALE)
image.create( 50, 50, CV_8UC1 );
image.create( 256, 256, CV_8UC1 );
else
image.create( 50, 50, CV_8UC3 );
image.create( 256, 256, CV_8UC3 );
try
{
dextractor->compute( image, keypoints, descriptors );
......@@ -1027,10 +1027,34 @@ TEST( Features2d_DescriptorExtractor_SIFT, regression )
TEST( Features2d_DescriptorExtractor_SURF, regression )
{
#ifdef HAVE_OPENCL
bool useOCL = ocl::useOpenCL();
ocl::setUseOpenCL(false);
#endif
CV_DescriptorExtractorTest<L2<float> > test( "descriptor-surf", 0.05f,
SURF::create() );
test.safe_run();
#ifdef HAVE_OPENCL
ocl::setUseOpenCL(useOCL);
#endif
}
#ifdef HAVE_OPENCL
TEST( Features2d_DescriptorExtractor_SURF_OCL, regression )
{
bool useOCL = ocl::useOpenCL();
ocl::setUseOpenCL(true);
if(ocl::useOpenCL())
{
CV_DescriptorExtractorTest<L2<float> > test( "descriptor-surf_ocl", 0.05f,
SURF::create() );
test.safe_run();
}
ocl::setUseOpenCL(useOCL);
}
#endif
TEST( Features2d_DescriptorExtractor_DAISY, regression )
{
......@@ -1187,7 +1211,7 @@ TEST(Features2d_BruteForceDescriptorMatcher_knnMatch, regression)
Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create("BruteForce");
ASSERT_TRUE(matcher != NULL);
Mat imgT(sz, sz, CV_8U, Scalar(255));
Mat imgT(256, 256, CV_8U, Scalar(255));
line(imgT, Point(20, sz/2), Point(sz-21, sz/2), Scalar(100), 2);
line(imgT, Point(sz/2, 20), Point(sz/2, sz-21), Scalar(100), 2);
vector<KeyPoint> kpT;
......@@ -1196,7 +1220,7 @@ TEST(Features2d_BruteForceDescriptorMatcher_knnMatch, regression)
Mat descT;
ext->compute(imgT, kpT, descT);
Mat imgQ(sz, sz, CV_8U, Scalar(255));
Mat imgQ(256, 256, CV_8U, Scalar(255));
line(imgQ, Point(30, sz/2), Point(sz-31, sz/2), Scalar(100), 3);
line(imgQ, Point(sz/2, 30), Point(sz/2, sz-31), Scalar(100), 3);
vector<KeyPoint> kpQ;
......
......@@ -21,8 +21,8 @@
#include "opencv2/opencv_modules.hpp"
#include "cvconfig.h"
#ifdef HAVE_OPENCV_OCL
# include "opencv2/ocl.hpp"
#ifdef HAVE_OPENCL
# include "opencv2/core/ocl.hpp"
#endif
#ifdef HAVE_CUDA
......
......@@ -168,9 +168,6 @@ void matchKeyPoints(const vector<KeyPoint>& keypoints0, const Mat& H,
const float r0 = 0.5f * keypoints0[i0].size;
for(size_t i1 = 0; i1 < keypoints1.size(); i1++)
{
if(nearestPointIndex >= 0 && usedMask[i1])
continue;
float r1 = 0.5f * keypoints1[i1].size;
float intersectRatio = calcIntersectRatio(points0t.at<Point2f>(i0), r0,
keypoints1[i1].pt, r1);
......@@ -619,7 +616,7 @@ protected:
TEST(Features2d_RotationInvariance_Detector_SURF, regression)
{
DetectorRotationInvarianceTest test(SURF::create(),
0.44f,
0.65f,
0.76f);
test.safe_run();
}
......@@ -859,10 +856,21 @@ TEST(Features2d_RotationInvariance2_Detector_SURF, regression)
vector<KeyPoint> keypoints;
surf->detect(cross, keypoints);
// Expect 5 keypoints. One keypoint has coordinates (50.0, 50.0).
// The other 4 keypoints should have the same response.
// The order of the keypoints is indeterminate.
ASSERT_EQ(keypoints.size(), (vector<KeyPoint>::size_type) 5);
ASSERT_LT( fabs(keypoints[1].response - keypoints[2].response), 1e-6);
ASSERT_LT( fabs(keypoints[1].response - keypoints[3].response), 1e-6);
ASSERT_LT( fabs(keypoints[1].response - keypoints[4].response), 1e-6);
int i1 = -1;
for(int i = 0; i < 5; i++)
{
if(keypoints[i].pt.x == 50.0f)
;
else if(i1 == -1)
i1 = i;
else
ASSERT_LT(fabs(keypoints[i1].response - keypoints[i].response) / keypoints[i1].response, 1e-6);
}
}
TEST(DISABLED_Features2d_ScaleInvariance_Descriptor_DAISY, regression)
......@@ -942,7 +950,7 @@ TEST(Features2d_ScaleInvariance_Descriptor_BoostDesc_LBGM, regression)
DescriptorScaleInvarianceTest test(SURF::create(),
BoostDesc::create(BoostDesc::LBGM, true, 6.25f),
NORM_L1,
0.98f);
0.95f);
test.safe_run();
}
......
......@@ -45,11 +45,13 @@
#include "test_precomp.hpp"
#ifdef HAVE_OPENCV_OCL
#ifdef HAVE_OPENCL
namespace cvtest {
namespace ocl {
using namespace std;
using std::tr1::get;
static bool keyPointsEquals(const cv::KeyPoint& p1, const cv::KeyPoint& p2)
{
const double maxPtDif = 0.1;
......@@ -117,6 +119,7 @@ IMPLEMENT_PARAM_CLASS(Upright, bool)
PARAM_TEST_CASE(SURF, HessianThreshold, Octaves, OctaveLayers, Extended, Upright)
{
bool useOpenCL;
double hessianThreshold;
int nOctaves;
int nOctaveLayers;
......@@ -125,39 +128,34 @@ PARAM_TEST_CASE(SURF, HessianThreshold, Octaves, OctaveLayers, Extended, Upright
virtual void SetUp()
{
useOpenCL = cv::ocl::useOpenCL();
hessianThreshold = get<0>(GetParam());
nOctaves = get<1>(GetParam());
nOctaveLayers = get<2>(GetParam());
extended = get<3>(GetParam());
upright = get<4>(GetParam());
}
virtual void TearDown()
{
cv::ocl::setUseOpenCL(useOpenCL);
}
};
TEST_P(SURF, DISABLED_Detector)
TEST_P(SURF, Detector)
{
cv::Mat image = cv::imread(string(cvtest::TS::ptr()->get_data_path()) + "shared/fruits.png", cv::IMREAD_GRAYSCALE);
cv::UMat image;
cv::ocl::setUseOpenCL(true);
cv::imread(string(cvtest::TS::ptr()->get_data_path()) + "shared/fruits.png", cv::IMREAD_GRAYSCALE).copyTo(image);
ASSERT_FALSE(image.empty());
cv::ocl::SURF_OCL surf;
surf.hessianThreshold = static_cast<float>(hessianThreshold);
surf.nOctaves = nOctaves;
surf.nOctaveLayers = nOctaveLayers;
surf.extended = extended;
surf.upright = upright;
surf.keypointsRatio = 0.05f;
cv::Ptr<cv::xfeatures2d::SURF> surf = cv::xfeatures2d::SURF::create(hessianThreshold, nOctaves, nOctaveLayers, extended, upright);
std::vector<cv::KeyPoint> keypoints;
surf(cv::ocl::oclMat(image), cv::ocl::oclMat(), keypoints);
cv::SURF surf_gold;
surf_gold.hessianThreshold = hessianThreshold;
surf_gold.nOctaves = nOctaves;
surf_gold.nOctaveLayers = nOctaveLayers;
surf_gold.extended = extended;
surf_gold.upright = upright;
surf->detect(image, keypoints, cv::noArray());
cv::ocl::setUseOpenCL(false);
std::vector<cv::KeyPoint> keypoints_gold;
surf_gold(image, cv::noArray(), keypoints_gold);
surf->detect(image, keypoints_gold, cv::noArray());
ASSERT_EQ(keypoints_gold.size(), keypoints.size());
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints);
......@@ -166,38 +164,29 @@ TEST_P(SURF, DISABLED_Detector)
EXPECT_GT(matchedRatio, 0.99);
}
TEST_P(SURF, DISABLED_Descriptor)
TEST_P(SURF, Descriptor)
{
cv::Mat image = cv::imread(string(cvtest::TS::ptr()->get_data_path()) + "shared/fruits.png", cv::IMREAD_GRAYSCALE);
cv::UMat image;
cv::ocl::setUseOpenCL(true);
cv::imread(string(cvtest::TS::ptr()->get_data_path()) + "shared/fruits.png", cv::IMREAD_GRAYSCALE).copyTo(image);
ASSERT_FALSE(image.empty());
cv::ocl::SURF_OCL surf;
surf.hessianThreshold = static_cast<float>(hessianThreshold);
surf.nOctaves = nOctaves;
surf.nOctaveLayers = nOctaveLayers;
surf.extended = extended;
surf.upright = upright;
surf.keypointsRatio = 0.05f;
cv::SURF surf_gold;
surf_gold.hessianThreshold = hessianThreshold;
surf_gold.nOctaves = nOctaves;
surf_gold.nOctaveLayers = nOctaveLayers;
surf_gold.extended = extended;
surf_gold.upright = upright;
cv::Ptr<cv::xfeatures2d::SURF> surf = cv::xfeatures2d::SURF::create(hessianThreshold, nOctaves, nOctaveLayers, extended, upright);
std::vector<cv::KeyPoint> keypoints;
surf_gold(image, cv::noArray(), keypoints);
surf->detect(image, keypoints, cv::noArray());
cv::ocl::oclMat descriptors;
surf(cv::ocl::oclMat(image), cv::ocl::oclMat(), keypoints, descriptors, true);
cv::UMat descriptors;
surf->detectAndCompute(image, cv::noArray(), keypoints, descriptors, true);
cv::ocl::setUseOpenCL(false);
cv::Mat descriptors_gold;
surf_gold(image, cv::noArray(), keypoints, descriptors_gold, true);
surf->detectAndCompute(image, cv::noArray(), keypoints, descriptors_gold, true);
cv::BFMatcher matcher(surf.defaultNorm());
cv::BFMatcher matcher(surf->defaultNorm());
std::vector<cv::DMatch> matches;
matcher.match(descriptors_gold, cv::Mat(descriptors), matches);
matcher.match(descriptors_gold, descriptors, matches);
int matchedCount = getMatchedPointsCount(keypoints, keypoints, matches);
double matchedRatio = static_cast<double>(matchedCount) / keypoints.size();
......@@ -212,4 +201,6 @@ INSTANTIATE_TEST_CASE_P(OCL_Features2D, SURF, testing::Combine(
testing::Values(Extended(false), Extended(true)),
testing::Values(Upright(false), Upright(true))));
#endif // HAVE_OPENCV_OCL
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment