Commit 3a844444 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

Merge pull request #3596 from jet47:cuda-features2d-refactoring

parents b6023eab 5f1282af
......@@ -64,15 +64,18 @@ PERF_TEST_P(Image_Threshold_NonMaxSuppression, FAST,
if (PERF_RUN_CUDA())
{
cv::cuda::FAST_CUDA d_fast(threshold, nonMaxSuppersion, 0.5);
cv::Ptr<cv::cuda::FastFeatureDetector> d_fast =
cv::cuda::FastFeatureDetector::create(threshold, nonMaxSuppersion,
cv::FastFeatureDetector::TYPE_9_16,
0.5 * img.size().area());
const cv::cuda::GpuMat d_img(img);
cv::cuda::GpuMat d_keypoints;
TEST_CYCLE() d_fast(d_img, cv::cuda::GpuMat(), d_keypoints);
TEST_CYCLE() d_fast->detectAsync(d_img, d_keypoints);
std::vector<cv::KeyPoint> gpu_keypoints;
d_fast.downloadKeypoints(d_keypoints, gpu_keypoints);
d_fast->convert(d_keypoints, gpu_keypoints);
sortKeyPoints(gpu_keypoints);
......@@ -106,15 +109,15 @@ PERF_TEST_P(Image_NFeatures, ORB,
if (PERF_RUN_CUDA())
{
cv::cuda::ORB_CUDA d_orb(nFeatures);
cv::Ptr<cv::cuda::ORB> d_orb = cv::cuda::ORB::create(nFeatures);
const cv::cuda::GpuMat d_img(img);
cv::cuda::GpuMat d_keypoints, d_descriptors;
TEST_CYCLE() d_orb(d_img, cv::cuda::GpuMat(), d_keypoints, d_descriptors);
TEST_CYCLE() d_orb->detectAndComputeAsync(d_img, cv::noArray(), d_keypoints, d_descriptors);
std::vector<cv::KeyPoint> gpu_keypoints;
d_orb.downloadKeyPoints(d_keypoints, gpu_keypoints);
d_orb->convert(d_keypoints, gpu_keypoints);
cv::Mat gpu_descriptors(d_descriptors);
......@@ -164,16 +167,16 @@ PERF_TEST_P(DescSize_Norm, BFMatch,
if (PERF_RUN_CUDA())
{
cv::cuda::BFMatcher_CUDA d_matcher(normType);
cv::Ptr<cv::cuda::DescriptorMatcher> d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType);
const cv::cuda::GpuMat d_query(query);
const cv::cuda::GpuMat d_train(train);
cv::cuda::GpuMat d_trainIdx, d_distance;
cv::cuda::GpuMat d_matches;
TEST_CYCLE() d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
TEST_CYCLE() d_matcher->matchAsync(d_query, d_train, d_matches);
std::vector<cv::DMatch> gpu_matches;
d_matcher.matchDownload(d_trainIdx, d_distance, gpu_matches);
d_matcher->matchConvert(d_matches, gpu_matches);
SANITY_CHECK_MATCHES(gpu_matches);
}
......@@ -223,16 +226,16 @@ PERF_TEST_P(DescSize_K_Norm, BFKnnMatch,
if (PERF_RUN_CUDA())
{
cv::cuda::BFMatcher_CUDA d_matcher(normType);
cv::Ptr<cv::cuda::DescriptorMatcher> d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType);
const cv::cuda::GpuMat d_query(query);
const cv::cuda::GpuMat d_train(train);
cv::cuda::GpuMat d_trainIdx, d_distance, d_allDist;
cv::cuda::GpuMat d_matches;
TEST_CYCLE() d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
TEST_CYCLE() d_matcher->knnMatchAsync(d_query, d_train, d_matches, k);
std::vector< std::vector<cv::DMatch> > matchesTbl;
d_matcher.knnMatchDownload(d_trainIdx, d_distance, matchesTbl);
d_matcher->knnMatchConvert(d_matches, matchesTbl);
std::vector<cv::DMatch> gpu_matches;
toOneRowMatches(matchesTbl, gpu_matches);
......@@ -277,16 +280,16 @@ PERF_TEST_P(DescSize_Norm, BFRadiusMatch,
if (PERF_RUN_CUDA())
{
cv::cuda::BFMatcher_CUDA d_matcher(normType);
cv::Ptr<cv::cuda::DescriptorMatcher> d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType);
const cv::cuda::GpuMat d_query(query);
const cv::cuda::GpuMat d_train(train);
cv::cuda::GpuMat d_trainIdx, d_nMatches, d_distance;
cv::cuda::GpuMat d_matches;
TEST_CYCLE() d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, maxDistance);
TEST_CYCLE() d_matcher->radiusMatchAsync(d_query, d_train, d_matches, maxDistance);
std::vector< std::vector<cv::DMatch> > matchesTbl;
d_matcher.radiusMatchDownload(d_trainIdx, d_distance, d_nMatches, matchesTbl);
d_matcher->radiusMatchConvert(d_matches, matchesTbl);
std::vector<cv::DMatch> gpu_matches;
toOneRowMatches(matchesTbl, gpu_matches);
......
......@@ -279,7 +279,7 @@ namespace cv { namespace cuda { namespace device
#endif
}
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold)
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold, cudaStream_t stream)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
......@@ -290,29 +290,29 @@ namespace cv { namespace cuda { namespace device
grid.x = divUp(img.cols - 6, block.x);
grid.y = divUp(img.rows - 6, block.y);
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) );
cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(unsigned int), stream) );
if (score.data)
{
if (mask.data)
calcKeypoints<true><<<grid, block>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
calcKeypoints<true><<<grid, block, 0, stream>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
else
calcKeypoints<true><<<grid, block>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
calcKeypoints<true><<<grid, block, 0, stream>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
}
else
{
if (mask.data)
calcKeypoints<false><<<grid, block>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
calcKeypoints<false><<<grid, block, 0, stream>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
else
calcKeypoints<false><<<grid, block>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
calcKeypoints<false><<<grid, block, 0, stream>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
}
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
unsigned int count;
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream) );
cudaSafeCall( cudaStreamSynchronize(stream) );
return count;
}
......@@ -356,7 +356,7 @@ namespace cv { namespace cuda { namespace device
#endif
}
int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response)
int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response, cudaStream_t stream)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
......@@ -366,15 +366,15 @@ namespace cv { namespace cuda { namespace device
dim3 grid;
grid.x = divUp(count, block.x);
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) );
cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(unsigned int), stream) );
nonmaxSuppression<<<grid, block>>>(kpLoc, count, score, loc, response);
nonmaxSuppression<<<grid, block, 0, stream>>>(kpLoc, count, score, loc, response);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
unsigned int new_count;
cudaSafeCall( cudaMemcpy(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpyAsync(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream) );
cudaSafeCall( cudaStreamSynchronize(stream) );
return new_count;
}
......
......@@ -47,124 +47,162 @@ using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
cv::cuda::FAST_CUDA::FAST_CUDA(int, bool, double) { throw_no_cuda(); }
void cv::cuda::FAST_CUDA::operator ()(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); }
void cv::cuda::FAST_CUDA::operator ()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
void cv::cuda::FAST_CUDA::downloadKeypoints(const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
void cv::cuda::FAST_CUDA::convertKeypoints(const Mat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
void cv::cuda::FAST_CUDA::release() { throw_no_cuda(); }
int cv::cuda::FAST_CUDA::calcKeyPointsLocation(const GpuMat&, const GpuMat&) { throw_no_cuda(); return 0; }
int cv::cuda::FAST_CUDA::getKeyPoints(GpuMat&) { throw_no_cuda(); return 0; }
Ptr<cv::cuda::FastFeatureDetector> cv::cuda::FastFeatureDetector::create(int, bool, int, int) { throw_no_cuda(); return Ptr<cv::cuda::FastFeatureDetector>(); }
#else /* !defined (HAVE_CUDA) */
cv::cuda::FAST_CUDA::FAST_CUDA(int _threshold, bool _nonmaxSuppression, double _keypointsRatio) :
nonmaxSuppression(_nonmaxSuppression), threshold(_threshold), keypointsRatio(_keypointsRatio), count_(0)
namespace cv { namespace cuda { namespace device
{
}
namespace fast
{
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold, cudaStream_t stream);
int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response, cudaStream_t stream);
}
}}}
void cv::cuda::FAST_CUDA::operator ()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
namespace
{
if (image.empty())
return;
class FAST_Impl : public cv::cuda::FastFeatureDetector
{
public:
FAST_Impl(int threshold, bool nonmaxSuppression, int max_npoints);
(*this)(image, mask, d_keypoints_);
downloadKeypoints(d_keypoints_, keypoints);
}
virtual void detect(InputArray _image, std::vector<KeyPoint>& keypoints, InputArray _mask);
virtual void detectAsync(InputArray _image, OutputArray _keypoints, InputArray _mask, Stream& stream);
void cv::cuda::FAST_CUDA::downloadKeypoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints)
{
if (d_keypoints.empty())
return;
virtual void convert(InputArray _gpu_keypoints, std::vector<KeyPoint>& keypoints);
Mat h_keypoints(d_keypoints);
convertKeypoints(h_keypoints, keypoints);
}
virtual void setThreshold(int threshold) { threshold_ = threshold; }
virtual int getThreshold() const { return threshold_; }
void cv::cuda::FAST_CUDA::convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints)
{
if (h_keypoints.empty())
return;
CV_Assert(h_keypoints.rows == ROWS_COUNT && h_keypoints.elemSize() == 4);
virtual void setNonmaxSuppression(bool f) { nonmaxSuppression_ = f; }
virtual bool getNonmaxSuppression() const { return nonmaxSuppression_; }
int npoints = h_keypoints.cols;
virtual void setMaxNumPoints(int max_npoints) { max_npoints_ = max_npoints; }
virtual int getMaxNumPoints() const { return max_npoints_; }
keypoints.resize(npoints);
virtual void setType(int type) { CV_Assert( type == TYPE_9_16 ); }
virtual int getType() const { return TYPE_9_16; }
const short2* loc_row = h_keypoints.ptr<short2>(LOCATION_ROW);
const float* response_row = h_keypoints.ptr<float>(RESPONSE_ROW);
private:
int threshold_;
bool nonmaxSuppression_;
int max_npoints_;
};
for (int i = 0; i < npoints; ++i)
FAST_Impl::FAST_Impl(int threshold, bool nonmaxSuppression, int max_npoints) :
threshold_(threshold), nonmaxSuppression_(nonmaxSuppression), max_npoints_(max_npoints)
{
KeyPoint kp(loc_row[i].x, loc_row[i].y, static_cast<float>(FEATURE_SIZE), -1, response_row[i]);
keypoints[i] = kp;
}
}
void cv::cuda::FAST_CUDA::operator ()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints)
{
calcKeyPointsLocation(img, mask);
keypoints.cols = getKeyPoints(keypoints);
}
namespace cv { namespace cuda { namespace device
{
namespace fast
void FAST_Impl::detect(InputArray _image, std::vector<KeyPoint>& keypoints, InputArray _mask)
{
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold);
int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response);
}
}}}
int cv::cuda::FAST_CUDA::calcKeyPointsLocation(const GpuMat& img, const GpuMat& mask)
{
using namespace cv::cuda::device::fast;
CV_Assert(img.type() == CV_8UC1);
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()));
if (_image.empty())
{
keypoints.clear();
return;
}
int maxKeypoints = static_cast<int>(keypointsRatio * img.size().area());
BufferPool pool(Stream::Null());
GpuMat d_keypoints = pool.getBuffer(ROWS_COUNT, max_npoints_, CV_16SC2);
ensureSizeIsEnough(1, maxKeypoints, CV_16SC2, kpLoc_);
detectAsync(_image, d_keypoints, _mask, Stream::Null());
convert(d_keypoints, keypoints);
}
if (nonmaxSuppression)
void FAST_Impl::detectAsync(InputArray _image, OutputArray _keypoints, InputArray _mask, Stream& stream)
{
ensureSizeIsEnough(img.size(), CV_32SC1, score_);
score_.setTo(Scalar::all(0));
using namespace cv::cuda::device::fast;
const GpuMat img = _image.getGpuMat();
const GpuMat mask = _mask.getGpuMat();
CV_Assert( img.type() == CV_8UC1 );
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()) );
BufferPool pool(stream);
GpuMat kpLoc = pool.getBuffer(1, max_npoints_, CV_16SC2);
GpuMat score;
if (nonmaxSuppression_)
{
score = pool.getBuffer(img.size(), CV_32SC1);
score.setTo(Scalar::all(0), stream);
}
int count = calcKeypoints_gpu(img, mask, kpLoc.ptr<short2>(), max_npoints_, score, threshold_, StreamAccessor::getStream(stream));
count = std::min(count, max_npoints_);
if (count == 0)
{
_keypoints.release();
return;
}
ensureSizeIsEnough(ROWS_COUNT, count, CV_32FC1, _keypoints);
GpuMat& keypoints = _keypoints.getGpuMatRef();
if (nonmaxSuppression_)
{
count = nonmaxSuppression_gpu(kpLoc.ptr<short2>(), count, score, keypoints.ptr<short2>(LOCATION_ROW), keypoints.ptr<float>(RESPONSE_ROW), StreamAccessor::getStream(stream));
if (count == 0)
{
keypoints.release();
}
else
{
keypoints.cols = count;
}
}
else
{
GpuMat locRow(1, count, kpLoc.type(), keypoints.ptr(0));
kpLoc.colRange(0, count).copyTo(locRow, stream);
keypoints.row(1).setTo(Scalar::all(0), stream);
}
}
count_ = calcKeypoints_gpu(img, mask, kpLoc_.ptr<short2>(), maxKeypoints, nonmaxSuppression ? score_ : PtrStepSzi(), threshold);
count_ = std::min(count_, maxKeypoints);
return count_;
}
int cv::cuda::FAST_CUDA::getKeyPoints(GpuMat& keypoints)
{
using namespace cv::cuda::device::fast;
if (count_ == 0)
return 0;
ensureSizeIsEnough(ROWS_COUNT, count_, CV_32FC1, keypoints);
if (nonmaxSuppression)
return nonmaxSuppression_gpu(kpLoc_.ptr<short2>(), count_, score_, keypoints.ptr<short2>(LOCATION_ROW), keypoints.ptr<float>(RESPONSE_ROW));
GpuMat locRow(1, count_, kpLoc_.type(), keypoints.ptr(0));
kpLoc_.colRange(0, count_).copyTo(locRow);
keypoints.row(1).setTo(Scalar::all(0));
return count_;
void FAST_Impl::convert(InputArray _gpu_keypoints, std::vector<KeyPoint>& keypoints)
{
if (_gpu_keypoints.empty())
{
keypoints.clear();
return;
}
Mat h_keypoints;
if (_gpu_keypoints.kind() == _InputArray::CUDA_GPU_MAT)
{
_gpu_keypoints.getGpuMat().download(h_keypoints);
}
else
{
h_keypoints = _gpu_keypoints.getMat();
}
CV_Assert( h_keypoints.rows == ROWS_COUNT );
CV_Assert( h_keypoints.elemSize() == 4 );
const int npoints = h_keypoints.cols;
keypoints.resize(npoints);
const short2* loc_row = h_keypoints.ptr<short2>(LOCATION_ROW);
const float* response_row = h_keypoints.ptr<float>(RESPONSE_ROW);
for (int i = 0; i < npoints; ++i)
{
KeyPoint kp(loc_row[i].x, loc_row[i].y, static_cast<float>(FEATURE_SIZE), -1, response_row[i]);
keypoints[i] = kp;
}
}
}
void cv::cuda::FAST_CUDA::release()
Ptr<cv::cuda::FastFeatureDetector> cv::cuda::FastFeatureDetector::create(int threshold, bool nonmaxSuppression, int type, int max_npoints)
{
kpLoc_.release();
score_.release();
d_keypoints_.release();
CV_Assert( type == TYPE_9_16 );
return makePtr<FAST_Impl>(threshold, nonmaxSuppression, max_npoints);
}
#endif /* !defined (HAVE_CUDA) */
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
cv::cuda::Feature2DAsync::~Feature2DAsync()
{
}
void cv::cuda::Feature2DAsync::detectAsync(InputArray image,
OutputArray keypoints,
InputArray mask,
Stream& stream)
{
if (image.empty())
{
keypoints.clear();
return;
}
detectAndComputeAsync(image, mask, keypoints, noArray(), false, stream);
}
void cv::cuda::Feature2DAsync::computeAsync(InputArray image,
OutputArray keypoints,
OutputArray descriptors,
Stream& stream)
{
if (image.empty())
{
descriptors.release();
return;
}
detectAndComputeAsync(image, noArray(), keypoints, descriptors, true, stream);
}
void cv::cuda::Feature2DAsync::detectAndComputeAsync(InputArray /*image*/,
InputArray /*mask*/,
OutputArray /*keypoints*/,
OutputArray /*descriptors*/,
bool /*useProvidedKeypoints*/,
Stream& /*stream*/)
{
CV_Error(Error::StsNotImplemented, "");
}
This diff is collapsed.
......@@ -154,7 +154,7 @@ void CpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat
matches_info.matches.clear();
Ptr<DescriptorMatcher> matcher;
Ptr<cv::DescriptorMatcher> matcher;
#if 0 // TODO check this
if (ocl::useOpenCL())
{
......@@ -220,13 +220,13 @@ void GpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat
descriptors1_.upload(features1.descriptors);
descriptors2_.upload(features2.descriptors);
BFMatcher_CUDA matcher(NORM_L2);
Ptr<cuda::DescriptorMatcher> matcher = cuda::DescriptorMatcher::createBFMatcher(NORM_L2);
MatchesSet matches;
// Find 1->2 matches
pair_matches.clear();
matcher.knnMatchSingle(descriptors1_, descriptors2_, train_idx_, distance_, all_dist_, 2);
matcher.knnMatchDownload(train_idx_, distance_, pair_matches);
matcher->knnMatch(descriptors1_, descriptors2_, pair_matches, 2);
for (size_t i = 0; i < pair_matches.size(); ++i)
{
if (pair_matches[i].size() < 2)
......@@ -242,8 +242,7 @@ void GpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat
// Find 2->1 matches
pair_matches.clear();
matcher.knnMatchSingle(descriptors2_, descriptors1_, train_idx_, distance_, all_dist_, 2);
matcher.knnMatchDownload(train_idx_, distance_, pair_matches);
matcher->knnMatch(descriptors2_, descriptors1_, pair_matches, 2);
for (size_t i = 0; i < pair_matches.size(); ++i)
{
if (pair_matches[i].size() < 2)
......
......@@ -322,14 +322,14 @@ TEST(FAST)
FAST(src, keypoints, 20);
CPU_OFF;
cuda::FAST_CUDA d_FAST(20);
cv::Ptr<cv::cuda::FastFeatureDetector> d_FAST = cv::cuda::FastFeatureDetector::create(20);
cuda::GpuMat d_src(src);
cuda::GpuMat d_keypoints;
d_FAST(d_src, cuda::GpuMat(), d_keypoints);
d_FAST->detectAsync(d_src, d_keypoints);
CUDA_ON;
d_FAST(d_src, cuda::GpuMat(), d_keypoints);
d_FAST->detectAsync(d_src, d_keypoints);
CUDA_OFF;
}
......@@ -350,15 +350,15 @@ TEST(ORB)
orb->detectAndCompute(src, Mat(), keypoints, descriptors);
CPU_OFF;
cuda::ORB_CUDA d_orb;
Ptr<cuda::ORB> d_orb = cuda::ORB::create();
cuda::GpuMat d_src(src);
cuda::GpuMat d_keypoints;
cuda::GpuMat d_descriptors;
d_orb(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
d_orb->detectAndComputeAsync(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
CUDA_ON;
d_orb(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
d_orb->detectAndComputeAsync(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
CUDA_OFF;
}
......@@ -379,14 +379,14 @@ TEST(BruteForceMatcher)
// Init CUDA matcher
cuda::BFMatcher_CUDA d_matcher(NORM_L2);
Ptr<cuda::DescriptorMatcher> d_matcher = cuda::DescriptorMatcher::createBFMatcher(NORM_L2);
cuda::GpuMat d_query(query);
cuda::GpuMat d_train(train);
// Output
vector< vector<DMatch> > matches(2);
cuda::GpuMat d_trainIdx, d_distance, d_allDist, d_nMatches;
cuda::GpuMat d_matches;
SUBTEST << "match";
......@@ -396,10 +396,10 @@ TEST(BruteForceMatcher)
matcher.match(query, train, matches[0]);
CPU_OFF;
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
d_matcher->matchAsync(d_query, d_train, d_matches);
CUDA_ON;
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
d_matcher->matchAsync(d_query, d_train, d_matches);
CUDA_OFF;
SUBTEST << "knnMatch";
......@@ -410,10 +410,10 @@ TEST(BruteForceMatcher)
matcher.knnMatch(query, train, matches, 2);
CPU_OFF;
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2);
d_matcher->knnMatchAsync(d_query, d_train, d_matches, 2);
CUDA_ON;
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2);
d_matcher->knnMatchAsync(d_query, d_train, d_matches, 2);
CUDA_OFF;
SUBTEST << "radiusMatch";
......@@ -426,12 +426,10 @@ TEST(BruteForceMatcher)
matcher.radiusMatch(query, train, matches, max_distance);
CPU_OFF;
d_trainIdx.release();
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance);
d_matcher->radiusMatchAsync(d_query, d_train, d_matches, max_distance);
CUDA_ON;
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance);
d_matcher->radiusMatchAsync(d_query, d_train, d_matches, max_distance);
CUDA_OFF;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment