Commit 8a178da1 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

refactor CUDA BFMatcher algorithm:

use new abstract interface and hidden implementation
parent 764d55b8
...@@ -63,170 +63,315 @@ namespace cv { namespace cuda { ...@@ -63,170 +63,315 @@ namespace cv { namespace cuda {
//! @addtogroup cudafeatures2d //! @addtogroup cudafeatures2d
//! @{ //! @{
/** @brief Brute-force descriptor matcher. //
// DescriptorMatcher
For each descriptor in the first set, this matcher finds the closest descriptor in the second set //
by trying each one. This descriptor matcher supports masking permissible matches between descriptor
sets.
The class BFMatcher_CUDA has an interface similar to the class DescriptorMatcher. It has two groups /** @brief Abstract base class for matching keypoint descriptors.
of match methods: for matching descriptors of one image with another image or with an image set.
Also, all functions have an alternative to save results either to the GPU memory or to the CPU
memory.
@sa DescriptorMatcher, BFMatcher It has two groups of match methods: for matching descriptors of an image with another image or with
an image set.
*/ */
class CV_EXPORTS BFMatcher_CUDA class CV_EXPORTS DescriptorMatcher : public cv::Algorithm
{ {
public: public:
explicit BFMatcher_CUDA(int norm = cv::NORM_L2); //
// Factories
//! Add descriptors to train descriptor collection //
void add(const std::vector<GpuMat>& descCollection);
/** @brief Brute-force descriptor matcher.
//! Get train descriptors collection
const std::vector<GpuMat>& getTrainDescriptors() const; For each descriptor in the first set, this matcher finds the closest descriptor in the second set
by trying each one. This descriptor matcher supports masking permissible matches of descriptor
//! Clear train descriptors collection sets.
void clear();
@param normType One of NORM_L1, NORM_L2, NORM_HAMMING. L1 and L2 norms are
//! Return true if there are not train descriptors in collection preferable choices for SIFT and SURF descriptors, NORM_HAMMING should be used with ORB, BRISK and
bool empty() const; BRIEF).
*/
//! Return true if the matcher supports mask in match methods static Ptr<DescriptorMatcher> createBFMatcher(int norm = cv::NORM_L2);
bool isMaskSupported() const;
//
//! Find one best match for each query descriptor // Utility
void matchSingle(const GpuMat& query, const GpuMat& train, //
GpuMat& trainIdx, GpuMat& distance,
const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null()); /** @brief Returns true if the descriptor matcher supports masking permissible matches.
*/
//! Download trainIdx and distance and convert it to CPU vector with DMatch virtual bool isMaskSupported() const = 0;
static void matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector<DMatch>& matches);
//! Convert trainIdx and distance to vector with DMatch //
static void matchConvert(const Mat& trainIdx, const Mat& distance, std::vector<DMatch>& matches); // Descriptor collection
//
//! Find one best match for each query descriptor
void match(const GpuMat& query, const GpuMat& train, std::vector<DMatch>& matches, const GpuMat& mask = GpuMat()); /** @brief Adds descriptors to train a descriptor collection.
//! Make gpu collection of trains and masks in suitable format for matchCollection function If the collection is not empty, the new descriptors are added to existing train descriptors.
void makeGpuCollection(GpuMat& trainCollection, GpuMat& maskCollection, const std::vector<GpuMat>& masks = std::vector<GpuMat>());
@param descriptors Descriptors to add. Each descriptors[i] is a set of descriptors from the same
//! Find one best match from train collection for each query descriptor train image.
void matchCollection(const GpuMat& query, const GpuMat& trainCollection, */
GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, virtual void add(const std::vector<GpuMat>& descriptors) = 0;
const GpuMat& masks = GpuMat(), Stream& stream = Stream::Null());
/** @brief Returns a constant link to the train descriptor collection.
//! Download trainIdx, imgIdx and distance and convert it to vector with DMatch */
static void matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, std::vector<DMatch>& matches); virtual const std::vector<GpuMat>& getTrainDescriptors() const = 0;
//! Convert trainIdx, imgIdx and distance to vector with DMatch
static void matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>& matches); /** @brief Clears the train descriptor collection.
*/
//! Find one best match from train collection for each query descriptor. virtual void clear() = 0;
void match(const GpuMat& query, std::vector<DMatch>& matches, const std::vector<GpuMat>& masks = std::vector<GpuMat>());
/** @brief Returns true if there are no train descriptors in the collection.
//! Find k best matches for each query descriptor (in increasing order of distances) */
void knnMatchSingle(const GpuMat& query, const GpuMat& train, virtual bool empty() const = 0;
GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k,
const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null()); /** @brief Trains a descriptor matcher.
//! Download trainIdx and distance and convert it to vector with DMatch Trains a descriptor matcher (for example, the flann index). In all methods to match, the method
//! compactResult is used when mask is not empty. If compactResult is false matches train() is run every time before matching.
//! vector will have the same size as queryDescriptors rows. If compactResult is true */
//! matches vector will not contain matches for fully masked out query descriptors. virtual void train() = 0;
static void knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance,
std::vector< std::vector<DMatch> >& matches, bool compactResult = false); //
//! Convert trainIdx and distance to vector with DMatch // 1 to 1 match
static void knnMatchConvert(const Mat& trainIdx, const Mat& distance, //
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
/** @brief Finds the best match for each descriptor from a query set (blocking version).
//! Find k best matches for each query descriptor (in increasing order of distances).
//! compactResult is used when mask is not empty. If compactResult is false matches @param queryDescriptors Query set of descriptors.
//! vector will have the same size as queryDescriptors rows. If compactResult is true @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
//! matches vector will not contain matches for fully masked out query descriptors. collection stored in the class object.
void knnMatch(const GpuMat& query, const GpuMat& train, @param matches Matches. If a query descriptor is masked out in mask , no match is added for this
std::vector< std::vector<DMatch> >& matches, int k, const GpuMat& mask = GpuMat(), descriptor. So, matches size may be smaller than the query descriptors count.
bool compactResult = false); @param mask Mask specifying permissible matches between an input query and train matrices of
descriptors.
//! Find k best matches from train collection for each query descriptor (in increasing order of distances)
void knnMatch2Collection(const GpuMat& query, const GpuMat& trainCollection, In the first variant of this method, the train descriptors are passed as an input argument. In the
GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, second variant of the method, train descriptors collection that was set by DescriptorMatcher::add is
const GpuMat& maskCollection = GpuMat(), Stream& stream = Stream::Null()); used. Optional mask (or masks) can be passed to specify which query and training descriptors can be
matched. Namely, queryDescriptors[i] can be matched with trainDescriptors[j] only if
//! Download trainIdx and distance and convert it to vector with DMatch mask.at\<uchar\>(i,j) is non-zero.
//! compactResult is used when mask is not empty. If compactResult is false matches */
//! vector will have the same size as queryDescriptors rows. If compactResult is true virtual void match(InputArray queryDescriptors, InputArray trainDescriptors,
//! matches vector will not contain matches for fully masked out query descriptors. std::vector<DMatch>& matches,
//! @see BFMatcher_CUDA::knnMatchDownload InputArray mask = noArray()) = 0;
static void knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance,
std::vector< std::vector<DMatch> >& matches, bool compactResult = false); /** @overload
//! Convert trainIdx and distance to vector with DMatch */
//! @see BFMatcher_CUDA::knnMatchConvert virtual void match(InputArray queryDescriptors,
static void knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>& matches,
std::vector< std::vector<DMatch> >& matches, bool compactResult = false); const std::vector<GpuMat>& masks = std::vector<GpuMat>()) = 0;
//! Find k best matches for each query descriptor (in increasing order of distances). /** @brief Finds the best match for each descriptor from a query set (asynchronous version).
//! compactResult is used when mask is not empty. If compactResult is false matches
//! vector will have the same size as queryDescriptors rows. If compactResult is true @param queryDescriptors Query set of descriptors.
//! matches vector will not contain matches for fully masked out query descriptors. @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
void knnMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, int k, collection stored in the class object.
const std::vector<GpuMat>& masks = std::vector<GpuMat>(), bool compactResult = false); @param matches Matches array stored in GPU memory. Internal representation is not defined.
Use DescriptorMatcher::matchConvert method to retrieve results in standard representation.
//! Find best matches for each query descriptor which have distance less than maxDistance. @param mask Mask specifying permissible matches between an input query and train matrices of
//! nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx. descriptors.
//! carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches, @param stream CUDA stream.
//! because it didn't have enough memory.
//! If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10), In the first variant of this method, the train descriptors are passed as an input argument. In the
//! otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches second variant of the method, train descriptors collection that was set by DescriptorMatcher::add is
//! Matches doesn't sorted. used. Optional mask (or masks) can be passed to specify which query and training descriptors can be
void radiusMatchSingle(const GpuMat& query, const GpuMat& train, matched. Namely, queryDescriptors[i] can be matched with trainDescriptors[j] only if
GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance, mask.at\<uchar\>(i,j) is non-zero.
const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null()); */
virtual void matchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
//! Download trainIdx, nMatches and distance and convert it to vector with DMatch. OutputArray matches,
//! matches will be sorted in increasing order of distances. InputArray mask = noArray(),
//! compactResult is used when mask is not empty. If compactResult is false matches Stream& stream = Stream::Null()) = 0;
//! vector will have the same size as queryDescriptors rows. If compactResult is true
//! matches vector will not contain matches for fully masked out query descriptors. /** @overload
static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches, */
std::vector< std::vector<DMatch> >& matches, bool compactResult = false); virtual void matchAsync(InputArray queryDescriptors,
//! Convert trainIdx, nMatches and distance to vector with DMatch. OutputArray matches,
static void radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches, const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
std::vector< std::vector<DMatch> >& matches, bool compactResult = false); Stream& stream = Stream::Null()) = 0;
//! Find best matches for each query descriptor which have distance less than maxDistance /** @brief Converts matches array from internal representation to standard matches vector.
//! in increasing order of distances).
void radiusMatch(const GpuMat& query, const GpuMat& train, The method is supposed to be used with DescriptorMatcher::matchAsync to get final result.
std::vector< std::vector<DMatch> >& matches, float maxDistance, Call this method only after DescriptorMatcher::matchAsync is completed (ie. after synchronization).
const GpuMat& mask = GpuMat(), bool compactResult = false);
@param gpu_matches Matches, returned from DescriptorMatcher::matchAsync.
//! Find best matches for each query descriptor which have distance less than maxDistance. @param matches Vector of DMatch objects.
//! If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10), */
//! otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches virtual void matchConvert(InputArray gpu_matches,
//! Matches doesn't sorted. std::vector<DMatch>& matches) = 0;
void radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
const std::vector<GpuMat>& masks = std::vector<GpuMat>(), Stream& stream = Stream::Null()); //
// knn match
//! Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch. //
//! matches will be sorted in increasing order of distances.
//! compactResult is used when mask is not empty. If compactResult is false matches /** @brief Finds the k best matches for each descriptor from a query set (blocking version).
//! vector will have the same size as queryDescriptors rows. If compactResult is true
//! matches vector will not contain matches for fully masked out query descriptors. @param queryDescriptors Query set of descriptors.
static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches, @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
std::vector< std::vector<DMatch> >& matches, bool compactResult = false); collection stored in the class object.
//! Convert trainIdx, nMatches and distance to vector with DMatch. @param matches Matches. Each matches[i] is k or less matches for the same query descriptor.
static void radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches, @param k Count of best matches found per each query descriptor or less if a query descriptor has
std::vector< std::vector<DMatch> >& matches, bool compactResult = false); less than k possible matches in total.
@param mask Mask specifying permissible matches between an input query and train matrices of
//! Find best matches from train collection for each query descriptor which have distance less than descriptors.
//! maxDistance (in increasing order of distances). @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
void radiusMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, float maxDistance, false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
const std::vector<GpuMat>& masks = std::vector<GpuMat>(), bool compactResult = false); the matches vector does not contain matches for fully masked-out query descriptors.
int norm; These extended variants of DescriptorMatcher::match methods find several best matches for each query
descriptor. The matches are returned in the distance increasing order. See DescriptorMatcher::match
private: for the details about query and train descriptors.
std::vector<GpuMat> trainDescCollection; */
virtual void knnMatch(InputArray queryDescriptors, InputArray trainDescriptors,
std::vector<std::vector<DMatch> >& matches,
int k,
InputArray mask = noArray(),
bool compactResult = false) = 0;
/** @overload
*/
virtual void knnMatch(InputArray queryDescriptors,
std::vector<std::vector<DMatch> >& matches,
int k,
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
bool compactResult = false) = 0;
/** @brief Finds the k best matches for each descriptor from a query set (asynchronous version).
@param queryDescriptors Query set of descriptors.
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
collection stored in the class object.
@param matches Matches array stored in GPU memory. Internal representation is not defined.
Use DescriptorMatcher::knnMatchConvert method to retrieve results in standard representation.
@param k Count of best matches found per each query descriptor or less if a query descriptor has
less than k possible matches in total.
@param mask Mask specifying permissible matches between an input query and train matrices of
descriptors.
@param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
the matches vector does not contain matches for fully masked-out query descriptors.
@param stream CUDA stream.
These extended variants of DescriptorMatcher::matchAsync methods find several best matches for each query
descriptor. The matches are returned in the distance increasing order. See DescriptorMatcher::matchAsync
for the details about query and train descriptors.
*/
virtual void knnMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
OutputArray matches,
int k,
InputArray mask = noArray(),
Stream& stream = Stream::Null()) = 0;
/** @overload
*/
virtual void knnMatchAsync(InputArray queryDescriptors,
OutputArray matches,
int k,
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
Stream& stream = Stream::Null()) = 0;
/** @brief Converts matches array from internal representation to standard matches vector.
The method is supposed to be used with DescriptorMatcher::knnMatchAsync to get final result.
Call this method only after DescriptorMatcher::knnMatchAsync is completed (ie. after synchronization).
@param gpu_matches Matches, returned from DescriptorMatcher::knnMatchAsync.
@param matches Vector of DMatch objects.
@param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
the matches vector does not contain matches for fully masked-out query descriptors.
*/
virtual void knnMatchConvert(InputArray gpu_matches,
std::vector< std::vector<DMatch> >& matches,
bool compactResult = false) = 0;
//
// radius match
//
/** @brief For each query descriptor, finds the training descriptors not farther than the specified distance (blocking version).
@param queryDescriptors Query set of descriptors.
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
collection stored in the class object.
@param matches Found matches.
@param maxDistance Threshold for the distance between matched descriptors. Distance means here
metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured
in Pixels)!
@param mask Mask specifying permissible matches between an input query and train matrices of
descriptors.
@param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
the matches vector does not contain matches for fully masked-out query descriptors.
For each query descriptor, the methods find such training descriptors that the distance between the
query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are
returned in the distance increasing order.
*/
virtual void radiusMatch(InputArray queryDescriptors, InputArray trainDescriptors,
std::vector<std::vector<DMatch> >& matches,
float maxDistance,
InputArray mask = noArray(),
bool compactResult = false) = 0;
/** @overload
*/
virtual void radiusMatch(InputArray queryDescriptors,
std::vector<std::vector<DMatch> >& matches,
float maxDistance,
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
bool compactResult = false) = 0;
/** @brief For each query descriptor, finds the training descriptors not farther than the specified distance (asynchronous version).
@param queryDescriptors Query set of descriptors.
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
collection stored in the class object.
@param matches Matches array stored in GPU memory. Internal representation is not defined.
Use DescriptorMatcher::radiusMatchConvert method to retrieve results in standard representation.
@param maxDistance Threshold for the distance between matched descriptors. Distance means here
metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured
in Pixels)!
@param mask Mask specifying permissible matches between an input query and train matrices of
descriptors.
@param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
the matches vector does not contain matches for fully masked-out query descriptors.
@param stream CUDA stream.
For each query descriptor, the methods find such training descriptors that the distance between the
query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are
returned in the distance increasing order.
*/
virtual void radiusMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
OutputArray matches,
float maxDistance,
InputArray mask = noArray(),
Stream& stream = Stream::Null()) = 0;
/** @overload
*/
virtual void radiusMatchAsync(InputArray queryDescriptors,
OutputArray matches,
float maxDistance,
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
Stream& stream = Stream::Null()) = 0;
/** @brief Converts matches array from internal representation to standard matches vector.
The method is supposed to be used with DescriptorMatcher::radiusMatchAsync to get final result.
Call this method only after DescriptorMatcher::radiusMatchAsync is completed (ie. after synchronization).
@param gpu_matches Matches, returned from DescriptorMatcher::radiusMatchAsync.
@param matches Vector of DMatch objects.
@param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
the matches vector does not contain matches for fully masked-out query descriptors.
*/
virtual void radiusMatchConvert(InputArray gpu_matches,
std::vector< std::vector<DMatch> >& matches,
bool compactResult = false) = 0;
}; };
// //
......
...@@ -167,16 +167,16 @@ PERF_TEST_P(DescSize_Norm, BFMatch, ...@@ -167,16 +167,16 @@ PERF_TEST_P(DescSize_Norm, BFMatch,
if (PERF_RUN_CUDA()) if (PERF_RUN_CUDA())
{ {
cv::cuda::BFMatcher_CUDA d_matcher(normType); cv::Ptr<cv::cuda::DescriptorMatcher> d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType);
const cv::cuda::GpuMat d_query(query); const cv::cuda::GpuMat d_query(query);
const cv::cuda::GpuMat d_train(train); const cv::cuda::GpuMat d_train(train);
cv::cuda::GpuMat d_trainIdx, d_distance; cv::cuda::GpuMat d_matches;
TEST_CYCLE() d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); TEST_CYCLE() d_matcher->matchAsync(d_query, d_train, d_matches);
std::vector<cv::DMatch> gpu_matches; std::vector<cv::DMatch> gpu_matches;
d_matcher.matchDownload(d_trainIdx, d_distance, gpu_matches); d_matcher->matchConvert(d_matches, gpu_matches);
SANITY_CHECK_MATCHES(gpu_matches); SANITY_CHECK_MATCHES(gpu_matches);
} }
...@@ -226,16 +226,16 @@ PERF_TEST_P(DescSize_K_Norm, BFKnnMatch, ...@@ -226,16 +226,16 @@ PERF_TEST_P(DescSize_K_Norm, BFKnnMatch,
if (PERF_RUN_CUDA()) if (PERF_RUN_CUDA())
{ {
cv::cuda::BFMatcher_CUDA d_matcher(normType); cv::Ptr<cv::cuda::DescriptorMatcher> d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType);
const cv::cuda::GpuMat d_query(query); const cv::cuda::GpuMat d_query(query);
const cv::cuda::GpuMat d_train(train); const cv::cuda::GpuMat d_train(train);
cv::cuda::GpuMat d_trainIdx, d_distance, d_allDist; cv::cuda::GpuMat d_matches;
TEST_CYCLE() d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k); TEST_CYCLE() d_matcher->knnMatchAsync(d_query, d_train, d_matches, k);
std::vector< std::vector<cv::DMatch> > matchesTbl; std::vector< std::vector<cv::DMatch> > matchesTbl;
d_matcher.knnMatchDownload(d_trainIdx, d_distance, matchesTbl); d_matcher->knnMatchConvert(d_matches, matchesTbl);
std::vector<cv::DMatch> gpu_matches; std::vector<cv::DMatch> gpu_matches;
toOneRowMatches(matchesTbl, gpu_matches); toOneRowMatches(matchesTbl, gpu_matches);
...@@ -280,16 +280,16 @@ PERF_TEST_P(DescSize_Norm, BFRadiusMatch, ...@@ -280,16 +280,16 @@ PERF_TEST_P(DescSize_Norm, BFRadiusMatch,
if (PERF_RUN_CUDA()) if (PERF_RUN_CUDA())
{ {
cv::cuda::BFMatcher_CUDA d_matcher(normType); cv::Ptr<cv::cuda::DescriptorMatcher> d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType);
const cv::cuda::GpuMat d_query(query); const cv::cuda::GpuMat d_query(query);
const cv::cuda::GpuMat d_train(train); const cv::cuda::GpuMat d_train(train);
cv::cuda::GpuMat d_trainIdx, d_nMatches, d_distance; cv::cuda::GpuMat d_matches;
TEST_CYCLE() d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, maxDistance); TEST_CYCLE() d_matcher->radiusMatchAsync(d_query, d_train, d_matches, maxDistance);
std::vector< std::vector<cv::DMatch> > matchesTbl; std::vector< std::vector<cv::DMatch> > matchesTbl;
d_matcher.radiusMatchDownload(d_trainIdx, d_distance, d_nMatches, matchesTbl); d_matcher->radiusMatchConvert(d_matches, matchesTbl);
std::vector<cv::DMatch> gpu_matches; std::vector<cv::DMatch> gpu_matches;
toOneRowMatches(matchesTbl, gpu_matches); toOneRowMatches(matchesTbl, gpu_matches);
......
...@@ -47,37 +47,7 @@ using namespace cv::cuda; ...@@ -47,37 +47,7 @@ using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
cv::cuda::BFMatcher_CUDA::BFMatcher_CUDA(int) { throw_no_cuda(); } Ptr<cv::cuda::DescriptorMatcher> cv::cuda::DescriptorMatcher::createBFMatcher(int) { throw_no_cuda(); return Ptr<cv::cuda::DescriptorMatcher>(); }
void cv::cuda::BFMatcher_CUDA::add(const std::vector<GpuMat>&) { throw_no_cuda(); }
const std::vector<GpuMat>& cv::cuda::BFMatcher_CUDA::getTrainDescriptors() const { throw_no_cuda(); return trainDescCollection; }
void cv::cuda::BFMatcher_CUDA::clear() { throw_no_cuda(); }
bool cv::cuda::BFMatcher_CUDA::empty() const { throw_no_cuda(); return true; }
bool cv::cuda::BFMatcher_CUDA::isMaskSupported() const { throw_no_cuda(); return true; }
void cv::cuda::BFMatcher_CUDA::matchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::matchDownload(const GpuMat&, const GpuMat&, std::vector<DMatch>&) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::matchConvert(const Mat&, const Mat&, std::vector<DMatch>&) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::match(const GpuMat&, const GpuMat&, std::vector<DMatch>&, const GpuMat&) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::makeGpuCollection(GpuMat&, GpuMat&, const std::vector<GpuMat>&) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::matchCollection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::matchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector<DMatch>&) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::matchConvert(const Mat&, const Mat&, const Mat&, std::vector<DMatch>&) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::match(const GpuMat&, std::vector<DMatch>&, const std::vector<GpuMat>&) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::knnMatchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, const GpuMat&, Stream&) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::knnMatchDownload(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::knnMatchConvert(const Mat&, const Mat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::knnMatch(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, int, const GpuMat&, bool) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::knnMatch2Collection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::knnMatch2Download(const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::knnMatch2Convert(const Mat&, const Mat&, const Mat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::knnMatch(const GpuMat&, std::vector< std::vector<DMatch> >&, int, const std::vector<GpuMat>&, bool) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::radiusMatchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const GpuMat&, Stream&) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::radiusMatchConvert(const Mat&, const Mat&, const Mat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::radiusMatch(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, float, const GpuMat&, bool) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::radiusMatchCollection(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const std::vector<GpuMat>&, Stream&) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::radiusMatchConvert(const Mat&, const Mat&, const Mat&, const Mat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
void cv::cuda::BFMatcher_CUDA::radiusMatch(const GpuMat&, std::vector< std::vector<DMatch> >&, float, const std::vector<GpuMat>&, bool) { throw_no_cuda(); }
#else /* !defined (HAVE_CUDA) */ #else /* !defined (HAVE_CUDA) */
...@@ -155,49 +125,212 @@ namespace cv { namespace cuda { namespace device ...@@ -155,49 +125,212 @@ namespace cv { namespace cuda { namespace device
} }
}}} }}}
//////////////////////////////////////////////////////////////////// namespace
// Train collection
cv::cuda::BFMatcher_CUDA::BFMatcher_CUDA(int norm_) : norm(norm_)
{ {
} static void makeGpuCollection(const std::vector<GpuMat>& trainDescCollection,
const std::vector<GpuMat>& masks,
GpuMat& trainCollection,
GpuMat& maskCollection)
{
if (trainDescCollection.empty())
return;
void cv::cuda::BFMatcher_CUDA::add(const std::vector<GpuMat>& descCollection) if (masks.empty())
{ {
trainDescCollection.insert(trainDescCollection.end(), descCollection.begin(), descCollection.end()); Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
}
const std::vector<GpuMat>& cv::cuda::BFMatcher_CUDA::getTrainDescriptors() const PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
{
return trainDescCollection;
}
void cv::cuda::BFMatcher_CUDA::clear() for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr)
{ *trainCollectionCPU_ptr = trainDescCollection[i];
trainDescCollection.clear();
}
bool cv::cuda::BFMatcher_CUDA::empty() const trainCollection.upload(trainCollectionCPU);
{ maskCollection.release();
return trainDescCollection.empty(); }
} else
{
CV_Assert( masks.size() == trainDescCollection.size() );
bool cv::cuda::BFMatcher_CUDA::isMaskSupported() const Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
{ Mat maskCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepb)));
return true;
} PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
PtrStepb* maskCollectionCPU_ptr = maskCollectionCPU.ptr<PtrStepb>();
for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr)
{
const GpuMat& train = trainDescCollection[i];
const GpuMat& mask = masks[i];
//////////////////////////////////////////////////////////////////// CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows) );
// Match
*trainCollectionCPU_ptr = train;
*maskCollectionCPU_ptr = mask;
}
trainCollection.upload(trainCollectionCPU);
maskCollection.upload(maskCollectionCPU);
}
}
class BFMatcher_Impl : public cv::cuda::DescriptorMatcher
{
public:
explicit BFMatcher_Impl(int norm) : norm_(norm)
{
CV_Assert( norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING );
}
virtual bool isMaskSupported() const { return true; }
virtual void add(const std::vector<GpuMat>& descriptors)
{
trainDescCollection_.insert(trainDescCollection_.end(), descriptors.begin(), descriptors.end());
}
virtual const std::vector<GpuMat>& getTrainDescriptors() const
{
return trainDescCollection_;
}
virtual void clear()
{
trainDescCollection_.clear();
}
virtual bool empty() const
{
return trainDescCollection_.empty();
}
virtual void train()
{
}
virtual void match(InputArray queryDescriptors, InputArray trainDescriptors,
std::vector<DMatch>& matches,
InputArray mask = noArray());
virtual void match(InputArray queryDescriptors,
std::vector<DMatch>& matches,
const std::vector<GpuMat>& masks = std::vector<GpuMat>());
virtual void matchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
OutputArray matches,
InputArray mask = noArray(),
Stream& stream = Stream::Null());
virtual void matchAsync(InputArray queryDescriptors,
OutputArray matches,
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
Stream& stream = Stream::Null());
virtual void matchConvert(InputArray gpu_matches,
std::vector<DMatch>& matches);
virtual void knnMatch(InputArray queryDescriptors, InputArray trainDescriptors,
std::vector<std::vector<DMatch> >& matches,
int k,
InputArray mask = noArray(),
bool compactResult = false);
virtual void knnMatch(InputArray queryDescriptors,
std::vector<std::vector<DMatch> >& matches,
int k,
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
bool compactResult = false);
virtual void knnMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
OutputArray matches,
int k,
InputArray mask = noArray(),
Stream& stream = Stream::Null());
virtual void knnMatchAsync(InputArray queryDescriptors,
OutputArray matches,
int k,
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
Stream& stream = Stream::Null());
virtual void knnMatchConvert(InputArray gpu_matches,
std::vector< std::vector<DMatch> >& matches,
bool compactResult = false);
virtual void radiusMatch(InputArray queryDescriptors, InputArray trainDescriptors,
std::vector<std::vector<DMatch> >& matches,
float maxDistance,
InputArray mask = noArray(),
bool compactResult = false);
virtual void radiusMatch(InputArray queryDescriptors,
std::vector<std::vector<DMatch> >& matches,
float maxDistance,
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
bool compactResult = false);
virtual void radiusMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
OutputArray matches,
float maxDistance,
InputArray mask = noArray(),
Stream& stream = Stream::Null());
virtual void radiusMatchAsync(InputArray queryDescriptors,
OutputArray matches,
float maxDistance,
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
Stream& stream = Stream::Null());
virtual void radiusMatchConvert(InputArray gpu_matches,
std::vector< std::vector<DMatch> >& matches,
bool compactResult = false);
private:
int norm_;
std::vector<GpuMat> trainDescCollection_;
};
//
// 1 to 1 match
//
void BFMatcher_Impl::match(InputArray _queryDescriptors, InputArray _trainDescriptors,
std::vector<DMatch>& matches,
InputArray _mask)
{
GpuMat d_matches;
matchAsync(_queryDescriptors, _trainDescriptors, d_matches, _mask);
matchConvert(d_matches, matches);
}
void BFMatcher_Impl::match(InputArray _queryDescriptors,
std::vector<DMatch>& matches,
const std::vector<GpuMat>& masks)
{
GpuMat d_matches;
matchAsync(_queryDescriptors, d_matches, masks);
matchConvert(d_matches, matches);
}
void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
OutputArray _matches,
InputArray _mask,
Stream& stream)
{
using namespace cv::cuda::device::bf_match;
const GpuMat query = _queryDescriptors.getGpuMat();
const GpuMat train = _trainDescriptors.getGpuMat();
const GpuMat mask = _mask.getGpuMat();
void cv::cuda::BFMatcher_CUDA::matchSingle(const GpuMat& query, const GpuMat& train,
GpuMat& trainIdx, GpuMat& distance,
const GpuMat& mask, Stream& stream)
{
if (query.empty() || train.empty()) if (query.empty() || train.empty())
{
_matches.release();
return; return;
}
using namespace cv::cuda::device::bf_match; CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
CV_Assert( train.cols == query.cols && train.type() == query.type() );
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
...@@ -215,7 +348,6 @@ void cv::cuda::BFMatcher_CUDA::matchSingle(const GpuMat& query, const GpuMat& tr ...@@ -215,7 +348,6 @@ void cv::cuda::BFMatcher_CUDA::matchSingle(const GpuMat& query, const GpuMat& tr
0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/, 0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
0/*matchL2_gpu<int>*/, matchL2_gpu<float> 0/*matchL2_gpu<int>*/, matchL2_gpu<float>
}; };
static const caller_t callersHamming[] = static const caller_t callersHamming[] =
{ {
matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/, matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
...@@ -223,124 +355,44 @@ void cv::cuda::BFMatcher_CUDA::matchSingle(const GpuMat& query, const GpuMat& tr ...@@ -223,124 +355,44 @@ void cv::cuda::BFMatcher_CUDA::matchSingle(const GpuMat& query, const GpuMat& tr
matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/ matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
}; };
CV_Assert(query.channels() == 1 && query.depth() < CV_64F); const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
CV_Assert(train.cols == query.cols && train.type() == query.type());
CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING);
const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming; const caller_t func = callers[query.depth()];
if (func == 0)
{
CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
}
const int nQuery = query.rows; const int nQuery = query.rows;
ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx); _matches.create(2, nQuery, CV_32SC1);
ensureSizeIsEnough(1, nQuery, CV_32F, distance); GpuMat matches = _matches.getGpuMat();
caller_t func = callers[query.depth()]; GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0));
CV_Assert(func != 0); GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(1));
func(query, train, mask, trainIdx, distance, StreamAccessor::getStream(stream)); func(query, train, mask, trainIdx, distance, StreamAccessor::getStream(stream));
}
void cv::cuda::BFMatcher_CUDA::matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector<DMatch>& matches)
{
if (trainIdx.empty() || distance.empty())
return;
Mat trainIdxCPU(trainIdx);
Mat distanceCPU(distance);
matchConvert(trainIdxCPU, distanceCPU, matches);
}
void cv::cuda::BFMatcher_CUDA::matchConvert(const Mat& trainIdx, const Mat& distance, std::vector<DMatch>& matches)
{
if (trainIdx.empty() || distance.empty())
return;
CV_Assert(trainIdx.type() == CV_32SC1);
CV_Assert(distance.type() == CV_32FC1 && distance.cols == trainIdx.cols);
const int nQuery = trainIdx.cols;
matches.clear();
matches.reserve(nQuery);
const int* trainIdx_ptr = trainIdx.ptr<int>();
const float* distance_ptr = distance.ptr<float>();
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++distance_ptr)
{
int train_idx = *trainIdx_ptr;
if (train_idx == -1)
continue;
float distance_local = *distance_ptr;
DMatch m(queryIdx, train_idx, 0, distance_local);
matches.push_back(m);
} }
}
void cv::cuda::BFMatcher_CUDA::match(const GpuMat& query, const GpuMat& train, void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors,
std::vector<DMatch>& matches, const GpuMat& mask) OutputArray _matches,
{ const std::vector<GpuMat>& masks,
GpuMat trainIdx, distance; Stream& stream)
matchSingle(query, train, trainIdx, distance, mask);
matchDownload(trainIdx, distance, matches);
}
void cv::cuda::BFMatcher_CUDA::makeGpuCollection(GpuMat& trainCollection, GpuMat& maskCollection,
const std::vector<GpuMat>& masks)
{
if (empty())
return;
if (masks.empty())
{
Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr)
*trainCollectionCPU_ptr = trainDescCollection[i];
trainCollection.upload(trainCollectionCPU);
maskCollection.release();
}
else
{ {
CV_Assert(masks.size() == trainDescCollection.size()); using namespace cv::cuda::device::bf_match;
Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb))); const GpuMat query = _queryDescriptors.getGpuMat();
Mat maskCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepb)));
PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>(); if (query.empty() || trainDescCollection_.empty())
PtrStepb* maskCollectionCPU_ptr = maskCollectionCPU.ptr<PtrStepb>();
for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr)
{ {
const GpuMat& train = trainDescCollection[i]; _matches.release();
const GpuMat& mask = masks[i]; return;
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows));
*trainCollectionCPU_ptr = train;
*maskCollectionCPU_ptr = mask;
}
trainCollection.upload(trainCollectionCPU);
maskCollection.upload(maskCollectionCPU);
} }
}
void cv::cuda::BFMatcher_CUDA::matchCollection(const GpuMat& query, const GpuMat& trainCollection, CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
const GpuMat& masks, Stream& stream)
{
if (query.empty() || trainCollection.empty())
return;
using namespace cv::cuda::device::bf_match; GpuMat trainCollection, maskCollection;
makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
...@@ -365,93 +417,180 @@ void cv::cuda::BFMatcher_CUDA::matchCollection(const GpuMat& query, const GpuMat ...@@ -365,93 +417,180 @@ void cv::cuda::BFMatcher_CUDA::matchCollection(const GpuMat& query, const GpuMat
matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/ matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
}; };
CV_Assert(query.channels() == 1 && query.depth() < CV_64F); const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING);
const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming; const caller_t func = callers[query.depth()];
if (func == 0)
{
CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
}
const int nQuery = query.rows; const int nQuery = query.rows;
ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx); _matches.create(3, nQuery, CV_32SC1);
ensureSizeIsEnough(1, nQuery, CV_32S, imgIdx); GpuMat matches = _matches.getGpuMat();
ensureSizeIsEnough(1, nQuery, CV_32F, distance);
caller_t func = callers[query.depth()];
CV_Assert(func != 0);
func(query, trainCollection, masks, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream)); GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0));
} GpuMat imgIdx(1, nQuery, CV_32SC1, matches.ptr(1));
GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(2));
void cv::cuda::BFMatcher_CUDA::matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, std::vector<DMatch>& matches)
{
if (trainIdx.empty() || imgIdx.empty() || distance.empty())
return;
Mat trainIdxCPU(trainIdx); func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream));
Mat imgIdxCPU(imgIdx); }
Mat distanceCPU(distance);
matchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, matches); void BFMatcher_Impl::matchConvert(InputArray _gpu_matches,
} std::vector<DMatch>& matches)
{
Mat gpu_matches;
if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
{
_gpu_matches.getGpuMat().download(gpu_matches);
}
else
{
gpu_matches = _gpu_matches.getMat();
}
void cv::cuda::BFMatcher_CUDA::matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>& matches) if (gpu_matches.empty())
{ {
if (trainIdx.empty() || imgIdx.empty() || distance.empty()) matches.clear();
return; return;
}
CV_Assert(trainIdx.type() == CV_32SC1); CV_Assert( (gpu_matches.type() == CV_32SC1) && (gpu_matches.rows == 2 || gpu_matches.rows == 3) );
CV_Assert(imgIdx.type() == CV_32SC1 && imgIdx.cols == trainIdx.cols);
CV_Assert(distance.type() == CV_32FC1 && distance.cols == trainIdx.cols);
const int nQuery = trainIdx.cols; const int nQuery = gpu_matches.cols;
matches.clear(); matches.clear();
matches.reserve(nQuery); matches.reserve(nQuery);
const int* trainIdx_ptr = trainIdx.ptr<int>(); const int* trainIdxPtr = NULL;
const int* imgIdx_ptr = imgIdx.ptr<int>(); const int* imgIdxPtr = NULL;
const float* distance_ptr = distance.ptr<float>(); const float* distancePtr = NULL;
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
if (gpu_matches.rows == 2)
{
trainIdxPtr = gpu_matches.ptr<int>(0);
distancePtr = gpu_matches.ptr<float>(1);
}
else
{ {
int _trainIdx = *trainIdx_ptr; trainIdxPtr = gpu_matches.ptr<int>(0);
imgIdxPtr = gpu_matches.ptr<int>(1);
distancePtr = gpu_matches.ptr<float>(2);
}
if (_trainIdx == -1) for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
{
const int trainIdx = trainIdxPtr[queryIdx];
if (trainIdx == -1)
continue; continue;
int _imgIdx = *imgIdx_ptr; const int imgIdx = imgIdxPtr ? imgIdxPtr[queryIdx] : 0;
const float distance = distancePtr[queryIdx];
float _distance = *distance_ptr;
DMatch m(queryIdx, _trainIdx, _imgIdx, _distance); DMatch m(queryIdx, trainIdx, imgIdx, distance);
matches.push_back(m); matches.push_back(m);
} }
} }
void cv::cuda::BFMatcher_CUDA::match(const GpuMat& query, std::vector<DMatch>& matches, const std::vector<GpuMat>& masks) //
{ // knn match
GpuMat trainCollection; //
GpuMat maskCollection;
makeGpuCollection(trainCollection, maskCollection, masks); void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors, InputArray _trainDescriptors,
std::vector<std::vector<DMatch> >& matches,
int k,
InputArray _mask,
bool compactResult)
{
GpuMat d_matches;
knnMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, k, _mask);
knnMatchConvert(d_matches, matches, compactResult);
}
GpuMat trainIdx, imgIdx, distance; void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors,
std::vector<std::vector<DMatch> >& matches,
int k,
const std::vector<GpuMat>& masks,
bool compactResult)
{
if (k == 2)
{
GpuMat d_matches;
knnMatchAsync(_queryDescriptors, d_matches, k, masks);
knnMatchConvert(d_matches, matches, compactResult);
}
else
{
const GpuMat query = _queryDescriptors.getGpuMat();
matchCollection(query, trainCollection, trainIdx, imgIdx, distance, maskCollection); if (query.empty() || trainDescCollection_.empty())
matchDownload(trainIdx, imgIdx, distance, matches); {
} matches.clear();
return;
}
//////////////////////////////////////////////////////////////////// CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
// KnnMatch
std::vector< std::vector<DMatch> > curMatches;
std::vector<DMatch> temp;
temp.reserve(2 * k);
matches.resize(query.rows);
for (size_t i = 0; i < matches.size(); ++i)
matches[i].reserve(k);
for (size_t imgIdx = 0; imgIdx < trainDescCollection_.size(); ++imgIdx)
{
knnMatch(query, trainDescCollection_[imgIdx], curMatches, k, masks.empty() ? GpuMat() : masks[imgIdx]);
for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx)
{
std::vector<DMatch>& localMatch = curMatches[queryIdx];
std::vector<DMatch>& globalMatch = matches[queryIdx];
for (size_t i = 0; i < localMatch.size(); ++i)
localMatch[i].imgIdx = imgIdx;
temp.clear();
std::merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), std::back_inserter(temp));
globalMatch.clear();
const size_t count = std::min(static_cast<size_t>(k), temp.size());
std::copy(temp.begin(), temp.begin() + count, std::back_inserter(globalMatch));
}
}
if (compactResult)
{
std::vector< std::vector<DMatch> >::iterator new_end = std::remove_if(matches.begin(), matches.end(), std::mem_fun_ref(&std::vector<DMatch>::empty));
matches.erase(new_end, matches.end());
}
}
}
void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
OutputArray _matches,
int k,
InputArray _mask,
Stream& stream)
{
using namespace cv::cuda::device::bf_knnmatch;
const GpuMat query = _queryDescriptors.getGpuMat();
const GpuMat train = _trainDescriptors.getGpuMat();
const GpuMat mask = _mask.getGpuMat();
void cv::cuda::BFMatcher_CUDA::knnMatchSingle(const GpuMat& query, const GpuMat& train,
GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k,
const GpuMat& mask, Stream& stream)
{
if (query.empty() || train.empty()) if (query.empty() || train.empty())
{
_matches.release();
return; return;
}
using namespace cv::cuda::device::bf_knnmatch; CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
CV_Assert( train.cols == query.cols && train.type() == query.type() );
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask, typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
...@@ -476,108 +615,68 @@ void cv::cuda::BFMatcher_CUDA::knnMatchSingle(const GpuMat& query, const GpuMat& ...@@ -476,108 +615,68 @@ void cv::cuda::BFMatcher_CUDA::knnMatchSingle(const GpuMat& query, const GpuMat&
matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/ matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
}; };
CV_Assert(query.channels() == 1 && query.depth() < CV_64F); const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
CV_Assert(train.type() == query.type() && train.cols == query.cols);
CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING);
const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming; const caller_t func = callers[query.depth()];
if (func == 0)
{
CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
}
const int nQuery = query.rows; const int nQuery = query.rows;
const int nTrain = train.rows; const int nTrain = train.rows;
GpuMat trainIdx, distance, allDist;
if (k == 2) if (k == 2)
{ {
ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx); _matches.create(2, nQuery, CV_32SC2);
ensureSizeIsEnough(1, nQuery, CV_32FC2, distance); GpuMat matches = _matches.getGpuMat();
trainIdx = GpuMat(1, nQuery, CV_32SC2, matches.ptr(0));
distance = GpuMat(1, nQuery, CV_32FC2, matches.ptr(1));
} }
else else
{ {
ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx); _matches.create(2 * nQuery, k, CV_32SC1);
ensureSizeIsEnough(nQuery, k, CV_32F, distance); GpuMat matches = _matches.getGpuMat();
ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist);
trainIdx = GpuMat(nQuery, k, CV_32SC1, matches.ptr(0), matches.step);
distance = GpuMat(nQuery, k, CV_32FC1, matches.ptr(nQuery), matches.step);
BufferPool pool(stream);
allDist = pool.getBuffer(nQuery, nTrain, CV_32FC1);
} }
trainIdx.setTo(Scalar::all(-1), stream); trainIdx.setTo(Scalar::all(-1), stream);
caller_t func = callers[query.depth()];
CV_Assert(func != 0);
func(query, train, k, mask, trainIdx, distance, allDist, StreamAccessor::getStream(stream)); func(query, train, k, mask, trainIdx, distance, allDist, StreamAccessor::getStream(stream));
} }
void cv::cuda::BFMatcher_CUDA::knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance,
std::vector< std::vector<DMatch> >& matches, bool compactResult)
{
if (trainIdx.empty() || distance.empty())
return;
Mat trainIdxCPU(trainIdx);
Mat distanceCPU(distance);
knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult);
}
void cv::cuda::BFMatcher_CUDA::knnMatchConvert(const Mat& trainIdx, const Mat& distance,
std::vector< std::vector<DMatch> >& matches, bool compactResult)
{
if (trainIdx.empty() || distance.empty())
return;
CV_Assert(trainIdx.type() == CV_32SC2 || trainIdx.type() == CV_32SC1);
CV_Assert(distance.type() == CV_32FC2 || distance.type() == CV_32FC1);
CV_Assert(distance.size() == trainIdx.size());
CV_Assert(trainIdx.isContinuous() && distance.isContinuous());
const int nQuery = trainIdx.type() == CV_32SC2 ? trainIdx.cols : trainIdx.rows;
const int k = trainIdx.type() == CV_32SC2 ? 2 :trainIdx.cols;
matches.clear();
matches.reserve(nQuery);
const int* trainIdx_ptr = trainIdx.ptr<int>();
const float* distance_ptr = distance.ptr<float>();
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
{
matches.push_back(std::vector<DMatch>());
std::vector<DMatch>& curMatches = matches.back();
curMatches.reserve(k);
for (int i = 0; i < k; ++i, ++trainIdx_ptr, ++distance_ptr) void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors,
OutputArray _matches,
int k,
const std::vector<GpuMat>& masks,
Stream& stream)
{ {
int _trainIdx = *trainIdx_ptr; using namespace cv::cuda::device::bf_knnmatch;
if (_trainIdx != -1) if (k != 2)
{ {
float _distance = *distance_ptr; CV_Error(Error::StsNotImplemented, "only k=2 mode is supported for now");
DMatch m(queryIdx, _trainIdx, 0, _distance);
curMatches.push_back(m);
}
} }
if (compactResult && curMatches.empty()) const GpuMat query = _queryDescriptors.getGpuMat();
matches.pop_back();
}
}
void cv::cuda::BFMatcher_CUDA::knnMatch(const GpuMat& query, const GpuMat& train,
std::vector< std::vector<DMatch> >& matches, int k, const GpuMat& mask, bool compactResult)
{
GpuMat trainIdx, distance, allDist;
knnMatchSingle(query, train, trainIdx, distance, allDist, k, mask);
knnMatchDownload(trainIdx, distance, matches, compactResult);
}
void cv::cuda::BFMatcher_CUDA::knnMatch2Collection(const GpuMat& query, const GpuMat& trainCollection, if (query.empty() || trainDescCollection_.empty())
GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, {
const GpuMat& maskCollection, Stream& stream) _matches.release();
{
if (query.empty() || trainCollection.empty())
return; return;
}
using namespace cv::cuda::device::bf_knnmatch; CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
GpuMat trainCollection, maskCollection;
makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
...@@ -602,160 +701,165 @@ void cv::cuda::BFMatcher_CUDA::knnMatch2Collection(const GpuMat& query, const Gp ...@@ -602,160 +701,165 @@ void cv::cuda::BFMatcher_CUDA::knnMatch2Collection(const GpuMat& query, const Gp
match2Hamming_gpu<int>, 0/*match2Hamming_gpu<float>*/ match2Hamming_gpu<int>, 0/*match2Hamming_gpu<float>*/
}; };
CV_Assert(query.channels() == 1 && query.depth() < CV_64F); const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING);
const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming; const caller_t func = callers[query.depth()];
if (func == 0)
{
CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
}
const int nQuery = query.rows; const int nQuery = query.rows;
ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx); _matches.create(3, nQuery, CV_32SC2);
ensureSizeIsEnough(1, nQuery, CV_32SC2, imgIdx); GpuMat matches = _matches.getGpuMat();
ensureSizeIsEnough(1, nQuery, CV_32FC2, distance);
trainIdx.setTo(Scalar::all(-1), stream); GpuMat trainIdx(1, nQuery, CV_32SC2, matches.ptr(0));
GpuMat imgIdx(1, nQuery, CV_32SC2, matches.ptr(1));
GpuMat distance(1, nQuery, CV_32FC2, matches.ptr(2));
caller_t func = callers[query.depth()]; trainIdx.setTo(Scalar::all(-1), stream);
CV_Assert(func != 0);
func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream)); func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream));
} }
void cv::cuda::BFMatcher_CUDA::knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, void BFMatcher_Impl::knnMatchConvert(InputArray _gpu_matches,
std::vector< std::vector<DMatch> >& matches, bool compactResult) std::vector< std::vector<DMatch> >& matches,
{ bool compactResult)
if (trainIdx.empty() || imgIdx.empty() || distance.empty()) {
Mat gpu_matches;
if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
{
_gpu_matches.getGpuMat().download(gpu_matches);
}
else
{
gpu_matches = _gpu_matches.getMat();
}
if (gpu_matches.empty())
{
matches.clear();
return; return;
}
Mat trainIdxCPU(trainIdx); CV_Assert( ((gpu_matches.type() == CV_32SC2) && (gpu_matches.rows == 2 || gpu_matches.rows == 3)) ||
Mat imgIdxCPU(imgIdx); (gpu_matches.type() == CV_32SC1) );
Mat distanceCPU(distance);
knnMatch2Convert(trainIdxCPU, imgIdxCPU, distanceCPU, matches, compactResult); int nQuery = -1, k = -1;
}
void cv::cuda::BFMatcher_CUDA::knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const int* trainIdxPtr = NULL;
std::vector< std::vector<DMatch> >& matches, bool compactResult) const int* imgIdxPtr = NULL;
{ const float* distancePtr = NULL;
if (trainIdx.empty() || imgIdx.empty() || distance.empty())
return;
CV_Assert(trainIdx.type() == CV_32SC2); if (gpu_matches.type() == CV_32SC2)
CV_Assert(imgIdx.type() == CV_32SC2 && imgIdx.cols == trainIdx.cols); {
CV_Assert(distance.type() == CV_32FC2 && distance.cols == trainIdx.cols); nQuery = gpu_matches.cols;
k = 2;
const int nQuery = trainIdx.cols; if (gpu_matches.rows == 2)
{
trainIdxPtr = gpu_matches.ptr<int>(0);
distancePtr = gpu_matches.ptr<float>(1);
}
else
{
trainIdxPtr = gpu_matches.ptr<int>(0);
imgIdxPtr = gpu_matches.ptr<int>(1);
distancePtr = gpu_matches.ptr<float>(2);
}
}
else
{
nQuery = gpu_matches.rows / 2;
k = gpu_matches.cols;
trainIdxPtr = gpu_matches.ptr<int>(0);
distancePtr = gpu_matches.ptr<float>(nQuery);
}
matches.clear(); matches.clear();
matches.reserve(nQuery); matches.reserve(nQuery);
const int* trainIdx_ptr = trainIdx.ptr<int>();
const int* imgIdx_ptr = imgIdx.ptr<int>();
const float* distance_ptr = distance.ptr<float>();
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
{ {
matches.push_back(std::vector<DMatch>()); matches.push_back(std::vector<DMatch>());
std::vector<DMatch>& curMatches = matches.back(); std::vector<DMatch>& curMatches = matches.back();
curMatches.reserve(2); curMatches.reserve(k);
for (int i = 0; i < 2; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
{
int _trainIdx = *trainIdx_ptr;
if (_trainIdx != -1) for (int i = 0; i < k; ++i)
{ {
int _imgIdx = *imgIdx_ptr; const int trainIdx = *trainIdxPtr;
if (trainIdx == -1)
continue;
float _distance = *distance_ptr; const int imgIdx = imgIdxPtr ? *imgIdxPtr : 0;
const float distance = *distancePtr;
DMatch m(queryIdx, _trainIdx, _imgIdx, _distance); DMatch m(queryIdx, trainIdx, imgIdx, distance);
curMatches.push_back(m); curMatches.push_back(m);
}
++trainIdxPtr;
++distancePtr;
if (imgIdxPtr)
++imgIdxPtr;
} }
if (compactResult && curMatches.empty()) if (compactResult && curMatches.empty())
{
matches.pop_back(); matches.pop_back();
} }
}
namespace
{
struct ImgIdxSetter
{
explicit inline ImgIdxSetter(int imgIdx_) : imgIdx(imgIdx_) {}
inline void operator()(DMatch& m) const {m.imgIdx = imgIdx;}
int imgIdx;
};
}
void cv::cuda::BFMatcher_CUDA::knnMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, int k,
const std::vector<GpuMat>& masks, bool compactResult)
{
if (k == 2)
{
GpuMat trainCollection;
GpuMat maskCollection;
makeGpuCollection(trainCollection, maskCollection, masks);
GpuMat trainIdx, imgIdx, distance;
knnMatch2Collection(query, trainCollection, trainIdx, imgIdx, distance, maskCollection);
knnMatch2Download(trainIdx, imgIdx, distance, matches);
} }
else }
{
if (query.empty() || empty())
return;
std::vector< std::vector<DMatch> > curMatches;
std::vector<DMatch> temp;
temp.reserve(2 * k);
matches.resize(query.rows); //
for_each(matches.begin(), matches.end(), bind2nd(mem_fun_ref(&std::vector<DMatch>::reserve), k)); // radius match
//
for (size_t imgIdx = 0, size = trainDescCollection.size(); imgIdx < size; ++imgIdx) void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors, InputArray _trainDescriptors,
std::vector<std::vector<DMatch> >& matches,
float maxDistance,
InputArray _mask,
bool compactResult)
{ {
knnMatch(query, trainDescCollection[imgIdx], curMatches, k, masks.empty() ? GpuMat() : masks[imgIdx]); GpuMat d_matches;
radiusMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, maxDistance, _mask);
radiusMatchConvert(d_matches, matches, compactResult);
}
for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx) void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors,
std::vector<std::vector<DMatch> >& matches,
float maxDistance,
const std::vector<GpuMat>& masks,
bool compactResult)
{ {
std::vector<DMatch>& localMatch = curMatches[queryIdx]; GpuMat d_matches;
std::vector<DMatch>& globalMatch = matches[queryIdx]; radiusMatchAsync(_queryDescriptors, d_matches, maxDistance, masks);
radiusMatchConvert(d_matches, matches, compactResult);
for_each(localMatch.begin(), localMatch.end(), ImgIdxSetter(static_cast<int>(imgIdx)));
temp.clear();
merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), back_inserter(temp));
globalMatch.clear();
const size_t count = std::min((size_t)k, temp.size());
copy(temp.begin(), temp.begin() + count, back_inserter(globalMatch));
}
} }
if (compactResult) void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
OutputArray _matches,
float maxDistance,
InputArray _mask,
Stream& stream)
{ {
std::vector< std::vector<DMatch> >::iterator new_end = remove_if(matches.begin(), matches.end(), mem_fun_ref(&std::vector<DMatch>::empty)); using namespace cv::cuda::device::bf_radius_match;
matches.erase(new_end, matches.end());
}
}
}
//////////////////////////////////////////////////////////////////// const GpuMat query = _queryDescriptors.getGpuMat();
// RadiusMatch const GpuMat train = _trainDescriptors.getGpuMat();
const GpuMat mask = _mask.getGpuMat();
void cv::cuda::BFMatcher_CUDA::radiusMatchSingle(const GpuMat& query, const GpuMat& train,
GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
const GpuMat& mask, Stream& stream)
{
if (query.empty() || train.empty()) if (query.empty() || train.empty())
{
_matches.release();
return; return;
}
using namespace cv::cuda::device::bf_radius_match; CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
CV_Assert( train.cols == query.cols && train.type() == query.type() );
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
...@@ -780,108 +884,51 @@ void cv::cuda::BFMatcher_CUDA::radiusMatchSingle(const GpuMat& query, const GpuM ...@@ -780,108 +884,51 @@ void cv::cuda::BFMatcher_CUDA::radiusMatchSingle(const GpuMat& query, const GpuM
matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/ matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
}; };
const int nQuery = query.rows; const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
const int nTrain = train.rows;
CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
CV_Assert(train.type() == query.type() && train.cols == query.cols);
CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size()));
CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING);
const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming;
ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches); const caller_t func = callers[query.depth()];
if (trainIdx.empty()) if (func == 0)
{ {
ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32SC1, trainIdx); CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32FC1, distance);
} }
nMatches.setTo(Scalar::all(0), stream); const int nQuery = query.rows;
const int nTrain = train.rows;
caller_t func = callers[query.depth()];
CV_Assert(func != 0);
func(query, train, maxDistance, mask, trainIdx, distance, nMatches, StreamAccessor::getStream(stream));
}
void cv::cuda::BFMatcher_CUDA::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches,
std::vector< std::vector<DMatch> >& matches, bool compactResult)
{
if (trainIdx.empty() || distance.empty() || nMatches.empty())
return;
Mat trainIdxCPU(trainIdx);
Mat distanceCPU(distance);
Mat nMatchesCPU(nMatches);
radiusMatchConvert(trainIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
}
void cv::cuda::BFMatcher_CUDA::radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches,
std::vector< std::vector<DMatch> >& matches, bool compactResult)
{
if (trainIdx.empty() || distance.empty() || nMatches.empty())
return;
CV_Assert(trainIdx.type() == CV_32SC1); const int cols = std::max((nTrain / 100), nQuery);
CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size());
CV_Assert(nMatches.type() == CV_32SC1 && nMatches.cols == trainIdx.rows);
const int nQuery = trainIdx.rows; _matches.create(2 * nQuery + 1, cols, CV_32SC1);
GpuMat matches = _matches.getGpuMat();
matches.clear(); GpuMat trainIdx(nQuery, cols, CV_32SC1, matches.ptr(0), matches.step);
matches.reserve(nQuery); GpuMat distance(nQuery, cols, CV_32FC1, matches.ptr(nQuery), matches.step);
GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(2 * nQuery));
const int* nMatches_ptr = nMatches.ptr<int>();
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
{
const int* trainIdx_ptr = trainIdx.ptr<int>(queryIdx);
const float* distance_ptr = distance.ptr<float>(queryIdx);
const int nMatched = std::min(nMatches_ptr[queryIdx], trainIdx.cols); nMatches.setTo(Scalar::all(0), stream);
if (nMatched == 0) func(query, train, maxDistance, mask, trainIdx, distance, nMatches, StreamAccessor::getStream(stream));
{
if (!compactResult)
matches.push_back(std::vector<DMatch>());
continue;
} }
matches.push_back(std::vector<DMatch>(nMatched)); void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors,
std::vector<DMatch>& curMatches = matches.back(); OutputArray _matches,
float maxDistance,
for (int i = 0; i < nMatched; ++i, ++trainIdx_ptr, ++distance_ptr) const std::vector<GpuMat>& masks,
Stream& stream)
{ {
int _trainIdx = *trainIdx_ptr; using namespace cv::cuda::device::bf_radius_match;
float _distance = *distance_ptr;
DMatch m(queryIdx, _trainIdx, 0, _distance);
curMatches[i] = m; const GpuMat query = _queryDescriptors.getGpuMat();
}
sort(curMatches.begin(), curMatches.end()); if (query.empty() || trainDescCollection_.empty())
{
_matches.release();
return;
} }
}
void cv::cuda::BFMatcher_CUDA::radiusMatch(const GpuMat& query, const GpuMat& train, CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
std::vector< std::vector<DMatch> >& matches, float maxDistance, const GpuMat& mask, bool compactResult)
{
GpuMat trainIdx, distance, nMatches;
radiusMatchSingle(query, train, trainIdx, distance, nMatches, maxDistance, mask);
radiusMatchDownload(trainIdx, distance, nMatches, matches, compactResult);
}
void cv::cuda::BFMatcher_CUDA::radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches, GpuMat trainCollection, maskCollection;
float maxDistance, const std::vector<GpuMat>& masks, Stream& stream) makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
{
if (query.empty() || empty())
return;
using namespace cv::cuda::device::bf_radius_match;
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
...@@ -906,106 +953,125 @@ void cv::cuda::BFMatcher_CUDA::radiusMatchCollection(const GpuMat& query, GpuMat ...@@ -906,106 +953,125 @@ void cv::cuda::BFMatcher_CUDA::radiusMatchCollection(const GpuMat& query, GpuMat
matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/ matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
}; };
const int nQuery = query.rows; const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
CV_Assert(query.channels() == 1 && query.depth() < CV_64F); const caller_t func = callers[query.depth()];
CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size() && trainIdx.size() == imgIdx.size())); if (func == 0)
CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING);
const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming;
ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches);
if (trainIdx.empty())
{ {
ensureSizeIsEnough(nQuery, std::max((nQuery / 100), 10), CV_32SC1, trainIdx); CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
ensureSizeIsEnough(nQuery, std::max((nQuery / 100), 10), CV_32SC1, imgIdx);
ensureSizeIsEnough(nQuery, std::max((nQuery / 100), 10), CV_32FC1, distance);
} }
nMatches.setTo(Scalar::all(0), stream); const int nQuery = query.rows;
_matches.create(3 * nQuery + 1, nQuery, CV_32FC1);
GpuMat matches = _matches.getGpuMat();
caller_t func = callers[query.depth()]; GpuMat trainIdx(nQuery, nQuery, CV_32SC1, matches.ptr(0), matches.step);
CV_Assert(func != 0); GpuMat imgIdx(nQuery, nQuery, CV_32SC1, matches.ptr(nQuery), matches.step);
GpuMat distance(nQuery, nQuery, CV_32FC1, matches.ptr(2 * nQuery), matches.step);
GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(3 * nQuery));
nMatches.setTo(Scalar::all(0), stream);
std::vector<PtrStepSzb> trains_(trainDescCollection.begin(), trainDescCollection.end()); std::vector<PtrStepSzb> trains_(trainDescCollection_.begin(), trainDescCollection_.end());
std::vector<PtrStepSzb> masks_(masks.begin(), masks.end()); std::vector<PtrStepSzb> masks_(masks.begin(), masks.end());
func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0], func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0],
trainIdx, imgIdx, distance, nMatches, StreamAccessor::getStream(stream)); trainIdx, imgIdx, distance, nMatches, StreamAccessor::getStream(stream));
} }
void cv::cuda::BFMatcher_CUDA::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches, void BFMatcher_Impl::radiusMatchConvert(InputArray _gpu_matches,
std::vector< std::vector<DMatch> >& matches, bool compactResult) std::vector< std::vector<DMatch> >& matches,
{ bool compactResult)
if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty()) {
Mat gpu_matches;
if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
{
_gpu_matches.getGpuMat().download(gpu_matches);
}
else
{
gpu_matches = _gpu_matches.getMat();
}
if (gpu_matches.empty())
{
matches.clear();
return; return;
}
Mat trainIdxCPU(trainIdx); CV_Assert( gpu_matches.type() == CV_32SC1 || gpu_matches.type() == CV_32FC1 );
Mat imgIdxCPU(imgIdx);
Mat distanceCPU(distance);
Mat nMatchesCPU(nMatches);
radiusMatchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult); int nQuery = -1;
}
void cv::cuda::BFMatcher_CUDA::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches, const int* trainIdxPtr = NULL;
std::vector< std::vector<DMatch> >& matches, bool compactResult) const int* imgIdxPtr = NULL;
{ const float* distancePtr = NULL;
if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty()) const int* nMatchesPtr = NULL;
return;
if (gpu_matches.type() == CV_32SC1)
{
nQuery = (gpu_matches.rows - 1) / 2;
CV_Assert(trainIdx.type() == CV_32SC1); trainIdxPtr = gpu_matches.ptr<int>(0);
CV_Assert(imgIdx.type() == CV_32SC1 && imgIdx.size() == trainIdx.size()); distancePtr = gpu_matches.ptr<float>(nQuery);
CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size()); nMatchesPtr = gpu_matches.ptr<int>(2 * nQuery);
CV_Assert(nMatches.type() == CV_32SC1 && nMatches.cols == trainIdx.rows); }
else
{
nQuery = (gpu_matches.rows - 1) / 3;
const int nQuery = trainIdx.rows; trainIdxPtr = gpu_matches.ptr<int>(0);
imgIdxPtr = gpu_matches.ptr<int>(nQuery);
distancePtr = gpu_matches.ptr<float>(2 * nQuery);
nMatchesPtr = gpu_matches.ptr<int>(3 * nQuery);
}
matches.clear(); matches.clear();
matches.reserve(nQuery); matches.reserve(nQuery);
const int* nMatches_ptr = nMatches.ptr<int>();
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
{ {
const int* trainIdx_ptr = trainIdx.ptr<int>(queryIdx); const int nMatched = std::min(nMatchesPtr[queryIdx], gpu_matches.cols);
const int* imgIdx_ptr = imgIdx.ptr<int>(queryIdx);
const float* distance_ptr = distance.ptr<float>(queryIdx);
const int nMatched = std::min(nMatches_ptr[queryIdx], trainIdx.cols);
if (nMatched == 0) if (nMatched == 0)
{ {
if (!compactResult) if (!compactResult)
{
matches.push_back(std::vector<DMatch>()); matches.push_back(std::vector<DMatch>());
continue;
} }
}
matches.push_back(std::vector<DMatch>()); else
{
matches.push_back(std::vector<DMatch>(nMatched));
std::vector<DMatch>& curMatches = matches.back(); std::vector<DMatch>& curMatches = matches.back();
curMatches.reserve(nMatched);
for (int i = 0; i < nMatched; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr) for (int i = 0; i < nMatched; ++i)
{ {
int _trainIdx = *trainIdx_ptr; const int trainIdx = trainIdxPtr[i];
int _imgIdx = *imgIdx_ptr;
float _distance = *distance_ptr;
DMatch m(queryIdx, _trainIdx, _imgIdx, _distance); const int imgIdx = imgIdxPtr ? imgIdxPtr[i] : 0;
const float distance = distancePtr[i];
curMatches.push_back(m); DMatch m(queryIdx, trainIdx, imgIdx, distance);
curMatches[i] = m;
} }
sort(curMatches.begin(), curMatches.end()); std::sort(curMatches.begin(), curMatches.end());
}
trainIdxPtr += gpu_matches.cols;
distancePtr += gpu_matches.cols;
if (imgIdxPtr)
imgIdxPtr += gpu_matches.cols;
}
} }
} }
void cv::cuda::BFMatcher_CUDA::radiusMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, Ptr<cv::cuda::DescriptorMatcher> cv::cuda::DescriptorMatcher::createBFMatcher(int norm)
float maxDistance, const std::vector<GpuMat>& masks, bool compactResult)
{ {
GpuMat trainIdx, imgIdx, distance, nMatches; return makePtr<BFMatcher_Impl>(norm);
radiusMatchCollection(query, trainIdx, imgIdx, distance, nMatches, maxDistance, masks);
radiusMatchDownload(trainIdx, imgIdx, distance, nMatches, matches, compactResult);
} }
#endif /* !defined (HAVE_CUDA) */ #endif /* !defined (HAVE_CUDA) */
...@@ -285,7 +285,8 @@ PARAM_TEST_CASE(BruteForceMatcher, cv::cuda::DeviceInfo, NormCode, DescriptorSiz ...@@ -285,7 +285,8 @@ PARAM_TEST_CASE(BruteForceMatcher, cv::cuda::DeviceInfo, NormCode, DescriptorSiz
CUDA_TEST_P(BruteForceMatcher, Match_Single) CUDA_TEST_P(BruteForceMatcher, Match_Single)
{ {
cv::cuda::BFMatcher_CUDA matcher(normCode); cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
cv::cuda::GpuMat mask; cv::cuda::GpuMat mask;
if (useMask) if (useMask)
...@@ -295,7 +296,7 @@ CUDA_TEST_P(BruteForceMatcher, Match_Single) ...@@ -295,7 +296,7 @@ CUDA_TEST_P(BruteForceMatcher, Match_Single)
} }
std::vector<cv::DMatch> matches; std::vector<cv::DMatch> matches;
matcher.match(loadMat(query), loadMat(train), matches, mask); matcher->match(loadMat(query), loadMat(train), matches, mask);
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size()); ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
...@@ -312,13 +313,14 @@ CUDA_TEST_P(BruteForceMatcher, Match_Single) ...@@ -312,13 +313,14 @@ CUDA_TEST_P(BruteForceMatcher, Match_Single)
CUDA_TEST_P(BruteForceMatcher, Match_Collection) CUDA_TEST_P(BruteForceMatcher, Match_Collection)
{ {
cv::cuda::BFMatcher_CUDA matcher(normCode); cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
cv::cuda::GpuMat d_train(train); cv::cuda::GpuMat d_train(train);
// make add() twice to test such case // make add() twice to test such case
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2))); matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows))); matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
// prepare masks (make first nearest match illegal) // prepare masks (make first nearest match illegal)
std::vector<cv::cuda::GpuMat> masks(2); std::vector<cv::cuda::GpuMat> masks(2);
...@@ -331,9 +333,9 @@ CUDA_TEST_P(BruteForceMatcher, Match_Collection) ...@@ -331,9 +333,9 @@ CUDA_TEST_P(BruteForceMatcher, Match_Collection)
std::vector<cv::DMatch> matches; std::vector<cv::DMatch> matches;
if (useMask) if (useMask)
matcher.match(cv::cuda::GpuMat(query), matches, masks); matcher->match(cv::cuda::GpuMat(query), matches, masks);
else else
matcher.match(cv::cuda::GpuMat(query), matches); matcher->match(cv::cuda::GpuMat(query), matches);
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size()); ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
...@@ -366,7 +368,8 @@ CUDA_TEST_P(BruteForceMatcher, Match_Collection) ...@@ -366,7 +368,8 @@ CUDA_TEST_P(BruteForceMatcher, Match_Collection)
CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single) CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single)
{ {
cv::cuda::BFMatcher_CUDA matcher(normCode); cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
const int knn = 2; const int knn = 2;
...@@ -378,7 +381,7 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single) ...@@ -378,7 +381,7 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single)
} }
std::vector< std::vector<cv::DMatch> > matches; std::vector< std::vector<cv::DMatch> > matches;
matcher.knnMatch(loadMat(query), loadMat(train), matches, knn, mask); matcher->knnMatch(loadMat(query), loadMat(train), matches, knn, mask);
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size()); ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
...@@ -405,7 +408,8 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single) ...@@ -405,7 +408,8 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single)
CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single) CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single)
{ {
cv::cuda::BFMatcher_CUDA matcher(normCode); cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
const int knn = 3; const int knn = 3;
...@@ -417,7 +421,7 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single) ...@@ -417,7 +421,7 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single)
} }
std::vector< std::vector<cv::DMatch> > matches; std::vector< std::vector<cv::DMatch> > matches;
matcher.knnMatch(loadMat(query), loadMat(train), matches, knn, mask); matcher->knnMatch(loadMat(query), loadMat(train), matches, knn, mask);
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size()); ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
...@@ -444,15 +448,16 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single) ...@@ -444,15 +448,16 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single)
CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection) CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection)
{ {
cv::cuda::BFMatcher_CUDA matcher(normCode); cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
const int knn = 2; const int knn = 2;
cv::cuda::GpuMat d_train(train); cv::cuda::GpuMat d_train(train);
// make add() twice to test such case // make add() twice to test such case
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2))); matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows))); matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
// prepare masks (make first nearest match illegal) // prepare masks (make first nearest match illegal)
std::vector<cv::cuda::GpuMat> masks(2); std::vector<cv::cuda::GpuMat> masks(2);
...@@ -466,9 +471,9 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection) ...@@ -466,9 +471,9 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection)
std::vector< std::vector<cv::DMatch> > matches; std::vector< std::vector<cv::DMatch> > matches;
if (useMask) if (useMask)
matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn, masks); matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn, masks);
else else
matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn); matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn);
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size()); ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
...@@ -506,15 +511,16 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection) ...@@ -506,15 +511,16 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection)
CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection) CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection)
{ {
cv::cuda::BFMatcher_CUDA matcher(normCode); cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
const int knn = 3; const int knn = 3;
cv::cuda::GpuMat d_train(train); cv::cuda::GpuMat d_train(train);
// make add() twice to test such case // make add() twice to test such case
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2))); matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows))); matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
// prepare masks (make first nearest match illegal) // prepare masks (make first nearest match illegal)
std::vector<cv::cuda::GpuMat> masks(2); std::vector<cv::cuda::GpuMat> masks(2);
...@@ -528,9 +534,9 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection) ...@@ -528,9 +534,9 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection)
std::vector< std::vector<cv::DMatch> > matches; std::vector< std::vector<cv::DMatch> > matches;
if (useMask) if (useMask)
matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn, masks); matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn, masks);
else else
matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn); matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn);
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size()); ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
...@@ -568,7 +574,8 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection) ...@@ -568,7 +574,8 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection)
CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single) CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single)
{ {
cv::cuda::BFMatcher_CUDA matcher(normCode); cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
const float radius = 1.f / countFactor; const float radius = 1.f / countFactor;
...@@ -577,7 +584,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single) ...@@ -577,7 +584,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single)
try try
{ {
std::vector< std::vector<cv::DMatch> > matches; std::vector< std::vector<cv::DMatch> > matches;
matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius); matcher->radiusMatch(loadMat(query), loadMat(train), matches, radius);
} }
catch (const cv::Exception& e) catch (const cv::Exception& e)
{ {
...@@ -594,7 +601,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single) ...@@ -594,7 +601,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single)
} }
std::vector< std::vector<cv::DMatch> > matches; std::vector< std::vector<cv::DMatch> > matches;
matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius, mask); matcher->radiusMatch(loadMat(query), loadMat(train), matches, radius, mask);
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size()); ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
...@@ -617,7 +624,8 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single) ...@@ -617,7 +624,8 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single)
CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection) CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
{ {
cv::cuda::BFMatcher_CUDA matcher(normCode); cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
const int n = 3; const int n = 3;
const float radius = 1.f / countFactor * n; const float radius = 1.f / countFactor * n;
...@@ -625,8 +633,8 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection) ...@@ -625,8 +633,8 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
cv::cuda::GpuMat d_train(train); cv::cuda::GpuMat d_train(train);
// make add() twice to test such case // make add() twice to test such case
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2))); matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows))); matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
// prepare masks (make first nearest match illegal) // prepare masks (make first nearest match illegal)
std::vector<cv::cuda::GpuMat> masks(2); std::vector<cv::cuda::GpuMat> masks(2);
...@@ -642,7 +650,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection) ...@@ -642,7 +650,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
try try
{ {
std::vector< std::vector<cv::DMatch> > matches; std::vector< std::vector<cv::DMatch> > matches;
matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks); matcher->radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks);
} }
catch (const cv::Exception& e) catch (const cv::Exception& e)
{ {
...@@ -654,9 +662,9 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection) ...@@ -654,9 +662,9 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
std::vector< std::vector<cv::DMatch> > matches; std::vector< std::vector<cv::DMatch> > matches;
if (useMask) if (useMask)
matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks); matcher->radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks);
else else
matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius); matcher->radiusMatch(cv::cuda::GpuMat(query), matches, radius);
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size()); ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
......
...@@ -154,7 +154,7 @@ void CpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat ...@@ -154,7 +154,7 @@ void CpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat
matches_info.matches.clear(); matches_info.matches.clear();
Ptr<DescriptorMatcher> matcher; Ptr<cv::DescriptorMatcher> matcher;
#if 0 // TODO check this #if 0 // TODO check this
if (ocl::useOpenCL()) if (ocl::useOpenCL())
{ {
...@@ -220,13 +220,13 @@ void GpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat ...@@ -220,13 +220,13 @@ void GpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat
descriptors1_.upload(features1.descriptors); descriptors1_.upload(features1.descriptors);
descriptors2_.upload(features2.descriptors); descriptors2_.upload(features2.descriptors);
BFMatcher_CUDA matcher(NORM_L2); Ptr<cuda::DescriptorMatcher> matcher = cuda::DescriptorMatcher::createBFMatcher(NORM_L2);
MatchesSet matches; MatchesSet matches;
// Find 1->2 matches // Find 1->2 matches
pair_matches.clear(); pair_matches.clear();
matcher.knnMatchSingle(descriptors1_, descriptors2_, train_idx_, distance_, all_dist_, 2); matcher->knnMatch(descriptors1_, descriptors2_, pair_matches, 2);
matcher.knnMatchDownload(train_idx_, distance_, pair_matches);
for (size_t i = 0; i < pair_matches.size(); ++i) for (size_t i = 0; i < pair_matches.size(); ++i)
{ {
if (pair_matches[i].size() < 2) if (pair_matches[i].size() < 2)
...@@ -242,8 +242,7 @@ void GpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat ...@@ -242,8 +242,7 @@ void GpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat
// Find 2->1 matches // Find 2->1 matches
pair_matches.clear(); pair_matches.clear();
matcher.knnMatchSingle(descriptors2_, descriptors1_, train_idx_, distance_, all_dist_, 2); matcher->knnMatch(descriptors2_, descriptors1_, pair_matches, 2);
matcher.knnMatchDownload(train_idx_, distance_, pair_matches);
for (size_t i = 0; i < pair_matches.size(); ++i) for (size_t i = 0; i < pair_matches.size(); ++i)
{ {
if (pair_matches[i].size() < 2) if (pair_matches[i].size() < 2)
......
...@@ -379,14 +379,14 @@ TEST(BruteForceMatcher) ...@@ -379,14 +379,14 @@ TEST(BruteForceMatcher)
// Init CUDA matcher // Init CUDA matcher
cuda::BFMatcher_CUDA d_matcher(NORM_L2); Ptr<cuda::DescriptorMatcher> d_matcher = cuda::DescriptorMatcher::createBFMatcher(NORM_L2);
cuda::GpuMat d_query(query); cuda::GpuMat d_query(query);
cuda::GpuMat d_train(train); cuda::GpuMat d_train(train);
// Output // Output
vector< vector<DMatch> > matches(2); vector< vector<DMatch> > matches(2);
cuda::GpuMat d_trainIdx, d_distance, d_allDist, d_nMatches; cuda::GpuMat d_matches;
SUBTEST << "match"; SUBTEST << "match";
...@@ -396,10 +396,10 @@ TEST(BruteForceMatcher) ...@@ -396,10 +396,10 @@ TEST(BruteForceMatcher)
matcher.match(query, train, matches[0]); matcher.match(query, train, matches[0]);
CPU_OFF; CPU_OFF;
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); d_matcher->matchAsync(d_query, d_train, d_matches);
CUDA_ON; CUDA_ON;
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); d_matcher->matchAsync(d_query, d_train, d_matches);
CUDA_OFF; CUDA_OFF;
SUBTEST << "knnMatch"; SUBTEST << "knnMatch";
...@@ -410,10 +410,10 @@ TEST(BruteForceMatcher) ...@@ -410,10 +410,10 @@ TEST(BruteForceMatcher)
matcher.knnMatch(query, train, matches, 2); matcher.knnMatch(query, train, matches, 2);
CPU_OFF; CPU_OFF;
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2); d_matcher->knnMatchAsync(d_query, d_train, d_matches, 2);
CUDA_ON; CUDA_ON;
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2); d_matcher->knnMatchAsync(d_query, d_train, d_matches, 2);
CUDA_OFF; CUDA_OFF;
SUBTEST << "radiusMatch"; SUBTEST << "radiusMatch";
...@@ -426,12 +426,10 @@ TEST(BruteForceMatcher) ...@@ -426,12 +426,10 @@ TEST(BruteForceMatcher)
matcher.radiusMatch(query, train, matches, max_distance); matcher.radiusMatch(query, train, matches, max_distance);
CPU_OFF; CPU_OFF;
d_trainIdx.release(); d_matcher->radiusMatchAsync(d_query, d_train, d_matches, max_distance);
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance);
CUDA_ON; CUDA_ON;
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance); d_matcher->radiusMatchAsync(d_query, d_train, d_matches, max_distance);
CUDA_OFF; CUDA_OFF;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment