Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
3a844444
Commit
3a844444
authored
Jan 19, 2015
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #3596 from jet47:cuda-features2d-refactoring
parents
b6023eab
5f1282af
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
1817 additions
and
1485 deletions
+1817
-1485
cudafeatures2d.hpp
modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
+370
-328
perf_features2d.cpp
modules/cudafeatures2d/perf/perf_features2d.cpp
+21
-18
brute_force_matcher.cpp
modules/cudafeatures2d/src/brute_force_matcher.cpp
+754
-688
fast.cu
modules/cudafeatures2d/src/cuda/fast.cu
+15
-15
fast.cpp
modules/cudafeatures2d/src/fast.cpp
+129
-91
feature2d_async.cpp
modules/cudafeatures2d/src/feature2d_async.cpp
+85
-0
orb.cpp
modules/cudafeatures2d/src/orb.cpp
+376
-283
test_features2d.cpp
modules/cudafeatures2d/test/test_features2d.cpp
+48
-40
matchers.cpp
modules/stitching/src/matchers.cpp
+5
-6
tests.cpp
samples/gpu/performance/tests.cpp
+14
-16
No files found.
modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
View file @
3a844444
...
...
@@ -48,6 +48,7 @@
#endif
#include "opencv2/core/cuda.hpp"
#include "opencv2/features2d.hpp"
#include "opencv2/cudafilters.hpp"
/**
...
...
@@ -62,382 +63,423 @@ namespace cv { namespace cuda {
//! @addtogroup cudafeatures2d
//! @{
/** @brief Brute-force descriptor matcher.
For each descriptor in the first set, this matcher finds the closest descriptor in the second set
by trying each one. This descriptor matcher supports masking permissible matches between descriptor
sets.
//
// DescriptorMatcher
//
The class BFMatcher_CUDA has an interface similar to the class DescriptorMatcher. It has two groups
of match methods: for matching descriptors of one image with another image or with an image set.
Also, all functions have an alternative to save results either to the GPU memory or to the CPU
memory.
/** @brief Abstract base class for matching keypoint descriptors.
@sa DescriptorMatcher, BFMatcher
It has two groups of match methods: for matching descriptors of an image with another image or with
an image set.
*/
class
CV_EXPORTS
BFMatcher_CUDA
class
CV_EXPORTS
DescriptorMatcher
:
public
cv
::
Algorithm
{
public
:
explicit
BFMatcher_CUDA
(
int
norm
=
cv
::
NORM_L2
);
//! Add descriptors to train descriptor collection
void
add
(
const
std
::
vector
<
GpuMat
>&
descCollection
);
//! Get train descriptors collection
const
std
::
vector
<
GpuMat
>&
getTrainDescriptors
()
const
;
//! Clear train descriptors collection
void
clear
();
//! Return true if there are not train descriptors in collection
bool
empty
()
const
;
//! Return true if the matcher supports mask in match methods
bool
isMaskSupported
()
const
;
//! Find one best match for each query descriptor
void
matchSingle
(
const
GpuMat
&
query
,
const
GpuMat
&
train
,
GpuMat
&
trainIdx
,
GpuMat
&
distance
,
const
GpuMat
&
mask
=
GpuMat
(),
Stream
&
stream
=
Stream
::
Null
());
//! Download trainIdx and distance and convert it to CPU vector with DMatch
static
void
matchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
distance
,
std
::
vector
<
DMatch
>&
matches
);
//! Convert trainIdx and distance to vector with DMatch
static
void
matchConvert
(
const
Mat
&
trainIdx
,
const
Mat
&
distance
,
std
::
vector
<
DMatch
>&
matches
);
//! Find one best match for each query descriptor
void
match
(
const
GpuMat
&
query
,
const
GpuMat
&
train
,
std
::
vector
<
DMatch
>&
matches
,
const
GpuMat
&
mask
=
GpuMat
());
//! Make gpu collection of trains and masks in suitable format for matchCollection function
void
makeGpuCollection
(
GpuMat
&
trainCollection
,
GpuMat
&
maskCollection
,
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
());
//! Find one best match from train collection for each query descriptor
void
matchCollection
(
const
GpuMat
&
query
,
const
GpuMat
&
trainCollection
,
GpuMat
&
trainIdx
,
GpuMat
&
imgIdx
,
GpuMat
&
distance
,
const
GpuMat
&
masks
=
GpuMat
(),
Stream
&
stream
=
Stream
::
Null
());
//! Download trainIdx, imgIdx and distance and convert it to vector with DMatch
static
void
matchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
imgIdx
,
const
GpuMat
&
distance
,
std
::
vector
<
DMatch
>&
matches
);
//! Convert trainIdx, imgIdx and distance to vector with DMatch
static
void
matchConvert
(
const
Mat
&
trainIdx
,
const
Mat
&
imgIdx
,
const
Mat
&
distance
,
std
::
vector
<
DMatch
>&
matches
);
//! Find one best match from train collection for each query descriptor.
void
match
(
const
GpuMat
&
query
,
std
::
vector
<
DMatch
>&
matches
,
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
());
//! Find k best matches for each query descriptor (in increasing order of distances)
void
knnMatchSingle
(
const
GpuMat
&
query
,
const
GpuMat
&
train
,
GpuMat
&
trainIdx
,
GpuMat
&
distance
,
GpuMat
&
allDist
,
int
k
,
const
GpuMat
&
mask
=
GpuMat
(),
Stream
&
stream
=
Stream
::
Null
());
//! Download trainIdx and distance and convert it to vector with DMatch
//! compactResult is used when mask is not empty. If compactResult is false matches
//! vector will have the same size as queryDescriptors rows. If compactResult is true
//! matches vector will not contain matches for fully masked out query descriptors.
static
void
knnMatchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
distance
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
=
false
);
//! Convert trainIdx and distance to vector with DMatch
static
void
knnMatchConvert
(
const
Mat
&
trainIdx
,
const
Mat
&
distance
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
=
false
);
//! Find k best matches for each query descriptor (in increasing order of distances).
//! compactResult is used when mask is not empty. If compactResult is false matches
//! vector will have the same size as queryDescriptors rows. If compactResult is true
//! matches vector will not contain matches for fully masked out query descriptors.
void
knnMatch
(
const
GpuMat
&
query
,
const
GpuMat
&
train
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
int
k
,
const
GpuMat
&
mask
=
GpuMat
(),
bool
compactResult
=
false
);
//! Find k best matches from train collection for each query descriptor (in increasing order of distances)
void
knnMatch2Collection
(
const
GpuMat
&
query
,
const
GpuMat
&
trainCollection
,
GpuMat
&
trainIdx
,
GpuMat
&
imgIdx
,
GpuMat
&
distance
,
const
GpuMat
&
maskCollection
=
GpuMat
(),
Stream
&
stream
=
Stream
::
Null
());
//! Download trainIdx and distance and convert it to vector with DMatch
//! compactResult is used when mask is not empty. If compactResult is false matches
//! vector will have the same size as queryDescriptors rows. If compactResult is true
//! matches vector will not contain matches for fully masked out query descriptors.
//! @see BFMatcher_CUDA::knnMatchDownload
static
void
knnMatch2Download
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
imgIdx
,
const
GpuMat
&
distance
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
=
false
);
//! Convert trainIdx and distance to vector with DMatch
//! @see BFMatcher_CUDA::knnMatchConvert
static
void
knnMatch2Convert
(
const
Mat
&
trainIdx
,
const
Mat
&
imgIdx
,
const
Mat
&
distance
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
=
false
);
//! Find k best matches for each query descriptor (in increasing order of distances).
//! compactResult is used when mask is not empty. If compactResult is false matches
//! vector will have the same size as queryDescriptors rows. If compactResult is true
//! matches vector will not contain matches for fully masked out query descriptors.
void
knnMatch
(
const
GpuMat
&
query
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
int
k
,
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
(),
bool
compactResult
=
false
);
//! Find best matches for each query descriptor which have distance less than maxDistance.
//! nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
//! carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
//! because it didn't have enough memory.
//! If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
//! otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
//! Matches doesn't sorted.
void
radiusMatchSingle
(
const
GpuMat
&
query
,
const
GpuMat
&
train
,
GpuMat
&
trainIdx
,
GpuMat
&
distance
,
GpuMat
&
nMatches
,
float
maxDistance
,
const
GpuMat
&
mask
=
GpuMat
(),
Stream
&
stream
=
Stream
::
Null
());
//! Download trainIdx, nMatches and distance and convert it to vector with DMatch.
//! matches will be sorted in increasing order of distances.
//! compactResult is used when mask is not empty. If compactResult is false matches
//! vector will have the same size as queryDescriptors rows. If compactResult is true
//! matches vector will not contain matches for fully masked out query descriptors.
static
void
radiusMatchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
distance
,
const
GpuMat
&
nMatches
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
=
false
);
//! Convert trainIdx, nMatches and distance to vector with DMatch.
static
void
radiusMatchConvert
(
const
Mat
&
trainIdx
,
const
Mat
&
distance
,
const
Mat
&
nMatches
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
=
false
);
//! Find best matches for each query descriptor which have distance less than maxDistance
//! in increasing order of distances).
void
radiusMatch
(
const
GpuMat
&
query
,
const
GpuMat
&
train
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
float
maxDistance
,
const
GpuMat
&
mask
=
GpuMat
(),
bool
compactResult
=
false
);
//! Find best matches for each query descriptor which have distance less than maxDistance.
//! If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
//! otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
//! Matches doesn't sorted.
void
radiusMatchCollection
(
const
GpuMat
&
query
,
GpuMat
&
trainIdx
,
GpuMat
&
imgIdx
,
GpuMat
&
distance
,
GpuMat
&
nMatches
,
float
maxDistance
,
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
(),
Stream
&
stream
=
Stream
::
Null
());
//! Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
//! matches will be sorted in increasing order of distances.
//! compactResult is used when mask is not empty. If compactResult is false matches
//! vector will have the same size as queryDescriptors rows. If compactResult is true
//! matches vector will not contain matches for fully masked out query descriptors.
static
void
radiusMatchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
imgIdx
,
const
GpuMat
&
distance
,
const
GpuMat
&
nMatches
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
=
false
);
//! Convert trainIdx, nMatches and distance to vector with DMatch.
static
void
radiusMatchConvert
(
const
Mat
&
trainIdx
,
const
Mat
&
imgIdx
,
const
Mat
&
distance
,
const
Mat
&
nMatches
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
=
false
);
//! Find best matches from train collection for each query descriptor which have distance less than
//! maxDistance (in increasing order of distances).
void
radiusMatch
(
const
GpuMat
&
query
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
float
maxDistance
,
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
(),
bool
compactResult
=
false
);
int
norm
;
private
:
std
::
vector
<
GpuMat
>
trainDescCollection
;
};
//
// Factories
//
/** @brief Class used for corner detection using the FAST algorithm. :
*/
class
CV_EXPORTS
FAST_CUDA
{
public
:
enum
{
LOCATION_ROW
=
0
,
RESPONSE_ROW
,
ROWS_COUNT
};
//! all features have same size
static
const
int
FEATURE_SIZE
=
7
;
/** @brief Brute-force descriptor matcher.
/** @brief Constructor.
For each descriptor in the first set, this matcher finds the closest descriptor in the second set
by trying each one. This descriptor matcher supports masking permissible matches of descriptor
sets.
@param threshold Threshold on difference between intensity of the central pixel and pixels on a
circle around this pixel.
@param nonmaxSuppression If it is true, non-maximum suppression is applied to detected corners
(keypoints).
@param keypointsRatio Inner buffer size for keypoints store is determined as (keypointsRatio \*
image_width \* image_height).
*/
explicit
FAST_CUDA
(
int
threshold
,
bool
nonmaxSuppression
=
true
,
double
keypointsRatio
=
0.05
);
/** @brief Finds the keypoints using FAST detector.
@param image Image where keypoints (corners) are detected. Only 8-bit grayscale images are
supported.
@param mask Optional input mask that marks the regions where we should detect features.
@param keypoints The output vector of keypoints. Can be stored both in CPU and GPU memory. For GPU
memory:
- keypoints.ptr\<Vec2s\>(LOCATION_ROW)[i] will contain location of i'th point
- keypoints.ptr\<float\>(RESPONSE_ROW)[i] will contain response of i'th point (if non-maximum
suppression is applied)
@param normType One of NORM_L1, NORM_L2, NORM_HAMMING. L1 and L2 norms are
preferable choices for SIFT and SURF descriptors, NORM_HAMMING should be used with ORB, BRISK and
BRIEF).
*/
void
operator
()(
const
GpuMat
&
image
,
const
GpuMat
&
mask
,
GpuMat
&
keypoints
);
/** @overload */
void
operator
()(
const
GpuMat
&
image
,
const
GpuMat
&
mask
,
std
::
vector
<
KeyPoint
>&
keypoints
);
static
Ptr
<
DescriptorMatcher
>
createBFMatcher
(
int
normType
=
cv
::
NORM_L2
);
/
** @brief Download keypoints from GPU to CPU memory.
*/
static
void
downloadKeypoints
(
const
GpuMat
&
d_keypoints
,
std
::
vector
<
KeyPoint
>&
keypoints
);
/
/
// Utility
//
/** @brief Converts keypoints from CUDA representation to vector of KeyPoint.
*/
static
void
convertKeypoints
(
const
Mat
&
h_keypoints
,
std
::
vector
<
KeyPoint
>&
keypoints
);
/** @brief Releases inner buffer memory.
*/
void
release
();
/** @brief Returns true if the descriptor matcher supports masking permissible matches.
*/
virtual
bool
isMaskSupported
()
const
=
0
;
bool
nonmaxSuppression
;
//
// Descriptor collection
//
int
threshold
;
/** @brief Adds descriptors to train a descriptor collection.
//! max keypoints = keypointsRatio * img.size().area()
double
keypointsRatio
;
If the collection is not empty, the new descriptors are added to existing train descriptors.
/** @brief Find keypoints and compute it's response if nonmaxSuppression is true.
@param descriptors Descriptors to add. Each descriptors[i] is a set of descriptors from the same
train image.
*/
virtual
void
add
(
const
std
::
vector
<
GpuMat
>&
descriptors
)
=
0
;
@param image Image where keypoints (corners) are detected. Only 8-bit grayscale images are
supported.
@param mask Optional input mask that marks the regions where we should detect features.
/** @brief Returns a constant link to the train descriptor collection.
*/
virtual
const
std
::
vector
<
GpuMat
>&
getTrainDescriptors
()
const
=
0
;
The function returns count of detected keypoints
.
/** @brief Clears the train descriptor collection
.
*/
int
calcKeyPointsLocation
(
const
GpuMat
&
image
,
const
GpuMat
&
mask
)
;
virtual
void
clear
()
=
0
;
/** @brief Gets final array of keypoints.
/** @brief Returns true if there are no train descriptors in the collection.
*/
virtual
bool
empty
()
const
=
0
;
@param keypoints The output vector of keypoints
.
/** @brief Trains a descriptor matcher
.
The function performs non-max suppression if needed and returns final count of keypoints.
Trains a descriptor matcher (for example, the flann index). In all methods to match, the method
train() is run every time before matching.
*/
int
getKeyPoints
(
GpuMat
&
keypoints
);
private
:
GpuMat
kpLoc_
;
int
count_
;
virtual
void
train
()
=
0
;
//
// 1 to 1 match
//
/** @brief Finds the best match for each descriptor from a query set (blocking version).
@param queryDescriptors Query set of descriptors.
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
collection stored in the class object.
@param matches Matches. If a query descriptor is masked out in mask , no match is added for this
descriptor. So, matches size may be smaller than the query descriptors count.
@param mask Mask specifying permissible matches between an input query and train matrices of
descriptors.
In the first variant of this method, the train descriptors are passed as an input argument. In the
second variant of the method, train descriptors collection that was set by DescriptorMatcher::add is
used. Optional mask (or masks) can be passed to specify which query and training descriptors can be
matched. Namely, queryDescriptors[i] can be matched with trainDescriptors[j] only if
mask.at\<uchar\>(i,j) is non-zero.
*/
virtual
void
match
(
InputArray
queryDescriptors
,
InputArray
trainDescriptors
,
std
::
vector
<
DMatch
>&
matches
,
InputArray
mask
=
noArray
())
=
0
;
GpuMat
score_
;
/** @overload
*/
virtual
void
match
(
InputArray
queryDescriptors
,
std
::
vector
<
DMatch
>&
matches
,
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
())
=
0
;
/** @brief Finds the best match for each descriptor from a query set (asynchronous version).
@param queryDescriptors Query set of descriptors.
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
collection stored in the class object.
@param matches Matches array stored in GPU memory. Internal representation is not defined.
Use DescriptorMatcher::matchConvert method to retrieve results in standard representation.
@param mask Mask specifying permissible matches between an input query and train matrices of
descriptors.
@param stream CUDA stream.
In the first variant of this method, the train descriptors are passed as an input argument. In the
second variant of the method, train descriptors collection that was set by DescriptorMatcher::add is
used. Optional mask (or masks) can be passed to specify which query and training descriptors can be
matched. Namely, queryDescriptors[i] can be matched with trainDescriptors[j] only if
mask.at\<uchar\>(i,j) is non-zero.
*/
virtual
void
matchAsync
(
InputArray
queryDescriptors
,
InputArray
trainDescriptors
,
OutputArray
matches
,
InputArray
mask
=
noArray
(),
Stream
&
stream
=
Stream
::
Null
())
=
0
;
GpuMat
d_keypoints_
;
};
/** @overload
*/
virtual
void
matchAsync
(
InputArray
queryDescriptors
,
OutputArray
matches
,
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
(),
Stream
&
stream
=
Stream
::
Null
())
=
0
;
/** @brief Class for extracting ORB features and descriptors from an image. :
*/
class
CV_EXPORTS
ORB_CUDA
{
public
:
enum
{
X_ROW
=
0
,
Y_ROW
,
RESPONSE_ROW
,
ANGLE_ROW
,
OCTAVE_ROW
,
SIZE_ROW
,
ROWS_COUNT
};
/** @brief Converts matches array from internal representation to standard matches vector.
enum
{
DEFAULT_FAST_THRESHOLD
=
20
};
The method is supposed to be used with DescriptorMatcher::matchAsync to get final result.
Call this method only after DescriptorMatcher::matchAsync is completed (ie. after synchronization).
/** @brief Constructor.
@param nFeatures The number of desired features.
@param scaleFactor Coefficient by which we divide the dimensions from one scale pyramid level to
the next.
@param nLevels The number of levels in the scale pyramid.
@param edgeThreshold How far from the boundary the points should be.
@param firstLevel The level at which the image is given. If 1, that means we will also look at the
image scaleFactor times bigger.
@param WTA_K
@param scoreType
@param patchSize
@param gpu_matches Matches, returned from DescriptorMatcher::matchAsync.
@param matches Vector of DMatch objects.
*/
explicit
ORB_CUDA
(
int
nFeatures
=
500
,
float
scaleFactor
=
1.2
f
,
int
nLevels
=
8
,
int
edgeThreshold
=
31
,
int
firstLevel
=
0
,
int
WTA_K
=
2
,
int
scoreType
=
0
,
int
patchSize
=
31
);
/** @overload */
void
operator
()(
const
GpuMat
&
image
,
const
GpuMat
&
mask
,
std
::
vector
<
KeyPoint
>&
keypoints
);
/** @overload */
void
operator
()(
const
GpuMat
&
image
,
const
GpuMat
&
mask
,
GpuMat
&
keypoints
);
/** @brief Detects keypoints and computes descriptors for them.
@param image Input 8-bit grayscale image.
@param mask Optional input mask that marks the regions where we should detect features.
@param keypoints The input/output vector of keypoints. Can be stored both in CPU and GPU memory.
For GPU memory:
- keypoints.ptr\<float\>(X_ROW)[i] contains x coordinate of the i'th feature.
- keypoints.ptr\<float\>(Y_ROW)[i] contains y coordinate of the i'th feature.
- keypoints.ptr\<float\>(RESPONSE_ROW)[i] contains the response of the i'th feature.
- keypoints.ptr\<float\>(ANGLE_ROW)[i] contains orientation of the i'th feature.
- keypoints.ptr\<float\>(OCTAVE_ROW)[i] contains the octave of the i'th feature.
- keypoints.ptr\<float\>(SIZE_ROW)[i] contains the size of the i'th feature.
@param descriptors Computed descriptors. if blurForDescriptor is true, image will be blurred
before descriptors calculation.
virtual
void
matchConvert
(
InputArray
gpu_matches
,
std
::
vector
<
DMatch
>&
matches
)
=
0
;
//
// knn match
//
/** @brief Finds the k best matches for each descriptor from a query set (blocking version).
@param queryDescriptors Query set of descriptors.
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
collection stored in the class object.
@param matches Matches. Each matches[i] is k or less matches for the same query descriptor.
@param k Count of best matches found per each query descriptor or less if a query descriptor has
less than k possible matches in total.
@param mask Mask specifying permissible matches between an input query and train matrices of
descriptors.
@param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
the matches vector does not contain matches for fully masked-out query descriptors.
These extended variants of DescriptorMatcher::match methods find several best matches for each query
descriptor. The matches are returned in the distance increasing order. See DescriptorMatcher::match
for the details about query and train descriptors.
*/
void
operator
()(
const
GpuMat
&
image
,
const
GpuMat
&
mask
,
std
::
vector
<
KeyPoint
>&
keypoints
,
GpuMat
&
descriptors
);
/** @overload */
void
operator
()(
const
GpuMat
&
image
,
const
GpuMat
&
mask
,
GpuMat
&
keypoints
,
GpuMat
&
descriptors
);
virtual
void
knnMatch
(
InputArray
queryDescriptors
,
InputArray
trainDescriptors
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
int
k
,
InputArray
mask
=
noArray
(),
bool
compactResult
=
false
)
=
0
;
/** @brief Download keypoints from GPU to CPU memory.
*/
static
void
downloadKeyPoints
(
const
GpuMat
&
d_keypoints
,
std
::
vector
<
KeyPoint
>&
keypoints
);
/** @brief Converts keypoints from CUDA representation to vector of KeyPoint.
*/
static
void
convertKeyPoints
(
const
Mat
&
d_keypoints
,
std
::
vector
<
KeyPoint
>&
keypoints
);
/** @overload
*/
virtual
void
knnMatch
(
InputArray
queryDescriptors
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
int
k
,
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
(),
bool
compactResult
=
false
)
=
0
;
/** @brief Finds the k best matches for each descriptor from a query set (asynchronous version).
@param queryDescriptors Query set of descriptors.
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
collection stored in the class object.
@param matches Matches array stored in GPU memory. Internal representation is not defined.
Use DescriptorMatcher::knnMatchConvert method to retrieve results in standard representation.
@param k Count of best matches found per each query descriptor or less if a query descriptor has
less than k possible matches in total.
@param mask Mask specifying permissible matches between an input query and train matrices of
descriptors.
@param stream CUDA stream.
These extended variants of DescriptorMatcher::matchAsync methods find several best matches for each query
descriptor. The matches are returned in the distance increasing order. See DescriptorMatcher::matchAsync
for the details about query and train descriptors.
*/
virtual
void
knnMatchAsync
(
InputArray
queryDescriptors
,
InputArray
trainDescriptors
,
OutputArray
matches
,
int
k
,
InputArray
mask
=
noArray
(),
Stream
&
stream
=
Stream
::
Null
())
=
0
;
//! returns the descriptor size in bytes
inline
int
descriptorSize
()
const
{
return
kBytes
;
}
/** @overload
*/
virtual
void
knnMatchAsync
(
InputArray
queryDescriptors
,
OutputArray
matches
,
int
k
,
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
(),
Stream
&
stream
=
Stream
::
Null
())
=
0
;
/** @brief Converts matches array from internal representation to standard matches vector.
The method is supposed to be used with DescriptorMatcher::knnMatchAsync to get final result.
Call this method only after DescriptorMatcher::knnMatchAsync is completed (ie. after synchronization).
@param gpu_matches Matches, returned from DescriptorMatcher::knnMatchAsync.
@param matches Vector of DMatch objects.
@param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
the matches vector does not contain matches for fully masked-out query descriptors.
*/
virtual
void
knnMatchConvert
(
InputArray
gpu_matches
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
=
false
)
=
0
;
//
// radius match
//
/** @brief For each query descriptor, finds the training descriptors not farther than the specified distance (blocking version).
@param queryDescriptors Query set of descriptors.
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
collection stored in the class object.
@param matches Found matches.
@param maxDistance Threshold for the distance between matched descriptors. Distance means here
metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured
in Pixels)!
@param mask Mask specifying permissible matches between an input query and train matrices of
descriptors.
@param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
the matches vector does not contain matches for fully masked-out query descriptors.
For each query descriptor, the methods find such training descriptors that the distance between the
query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are
returned in the distance increasing order.
*/
virtual
void
radiusMatch
(
InputArray
queryDescriptors
,
InputArray
trainDescriptors
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
float
maxDistance
,
InputArray
mask
=
noArray
(),
bool
compactResult
=
false
)
=
0
;
inline
void
setFastParams
(
int
threshold
,
bool
nonmaxSuppression
=
true
)
{
fastDetector_
.
threshold
=
threshold
;
fastDetector_
.
nonmaxSuppression
=
nonmaxSuppression
;
}
/** @overload
*/
virtual
void
radiusMatch
(
InputArray
queryDescriptors
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
float
maxDistance
,
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
(),
bool
compactResult
=
false
)
=
0
;
/** @brief For each query descriptor, finds the training descriptors not farther than the specified distance (asynchronous version).
@param queryDescriptors Query set of descriptors.
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
collection stored in the class object.
@param matches Matches array stored in GPU memory. Internal representation is not defined.
Use DescriptorMatcher::radiusMatchConvert method to retrieve results in standard representation.
@param maxDistance Threshold for the distance between matched descriptors. Distance means here
metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured
in Pixels)!
@param mask Mask specifying permissible matches between an input query and train matrices of
descriptors.
@param stream CUDA stream.
For each query descriptor, the methods find such training descriptors that the distance between the
query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are
returned in the distance increasing order.
*/
virtual
void
radiusMatchAsync
(
InputArray
queryDescriptors
,
InputArray
trainDescriptors
,
OutputArray
matches
,
float
maxDistance
,
InputArray
mask
=
noArray
(),
Stream
&
stream
=
Stream
::
Null
())
=
0
;
/** @brief Releases inner buffer memory.
*/
void
release
();
/** @overload
*/
virtual
void
radiusMatchAsync
(
InputArray
queryDescriptors
,
OutputArray
matches
,
float
maxDistance
,
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
(),
Stream
&
stream
=
Stream
::
Null
())
=
0
;
/** @brief Converts matches array from internal representation to standard matches vector.
The method is supposed to be used with DescriptorMatcher::radiusMatchAsync to get final result.
Call this method only after DescriptorMatcher::radiusMatchAsync is completed (ie. after synchronization).
@param gpu_matches Matches, returned from DescriptorMatcher::radiusMatchAsync.
@param matches Vector of DMatch objects.
@param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
the matches vector does not contain matches for fully masked-out query descriptors.
*/
virtual
void
radiusMatchConvert
(
InputArray
gpu_matches
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
=
false
)
=
0
;
};
//! if true, image will be blurred before descriptors calculation
bool
blurForDescriptor
;
//
// Feature2DAsync
//
private
:
enum
{
kBytes
=
32
};
/** @brief Abstract base class for CUDA asynchronous 2D image feature detectors and descriptor extractors.
*/
class
CV_EXPORTS
Feature2DAsync
{
public
:
virtual
~
Feature2DAsync
();
void
buildScalePyramids
(
const
GpuMat
&
image
,
const
GpuMat
&
mask
);
/** @brief Detects keypoints in an image.
void
computeKeyPointsPyramid
();
@param image Image.
@param keypoints The detected keypoints.
@param mask Mask specifying where to look for keypoints (optional). It must be a 8-bit integer
matrix with non-zero values in the region of interest.
@param stream CUDA stream.
*/
virtual
void
detectAsync
(
InputArray
image
,
OutputArray
keypoints
,
InputArray
mask
=
noArray
(),
Stream
&
stream
=
Stream
::
Null
());
void
computeDescriptors
(
GpuMat
&
descriptors
);
/** @brief Computes the descriptors for a set of keypoints detected in an image.
void
mergeKeyPoints
(
GpuMat
&
keypoints
);
@param image Image.
@param keypoints Input collection of keypoints.
@param descriptors Computed descriptors. Row j is the descriptor for j-th keypoint.
@param stream CUDA stream.
*/
virtual
void
computeAsync
(
InputArray
image
,
OutputArray
keypoints
,
OutputArray
descriptors
,
Stream
&
stream
=
Stream
::
Null
());
/** Detects keypoints and computes the descriptors. */
virtual
void
detectAndComputeAsync
(
InputArray
image
,
InputArray
mask
,
OutputArray
keypoints
,
OutputArray
descriptors
,
bool
useProvidedKeypoints
=
false
,
Stream
&
stream
=
Stream
::
Null
());
/** Converts keypoints array from internal representation to standard vector. */
virtual
void
convert
(
InputArray
gpu_keypoints
,
std
::
vector
<
KeyPoint
>&
keypoints
)
=
0
;
};
int
nFeatures_
;
float
scaleFactor_
;
int
nLevels_
;
int
edgeThreshold_
;
int
firstLevel_
;
int
WTA_K_
;
int
scoreType_
;
int
patchSize_
;
//
// FastFeatureDetector
//
//! The number of desired features per scale
std
::
vector
<
size_t
>
n_features_per_level_
;
/** @brief Wrapping class for feature detection using the FAST method.
*/
class
CV_EXPORTS
FastFeatureDetector
:
public
cv
::
FastFeatureDetector
,
public
Feature2DAsync
{
public
:
enum
{
LOCATION_ROW
=
0
,
RESPONSE_ROW
,
ROWS_COUNT
,
//! Points to compute BRIEF descriptors from
GpuMat
pattern_
;
FEATURE_SIZE
=
7
}
;
std
::
vector
<
GpuMat
>
imagePyr_
;
std
::
vector
<
GpuMat
>
maskPyr_
;
static
Ptr
<
FastFeatureDetector
>
create
(
int
threshold
=
10
,
bool
nonmaxSuppression
=
true
,
int
type
=
FastFeatureDetector
::
TYPE_9_16
,
int
max_npoints
=
5000
);
GpuMat
buf_
;
virtual
void
setMaxNumPoints
(
int
max_npoints
)
=
0
;
virtual
int
getMaxNumPoints
()
const
=
0
;
};
std
::
vector
<
GpuMat
>
keyPointsPyr_
;
std
::
vector
<
int
>
keyPointsCount_
;
//
// ORB
//
FAST_CUDA
fastDetector_
;
/** @brief Class implementing the ORB (*oriented BRIEF*) keypoint detector and descriptor extractor
*
* @sa cv::ORB
*/
class
CV_EXPORTS
ORB
:
public
cv
::
ORB
,
public
Feature2DAsync
{
public
:
enum
{
X_ROW
=
0
,
Y_ROW
,
RESPONSE_ROW
,
ANGLE_ROW
,
OCTAVE_ROW
,
SIZE_ROW
,
ROWS_COUNT
};
Ptr
<
cuda
::
Filter
>
blurFilter
;
static
Ptr
<
ORB
>
create
(
int
nfeatures
=
500
,
float
scaleFactor
=
1.2
f
,
int
nlevels
=
8
,
int
edgeThreshold
=
31
,
int
firstLevel
=
0
,
int
WTA_K
=
2
,
int
scoreType
=
ORB
::
HARRIS_SCORE
,
int
patchSize
=
31
,
int
fastThreshold
=
20
,
bool
blurForDescriptor
=
false
);
GpuMat
d_keypoints_
;
//! if true, image will be blurred before descriptors calculation
virtual
void
setBlurForDescriptor
(
bool
blurForDescriptor
)
=
0
;
virtual
bool
getBlurForDescriptor
()
const
=
0
;
};
//! @}
...
...
modules/cudafeatures2d/perf/perf_features2d.cpp
View file @
3a844444
...
...
@@ -64,15 +64,18 @@ PERF_TEST_P(Image_Threshold_NonMaxSuppression, FAST,
if
(
PERF_RUN_CUDA
())
{
cv
::
cuda
::
FAST_CUDA
d_fast
(
threshold
,
nonMaxSuppersion
,
0.5
);
cv
::
Ptr
<
cv
::
cuda
::
FastFeatureDetector
>
d_fast
=
cv
::
cuda
::
FastFeatureDetector
::
create
(
threshold
,
nonMaxSuppersion
,
cv
::
FastFeatureDetector
::
TYPE_9_16
,
0.5
*
img
.
size
().
area
());
const
cv
::
cuda
::
GpuMat
d_img
(
img
);
cv
::
cuda
::
GpuMat
d_keypoints
;
TEST_CYCLE
()
d_fast
(
d_img
,
cv
::
cuda
::
GpuMat
()
,
d_keypoints
);
TEST_CYCLE
()
d_fast
->
detectAsync
(
d_img
,
d_keypoints
);
std
::
vector
<
cv
::
KeyPoint
>
gpu_keypoints
;
d_fast
.
downloadKeypoints
(
d_keypoints
,
gpu_keypoints
);
d_fast
->
convert
(
d_keypoints
,
gpu_keypoints
);
sortKeyPoints
(
gpu_keypoints
);
...
...
@@ -106,15 +109,15 @@ PERF_TEST_P(Image_NFeatures, ORB,
if
(
PERF_RUN_CUDA
())
{
cv
::
cuda
::
ORB_CUDA
d_orb
(
nFeatures
);
cv
::
Ptr
<
cv
::
cuda
::
ORB
>
d_orb
=
cv
::
cuda
::
ORB
::
create
(
nFeatures
);
const
cv
::
cuda
::
GpuMat
d_img
(
img
);
cv
::
cuda
::
GpuMat
d_keypoints
,
d_descriptors
;
TEST_CYCLE
()
d_orb
(
d_img
,
cv
::
cuda
::
GpuMat
(),
d_keypoints
,
d_descriptors
);
TEST_CYCLE
()
d_orb
->
detectAndComputeAsync
(
d_img
,
cv
::
noArray
(),
d_keypoints
,
d_descriptors
);
std
::
vector
<
cv
::
KeyPoint
>
gpu_keypoints
;
d_orb
.
downloadKeyPoints
(
d_keypoints
,
gpu_keypoints
);
d_orb
->
convert
(
d_keypoints
,
gpu_keypoints
);
cv
::
Mat
gpu_descriptors
(
d_descriptors
);
...
...
@@ -164,16 +167,16 @@ PERF_TEST_P(DescSize_Norm, BFMatch,
if
(
PERF_RUN_CUDA
())
{
cv
::
cuda
::
BFMatcher_CUDA
d_m
atcher
(
normType
);
cv
::
Ptr
<
cv
::
cuda
::
DescriptorMatcher
>
d_matcher
=
cv
::
cuda
::
DescriptorMatcher
::
createBFM
atcher
(
normType
);
const
cv
::
cuda
::
GpuMat
d_query
(
query
);
const
cv
::
cuda
::
GpuMat
d_train
(
train
);
cv
::
cuda
::
GpuMat
d_
trainIdx
,
d_distance
;
cv
::
cuda
::
GpuMat
d_
matches
;
TEST_CYCLE
()
d_matcher
.
matchSingle
(
d_query
,
d_train
,
d_trainIdx
,
d_distance
);
TEST_CYCLE
()
d_matcher
->
matchAsync
(
d_query
,
d_train
,
d_matches
);
std
::
vector
<
cv
::
DMatch
>
gpu_matches
;
d_matcher
.
matchDownload
(
d_trainIdx
,
d_distance
,
gpu_matches
);
d_matcher
->
matchConvert
(
d_matches
,
gpu_matches
);
SANITY_CHECK_MATCHES
(
gpu_matches
);
}
...
...
@@ -223,16 +226,16 @@ PERF_TEST_P(DescSize_K_Norm, BFKnnMatch,
if
(
PERF_RUN_CUDA
())
{
cv
::
cuda
::
BFMatcher_CUDA
d_m
atcher
(
normType
);
cv
::
Ptr
<
cv
::
cuda
::
DescriptorMatcher
>
d_matcher
=
cv
::
cuda
::
DescriptorMatcher
::
createBFM
atcher
(
normType
);
const
cv
::
cuda
::
GpuMat
d_query
(
query
);
const
cv
::
cuda
::
GpuMat
d_train
(
train
);
cv
::
cuda
::
GpuMat
d_
trainIdx
,
d_distance
,
d_allDist
;
cv
::
cuda
::
GpuMat
d_
matches
;
TEST_CYCLE
()
d_matcher
.
knnMatchSingle
(
d_query
,
d_train
,
d_trainIdx
,
d_distance
,
d_allDist
,
k
);
TEST_CYCLE
()
d_matcher
->
knnMatchAsync
(
d_query
,
d_train
,
d_matches
,
k
);
std
::
vector
<
std
::
vector
<
cv
::
DMatch
>
>
matchesTbl
;
d_matcher
.
knnMatchDownload
(
d_trainIdx
,
d_distance
,
matchesTbl
);
d_matcher
->
knnMatchConvert
(
d_matches
,
matchesTbl
);
std
::
vector
<
cv
::
DMatch
>
gpu_matches
;
toOneRowMatches
(
matchesTbl
,
gpu_matches
);
...
...
@@ -277,16 +280,16 @@ PERF_TEST_P(DescSize_Norm, BFRadiusMatch,
if
(
PERF_RUN_CUDA
())
{
cv
::
cuda
::
BFMatcher_CUDA
d_m
atcher
(
normType
);
cv
::
Ptr
<
cv
::
cuda
::
DescriptorMatcher
>
d_matcher
=
cv
::
cuda
::
DescriptorMatcher
::
createBFM
atcher
(
normType
);
const
cv
::
cuda
::
GpuMat
d_query
(
query
);
const
cv
::
cuda
::
GpuMat
d_train
(
train
);
cv
::
cuda
::
GpuMat
d_
trainIdx
,
d_nMatches
,
d_distance
;
cv
::
cuda
::
GpuMat
d_
matches
;
TEST_CYCLE
()
d_matcher
.
radiusMatchSingle
(
d_query
,
d_train
,
d_trainIdx
,
d_distance
,
d_nM
atches
,
maxDistance
);
TEST_CYCLE
()
d_matcher
->
radiusMatchAsync
(
d_query
,
d_train
,
d_m
atches
,
maxDistance
);
std
::
vector
<
std
::
vector
<
cv
::
DMatch
>
>
matchesTbl
;
d_matcher
.
radiusMatchDownload
(
d_trainIdx
,
d_distance
,
d_nM
atches
,
matchesTbl
);
d_matcher
->
radiusMatchConvert
(
d_m
atches
,
matchesTbl
);
std
::
vector
<
cv
::
DMatch
>
gpu_matches
;
toOneRowMatches
(
matchesTbl
,
gpu_matches
);
...
...
modules/cudafeatures2d/src/brute_force_matcher.cpp
View file @
3a844444
...
...
@@ -47,37 +47,7 @@ using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
cv
::
cuda
::
BFMatcher_CUDA
::
BFMatcher_CUDA
(
int
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
add
(
const
std
::
vector
<
GpuMat
>&
)
{
throw_no_cuda
();
}
const
std
::
vector
<
GpuMat
>&
cv
::
cuda
::
BFMatcher_CUDA
::
getTrainDescriptors
()
const
{
throw_no_cuda
();
return
trainDescCollection
;
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
clear
()
{
throw_no_cuda
();
}
bool
cv
::
cuda
::
BFMatcher_CUDA
::
empty
()
const
{
throw_no_cuda
();
return
true
;
}
bool
cv
::
cuda
::
BFMatcher_CUDA
::
isMaskSupported
()
const
{
throw_no_cuda
();
return
true
;
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
matchSingle
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
,
Stream
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
matchDownload
(
const
GpuMat
&
,
const
GpuMat
&
,
std
::
vector
<
DMatch
>&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
matchConvert
(
const
Mat
&
,
const
Mat
&
,
std
::
vector
<
DMatch
>&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
match
(
const
GpuMat
&
,
const
GpuMat
&
,
std
::
vector
<
DMatch
>&
,
const
GpuMat
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
makeGpuCollection
(
GpuMat
&
,
GpuMat
&
,
const
std
::
vector
<
GpuMat
>&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
matchCollection
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
,
Stream
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
matchDownload
(
const
GpuMat
&
,
const
GpuMat
&
,
const
GpuMat
&
,
std
::
vector
<
DMatch
>&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
matchConvert
(
const
Mat
&
,
const
Mat
&
,
const
Mat
&
,
std
::
vector
<
DMatch
>&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
match
(
const
GpuMat
&
,
std
::
vector
<
DMatch
>&
,
const
std
::
vector
<
GpuMat
>&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
knnMatchSingle
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
int
,
const
GpuMat
&
,
Stream
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
knnMatchDownload
(
const
GpuMat
&
,
const
GpuMat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
bool
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
knnMatchConvert
(
const
Mat
&
,
const
Mat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
bool
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
knnMatch
(
const
GpuMat
&
,
const
GpuMat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
int
,
const
GpuMat
&
,
bool
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
knnMatch2Collection
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
,
Stream
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
knnMatch2Download
(
const
GpuMat
&
,
const
GpuMat
&
,
const
GpuMat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
bool
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
knnMatch2Convert
(
const
Mat
&
,
const
Mat
&
,
const
Mat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
bool
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
knnMatch
(
const
GpuMat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
int
,
const
std
::
vector
<
GpuMat
>&
,
bool
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
radiusMatchSingle
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
float
,
const
GpuMat
&
,
Stream
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
radiusMatchDownload
(
const
GpuMat
&
,
const
GpuMat
&
,
const
GpuMat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
bool
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
radiusMatchConvert
(
const
Mat
&
,
const
Mat
&
,
const
Mat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
bool
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
radiusMatch
(
const
GpuMat
&
,
const
GpuMat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
float
,
const
GpuMat
&
,
bool
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
radiusMatchCollection
(
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
float
,
const
std
::
vector
<
GpuMat
>&
,
Stream
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
radiusMatchDownload
(
const
GpuMat
&
,
const
GpuMat
&
,
const
GpuMat
&
,
const
GpuMat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
bool
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
radiusMatchConvert
(
const
Mat
&
,
const
Mat
&
,
const
Mat
&
,
const
Mat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
bool
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
radiusMatch
(
const
GpuMat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
float
,
const
std
::
vector
<
GpuMat
>&
,
bool
)
{
throw_no_cuda
();
}
Ptr
<
cv
::
cuda
::
DescriptorMatcher
>
cv
::
cuda
::
DescriptorMatcher
::
createBFMatcher
(
int
)
{
throw_no_cuda
();
return
Ptr
<
cv
::
cuda
::
DescriptorMatcher
>
();
}
#else
/* !defined (HAVE_CUDA) */
...
...
@@ -155,857 +125,953 @@ namespace cv { namespace cuda { namespace device
}
}}}
////////////////////////////////////////////////////////////////////
// Train collection
cv
::
cuda
::
BFMatcher_CUDA
::
BFMatcher_CUDA
(
int
norm_
)
:
norm
(
norm_
)
namespace
{
}
static
void
makeGpuCollection
(
const
std
::
vector
<
GpuMat
>&
trainDescCollection
,
const
std
::
vector
<
GpuMat
>&
masks
,
GpuMat
&
trainCollection
,
GpuMat
&
maskCollection
)
{
if
(
trainDescCollection
.
empty
())
return
;
void
cv
::
cuda
::
BFMatcher_CUDA
::
add
(
const
std
::
vector
<
GpuMat
>&
descCollection
)
{
trainDescCollection
.
insert
(
trainDescCollection
.
end
(),
descCollection
.
begin
(),
descCollection
.
end
());
}
if
(
masks
.
empty
())
{
Mat
trainCollectionCPU
(
1
,
static_cast
<
int
>
(
trainDescCollection
.
size
()),
CV_8UC
(
sizeof
(
PtrStepSzb
)));
const
std
::
vector
<
GpuMat
>&
cv
::
cuda
::
BFMatcher_CUDA
::
getTrainDescriptors
()
const
{
return
trainDescCollection
;
}
PtrStepSzb
*
trainCollectionCPU_ptr
=
trainCollectionCPU
.
ptr
<
PtrStepSzb
>
();
void
cv
::
cuda
::
BFMatcher_CUDA
::
clear
()
{
trainDescCollection
.
clear
();
}
for
(
size_t
i
=
0
,
size
=
trainDescCollection
.
size
();
i
<
size
;
++
i
,
++
trainCollectionCPU_ptr
)
*
trainCollectionCPU_ptr
=
trainDescCollection
[
i
];
bool
cv
::
cuda
::
BFMatcher_CUDA
::
empty
()
const
{
return
trainDescCollection
.
empty
();
}
trainCollection
.
upload
(
trainCollectionCPU
);
maskCollection
.
release
();
}
else
{
CV_Assert
(
masks
.
size
()
==
trainDescCollection
.
size
()
);
bool
cv
::
cuda
::
BFMatcher_CUDA
::
isMaskSupported
()
const
{
return
true
;
}
Mat
trainCollectionCPU
(
1
,
static_cast
<
int
>
(
trainDescCollection
.
size
()),
CV_8UC
(
sizeof
(
PtrStepSzb
)));
Mat
maskCollectionCPU
(
1
,
static_cast
<
int
>
(
trainDescCollection
.
size
()),
CV_8UC
(
sizeof
(
PtrStepb
)));
////////////////////////////////////////////////////////////////////
// Match
PtrStepSzb
*
trainCollectionCPU_ptr
=
trainCollectionCPU
.
ptr
<
PtrStepSzb
>
();
PtrStepb
*
maskCollectionCPU_ptr
=
maskCollectionCPU
.
ptr
<
PtrStepb
>
();
void
cv
::
cuda
::
BFMatcher_CUDA
::
matchSingle
(
const
GpuMat
&
query
,
const
GpuMat
&
train
,
GpuMat
&
trainIdx
,
GpuMat
&
distance
,
const
GpuMat
&
mask
,
Stream
&
stream
)
{
if
(
query
.
empty
()
||
train
.
empty
())
return
;
for
(
size_t
i
=
0
,
size
=
trainDescCollection
.
size
();
i
<
size
;
++
i
,
++
trainCollectionCPU_ptr
,
++
maskCollectionCPU_ptr
)
{
const
GpuMat
&
train
=
trainDescCollection
[
i
];
const
GpuMat
&
mask
=
masks
[
i
];
using
namespace
cv
::
cuda
::
device
::
bf_match
;
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8UC1
&&
mask
.
cols
==
train
.
rows
)
)
;
typedef
void
(
*
caller_t
)(
const
PtrStepSzb
&
query
,
const
PtrStepSzb
&
train
,
const
PtrStepSzb
&
mask
,
const
PtrStepSzi
&
trainIdx
,
const
PtrStepSzf
&
distance
,
cudaStream_t
stream
);
*
trainCollectionCPU_ptr
=
train
;
*
maskCollectionCPU_ptr
=
mask
;
}
static
const
caller_t
callersL1
[]
=
{
matchL1_gpu
<
unsigned
char
>
,
0
/*matchL1_gpu<signed char>*/
,
matchL1_gpu
<
unsigned
short
>
,
matchL1_gpu
<
short
>
,
matchL1_gpu
<
int
>
,
matchL1_gpu
<
float
>
};
static
const
caller_t
callersL2
[]
=
{
0
/*matchL2_gpu<unsigned char>*/
,
0
/*matchL2_gpu<signed char>*/
,
0
/*matchL2_gpu<unsigned short>*/
,
0
/*matchL2_gpu<short>*/
,
0
/*matchL2_gpu<int>*/
,
matchL2_gpu
<
float
>
};
trainCollection
.
upload
(
trainCollectionCPU
);
maskCollection
.
upload
(
maskCollectionCPU
);
}
}
static
const
caller_t
callersHamming
[]
=
class
BFMatcher_Impl
:
public
cv
::
cuda
::
DescriptorMatcher
{
matchHamming_gpu
<
unsigned
char
>
,
0
/*matchHamming_gpu<signed char>*/
,
matchHamming_gpu
<
unsigned
short
>
,
0
/*matchHamming_gpu<short>*/
,
matchHamming_gpu
<
int
>
,
0
/*matchHamming_gpu<float>*/
};
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
train
.
cols
==
query
.
cols
&&
train
.
type
()
==
query
.
type
());
CV_Assert
(
norm
==
NORM_L1
||
norm
==
NORM_L2
||
norm
==
NORM_HAMMING
);
const
caller_t
*
callers
=
norm
==
NORM_L1
?
callersL1
:
norm
==
NORM_L2
?
callersL2
:
callersHamming
;
const
int
nQuery
=
query
.
rows
;
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32S
,
trainIdx
);
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32F
,
distance
);
caller_t
func
=
callers
[
query
.
depth
()];
CV_Assert
(
func
!=
0
);
public
:
explicit
BFMatcher_Impl
(
int
norm
)
:
norm_
(
norm
)
{
CV_Assert
(
norm
==
NORM_L1
||
norm
==
NORM_L2
||
norm
==
NORM_HAMMING
);
}
func
(
query
,
train
,
mask
,
trainIdx
,
distance
,
StreamAccessor
::
getStream
(
stream
));
}
virtual
bool
isMaskSupported
()
const
{
return
true
;
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
matchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
distance
,
std
::
vector
<
DMatch
>&
matche
s
)
{
if
(
trainIdx
.
empty
()
||
distance
.
empty
())
return
;
virtual
void
add
(
const
std
::
vector
<
GpuMat
>&
descriptor
s
)
{
trainDescCollection_
.
insert
(
trainDescCollection_
.
end
(),
descriptors
.
begin
(),
descriptors
.
end
());
}
Mat
trainIdxCPU
(
trainIdx
);
Mat
distanceCPU
(
distance
);
virtual
const
std
::
vector
<
GpuMat
>&
getTrainDescriptors
()
const
{
return
trainDescCollection_
;
}
matchConvert
(
trainIdxCPU
,
distanceCPU
,
matches
);
}
virtual
void
clear
()
{
trainDescCollection_
.
clear
();
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
matchConvert
(
const
Mat
&
trainIdx
,
const
Mat
&
distance
,
std
::
vector
<
DMatch
>&
matches
)
{
if
(
trainIdx
.
empty
()
||
distance
.
empty
())
return
;
virtual
bool
empty
()
const
{
return
trainDescCollection_
.
empty
();
}
CV_Assert
(
trainIdx
.
type
()
==
CV_32SC1
);
CV_Assert
(
distance
.
type
()
==
CV_32FC1
&&
distance
.
cols
==
trainIdx
.
cols
);
virtual
void
train
()
{
}
const
int
nQuery
=
trainIdx
.
cols
;
virtual
void
match
(
InputArray
queryDescriptors
,
InputArray
trainDescriptors
,
std
::
vector
<
DMatch
>&
matches
,
InputArray
mask
=
noArray
());
virtual
void
match
(
InputArray
queryDescriptors
,
std
::
vector
<
DMatch
>&
matches
,
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
());
virtual
void
matchAsync
(
InputArray
queryDescriptors
,
InputArray
trainDescriptors
,
OutputArray
matches
,
InputArray
mask
=
noArray
(),
Stream
&
stream
=
Stream
::
Null
());
virtual
void
matchAsync
(
InputArray
queryDescriptors
,
OutputArray
matches
,
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
(),
Stream
&
stream
=
Stream
::
Null
());
virtual
void
matchConvert
(
InputArray
gpu_matches
,
std
::
vector
<
DMatch
>&
matches
);
virtual
void
knnMatch
(
InputArray
queryDescriptors
,
InputArray
trainDescriptors
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
int
k
,
InputArray
mask
=
noArray
(),
bool
compactResult
=
false
);
virtual
void
knnMatch
(
InputArray
queryDescriptors
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
int
k
,
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
(),
bool
compactResult
=
false
);
virtual
void
knnMatchAsync
(
InputArray
queryDescriptors
,
InputArray
trainDescriptors
,
OutputArray
matches
,
int
k
,
InputArray
mask
=
noArray
(),
Stream
&
stream
=
Stream
::
Null
());
virtual
void
knnMatchAsync
(
InputArray
queryDescriptors
,
OutputArray
matches
,
int
k
,
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
(),
Stream
&
stream
=
Stream
::
Null
());
virtual
void
knnMatchConvert
(
InputArray
gpu_matches
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
=
false
);
virtual
void
radiusMatch
(
InputArray
queryDescriptors
,
InputArray
trainDescriptors
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
float
maxDistance
,
InputArray
mask
=
noArray
(),
bool
compactResult
=
false
);
virtual
void
radiusMatch
(
InputArray
queryDescriptors
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
float
maxDistance
,
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
(),
bool
compactResult
=
false
);
virtual
void
radiusMatchAsync
(
InputArray
queryDescriptors
,
InputArray
trainDescriptors
,
OutputArray
matches
,
float
maxDistance
,
InputArray
mask
=
noArray
(),
Stream
&
stream
=
Stream
::
Null
());
virtual
void
radiusMatchAsync
(
InputArray
queryDescriptors
,
OutputArray
matches
,
float
maxDistance
,
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
(),
Stream
&
stream
=
Stream
::
Null
());
virtual
void
radiusMatchConvert
(
InputArray
gpu_matches
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
=
false
);
private
:
int
norm_
;
std
::
vector
<
GpuMat
>
trainDescCollection_
;
};
matches
.
clear
();
matches
.
reserve
(
nQuery
);
//
// 1 to 1 match
//
const
int
*
trainIdx_ptr
=
trainIdx
.
ptr
<
int
>
();
const
float
*
distance_ptr
=
distance
.
ptr
<
float
>
();
for
(
int
queryIdx
=
0
;
queryIdx
<
nQuery
;
++
queryIdx
,
++
trainIdx_ptr
,
++
distance_ptr
)
void
BFMatcher_Impl
::
match
(
InputArray
_queryDescriptors
,
InputArray
_trainDescriptors
,
std
::
vector
<
DMatch
>&
matches
,
InputArray
_mask
)
{
int
train_idx
=
*
trainIdx_ptr
;
if
(
train_idx
==
-
1
)
continue
;
float
distance_local
=
*
distance_ptr
;
DMatch
m
(
queryIdx
,
train_idx
,
0
,
distance_local
);
matches
.
push_back
(
m
);
GpuMat
d_matches
;
matchAsync
(
_queryDescriptors
,
_trainDescriptors
,
d_matches
,
_mask
);
matchConvert
(
d_matches
,
matches
);
}
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
match
(
const
GpuMat
&
query
,
const
GpuMat
&
train
,
std
::
vector
<
DMatch
>&
matches
,
const
GpuMat
&
mask
)
{
GpuMat
trainIdx
,
distance
;
matchSingle
(
query
,
train
,
trainIdx
,
distance
,
mask
);
matchDownload
(
trainIdx
,
distance
,
matches
);
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
makeGpuCollection
(
GpuMat
&
trainCollection
,
GpuMat
&
maskCollection
,
const
std
::
vector
<
GpuMat
>&
masks
)
{
if
(
empty
())
return
;
if
(
masks
.
empty
())
void
BFMatcher_Impl
::
match
(
InputArray
_queryDescriptors
,
std
::
vector
<
DMatch
>&
matches
,
const
std
::
vector
<
GpuMat
>&
masks
)
{
Mat
trainCollectionCPU
(
1
,
static_cast
<
int
>
(
trainDescCollection
.
size
()),
CV_8UC
(
sizeof
(
PtrStepSzb
)));
GpuMat
d_matches
;
matchAsync
(
_queryDescriptors
,
d_matches
,
masks
);
matchConvert
(
d_matches
,
matches
);
}
PtrStepSzb
*
trainCollectionCPU_ptr
=
trainCollectionCPU
.
ptr
<
PtrStepSzb
>
();
void
BFMatcher_Impl
::
matchAsync
(
InputArray
_queryDescriptors
,
InputArray
_trainDescriptors
,
OutputArray
_matches
,
InputArray
_mask
,
Stream
&
stream
)
{
using
namespace
cv
::
cuda
::
device
::
bf_match
;
for
(
size_t
i
=
0
,
size
=
trainDescCollection
.
size
();
i
<
size
;
++
i
,
++
trainCollectionCPU_ptr
)
*
trainCollectionCPU_ptr
=
trainDescCollection
[
i
];
const
GpuMat
query
=
_queryDescriptors
.
getGpuMat
();
const
GpuMat
train
=
_trainDescriptors
.
getGpuMat
();
const
GpuMat
mask
=
_mask
.
getGpuMat
();
trainCollection
.
upload
(
trainCollectionCPU
);
maskCollection
.
release
();
}
else
{
CV_Assert
(
masks
.
size
()
==
trainDescCollection
.
size
());
if
(
query
.
empty
()
||
train
.
empty
())
{
_matches
.
release
();
return
;
}
Mat
trainCollectionCPU
(
1
,
static_cast
<
int
>
(
trainDescCollection
.
size
()),
CV_8UC
(
sizeof
(
PtrStepSzb
)));
Mat
maskCollectionCPU
(
1
,
static_cast
<
int
>
(
trainDescCollection
.
size
()),
CV_8UC
(
sizeof
(
PtrStepb
)));
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
train
.
cols
==
query
.
cols
&&
train
.
type
()
==
query
.
type
()
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8UC1
&&
mask
.
rows
==
query
.
rows
&&
mask
.
cols
==
train
.
rows
)
);
PtrStepSzb
*
trainCollectionCPU_ptr
=
trainCollectionCPU
.
ptr
<
PtrStepSzb
>
();
PtrStepb
*
maskCollectionCPU_ptr
=
maskCollectionCPU
.
ptr
<
PtrStepb
>
();
typedef
void
(
*
caller_t
)(
const
PtrStepSzb
&
query
,
const
PtrStepSzb
&
train
,
const
PtrStepSzb
&
mask
,
const
PtrStepSzi
&
trainIdx
,
const
PtrStepSzf
&
distance
,
cudaStream_t
stream
);
for
(
size_t
i
=
0
,
size
=
trainDescCollection
.
size
();
i
<
size
;
++
i
,
++
trainCollectionCPU_ptr
,
++
maskCollectionCPU_ptr
)
static
const
caller_t
callersL1
[]
=
{
const
GpuMat
&
train
=
trainDescCollection
[
i
];
const
GpuMat
&
mask
=
masks
[
i
];
matchL1_gpu
<
unsigned
char
>
,
0
/*matchL1_gpu<signed char>*/
,
matchL1_gpu
<
unsigned
short
>
,
matchL1_gpu
<
short
>
,
matchL1_gpu
<
int
>
,
matchL1_gpu
<
float
>
};
static
const
caller_t
callersL2
[]
=
{
0
/*matchL2_gpu<unsigned char>*/
,
0
/*matchL2_gpu<signed char>*/
,
0
/*matchL2_gpu<unsigned short>*/
,
0
/*matchL2_gpu<short>*/
,
0
/*matchL2_gpu<int>*/
,
matchL2_gpu
<
float
>
};
static
const
caller_t
callersHamming
[]
=
{
matchHamming_gpu
<
unsigned
char
>
,
0
/*matchHamming_gpu<signed char>*/
,
matchHamming_gpu
<
unsigned
short
>
,
0
/*matchHamming_gpu<short>*/
,
matchHamming_gpu
<
int
>
,
0
/*matchHamming_gpu<float>*/
};
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8UC1
&&
mask
.
cols
==
train
.
rows
))
;
const
caller_t
*
callers
=
norm_
==
NORM_L1
?
callersL1
:
norm_
==
NORM_L2
?
callersL2
:
callersHamming
;
*
trainCollectionCPU_ptr
=
train
;
*
maskCollectionCPU_ptr
=
mask
;
const
caller_t
func
=
callers
[
query
.
depth
()];
if
(
func
==
0
)
{
CV_Error
(
Error
::
StsUnsupportedFormat
,
"unsupported combination of query.depth() and norm"
);
}
trainCollection
.
upload
(
trainCollectionCPU
);
maskCollection
.
upload
(
maskCollectionCPU
);
}
}
const
int
nQuery
=
query
.
rows
;
void
cv
::
cuda
::
BFMatcher_CUDA
::
matchCollection
(
const
GpuMat
&
query
,
const
GpuMat
&
trainCollection
,
GpuMat
&
trainIdx
,
GpuMat
&
imgIdx
,
GpuMat
&
distance
,
const
GpuMat
&
masks
,
Stream
&
stream
)
{
if
(
query
.
empty
()
||
trainCollection
.
empty
())
return
;
_matches
.
create
(
2
,
nQuery
,
CV_32SC1
);
GpuMat
matches
=
_matches
.
getGpuMat
();
using
namespace
cv
::
cuda
::
device
::
bf_match
;
GpuMat
trainIdx
(
1
,
nQuery
,
CV_32SC1
,
matches
.
ptr
(
0
));
GpuMat
distance
(
1
,
nQuery
,
CV_32FC1
,
matches
.
ptr
(
1
));
typedef
void
(
*
caller_t
)(
const
PtrStepSzb
&
query
,
const
PtrStepSzb
&
trains
,
const
PtrStepSz
<
PtrStepb
>&
masks
,
const
PtrStepSzi
&
trainIdx
,
const
PtrStepSzi
&
imgIdx
,
const
PtrStepSzf
&
distance
,
cudaStream_t
stream
);
func
(
query
,
train
,
mask
,
trainIdx
,
distance
,
StreamAccessor
::
getStream
(
stream
));
}
static
const
caller_t
callersL1
[]
=
{
matchL1_gpu
<
unsigned
char
>
,
0
/*matchL1_gpu<signed char>*/
,
matchL1_gpu
<
unsigned
short
>
,
matchL1_gpu
<
short
>
,
matchL1_gpu
<
int
>
,
matchL1_gpu
<
float
>
};
static
const
caller_t
callersL2
[]
=
void
BFMatcher_Impl
::
matchAsync
(
InputArray
_queryDescriptors
,
OutputArray
_matches
,
const
std
::
vector
<
GpuMat
>&
masks
,
Stream
&
stream
)
{
0
/*matchL2_gpu<unsigned char>*/
,
0
/*matchL2_gpu<signed char>*/
,
0
/*matchL2_gpu<unsigned short>*/
,
0
/*matchL2_gpu<short>*/
,
0
/*matchL2_gpu<int>*/
,
matchL2_gpu
<
float
>
};
static
const
caller_t
callersHamming
[]
=
{
matchHamming_gpu
<
unsigned
char
>
,
0
/*matchHamming_gpu<signed char>*/
,
matchHamming_gpu
<
unsigned
short
>
,
0
/*matchHamming_gpu<short>*/
,
matchHamming_gpu
<
int
>
,
0
/*matchHamming_gpu<float>*/
};
using
namespace
cv
::
cuda
::
device
::
bf_match
;
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
norm
==
NORM_L1
||
norm
==
NORM_L2
||
norm
==
NORM_HAMMING
);
const
GpuMat
query
=
_queryDescriptors
.
getGpuMat
();
const
caller_t
*
callers
=
norm
==
NORM_L1
?
callersL1
:
norm
==
NORM_L2
?
callersL2
:
callersHamming
;
const
int
nQuery
=
query
.
rows
;
if
(
query
.
empty
()
||
trainDescCollection_
.
empty
())
{
_matches
.
release
();
return
;
}
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32S
,
trainIdx
);
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32S
,
imgIdx
);
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32F
,
distance
);
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
caller_t
func
=
callers
[
query
.
depth
()]
;
CV_Assert
(
func
!=
0
);
GpuMat
trainCollection
,
maskCollection
;
makeGpuCollection
(
trainDescCollection_
,
masks
,
trainCollection
,
maskCollection
);
func
(
query
,
trainCollection
,
masks
,
trainIdx
,
imgIdx
,
distance
,
StreamAccessor
::
getStream
(
stream
));
}
typedef
void
(
*
caller_t
)(
const
PtrStepSzb
&
query
,
const
PtrStepSzb
&
trains
,
const
PtrStepSz
<
PtrStepb
>&
masks
,
const
PtrStepSzi
&
trainIdx
,
const
PtrStepSzi
&
imgIdx
,
const
PtrStepSzf
&
distance
,
cudaStream_t
stream
);
void
cv
::
cuda
::
BFMatcher_CUDA
::
matchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
imgIdx
,
const
GpuMat
&
distance
,
std
::
vector
<
DMatch
>&
matches
)
{
if
(
trainIdx
.
empty
()
||
imgIdx
.
empty
()
||
distance
.
empty
())
return
;
static
const
caller_t
callersL1
[]
=
{
matchL1_gpu
<
unsigned
char
>
,
0
/*matchL1_gpu<signed char>*/
,
matchL1_gpu
<
unsigned
short
>
,
matchL1_gpu
<
short
>
,
matchL1_gpu
<
int
>
,
matchL1_gpu
<
float
>
};
static
const
caller_t
callersL2
[]
=
{
0
/*matchL2_gpu<unsigned char>*/
,
0
/*matchL2_gpu<signed char>*/
,
0
/*matchL2_gpu<unsigned short>*/
,
0
/*matchL2_gpu<short>*/
,
0
/*matchL2_gpu<int>*/
,
matchL2_gpu
<
float
>
};
static
const
caller_t
callersHamming
[]
=
{
matchHamming_gpu
<
unsigned
char
>
,
0
/*matchHamming_gpu<signed char>*/
,
matchHamming_gpu
<
unsigned
short
>
,
0
/*matchHamming_gpu<short>*/
,
matchHamming_gpu
<
int
>
,
0
/*matchHamming_gpu<float>*/
};
Mat
trainIdxCPU
(
trainIdx
);
Mat
imgIdxCPU
(
imgIdx
);
Mat
distanceCPU
(
distance
);
const
caller_t
*
callers
=
norm_
==
NORM_L1
?
callersL1
:
norm_
==
NORM_L2
?
callersL2
:
callersHamming
;
matchConvert
(
trainIdxCPU
,
imgIdxCPU
,
distanceCPU
,
matches
);
}
const
caller_t
func
=
callers
[
query
.
depth
()];
if
(
func
==
0
)
{
CV_Error
(
Error
::
StsUnsupportedFormat
,
"unsupported combination of query.depth() and norm"
);
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
matchConvert
(
const
Mat
&
trainIdx
,
const
Mat
&
imgIdx
,
const
Mat
&
distance
,
std
::
vector
<
DMatch
>&
matches
)
{
if
(
trainIdx
.
empty
()
||
imgIdx
.
empty
()
||
distance
.
empty
())
return
;
const
int
nQuery
=
query
.
rows
;
CV_Assert
(
trainIdx
.
type
()
==
CV_32SC1
);
CV_Assert
(
imgIdx
.
type
()
==
CV_32SC1
&&
imgIdx
.
cols
==
trainIdx
.
cols
);
CV_Assert
(
distance
.
type
()
==
CV_32FC1
&&
distance
.
cols
==
trainIdx
.
cols
);
_matches
.
create
(
3
,
nQuery
,
CV_32SC1
);
GpuMat
matches
=
_matches
.
getGpuMat
();
const
int
nQuery
=
trainIdx
.
cols
;
GpuMat
trainIdx
(
1
,
nQuery
,
CV_32SC1
,
matches
.
ptr
(
0
));
GpuMat
imgIdx
(
1
,
nQuery
,
CV_32SC1
,
matches
.
ptr
(
1
));
GpuMat
distance
(
1
,
nQuery
,
CV_32FC1
,
matches
.
ptr
(
2
));
matches
.
clear
(
);
matches
.
reserve
(
nQuery
);
func
(
query
,
trainCollection
,
maskCollection
,
trainIdx
,
imgIdx
,
distance
,
StreamAccessor
::
getStream
(
stream
)
);
}
const
int
*
trainIdx_ptr
=
trainIdx
.
ptr
<
int
>
();
const
int
*
imgIdx_ptr
=
imgIdx
.
ptr
<
int
>
();
const
float
*
distance_ptr
=
distance
.
ptr
<
float
>
();
for
(
int
queryIdx
=
0
;
queryIdx
<
nQuery
;
++
queryIdx
,
++
trainIdx_ptr
,
++
imgIdx_ptr
,
++
distance_ptr
)
void
BFMatcher_Impl
::
matchConvert
(
InputArray
_gpu_matches
,
std
::
vector
<
DMatch
>&
matches
)
{
int
_trainIdx
=
*
trainIdx_ptr
;
if
(
_trainIdx
==
-
1
)
continue
;
int
_imgIdx
=
*
imgIdx_ptr
;
Mat
gpu_matches
;
if
(
_gpu_matches
.
kind
()
==
_InputArray
::
CUDA_GPU_MAT
)
{
_gpu_matches
.
getGpuMat
().
download
(
gpu_matches
);
}
else
{
gpu_matches
=
_gpu_matches
.
getMat
();
}
float
_distance
=
*
distance_ptr
;
if
(
gpu_matches
.
empty
())
{
matches
.
clear
();
return
;
}
DMatch
m
(
queryIdx
,
_trainIdx
,
_imgIdx
,
_distance
);
CV_Assert
(
(
gpu_matches
.
type
()
==
CV_32SC1
)
&&
(
gpu_matches
.
rows
==
2
||
gpu_matches
.
rows
==
3
)
);
matches
.
push_back
(
m
);
}
}
const
int
nQuery
=
gpu_matches
.
cols
;
void
cv
::
cuda
::
BFMatcher_CUDA
::
match
(
const
GpuMat
&
query
,
std
::
vector
<
DMatch
>&
matches
,
const
std
::
vector
<
GpuMat
>&
masks
)
{
GpuMat
trainCollection
;
GpuMat
maskCollection
;
matches
.
clear
();
matches
.
reserve
(
nQuery
);
makeGpuCollection
(
trainCollection
,
maskCollection
,
masks
);
const
int
*
trainIdxPtr
=
NULL
;
const
int
*
imgIdxPtr
=
NULL
;
const
float
*
distancePtr
=
NULL
;
GpuMat
trainIdx
,
imgIdx
,
distance
;
if
(
gpu_matches
.
rows
==
2
)
{
trainIdxPtr
=
gpu_matches
.
ptr
<
int
>
(
0
);
distancePtr
=
gpu_matches
.
ptr
<
float
>
(
1
);
}
else
{
trainIdxPtr
=
gpu_matches
.
ptr
<
int
>
(
0
);
imgIdxPtr
=
gpu_matches
.
ptr
<
int
>
(
1
);
distancePtr
=
gpu_matches
.
ptr
<
float
>
(
2
);
}
matchCollection
(
query
,
trainCollection
,
trainIdx
,
imgIdx
,
distance
,
maskCollection
);
matchDownload
(
trainIdx
,
imgIdx
,
distance
,
matches
);
}
for
(
int
queryIdx
=
0
;
queryIdx
<
nQuery
;
++
queryIdx
)
{
const
int
trainIdx
=
trainIdxPtr
[
queryIdx
];
if
(
trainIdx
==
-
1
)
continue
;
////////////////////////////////////////////////////////////////////
// KnnMatch
const
int
imgIdx
=
imgIdxPtr
?
imgIdxPtr
[
queryIdx
]
:
0
;
const
float
distance
=
distancePtr
[
queryIdx
];
void
cv
::
cuda
::
BFMatcher_CUDA
::
knnMatchSingle
(
const
GpuMat
&
query
,
const
GpuMat
&
train
,
GpuMat
&
trainIdx
,
GpuMat
&
distance
,
GpuMat
&
allDist
,
int
k
,
const
GpuMat
&
mask
,
Stream
&
stream
)
{
if
(
query
.
empty
()
||
train
.
empty
())
return
;
DMatch
m
(
queryIdx
,
trainIdx
,
imgIdx
,
distance
);
using
namespace
cv
::
cuda
::
device
::
bf_knnmatch
;
matches
.
push_back
(
m
);
}
}
typedef
void
(
*
caller_t
)(
const
PtrStepSzb
&
query
,
const
PtrStepSzb
&
train
,
int
k
,
const
PtrStepSzb
&
mask
,
const
PtrStepSzb
&
trainIdx
,
const
PtrStepSzb
&
distance
,
const
PtrStepSzf
&
allDist
,
cudaStream_t
stream
);
//
// knn match
//
static
const
caller_t
callersL1
[]
=
void
BFMatcher_Impl
::
knnMatch
(
InputArray
_queryDescriptors
,
InputArray
_trainDescriptors
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
int
k
,
InputArray
_mask
,
bool
compactResult
)
{
matchL1_gpu
<
unsigned
char
>
,
0
/*matchL1_gpu<signed char>*/
,
matchL1_gpu
<
unsigned
short
>
,
matchL1_gpu
<
short
>
,
matchL1_gpu
<
int
>
,
matchL1_gpu
<
float
>
};
static
const
caller_t
callersL2
[]
=
{
0
/*matchL2_gpu<unsigned char>*/
,
0
/*matchL2_gpu<signed char>*/
,
0
/*matchL2_gpu<unsigned short>*/
,
0
/*matchL2_gpu<short>*/
,
0
/*matchL2_gpu<int>*/
,
matchL2_gpu
<
float
>
};
static
const
caller_t
callersHamming
[]
=
GpuMat
d_matches
;
knnMatchAsync
(
_queryDescriptors
,
_trainDescriptors
,
d_matches
,
k
,
_mask
);
knnMatchConvert
(
d_matches
,
matches
,
compactResult
);
}
void
BFMatcher_Impl
::
knnMatch
(
InputArray
_queryDescriptors
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
int
k
,
const
std
::
vector
<
GpuMat
>&
masks
,
bool
compactResult
)
{
matchHamming_gpu
<
unsigned
char
>
,
0
/*matchHamming_gpu<signed char>*/
,
matchHamming_gpu
<
unsigned
short
>
,
0
/*matchHamming_gpu<short>*/
,
matchHamming_gpu
<
int
>
,
0
/*matchHamming_gpu<float>*/
};
if
(
k
==
2
)
{
GpuMat
d_matches
;
knnMatchAsync
(
_queryDescriptors
,
d_matches
,
k
,
masks
);
knnMatchConvert
(
d_matches
,
matches
,
compactResult
);
}
else
{
const
GpuMat
query
=
_queryDescriptors
.
getGpuMat
();
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
train
.
type
()
==
query
.
type
()
&&
train
.
cols
==
query
.
cols
);
CV_Assert
(
norm
==
NORM_L1
||
norm
==
NORM_L2
||
norm
==
NORM_HAMMING
);
if
(
query
.
empty
()
||
trainDescCollection_
.
empty
())
{
matches
.
clear
();
return
;
}
const
caller_t
*
callers
=
norm
==
NORM_L1
?
callersL1
:
norm
==
NORM_L2
?
callersL2
:
callersHamming
;
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
)
;
const
int
nQuery
=
query
.
rows
;
const
int
nTrain
=
train
.
rows
;
std
::
vector
<
std
::
vector
<
DMatch
>
>
curMatches
;
std
::
vector
<
DMatch
>
temp
;
temp
.
reserve
(
2
*
k
);
if
(
k
==
2
)
{
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32SC2
,
trainIdx
);
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32FC2
,
distance
);
}
else
{
ensureSizeIsEnough
(
nQuery
,
k
,
CV_32S
,
trainIdx
);
ensureSizeIsEnough
(
nQuery
,
k
,
CV_32F
,
distance
);
ensureSizeIsEnough
(
nQuery
,
nTrain
,
CV_32FC1
,
allDist
);
}
matches
.
resize
(
query
.
rows
);
for
(
size_t
i
=
0
;
i
<
matches
.
size
();
++
i
)
matches
[
i
].
reserve
(
k
);
trainIdx
.
setTo
(
Scalar
::
all
(
-
1
),
stream
);
for
(
size_t
imgIdx
=
0
;
imgIdx
<
trainDescCollection_
.
size
();
++
imgIdx
)
{
knnMatch
(
query
,
trainDescCollection_
[
imgIdx
],
curMatches
,
k
,
masks
.
empty
()
?
GpuMat
()
:
masks
[
imgIdx
]);
caller_t
func
=
callers
[
query
.
depth
()];
CV_Assert
(
func
!=
0
);
for
(
int
queryIdx
=
0
;
queryIdx
<
query
.
rows
;
++
queryIdx
)
{
std
::
vector
<
DMatch
>&
localMatch
=
curMatches
[
queryIdx
];
std
::
vector
<
DMatch
>&
globalMatch
=
matches
[
queryIdx
];
func
(
query
,
train
,
k
,
mask
,
trainIdx
,
distance
,
allDist
,
StreamAccessor
::
getStream
(
stream
));
}
for
(
size_t
i
=
0
;
i
<
localMatch
.
size
();
++
i
)
localMatch
[
i
].
imgIdx
=
imgIdx
;
void
cv
::
cuda
::
BFMatcher_CUDA
::
knnMatchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
distance
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
)
{
if
(
trainIdx
.
empty
()
||
distance
.
empty
())
return
;
temp
.
clear
();
std
::
merge
(
globalMatch
.
begin
(),
globalMatch
.
end
(),
localMatch
.
begin
(),
localMatch
.
end
(),
std
::
back_inserter
(
temp
));
Mat
trainIdxCPU
(
trainIdx
);
Mat
distanceCPU
(
distance
);
globalMatch
.
clear
();
const
size_t
count
=
std
::
min
(
static_cast
<
size_t
>
(
k
),
temp
.
size
());
std
::
copy
(
temp
.
begin
(),
temp
.
begin
()
+
count
,
std
::
back_inserter
(
globalMatch
));
}
}
knnMatchConvert
(
trainIdxCPU
,
distanceCPU
,
matches
,
compactResult
);
}
if
(
compactResult
)
{
std
::
vector
<
std
::
vector
<
DMatch
>
>::
iterator
new_end
=
std
::
remove_if
(
matches
.
begin
(),
matches
.
end
(),
std
::
mem_fun_ref
(
&
std
::
vector
<
DMatch
>::
empty
));
matches
.
erase
(
new_end
,
matches
.
end
());
}
}
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
knnMatchConvert
(
const
Mat
&
trainIdx
,
const
Mat
&
distance
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
)
{
if
(
trainIdx
.
empty
()
||
distance
.
empty
())
return
;
void
BFMatcher_Impl
::
knnMatchAsync
(
InputArray
_queryDescriptors
,
InputArray
_trainDescriptors
,
OutputArray
_matches
,
int
k
,
InputArray
_mask
,
Stream
&
stream
)
{
using
namespace
cv
::
cuda
::
device
::
bf_knnmatch
;
CV_Assert
(
trainIdx
.
type
()
==
CV_32SC2
||
trainIdx
.
type
()
==
CV_32SC1
);
CV_Assert
(
distance
.
type
()
==
CV_32FC2
||
distance
.
type
()
==
CV_32FC1
);
CV_Assert
(
distance
.
size
()
==
trainIdx
.
size
());
CV_Assert
(
trainIdx
.
isContinuous
()
&&
distance
.
isContinuous
());
const
GpuMat
query
=
_queryDescriptors
.
getGpuMat
();
const
GpuMat
train
=
_trainDescriptors
.
getGpuMat
();
const
GpuMat
mask
=
_mask
.
getGpuMat
();
if
(
query
.
empty
()
||
train
.
empty
())
{
_matches
.
release
();
return
;
}
const
int
nQuery
=
trainIdx
.
type
()
==
CV_32SC2
?
trainIdx
.
cols
:
trainIdx
.
rows
;
const
int
k
=
trainIdx
.
type
()
==
CV_32SC2
?
2
:
trainIdx
.
cols
;
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
train
.
cols
==
query
.
cols
&&
train
.
type
()
==
query
.
type
()
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8UC1
&&
mask
.
rows
==
query
.
rows
&&
mask
.
cols
==
train
.
rows
)
);
matches
.
clear
();
matches
.
reserve
(
nQuery
);
typedef
void
(
*
caller_t
)(
const
PtrStepSzb
&
query
,
const
PtrStepSzb
&
train
,
int
k
,
const
PtrStepSzb
&
mask
,
const
PtrStepSzb
&
trainIdx
,
const
PtrStepSzb
&
distance
,
const
PtrStepSzf
&
allDist
,
cudaStream_t
stream
);
const
int
*
trainIdx_ptr
=
trainIdx
.
ptr
<
int
>
();
const
float
*
distance_ptr
=
distance
.
ptr
<
float
>
();
static
const
caller_t
callersL1
[]
=
{
matchL1_gpu
<
unsigned
char
>
,
0
/*matchL1_gpu<signed char>*/
,
matchL1_gpu
<
unsigned
short
>
,
matchL1_gpu
<
short
>
,
matchL1_gpu
<
int
>
,
matchL1_gpu
<
float
>
};
static
const
caller_t
callersL2
[]
=
{
0
/*matchL2_gpu<unsigned char>*/
,
0
/*matchL2_gpu<signed char>*/
,
0
/*matchL2_gpu<unsigned short>*/
,
0
/*matchL2_gpu<short>*/
,
0
/*matchL2_gpu<int>*/
,
matchL2_gpu
<
float
>
};
static
const
caller_t
callersHamming
[]
=
{
matchHamming_gpu
<
unsigned
char
>
,
0
/*matchHamming_gpu<signed char>*/
,
matchHamming_gpu
<
unsigned
short
>
,
0
/*matchHamming_gpu<short>*/
,
matchHamming_gpu
<
int
>
,
0
/*matchHamming_gpu<float>*/
};
for
(
int
queryIdx
=
0
;
queryIdx
<
nQuery
;
++
queryIdx
)
{
matches
.
push_back
(
std
::
vector
<
DMatch
>
());
std
::
vector
<
DMatch
>&
curMatches
=
matches
.
back
();
curMatches
.
reserve
(
k
);
const
caller_t
*
callers
=
norm_
==
NORM_L1
?
callersL1
:
norm_
==
NORM_L2
?
callersL2
:
callersHamming
;
for
(
int
i
=
0
;
i
<
k
;
++
i
,
++
trainIdx_ptr
,
++
distance_ptr
)
const
caller_t
func
=
callers
[
query
.
depth
()];
if
(
func
==
0
)
{
int
_trainIdx
=
*
trainIdx_ptr
;
CV_Error
(
Error
::
StsUnsupportedFormat
,
"unsupported combination of query.depth() and norm"
);
}
if
(
_trainIdx
!=
-
1
)
{
float
_distance
=
*
distance_ptr
;
const
int
nQuery
=
query
.
rows
;
const
int
nTrain
=
train
.
rows
;
DMatch
m
(
queryIdx
,
_trainIdx
,
0
,
_distance
);
GpuMat
trainIdx
,
distance
,
allDist
;
if
(
k
==
2
)
{
_matches
.
create
(
2
,
nQuery
,
CV_32SC2
);
GpuMat
matches
=
_matches
.
getGpuMat
();
curMatches
.
push_back
(
m
);
}
trainIdx
=
GpuMat
(
1
,
nQuery
,
CV_32SC2
,
matches
.
ptr
(
0
)
);
distance
=
GpuMat
(
1
,
nQuery
,
CV_32FC2
,
matches
.
ptr
(
1
));
}
else
{
_matches
.
create
(
2
*
nQuery
,
k
,
CV_32SC1
);
GpuMat
matches
=
_matches
.
getGpuMat
();
if
(
compactResult
&&
curMatches
.
empty
())
matches
.
pop_back
();
}
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
knnMatch
(
const
GpuMat
&
query
,
const
GpuMat
&
train
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
int
k
,
const
GpuMat
&
mask
,
bool
compactResult
)
{
GpuMat
trainIdx
,
distance
,
allDist
;
knnMatchSingle
(
query
,
train
,
trainIdx
,
distance
,
allDist
,
k
,
mask
);
knnMatchDownload
(
trainIdx
,
distance
,
matches
,
compactResult
);
}
trainIdx
=
GpuMat
(
nQuery
,
k
,
CV_32SC1
,
matches
.
ptr
(
0
),
matches
.
step
);
distance
=
GpuMat
(
nQuery
,
k
,
CV_32FC1
,
matches
.
ptr
(
nQuery
),
matches
.
step
);
void
cv
::
cuda
::
BFMatcher_CUDA
::
knnMatch2Collection
(
const
GpuMat
&
query
,
const
GpuMat
&
trainCollection
,
GpuMat
&
trainIdx
,
GpuMat
&
imgIdx
,
GpuMat
&
distance
,
const
GpuMat
&
maskCollection
,
Stream
&
stream
)
{
if
(
query
.
empty
()
||
trainCollection
.
empty
())
return
;
BufferPool
pool
(
stream
);
allDist
=
pool
.
getBuffer
(
nQuery
,
nTrain
,
CV_32FC1
);
}
using
namespace
cv
::
cuda
::
device
::
bf_knnmatch
;
trainIdx
.
setTo
(
Scalar
::
all
(
-
1
),
stream
)
;
typedef
void
(
*
caller_t
)(
const
PtrStepSzb
&
query
,
const
PtrStepSzb
&
trains
,
const
PtrStepSz
<
PtrStepb
>&
masks
,
const
PtrStepSzb
&
trainIdx
,
const
PtrStepSzb
&
imgIdx
,
const
PtrStepSzb
&
distance
,
cudaStream_t
stream
);
func
(
query
,
train
,
k
,
mask
,
trainIdx
,
distance
,
allDist
,
StreamAccessor
::
getStream
(
stream
));
}
static
const
caller_t
callersL1
[]
=
{
match2L1_gpu
<
unsigned
char
>
,
0
/*match2L1_gpu<signed char>*/
,
match2L1_gpu
<
unsigned
short
>
,
match2L1_gpu
<
short
>
,
match2L1_gpu
<
int
>
,
match2L1_gpu
<
float
>
};
static
const
caller_t
callersL2
[]
=
{
0
/*match2L2_gpu<unsigned char>*/
,
0
/*match2L2_gpu<signed char>*/
,
0
/*match2L2_gpu<unsigned short>*/
,
0
/*match2L2_gpu<short>*/
,
0
/*match2L2_gpu<int>*/
,
match2L2_gpu
<
float
>
};
static
const
caller_t
callersHamming
[]
=
void
BFMatcher_Impl
::
knnMatchAsync
(
InputArray
_queryDescriptors
,
OutputArray
_matches
,
int
k
,
const
std
::
vector
<
GpuMat
>&
masks
,
Stream
&
stream
)
{
match2Hamming_gpu
<
unsigned
char
>
,
0
/*match2Hamming_gpu<signed char>*/
,
match2Hamming_gpu
<
unsigned
short
>
,
0
/*match2Hamming_gpu<short>*/
,
match2Hamming_gpu
<
int
>
,
0
/*match2Hamming_gpu<float>*/
};
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
norm
==
NORM_L1
||
norm
==
NORM_L2
||
norm
==
NORM_HAMMING
);
using
namespace
cv
::
cuda
::
device
::
bf_knnmatch
;
const
caller_t
*
callers
=
norm
==
NORM_L1
?
callersL1
:
norm
==
NORM_L2
?
callersL2
:
callersHamming
;
if
(
k
!=
2
)
{
CV_Error
(
Error
::
StsNotImplemented
,
"only k=2 mode is supported for now"
);
}
const
int
nQuery
=
query
.
rows
;
const
GpuMat
query
=
_queryDescriptors
.
getGpuMat
()
;
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32SC2
,
trainIdx
);
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32SC2
,
imgIdx
);
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32FC2
,
distance
);
if
(
query
.
empty
()
||
trainDescCollection_
.
empty
())
{
_matches
.
release
();
return
;
}
trainIdx
.
setTo
(
Scalar
::
all
(
-
1
),
stream
);
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
caller_t
func
=
callers
[
query
.
depth
()]
;
CV_Assert
(
func
!=
0
);
GpuMat
trainCollection
,
maskCollection
;
makeGpuCollection
(
trainDescCollection_
,
masks
,
trainCollection
,
maskCollection
);
func
(
query
,
trainCollection
,
maskCollection
,
trainIdx
,
imgIdx
,
distance
,
StreamAccessor
::
getStream
(
stream
));
}
typedef
void
(
*
caller_t
)(
const
PtrStepSzb
&
query
,
const
PtrStepSzb
&
trains
,
const
PtrStepSz
<
PtrStepb
>&
masks
,
const
PtrStepSzb
&
trainIdx
,
const
PtrStepSzb
&
imgIdx
,
const
PtrStepSzb
&
distance
,
cudaStream_t
stream
);
void
cv
::
cuda
::
BFMatcher_CUDA
::
knnMatch2Download
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
imgIdx
,
const
GpuMat
&
distance
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
)
{
if
(
trainIdx
.
empty
()
||
imgIdx
.
empty
()
||
distance
.
empty
())
return
;
static
const
caller_t
callersL1
[]
=
{
match2L1_gpu
<
unsigned
char
>
,
0
/*match2L1_gpu<signed char>*/
,
match2L1_gpu
<
unsigned
short
>
,
match2L1_gpu
<
short
>
,
match2L1_gpu
<
int
>
,
match2L1_gpu
<
float
>
};
static
const
caller_t
callersL2
[]
=
{
0
/*match2L2_gpu<unsigned char>*/
,
0
/*match2L2_gpu<signed char>*/
,
0
/*match2L2_gpu<unsigned short>*/
,
0
/*match2L2_gpu<short>*/
,
0
/*match2L2_gpu<int>*/
,
match2L2_gpu
<
float
>
};
static
const
caller_t
callersHamming
[]
=
{
match2Hamming_gpu
<
unsigned
char
>
,
0
/*match2Hamming_gpu<signed char>*/
,
match2Hamming_gpu
<
unsigned
short
>
,
0
/*match2Hamming_gpu<short>*/
,
match2Hamming_gpu
<
int
>
,
0
/*match2Hamming_gpu<float>*/
};
Mat
trainIdxCPU
(
trainIdx
);
Mat
imgIdxCPU
(
imgIdx
);
Mat
distanceCPU
(
distance
);
const
caller_t
*
callers
=
norm_
==
NORM_L1
?
callersL1
:
norm_
==
NORM_L2
?
callersL2
:
callersHamming
;
knnMatch2Convert
(
trainIdxCPU
,
imgIdxCPU
,
distanceCPU
,
matches
,
compactResult
);
}
const
caller_t
func
=
callers
[
query
.
depth
()];
if
(
func
==
0
)
{
CV_Error
(
Error
::
StsUnsupportedFormat
,
"unsupported combination of query.depth() and norm"
);
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
knnMatch2Convert
(
const
Mat
&
trainIdx
,
const
Mat
&
imgIdx
,
const
Mat
&
distance
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
)
{
if
(
trainIdx
.
empty
()
||
imgIdx
.
empty
()
||
distance
.
empty
())
return
;
const
int
nQuery
=
query
.
rows
;
CV_Assert
(
trainIdx
.
type
()
==
CV_32SC2
);
CV_Assert
(
imgIdx
.
type
()
==
CV_32SC2
&&
imgIdx
.
cols
==
trainIdx
.
cols
);
CV_Assert
(
distance
.
type
()
==
CV_32FC2
&&
distance
.
cols
==
trainIdx
.
cols
);
_matches
.
create
(
3
,
nQuery
,
CV_32SC2
);
GpuMat
matches
=
_matches
.
getGpuMat
();
const
int
nQuery
=
trainIdx
.
cols
;
GpuMat
trainIdx
(
1
,
nQuery
,
CV_32SC2
,
matches
.
ptr
(
0
));
GpuMat
imgIdx
(
1
,
nQuery
,
CV_32SC2
,
matches
.
ptr
(
1
));
GpuMat
distance
(
1
,
nQuery
,
CV_32FC2
,
matches
.
ptr
(
2
));
matches
.
clear
();
matches
.
reserve
(
nQuery
);
trainIdx
.
setTo
(
Scalar
::
all
(
-
1
),
stream
);
const
int
*
trainIdx_ptr
=
trainIdx
.
ptr
<
int
>
();
const
int
*
imgIdx_ptr
=
imgIdx
.
ptr
<
int
>
();
const
float
*
distance_ptr
=
distance
.
ptr
<
float
>
();
func
(
query
,
trainCollection
,
maskCollection
,
trainIdx
,
imgIdx
,
distance
,
StreamAccessor
::
getStream
(
stream
));
}
for
(
int
queryIdx
=
0
;
queryIdx
<
nQuery
;
++
queryIdx
)
void
BFMatcher_Impl
::
knnMatchConvert
(
InputArray
_gpu_matches
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
)
{
matches
.
push_back
(
std
::
vector
<
DMatch
>
());
std
::
vector
<
DMatch
>&
curMatches
=
matches
.
back
();
curMatches
.
reserve
(
2
);
for
(
int
i
=
0
;
i
<
2
;
++
i
,
++
trainIdx_ptr
,
++
imgIdx_ptr
,
++
distance_ptr
)
Mat
gpu_matches
;
if
(
_gpu_matches
.
kind
()
==
_InputArray
::
CUDA_GPU_MAT
)
{
int
_trainIdx
=
*
trainIdx_ptr
;
if
(
_trainIdx
!=
-
1
)
{
int
_imgIdx
=
*
imgIdx_ptr
;
float
_distance
=
*
distance_ptr
;
DMatch
m
(
queryIdx
,
_trainIdx
,
_imgIdx
,
_distance
);
curMatches
.
push_back
(
m
);
}
_gpu_matches
.
getGpuMat
().
download
(
gpu_matches
);
}
else
{
gpu_matches
=
_gpu_matches
.
getMat
();
}
if
(
compactResult
&&
curMatches
.
empty
())
matches
.
pop_back
();
}
}
if
(
gpu_matches
.
empty
())
{
matches
.
clear
();
return
;
}
namespace
{
struct
ImgIdxSetter
{
explicit
inline
ImgIdxSetter
(
int
imgIdx_
)
:
imgIdx
(
imgIdx_
)
{}
inline
void
operator
()(
DMatch
&
m
)
const
{
m
.
imgIdx
=
imgIdx
;}
int
imgIdx
;
};
}
CV_Assert
(
((
gpu_matches
.
type
()
==
CV_32SC2
)
&&
(
gpu_matches
.
rows
==
2
||
gpu_matches
.
rows
==
3
))
||
(
gpu_matches
.
type
()
==
CV_32SC1
)
);
void
cv
::
cuda
::
BFMatcher_CUDA
::
knnMatch
(
const
GpuMat
&
query
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
int
k
,
const
std
::
vector
<
GpuMat
>&
masks
,
bool
compactResult
)
{
if
(
k
==
2
)
{
GpuMat
trainCollection
;
GpuMat
maskCollection
;
int
nQuery
=
-
1
,
k
=
-
1
;
makeGpuCollection
(
trainCollection
,
maskCollection
,
masks
);
const
int
*
trainIdxPtr
=
NULL
;
const
int
*
imgIdxPtr
=
NULL
;
const
float
*
distancePtr
=
NULL
;
GpuMat
trainIdx
,
imgIdx
,
distance
;
if
(
gpu_matches
.
type
()
==
CV_32SC2
)
{
nQuery
=
gpu_matches
.
cols
;
k
=
2
;
knnMatch2Collection
(
query
,
trainCollection
,
trainIdx
,
imgIdx
,
distance
,
maskCollection
);
knnMatch2Download
(
trainIdx
,
imgIdx
,
distance
,
matches
);
}
else
{
if
(
query
.
empty
()
||
empty
())
return
;
if
(
gpu_matches
.
rows
==
2
)
{
trainIdxPtr
=
gpu_matches
.
ptr
<
int
>
(
0
);
distancePtr
=
gpu_matches
.
ptr
<
float
>
(
1
);
}
else
{
trainIdxPtr
=
gpu_matches
.
ptr
<
int
>
(
0
);
imgIdxPtr
=
gpu_matches
.
ptr
<
int
>
(
1
);
distancePtr
=
gpu_matches
.
ptr
<
float
>
(
2
);
}
}
else
{
nQuery
=
gpu_matches
.
rows
/
2
;
k
=
gpu_matches
.
cols
;
std
::
vector
<
std
::
vector
<
DMatch
>
>
curMatches
;
std
::
vector
<
DMatch
>
temp
;
temp
.
reserve
(
2
*
k
);
trainIdxPtr
=
gpu_matches
.
ptr
<
int
>
(
0
)
;
distancePtr
=
gpu_matches
.
ptr
<
float
>
(
nQuery
)
;
}
matches
.
resize
(
query
.
rows
);
for_each
(
matches
.
begin
(),
matches
.
end
(),
bind2nd
(
mem_fun_ref
(
&
std
::
vector
<
DMatch
>::
reserve
),
k
)
);
matches
.
clear
(
);
matches
.
reserve
(
nQuery
);
for
(
size_t
imgIdx
=
0
,
size
=
trainDescCollection
.
size
();
imgIdx
<
size
;
++
img
Idx
)
for
(
int
queryIdx
=
0
;
queryIdx
<
nQuery
;
++
query
Idx
)
{
knnMatch
(
query
,
trainDescCollection
[
imgIdx
],
curMatches
,
k
,
masks
.
empty
()
?
GpuMat
()
:
masks
[
imgIdx
]);
matches
.
push_back
(
std
::
vector
<
DMatch
>
());
std
::
vector
<
DMatch
>&
curMatches
=
matches
.
back
();
curMatches
.
reserve
(
k
);
for
(
int
queryIdx
=
0
;
queryIdx
<
query
.
rows
;
++
queryIdx
)
for
(
int
i
=
0
;
i
<
k
;
++
i
)
{
std
::
vector
<
DMatch
>&
localMatch
=
curMatches
[
queryIdx
];
std
::
vector
<
DMatch
>&
globalMatch
=
matches
[
queryIdx
];
const
int
trainIdx
=
*
trainIdxPtr
;
if
(
trainIdx
==
-
1
)
continue
;
for_each
(
localMatch
.
begin
(),
localMatch
.
end
(),
ImgIdxSetter
(
static_cast
<
int
>
(
imgIdx
)));
const
int
imgIdx
=
imgIdxPtr
?
*
imgIdxPtr
:
0
;
const
float
distance
=
*
distancePtr
;
temp
.
clear
();
merge
(
globalMatch
.
begin
(),
globalMatch
.
end
(),
localMatch
.
begin
(),
localMatch
.
end
(),
back_inserter
(
temp
));
DMatch
m
(
queryIdx
,
trainIdx
,
imgIdx
,
distance
);
curMatches
.
push_back
(
m
);
globalMatch
.
clear
();
const
size_t
count
=
std
::
min
((
size_t
)
k
,
temp
.
size
());
copy
(
temp
.
begin
(),
temp
.
begin
()
+
count
,
back_inserter
(
globalMatch
));
++
trainIdxPtr
;
++
distancePtr
;
if
(
imgIdxPtr
)
++
imgIdxPtr
;
}
}
if
(
compactResult
)
{
std
::
vector
<
std
::
vector
<
DMatch
>
>::
iterator
new_end
=
remove_if
(
matches
.
begin
(),
matches
.
end
(),
mem_fun_ref
(
&
std
::
vector
<
DMatch
>::
empty
)
);
matches
.
erase
(
new_end
,
matches
.
end
());
if
(
compactResult
&&
curMatches
.
empty
()
)
{
matches
.
pop_back
(
);
}
}
}
}
////////////////////////////////////////////////////////////////////
// RadiusMatch
void
cv
::
cuda
::
BFMatcher_CUDA
::
radiusMatchSingle
(
const
GpuMat
&
query
,
const
GpuMat
&
train
,
GpuMat
&
trainIdx
,
GpuMat
&
distance
,
GpuMat
&
nMatches
,
float
maxDistance
,
const
GpuMat
&
mask
,
Stream
&
stream
)
{
if
(
query
.
empty
()
||
train
.
empty
())
return
;
using
namespace
cv
::
cuda
::
device
::
bf_radius_match
;
typedef
void
(
*
caller_t
)(
const
PtrStepSzb
&
query
,
const
PtrStepSzb
&
train
,
float
maxDistance
,
const
PtrStepSzb
&
mask
,
const
PtrStepSzi
&
trainIdx
,
const
PtrStepSzf
&
distance
,
const
PtrStepSz
<
unsigned
int
>&
nMatches
,
cudaStream_t
stream
);
//
// radius match
//
static
const
caller_t
callersL1
[]
=
void
BFMatcher_Impl
::
radiusMatch
(
InputArray
_queryDescriptors
,
InputArray
_trainDescriptors
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
float
maxDistance
,
InputArray
_mask
,
bool
compactResult
)
{
matchL1_gpu
<
unsigned
char
>
,
0
/*matchL1_gpu<signed char>*/
,
matchL1_gpu
<
unsigned
short
>
,
matchL1_gpu
<
short
>
,
matchL1_gpu
<
int
>
,
matchL1_gpu
<
float
>
};
static
const
caller_t
callersL2
[]
=
{
0
/*matchL2_gpu<unsigned char>*/
,
0
/*matchL2_gpu<signed char>*/
,
0
/*matchL2_gpu<unsigned short>*/
,
0
/*matchL2_gpu<short>*/
,
0
/*matchL2_gpu<int>*/
,
matchL2_gpu
<
float
>
};
static
const
caller_t
callersHamming
[]
=
{
matchHamming_gpu
<
unsigned
char
>
,
0
/*matchHamming_gpu<signed char>*/
,
matchHamming_gpu
<
unsigned
short
>
,
0
/*matchHamming_gpu<short>*/
,
matchHamming_gpu
<
int
>
,
0
/*matchHamming_gpu<float>*/
};
GpuMat
d_matches
;
radiusMatchAsync
(
_queryDescriptors
,
_trainDescriptors
,
d_matches
,
maxDistance
,
_mask
);
radiusMatchConvert
(
d_matches
,
matches
,
compactResult
);
}
const
int
nQuery
=
query
.
rows
;
const
int
nTrain
=
train
.
rows
;
void
BFMatcher_Impl
::
radiusMatch
(
InputArray
_queryDescriptors
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
float
maxDistance
,
const
std
::
vector
<
GpuMat
>&
masks
,
bool
compactResult
)
{
GpuMat
d_matches
;
radiusMatchAsync
(
_queryDescriptors
,
d_matches
,
maxDistance
,
masks
);
radiusMatchConvert
(
d_matches
,
matches
,
compactResult
);
}
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
train
.
type
()
==
query
.
type
()
&&
train
.
cols
==
query
.
cols
);
CV_Assert
(
trainIdx
.
empty
()
||
(
trainIdx
.
rows
==
nQuery
&&
trainIdx
.
size
()
==
distance
.
size
()));
CV_Assert
(
norm
==
NORM_L1
||
norm
==
NORM_L2
||
norm
==
NORM_HAMMING
);
void
BFMatcher_Impl
::
radiusMatchAsync
(
InputArray
_queryDescriptors
,
InputArray
_trainDescriptors
,
OutputArray
_matches
,
float
maxDistance
,
InputArray
_mask
,
Stream
&
stream
)
{
using
namespace
cv
::
cuda
::
device
::
bf_radius_match
;
const
caller_t
*
callers
=
norm
==
NORM_L1
?
callersL1
:
norm
==
NORM_L2
?
callersL2
:
callersHamming
;
const
GpuMat
query
=
_queryDescriptors
.
getGpuMat
();
const
GpuMat
train
=
_trainDescriptors
.
getGpuMat
();
const
GpuMat
mask
=
_mask
.
getGpuMat
();
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32SC1
,
nMatches
);
if
(
trainIdx
.
empty
())
{
ensureSizeIsEnough
(
nQuery
,
std
::
max
((
nTrain
/
100
),
10
),
CV_32SC1
,
trainIdx
);
ensureSizeIsEnough
(
nQuery
,
std
::
max
((
nTrain
/
100
),
10
),
CV_32FC1
,
distance
);
}
if
(
query
.
empty
()
||
train
.
empty
())
{
_matches
.
release
();
return
;
}
nMatches
.
setTo
(
Scalar
::
all
(
0
),
stream
);
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
train
.
cols
==
query
.
cols
&&
train
.
type
()
==
query
.
type
()
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8UC1
&&
mask
.
rows
==
query
.
rows
&&
mask
.
cols
==
train
.
rows
)
);
caller_t
func
=
callers
[
query
.
depth
()];
CV_Assert
(
func
!=
0
);
typedef
void
(
*
caller_t
)(
const
PtrStepSzb
&
query
,
const
PtrStepSzb
&
train
,
float
maxDistance
,
const
PtrStepSzb
&
mask
,
const
PtrStepSzi
&
trainIdx
,
const
PtrStepSzf
&
distance
,
const
PtrStepSz
<
unsigned
int
>&
nMatches
,
cudaStream_t
stream
);
func
(
query
,
train
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
StreamAccessor
::
getStream
(
stream
));
}
static
const
caller_t
callersL1
[]
=
{
matchL1_gpu
<
unsigned
char
>
,
0
/*matchL1_gpu<signed char>*/
,
matchL1_gpu
<
unsigned
short
>
,
matchL1_gpu
<
short
>
,
matchL1_gpu
<
int
>
,
matchL1_gpu
<
float
>
};
static
const
caller_t
callersL2
[]
=
{
0
/*matchL2_gpu<unsigned char>*/
,
0
/*matchL2_gpu<signed char>*/
,
0
/*matchL2_gpu<unsigned short>*/
,
0
/*matchL2_gpu<short>*/
,
0
/*matchL2_gpu<int>*/
,
matchL2_gpu
<
float
>
};
static
const
caller_t
callersHamming
[]
=
{
matchHamming_gpu
<
unsigned
char
>
,
0
/*matchHamming_gpu<signed char>*/
,
matchHamming_gpu
<
unsigned
short
>
,
0
/*matchHamming_gpu<short>*/
,
matchHamming_gpu
<
int
>
,
0
/*matchHamming_gpu<float>*/
};
void
cv
::
cuda
::
BFMatcher_CUDA
::
radiusMatchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
distance
,
const
GpuMat
&
nMatches
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
)
{
if
(
trainIdx
.
empty
()
||
distance
.
empty
()
||
nMatches
.
empty
())
return
;
const
caller_t
*
callers
=
norm_
==
NORM_L1
?
callersL1
:
norm_
==
NORM_L2
?
callersL2
:
callersHamming
;
Mat
trainIdxCPU
(
trainIdx
);
Mat
distanceCPU
(
distance
);
Mat
nMatchesCPU
(
nMatches
);
const
caller_t
func
=
callers
[
query
.
depth
()];
if
(
func
==
0
)
{
CV_Error
(
Error
::
StsUnsupportedFormat
,
"unsupported combination of query.depth() and norm"
);
}
radiusMatchConvert
(
trainIdxCPU
,
distanceCPU
,
nMatchesCPU
,
matches
,
compactResult
)
;
}
const
int
nQuery
=
query
.
rows
;
const
int
nTrain
=
train
.
rows
;
void
cv
::
cuda
::
BFMatcher_CUDA
::
radiusMatchConvert
(
const
Mat
&
trainIdx
,
const
Mat
&
distance
,
const
Mat
&
nMatches
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
)
{
if
(
trainIdx
.
empty
()
||
distance
.
empty
()
||
nMatches
.
empty
())
return
;
const
int
cols
=
std
::
max
((
nTrain
/
100
),
nQuery
);
CV_Assert
(
trainIdx
.
type
()
==
CV_32SC1
);
CV_Assert
(
distance
.
type
()
==
CV_32FC1
&&
distance
.
size
()
==
trainIdx
.
size
());
CV_Assert
(
nMatches
.
type
()
==
CV_32SC1
&&
nMatches
.
cols
==
trainIdx
.
rows
);
_matches
.
create
(
2
*
nQuery
+
1
,
cols
,
CV_32SC1
);
GpuMat
matches
=
_matches
.
getGpuMat
();
const
int
nQuery
=
trainIdx
.
rows
;
GpuMat
trainIdx
(
nQuery
,
cols
,
CV_32SC1
,
matches
.
ptr
(
0
),
matches
.
step
);
GpuMat
distance
(
nQuery
,
cols
,
CV_32FC1
,
matches
.
ptr
(
nQuery
),
matches
.
step
);
GpuMat
nMatches
(
1
,
nQuery
,
CV_32SC1
,
matches
.
ptr
(
2
*
nQuery
));
matches
.
clear
();
matches
.
reserve
(
nQuery
);
nMatches
.
setTo
(
Scalar
::
all
(
0
),
stream
);
const
int
*
nMatches_ptr
=
nMatches
.
ptr
<
int
>
();
func
(
query
,
train
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
StreamAccessor
::
getStream
(
stream
));
}
for
(
int
queryIdx
=
0
;
queryIdx
<
nQuery
;
++
queryIdx
)
void
BFMatcher_Impl
::
radiusMatchAsync
(
InputArray
_queryDescriptors
,
OutputArray
_matches
,
float
maxDistance
,
const
std
::
vector
<
GpuMat
>&
masks
,
Stream
&
stream
)
{
const
int
*
trainIdx_ptr
=
trainIdx
.
ptr
<
int
>
(
queryIdx
);
const
float
*
distance_ptr
=
distance
.
ptr
<
float
>
(
queryIdx
);
using
namespace
cv
::
cuda
::
device
::
bf_radius_match
;
const
int
nMatched
=
std
::
min
(
nMatches_ptr
[
queryIdx
],
trainIdx
.
cols
);
const
GpuMat
query
=
_queryDescriptors
.
getGpuMat
(
);
if
(
nMatched
==
0
)
if
(
query
.
empty
()
||
trainDescCollection_
.
empty
()
)
{
if
(
!
compactResult
)
matches
.
push_back
(
std
::
vector
<
DMatch
>
());
continue
;
_matches
.
release
();
return
;
}
matches
.
push_back
(
std
::
vector
<
DMatch
>
(
nMatched
));
std
::
vector
<
DMatch
>&
curMatches
=
matches
.
back
();
for
(
int
i
=
0
;
i
<
nMatched
;
++
i
,
++
trainIdx_ptr
,
++
distance_ptr
)
{
int
_trainIdx
=
*
trainIdx_ptr
;
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
float
_distance
=
*
distance_ptr
;
GpuMat
trainCollection
,
maskCollection
;
makeGpuCollection
(
trainDescCollection_
,
masks
,
trainCollection
,
maskCollection
);
DMatch
m
(
queryIdx
,
_trainIdx
,
0
,
_distance
);
curMatches
[
i
]
=
m
;
}
typedef
void
(
*
caller_t
)(
const
PtrStepSzb
&
query
,
const
PtrStepSzb
*
trains
,
int
n
,
float
maxDistance
,
const
PtrStepSzb
*
masks
,
const
PtrStepSzi
&
trainIdx
,
const
PtrStepSzi
&
imgIdx
,
const
PtrStepSzf
&
distance
,
const
PtrStepSz
<
unsigned
int
>&
nMatches
,
cudaStream_t
stream
);
sort
(
curMatches
.
begin
(),
curMatches
.
end
());
}
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
radiusMatch
(
const
GpuMat
&
query
,
const
GpuMat
&
train
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
float
maxDistance
,
const
GpuMat
&
mask
,
bool
compactResult
)
{
GpuMat
trainIdx
,
distance
,
nMatches
;
radiusMatchSingle
(
query
,
train
,
trainIdx
,
distance
,
nMatches
,
maxDistance
,
mask
);
radiusMatchDownload
(
trainIdx
,
distance
,
nMatches
,
matches
,
compactResult
);
}
static
const
caller_t
callersL1
[]
=
{
matchL1_gpu
<
unsigned
char
>
,
0
/*matchL1_gpu<signed char>*/
,
matchL1_gpu
<
unsigned
short
>
,
matchL1_gpu
<
short
>
,
matchL1_gpu
<
int
>
,
matchL1_gpu
<
float
>
};
static
const
caller_t
callersL2
[]
=
{
0
/*matchL2_gpu<unsigned char>*/
,
0
/*matchL2_gpu<signed char>*/
,
0
/*matchL2_gpu<unsigned short>*/
,
0
/*matchL2_gpu<short>*/
,
0
/*matchL2_gpu<int>*/
,
matchL2_gpu
<
float
>
};
static
const
caller_t
callersHamming
[]
=
{
matchHamming_gpu
<
unsigned
char
>
,
0
/*matchHamming_gpu<signed char>*/
,
matchHamming_gpu
<
unsigned
short
>
,
0
/*matchHamming_gpu<short>*/
,
matchHamming_gpu
<
int
>
,
0
/*matchHamming_gpu<float>*/
};
void
cv
::
cuda
::
BFMatcher_CUDA
::
radiusMatchCollection
(
const
GpuMat
&
query
,
GpuMat
&
trainIdx
,
GpuMat
&
imgIdx
,
GpuMat
&
distance
,
GpuMat
&
nMatches
,
float
maxDistance
,
const
std
::
vector
<
GpuMat
>&
masks
,
Stream
&
stream
)
{
if
(
query
.
empty
()
||
empty
())
return
;
const
caller_t
*
callers
=
norm_
==
NORM_L1
?
callersL1
:
norm_
==
NORM_L2
?
callersL2
:
callersHamming
;
using
namespace
cv
::
cuda
::
device
::
bf_radius_match
;
const
caller_t
func
=
callers
[
query
.
depth
()];
if
(
func
==
0
)
{
CV_Error
(
Error
::
StsUnsupportedFormat
,
"unsupported combination of query.depth() and norm"
);
}
typedef
void
(
*
caller_t
)(
const
PtrStepSzb
&
query
,
const
PtrStepSzb
*
trains
,
int
n
,
float
maxDistance
,
const
PtrStepSzb
*
masks
,
const
PtrStepSzi
&
trainIdx
,
const
PtrStepSzi
&
imgIdx
,
const
PtrStepSzf
&
distance
,
const
PtrStepSz
<
unsigned
int
>&
nMatches
,
cudaStream_t
stream
);
const
int
nQuery
=
query
.
rows
;
static
const
caller_t
callersL1
[]
=
{
matchL1_gpu
<
unsigned
char
>
,
0
/*matchL1_gpu<signed char>*/
,
matchL1_gpu
<
unsigned
short
>
,
matchL1_gpu
<
short
>
,
matchL1_gpu
<
int
>
,
matchL1_gpu
<
float
>
};
static
const
caller_t
callersL2
[]
=
{
0
/*matchL2_gpu<unsigned char>*/
,
0
/*matchL2_gpu<signed char>*/
,
0
/*matchL2_gpu<unsigned short>*/
,
0
/*matchL2_gpu<short>*/
,
0
/*matchL2_gpu<int>*/
,
matchL2_gpu
<
float
>
};
static
const
caller_t
callersHamming
[]
=
{
matchHamming_gpu
<
unsigned
char
>
,
0
/*matchHamming_gpu<signed char>*/
,
matchHamming_gpu
<
unsigned
short
>
,
0
/*matchHamming_gpu<short>*/
,
matchHamming_gpu
<
int
>
,
0
/*matchHamming_gpu<float>*/
};
_matches
.
create
(
3
*
nQuery
+
1
,
nQuery
,
CV_32FC1
);
GpuMat
matches
=
_matches
.
getGpuMat
();
const
int
nQuery
=
query
.
rows
;
GpuMat
trainIdx
(
nQuery
,
nQuery
,
CV_32SC1
,
matches
.
ptr
(
0
),
matches
.
step
);
GpuMat
imgIdx
(
nQuery
,
nQuery
,
CV_32SC1
,
matches
.
ptr
(
nQuery
),
matches
.
step
);
GpuMat
distance
(
nQuery
,
nQuery
,
CV_32FC1
,
matches
.
ptr
(
2
*
nQuery
),
matches
.
step
);
GpuMat
nMatches
(
1
,
nQuery
,
CV_32SC1
,
matches
.
ptr
(
3
*
nQuery
));
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
trainIdx
.
empty
()
||
(
trainIdx
.
rows
==
nQuery
&&
trainIdx
.
size
()
==
distance
.
size
()
&&
trainIdx
.
size
()
==
imgIdx
.
size
()));
CV_Assert
(
norm
==
NORM_L1
||
norm
==
NORM_L2
||
norm
==
NORM_HAMMING
);
nMatches
.
setTo
(
Scalar
::
all
(
0
),
stream
);
const
caller_t
*
callers
=
norm
==
NORM_L1
?
callersL1
:
norm
==
NORM_L2
?
callersL2
:
callersHamming
;
std
::
vector
<
PtrStepSzb
>
trains_
(
trainDescCollection_
.
begin
(),
trainDescCollection_
.
end
());
std
::
vector
<
PtrStepSzb
>
masks_
(
masks
.
begin
(),
masks
.
end
());
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32SC1
,
nMatches
);
if
(
trainIdx
.
empty
())
{
ensureSizeIsEnough
(
nQuery
,
std
::
max
((
nQuery
/
100
),
10
),
CV_32SC1
,
trainIdx
);
ensureSizeIsEnough
(
nQuery
,
std
::
max
((
nQuery
/
100
),
10
),
CV_32SC1
,
imgIdx
);
ensureSizeIsEnough
(
nQuery
,
std
::
max
((
nQuery
/
100
),
10
),
CV_32FC1
,
distance
);
func
(
query
,
&
trains_
[
0
],
static_cast
<
int
>
(
trains_
.
size
()),
maxDistance
,
masks_
.
size
()
==
0
?
0
:
&
masks_
[
0
],
trainIdx
,
imgIdx
,
distance
,
nMatches
,
StreamAccessor
::
getStream
(
stream
));
}
nMatches
.
setTo
(
Scalar
::
all
(
0
),
stream
);
caller_t
func
=
callers
[
query
.
depth
()];
CV_Assert
(
func
!=
0
);
std
::
vector
<
PtrStepSzb
>
trains_
(
trainDescCollection
.
begin
(),
trainDescCollection
.
end
());
std
::
vector
<
PtrStepSzb
>
masks_
(
masks
.
begin
(),
masks
.
end
());
void
BFMatcher_Impl
::
radiusMatchConvert
(
InputArray
_gpu_matches
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
)
{
Mat
gpu_matches
;
if
(
_gpu_matches
.
kind
()
==
_InputArray
::
CUDA_GPU_MAT
)
{
_gpu_matches
.
getGpuMat
().
download
(
gpu_matches
);
}
else
{
gpu_matches
=
_gpu_matches
.
getMat
();
}
func
(
query
,
&
trains_
[
0
],
static_cast
<
int
>
(
trains_
.
size
()),
maxDistance
,
masks_
.
size
()
==
0
?
0
:
&
masks_
[
0
],
trainIdx
,
imgIdx
,
distance
,
nMatches
,
StreamAccessor
::
getStream
(
stream
));
}
if
(
gpu_matches
.
empty
())
{
matches
.
clear
();
return
;
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
radiusMatchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
imgIdx
,
const
GpuMat
&
distance
,
const
GpuMat
&
nMatches
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
)
{
if
(
trainIdx
.
empty
()
||
imgIdx
.
empty
()
||
distance
.
empty
()
||
nMatches
.
empty
())
return
;
CV_Assert
(
gpu_matches
.
type
()
==
CV_32SC1
||
gpu_matches
.
type
()
==
CV_32FC1
);
Mat
trainIdxCPU
(
trainIdx
);
Mat
imgIdxCPU
(
imgIdx
);
Mat
distanceCPU
(
distance
);
Mat
nMatchesCPU
(
nMatches
);
int
nQuery
=
-
1
;
radiusMatchConvert
(
trainIdxCPU
,
imgIdxCPU
,
distanceCPU
,
nMatchesCPU
,
matches
,
compactResult
);
}
const
int
*
trainIdxPtr
=
NULL
;
const
int
*
imgIdxPtr
=
NULL
;
const
float
*
distancePtr
=
NULL
;
const
int
*
nMatchesPtr
=
NULL
;
void
cv
::
cuda
::
BFMatcher_CUDA
::
radiusMatchConvert
(
const
Mat
&
trainIdx
,
const
Mat
&
imgIdx
,
const
Mat
&
distance
,
const
Mat
&
nMatches
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
)
{
if
(
trainIdx
.
empty
()
||
imgIdx
.
empty
()
||
distance
.
empty
()
||
nMatches
.
empty
())
return
;
if
(
gpu_matches
.
type
()
==
CV_32SC1
)
{
nQuery
=
(
gpu_matches
.
rows
-
1
)
/
2
;
CV_Assert
(
trainIdx
.
type
()
==
CV_32SC1
);
CV_Assert
(
imgIdx
.
type
()
==
CV_32SC1
&&
imgIdx
.
size
()
==
trainIdx
.
size
());
CV_Assert
(
distance
.
type
()
==
CV_32FC1
&&
distance
.
size
()
==
trainIdx
.
size
());
CV_Assert
(
nMatches
.
type
()
==
CV_32SC1
&&
nMatches
.
cols
==
trainIdx
.
rows
);
trainIdxPtr
=
gpu_matches
.
ptr
<
int
>
(
0
);
distancePtr
=
gpu_matches
.
ptr
<
float
>
(
nQuery
);
nMatchesPtr
=
gpu_matches
.
ptr
<
int
>
(
2
*
nQuery
);
}
else
{
nQuery
=
(
gpu_matches
.
rows
-
1
)
/
3
;
const
int
nQuery
=
trainIdx
.
rows
;
trainIdxPtr
=
gpu_matches
.
ptr
<
int
>
(
0
);
imgIdxPtr
=
gpu_matches
.
ptr
<
int
>
(
nQuery
);
distancePtr
=
gpu_matches
.
ptr
<
float
>
(
2
*
nQuery
);
nMatchesPtr
=
gpu_matches
.
ptr
<
int
>
(
3
*
nQuery
);
}
matches
.
clear
();
matches
.
reserve
(
nQuery
);
matches
.
clear
();
matches
.
reserve
(
nQuery
);
const
int
*
nMatches_ptr
=
nMatches
.
ptr
<
int
>
();
for
(
int
queryIdx
=
0
;
queryIdx
<
nQuery
;
++
queryIdx
)
{
const
int
nMatched
=
std
::
min
(
nMatchesPtr
[
queryIdx
],
gpu_matches
.
cols
);
for
(
int
queryIdx
=
0
;
queryIdx
<
nQuery
;
++
queryIdx
)
{
const
int
*
trainIdx_ptr
=
trainIdx
.
ptr
<
int
>
(
queryIdx
);
const
int
*
imgIdx_ptr
=
imgIdx
.
ptr
<
int
>
(
queryIdx
);
const
float
*
distance_ptr
=
distance
.
ptr
<
float
>
(
queryIdx
);
if
(
nMatched
==
0
)
{
if
(
!
compactResult
)
{
matches
.
push_back
(
std
::
vector
<
DMatch
>
());
}
}
else
{
matches
.
push_back
(
std
::
vector
<
DMatch
>
(
nMatched
));
std
::
vector
<
DMatch
>&
curMatches
=
matches
.
back
();
const
int
nMatched
=
std
::
min
(
nMatches_ptr
[
queryIdx
],
trainIdx
.
cols
);
for
(
int
i
=
0
;
i
<
nMatched
;
++
i
)
{
const
int
trainIdx
=
trainIdxPtr
[
i
];
if
(
nMatched
==
0
)
{
if
(
!
compactResult
)
matches
.
push_back
(
std
::
vector
<
DMatch
>
());
continue
;
}
const
int
imgIdx
=
imgIdxPtr
?
imgIdxPtr
[
i
]
:
0
;
const
float
distance
=
distancePtr
[
i
];
matches
.
push_back
(
std
::
vector
<
DMatch
>
());
std
::
vector
<
DMatch
>&
curMatches
=
matches
.
back
();
curMatches
.
reserve
(
nMatched
);
DMatch
m
(
queryIdx
,
trainIdx
,
imgIdx
,
distance
);
for
(
int
i
=
0
;
i
<
nMatched
;
++
i
,
++
trainIdx_ptr
,
++
imgIdx_ptr
,
++
distance_ptr
)
{
int
_trainIdx
=
*
trainIdx_ptr
;
int
_imgIdx
=
*
imgIdx_ptr
;
float
_distance
=
*
distance_ptr
;
curMatches
[
i
]
=
m
;
}
DMatch
m
(
queryIdx
,
_trainIdx
,
_imgIdx
,
_distance
);
std
::
sort
(
curMatches
.
begin
(),
curMatches
.
end
());
}
curMatches
.
push_back
(
m
);
trainIdxPtr
+=
gpu_matches
.
cols
;
distancePtr
+=
gpu_matches
.
cols
;
if
(
imgIdxPtr
)
imgIdxPtr
+=
gpu_matches
.
cols
;
}
sort
(
curMatches
.
begin
(),
curMatches
.
end
());
}
}
void
cv
::
cuda
::
BFMatcher_CUDA
::
radiusMatch
(
const
GpuMat
&
query
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
float
maxDistance
,
const
std
::
vector
<
GpuMat
>&
masks
,
bool
compactResult
)
Ptr
<
cv
::
cuda
::
DescriptorMatcher
>
cv
::
cuda
::
DescriptorMatcher
::
createBFMatcher
(
int
norm
)
{
GpuMat
trainIdx
,
imgIdx
,
distance
,
nMatches
;
radiusMatchCollection
(
query
,
trainIdx
,
imgIdx
,
distance
,
nMatches
,
maxDistance
,
masks
);
radiusMatchDownload
(
trainIdx
,
imgIdx
,
distance
,
nMatches
,
matches
,
compactResult
);
return
makePtr
<
BFMatcher_Impl
>
(
norm
);
}
#endif
/* !defined (HAVE_CUDA) */
modules/cudafeatures2d/src/cuda/fast.cu
View file @
3a844444
...
...
@@ -279,7 +279,7 @@ namespace cv { namespace cuda { namespace device
#endif
}
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold)
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold
, cudaStream_t stream
)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
...
...
@@ -290,29 +290,29 @@ namespace cv { namespace cuda { namespace device
grid.x = divUp(img.cols - 6, block.x);
grid.y = divUp(img.rows - 6, block.y);
cudaSafeCall( cudaMemset
(counter_ptr, 0, sizeof(unsigned int)
) );
cudaSafeCall( cudaMemset
Async(counter_ptr, 0, sizeof(unsigned int), stream
) );
if (score.data)
{
if (mask.data)
calcKeypoints<true><<<grid, block>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
calcKeypoints<true><<<grid, block
, 0, stream
>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
else
calcKeypoints<true><<<grid, block>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
calcKeypoints<true><<<grid, block
, 0, stream
>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
}
else
{
if (mask.data)
calcKeypoints<false><<<grid, block>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
calcKeypoints<false><<<grid, block
, 0, stream
>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
else
calcKeypoints<false><<<grid, block>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
calcKeypoints<false><<<grid, block
, 0, stream
>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
}
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
unsigned int count;
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream) );
cudaSafeCall( cudaStreamSynchronize(stream) );
return count;
}
...
...
@@ -356,7 +356,7 @@ namespace cv { namespace cuda { namespace device
#endif
}
int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response)
int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response
, cudaStream_t stream
)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
...
...
@@ -366,15 +366,15 @@ namespace cv { namespace cuda { namespace device
dim3 grid;
grid.x = divUp(count, block.x);
cudaSafeCall( cudaMemset
(counter_ptr, 0, sizeof(unsigned int)
) );
cudaSafeCall( cudaMemset
Async(counter_ptr, 0, sizeof(unsigned int), stream
) );
nonmaxSuppression<<<grid, block>>>(kpLoc, count, score, loc, response);
nonmaxSuppression<<<grid, block
, 0, stream
>>>(kpLoc, count, score, loc, response);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
unsigned int new_count;
cudaSafeCall( cudaMemcpy(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpyAsync(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream) );
cudaSafeCall( cudaStreamSynchronize(stream) );
return new_count;
}
...
...
modules/cudafeatures2d/src/fast.cpp
View file @
3a844444
...
...
@@ -47,124 +47,162 @@ using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
cv
::
cuda
::
FAST_CUDA
::
FAST_CUDA
(
int
,
bool
,
double
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
FAST_CUDA
::
operator
()(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
FAST_CUDA
::
operator
()(
const
GpuMat
&
,
const
GpuMat
&
,
std
::
vector
<
KeyPoint
>&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
FAST_CUDA
::
downloadKeypoints
(
const
GpuMat
&
,
std
::
vector
<
KeyPoint
>&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
FAST_CUDA
::
convertKeypoints
(
const
Mat
&
,
std
::
vector
<
KeyPoint
>&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
FAST_CUDA
::
release
()
{
throw_no_cuda
();
}
int
cv
::
cuda
::
FAST_CUDA
::
calcKeyPointsLocation
(
const
GpuMat
&
,
const
GpuMat
&
)
{
throw_no_cuda
();
return
0
;
}
int
cv
::
cuda
::
FAST_CUDA
::
getKeyPoints
(
GpuMat
&
)
{
throw_no_cuda
();
return
0
;
}
Ptr
<
cv
::
cuda
::
FastFeatureDetector
>
cv
::
cuda
::
FastFeatureDetector
::
create
(
int
,
bool
,
int
,
int
)
{
throw_no_cuda
();
return
Ptr
<
cv
::
cuda
::
FastFeatureDetector
>
();
}
#else
/* !defined (HAVE_CUDA) */
cv
::
cuda
::
FAST_CUDA
::
FAST_CUDA
(
int
_threshold
,
bool
_nonmaxSuppression
,
double
_keypointsRatio
)
:
nonmaxSuppression
(
_nonmaxSuppression
),
threshold
(
_threshold
),
keypointsRatio
(
_keypointsRatio
),
count_
(
0
)
namespace
cv
{
namespace
cuda
{
namespace
device
{
}
namespace
fast
{
int
calcKeypoints_gpu
(
PtrStepSzb
img
,
PtrStepSzb
mask
,
short2
*
kpLoc
,
int
maxKeypoints
,
PtrStepSzi
score
,
int
threshold
,
cudaStream_t
stream
);
int
nonmaxSuppression_gpu
(
const
short2
*
kpLoc
,
int
count
,
PtrStepSzi
score
,
short2
*
loc
,
float
*
response
,
cudaStream_t
stream
);
}
}}}
void
cv
::
cuda
::
FAST_CUDA
::
operator
()(
const
GpuMat
&
image
,
const
GpuMat
&
mask
,
std
::
vector
<
KeyPoint
>&
keypoints
)
namespace
{
if
(
image
.
empty
())
return
;
class
FAST_Impl
:
public
cv
::
cuda
::
FastFeatureDetector
{
public
:
FAST_Impl
(
int
threshold
,
bool
nonmaxSuppression
,
int
max_npoints
);
(
*
this
)(
image
,
mask
,
d_keypoints_
);
downloadKeypoints
(
d_keypoints_
,
keypoints
);
}
virtual
void
detect
(
InputArray
_image
,
std
::
vector
<
KeyPoint
>&
keypoints
,
InputArray
_mask
);
virtual
void
detectAsync
(
InputArray
_image
,
OutputArray
_keypoints
,
InputArray
_mask
,
Stream
&
stream
);
void
cv
::
cuda
::
FAST_CUDA
::
downloadKeypoints
(
const
GpuMat
&
d_keypoints
,
std
::
vector
<
KeyPoint
>&
keypoints
)
{
if
(
d_keypoints
.
empty
())
return
;
virtual
void
convert
(
InputArray
_gpu_keypoints
,
std
::
vector
<
KeyPoint
>&
keypoints
);
Mat
h_keypoints
(
d_keypoints
);
convertKeypoints
(
h_keypoints
,
keypoints
);
}
virtual
void
setThreshold
(
int
threshold
)
{
threshold_
=
threshold
;
}
virtual
int
getThreshold
()
const
{
return
threshold_
;
}
void
cv
::
cuda
::
FAST_CUDA
::
convertKeypoints
(
const
Mat
&
h_keypoints
,
std
::
vector
<
KeyPoint
>&
keypoints
)
{
if
(
h_keypoints
.
empty
())
return
;
CV_Assert
(
h_keypoints
.
rows
==
ROWS_COUNT
&&
h_keypoints
.
elemSize
()
==
4
);
virtual
void
setNonmaxSuppression
(
bool
f
)
{
nonmaxSuppression_
=
f
;
}
virtual
bool
getNonmaxSuppression
()
const
{
return
nonmaxSuppression_
;
}
int
npoints
=
h_keypoints
.
cols
;
virtual
void
setMaxNumPoints
(
int
max_npoints
)
{
max_npoints_
=
max_npoints
;
}
virtual
int
getMaxNumPoints
()
const
{
return
max_npoints_
;
}
keypoints
.
resize
(
npoints
);
virtual
void
setType
(
int
type
)
{
CV_Assert
(
type
==
TYPE_9_16
);
}
virtual
int
getType
()
const
{
return
TYPE_9_16
;
}
const
short2
*
loc_row
=
h_keypoints
.
ptr
<
short2
>
(
LOCATION_ROW
);
const
float
*
response_row
=
h_keypoints
.
ptr
<
float
>
(
RESPONSE_ROW
);
private
:
int
threshold_
;
bool
nonmaxSuppression_
;
int
max_npoints_
;
};
for
(
int
i
=
0
;
i
<
npoints
;
++
i
)
FAST_Impl
::
FAST_Impl
(
int
threshold
,
bool
nonmaxSuppression
,
int
max_npoints
)
:
threshold_
(
threshold
),
nonmaxSuppression_
(
nonmaxSuppression
),
max_npoints_
(
max_npoints
)
{
KeyPoint
kp
(
loc_row
[
i
].
x
,
loc_row
[
i
].
y
,
static_cast
<
float
>
(
FEATURE_SIZE
),
-
1
,
response_row
[
i
]);
keypoints
[
i
]
=
kp
;
}
}
void
cv
::
cuda
::
FAST_CUDA
::
operator
()(
const
GpuMat
&
img
,
const
GpuMat
&
mask
,
GpuMat
&
keypoints
)
{
calcKeyPointsLocation
(
img
,
mask
);
keypoints
.
cols
=
getKeyPoints
(
keypoints
);
}
namespace
cv
{
namespace
cuda
{
namespace
device
{
namespace
fast
void
FAST_Impl
::
detect
(
InputArray
_image
,
std
::
vector
<
KeyPoint
>&
keypoints
,
InputArray
_mask
)
{
int
calcKeypoints_gpu
(
PtrStepSzb
img
,
PtrStepSzb
mask
,
short2
*
kpLoc
,
int
maxKeypoints
,
PtrStepSzi
score
,
int
threshold
);
int
nonmaxSuppression_gpu
(
const
short2
*
kpLoc
,
int
count
,
PtrStepSzi
score
,
short2
*
loc
,
float
*
response
);
}
}}}
int
cv
::
cuda
::
FAST_CUDA
::
calcKeyPointsLocation
(
const
GpuMat
&
img
,
const
GpuMat
&
mask
)
{
using
namespace
cv
::
cuda
::
device
::
fast
;
CV_Assert
(
img
.
type
()
==
CV_8UC1
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8UC1
&&
mask
.
size
()
==
img
.
size
()));
if
(
_image
.
empty
())
{
keypoints
.
clear
();
return
;
}
int
maxKeypoints
=
static_cast
<
int
>
(
keypointsRatio
*
img
.
size
().
area
());
BufferPool
pool
(
Stream
::
Null
());
GpuMat
d_keypoints
=
pool
.
getBuffer
(
ROWS_COUNT
,
max_npoints_
,
CV_16SC2
);
ensureSizeIsEnough
(
1
,
maxKeypoints
,
CV_16SC2
,
kpLoc_
);
detectAsync
(
_image
,
d_keypoints
,
_mask
,
Stream
::
Null
());
convert
(
d_keypoints
,
keypoints
);
}
if
(
nonmaxSuppression
)
void
FAST_Impl
::
detectAsync
(
InputArray
_image
,
OutputArray
_keypoints
,
InputArray
_mask
,
Stream
&
stream
)
{
ensureSizeIsEnough
(
img
.
size
(),
CV_32SC1
,
score_
);
score_
.
setTo
(
Scalar
::
all
(
0
));
using
namespace
cv
::
cuda
::
device
::
fast
;
const
GpuMat
img
=
_image
.
getGpuMat
();
const
GpuMat
mask
=
_mask
.
getGpuMat
();
CV_Assert
(
img
.
type
()
==
CV_8UC1
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8UC1
&&
mask
.
size
()
==
img
.
size
())
);
BufferPool
pool
(
stream
);
GpuMat
kpLoc
=
pool
.
getBuffer
(
1
,
max_npoints_
,
CV_16SC2
);
GpuMat
score
;
if
(
nonmaxSuppression_
)
{
score
=
pool
.
getBuffer
(
img
.
size
(),
CV_32SC1
);
score
.
setTo
(
Scalar
::
all
(
0
),
stream
);
}
int
count
=
calcKeypoints_gpu
(
img
,
mask
,
kpLoc
.
ptr
<
short2
>
(),
max_npoints_
,
score
,
threshold_
,
StreamAccessor
::
getStream
(
stream
));
count
=
std
::
min
(
count
,
max_npoints_
);
if
(
count
==
0
)
{
_keypoints
.
release
();
return
;
}
ensureSizeIsEnough
(
ROWS_COUNT
,
count
,
CV_32FC1
,
_keypoints
);
GpuMat
&
keypoints
=
_keypoints
.
getGpuMatRef
();
if
(
nonmaxSuppression_
)
{
count
=
nonmaxSuppression_gpu
(
kpLoc
.
ptr
<
short2
>
(),
count
,
score
,
keypoints
.
ptr
<
short2
>
(
LOCATION_ROW
),
keypoints
.
ptr
<
float
>
(
RESPONSE_ROW
),
StreamAccessor
::
getStream
(
stream
));
if
(
count
==
0
)
{
keypoints
.
release
();
}
else
{
keypoints
.
cols
=
count
;
}
}
else
{
GpuMat
locRow
(
1
,
count
,
kpLoc
.
type
(),
keypoints
.
ptr
(
0
));
kpLoc
.
colRange
(
0
,
count
).
copyTo
(
locRow
,
stream
);
keypoints
.
row
(
1
).
setTo
(
Scalar
::
all
(
0
),
stream
);
}
}
count_
=
calcKeypoints_gpu
(
img
,
mask
,
kpLoc_
.
ptr
<
short2
>
(),
maxKeypoints
,
nonmaxSuppression
?
score_
:
PtrStepSzi
(),
threshold
);
count_
=
std
::
min
(
count_
,
maxKeypoints
);
return
count_
;
}
int
cv
::
cuda
::
FAST_CUDA
::
getKeyPoints
(
GpuMat
&
keypoints
)
{
using
namespace
cv
::
cuda
::
device
::
fast
;
if
(
count_
==
0
)
return
0
;
ensureSizeIsEnough
(
ROWS_COUNT
,
count_
,
CV_32FC1
,
keypoints
);
if
(
nonmaxSuppression
)
return
nonmaxSuppression_gpu
(
kpLoc_
.
ptr
<
short2
>
(),
count_
,
score_
,
keypoints
.
ptr
<
short2
>
(
LOCATION_ROW
),
keypoints
.
ptr
<
float
>
(
RESPONSE_ROW
));
GpuMat
locRow
(
1
,
count_
,
kpLoc_
.
type
(),
keypoints
.
ptr
(
0
));
kpLoc_
.
colRange
(
0
,
count_
).
copyTo
(
locRow
);
keypoints
.
row
(
1
).
setTo
(
Scalar
::
all
(
0
));
return
count_
;
void
FAST_Impl
::
convert
(
InputArray
_gpu_keypoints
,
std
::
vector
<
KeyPoint
>&
keypoints
)
{
if
(
_gpu_keypoints
.
empty
())
{
keypoints
.
clear
();
return
;
}
Mat
h_keypoints
;
if
(
_gpu_keypoints
.
kind
()
==
_InputArray
::
CUDA_GPU_MAT
)
{
_gpu_keypoints
.
getGpuMat
().
download
(
h_keypoints
);
}
else
{
h_keypoints
=
_gpu_keypoints
.
getMat
();
}
CV_Assert
(
h_keypoints
.
rows
==
ROWS_COUNT
);
CV_Assert
(
h_keypoints
.
elemSize
()
==
4
);
const
int
npoints
=
h_keypoints
.
cols
;
keypoints
.
resize
(
npoints
);
const
short2
*
loc_row
=
h_keypoints
.
ptr
<
short2
>
(
LOCATION_ROW
);
const
float
*
response_row
=
h_keypoints
.
ptr
<
float
>
(
RESPONSE_ROW
);
for
(
int
i
=
0
;
i
<
npoints
;
++
i
)
{
KeyPoint
kp
(
loc_row
[
i
].
x
,
loc_row
[
i
].
y
,
static_cast
<
float
>
(
FEATURE_SIZE
),
-
1
,
response_row
[
i
]);
keypoints
[
i
]
=
kp
;
}
}
}
void
cv
::
cuda
::
FAST_CUDA
::
release
(
)
Ptr
<
cv
::
cuda
::
FastFeatureDetector
>
cv
::
cuda
::
FastFeatureDetector
::
create
(
int
threshold
,
bool
nonmaxSuppression
,
int
type
,
int
max_npoints
)
{
kpLoc_
.
release
();
score_
.
release
();
d_keypoints_
.
release
();
CV_Assert
(
type
==
TYPE_9_16
);
return
makePtr
<
FAST_Impl
>
(
threshold
,
nonmaxSuppression
,
max_npoints
);
}
#endif
/* !defined (HAVE_CUDA) */
modules/cudafeatures2d/src/feature2d_async.cpp
0 → 100644
View file @
3a844444
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
cv
::
cuda
::
Feature2DAsync
::~
Feature2DAsync
()
{
}
void
cv
::
cuda
::
Feature2DAsync
::
detectAsync
(
InputArray
image
,
OutputArray
keypoints
,
InputArray
mask
,
Stream
&
stream
)
{
if
(
image
.
empty
())
{
keypoints
.
clear
();
return
;
}
detectAndComputeAsync
(
image
,
mask
,
keypoints
,
noArray
(),
false
,
stream
);
}
void
cv
::
cuda
::
Feature2DAsync
::
computeAsync
(
InputArray
image
,
OutputArray
keypoints
,
OutputArray
descriptors
,
Stream
&
stream
)
{
if
(
image
.
empty
())
{
descriptors
.
release
();
return
;
}
detectAndComputeAsync
(
image
,
noArray
(),
keypoints
,
descriptors
,
true
,
stream
);
}
void
cv
::
cuda
::
Feature2DAsync
::
detectAndComputeAsync
(
InputArray
/*image*/
,
InputArray
/*mask*/
,
OutputArray
/*keypoints*/
,
OutputArray
/*descriptors*/
,
bool
/*useProvidedKeypoints*/
,
Stream
&
/*stream*/
)
{
CV_Error
(
Error
::
StsNotImplemented
,
""
);
}
modules/cudafeatures2d/src/orb.cpp
View file @
3a844444
...
...
@@ -47,18 +47,7 @@ using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
cv
::
cuda
::
ORB_CUDA
::
ORB_CUDA
(
int
,
float
,
int
,
int
,
int
,
int
,
int
,
int
)
:
fastDetector_
(
20
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
ORB_CUDA
::
operator
()(
const
GpuMat
&
,
const
GpuMat
&
,
std
::
vector
<
KeyPoint
>&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
ORB_CUDA
::
operator
()(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
ORB_CUDA
::
operator
()(
const
GpuMat
&
,
const
GpuMat
&
,
std
::
vector
<
KeyPoint
>&
,
GpuMat
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
ORB_CUDA
::
operator
()(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
ORB_CUDA
::
downloadKeyPoints
(
const
GpuMat
&
,
std
::
vector
<
KeyPoint
>&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
ORB_CUDA
::
convertKeyPoints
(
const
Mat
&
,
std
::
vector
<
KeyPoint
>&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
ORB_CUDA
::
release
()
{
throw_no_cuda
();
}
void
cv
::
cuda
::
ORB_CUDA
::
buildScalePyramids
(
const
GpuMat
&
,
const
GpuMat
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
ORB_CUDA
::
computeKeyPointsPyramid
()
{
throw_no_cuda
();
}
void
cv
::
cuda
::
ORB_CUDA
::
computeDescriptors
(
GpuMat
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
ORB_CUDA
::
mergeKeyPoints
(
GpuMat
&
)
{
throw_no_cuda
();
}
Ptr
<
cv
::
cuda
::
ORB
>
cv
::
cuda
::
ORB
::
create
(
int
,
float
,
int
,
int
,
int
,
int
,
int
,
int
,
int
,
bool
)
{
throw_no_cuda
();
return
Ptr
<
cv
::
cuda
::
ORB
>
();
}
#else
/* !defined (HAVE_CUDA) */
...
...
@@ -346,7 +335,100 @@ namespace
-
1
,
-
6
,
0
,
-
11
/*mean (0.127148), correlation (0.547401)*/
};
void
initializeOrbPattern
(
const
Point
*
pattern0
,
Mat
&
pattern
,
int
ntuples
,
int
tupleSize
,
int
poolSize
)
class
ORB_Impl
:
public
cv
::
cuda
::
ORB
{
public
:
ORB_Impl
(
int
nfeatures
,
float
scaleFactor
,
int
nlevels
,
int
edgeThreshold
,
int
firstLevel
,
int
WTA_K
,
int
scoreType
,
int
patchSize
,
int
fastThreshold
,
bool
blurForDescriptor
);
virtual
void
detectAndCompute
(
InputArray
_image
,
InputArray
_mask
,
std
::
vector
<
KeyPoint
>&
keypoints
,
OutputArray
_descriptors
,
bool
useProvidedKeypoints
);
virtual
void
detectAndComputeAsync
(
InputArray
_image
,
InputArray
_mask
,
OutputArray
_keypoints
,
OutputArray
_descriptors
,
bool
useProvidedKeypoints
,
Stream
&
stream
);
virtual
void
convert
(
InputArray
_gpu_keypoints
,
std
::
vector
<
KeyPoint
>&
keypoints
);
virtual
int
descriptorSize
()
const
{
return
kBytes
;
}
virtual
int
descriptorType
()
const
{
return
CV_8U
;
}
virtual
int
defaultNorm
()
const
{
return
NORM_HAMMING
;
}
virtual
void
setMaxFeatures
(
int
maxFeatures
)
{
nFeatures_
=
maxFeatures
;
}
virtual
int
getMaxFeatures
()
const
{
return
nFeatures_
;
}
virtual
void
setScaleFactor
(
double
scaleFactor
)
{
scaleFactor_
=
scaleFactor
;
}
virtual
double
getScaleFactor
()
const
{
return
scaleFactor_
;
}
virtual
void
setNLevels
(
int
nlevels
)
{
nLevels_
=
nlevels
;
}
virtual
int
getNLevels
()
const
{
return
nLevels_
;
}
virtual
void
setEdgeThreshold
(
int
edgeThreshold
)
{
edgeThreshold_
=
edgeThreshold
;
}
virtual
int
getEdgeThreshold
()
const
{
return
edgeThreshold_
;
}
virtual
void
setFirstLevel
(
int
firstLevel
)
{
firstLevel_
=
firstLevel
;
}
virtual
int
getFirstLevel
()
const
{
return
firstLevel_
;
}
virtual
void
setWTA_K
(
int
wta_k
)
{
WTA_K_
=
wta_k
;
}
virtual
int
getWTA_K
()
const
{
return
WTA_K_
;
}
virtual
void
setScoreType
(
int
scoreType
)
{
scoreType_
=
scoreType
;
}
virtual
int
getScoreType
()
const
{
return
scoreType_
;
}
virtual
void
setPatchSize
(
int
patchSize
)
{
patchSize_
=
patchSize
;
}
virtual
int
getPatchSize
()
const
{
return
patchSize_
;
}
virtual
void
setFastThreshold
(
int
fastThreshold
)
{
fastThreshold_
=
fastThreshold
;
}
virtual
int
getFastThreshold
()
const
{
return
fastThreshold_
;
}
virtual
void
setBlurForDescriptor
(
bool
blurForDescriptor
)
{
blurForDescriptor_
=
blurForDescriptor
;
}
virtual
bool
getBlurForDescriptor
()
const
{
return
blurForDescriptor_
;
}
private
:
int
nFeatures_
;
float
scaleFactor_
;
int
nLevels_
;
int
edgeThreshold_
;
int
firstLevel_
;
int
WTA_K_
;
int
scoreType_
;
int
patchSize_
;
int
fastThreshold_
;
bool
blurForDescriptor_
;
private
:
void
buildScalePyramids
(
InputArray
_image
,
InputArray
_mask
);
void
computeKeyPointsPyramid
();
void
computeDescriptors
(
OutputArray
_descriptors
);
void
mergeKeyPoints
(
OutputArray
_keypoints
);
private
:
Ptr
<
cv
::
cuda
::
FastFeatureDetector
>
fastDetector_
;
//! The number of desired features per scale
std
::
vector
<
size_t
>
n_features_per_level_
;
//! Points to compute BRIEF descriptors from
GpuMat
pattern_
;
std
::
vector
<
GpuMat
>
imagePyr_
;
std
::
vector
<
GpuMat
>
maskPyr_
;
GpuMat
buf_
;
std
::
vector
<
GpuMat
>
keyPointsPyr_
;
std
::
vector
<
int
>
keyPointsCount_
;
Ptr
<
cuda
::
Filter
>
blurFilter_
;
GpuMat
d_keypoints_
;
};
static
void
initializeOrbPattern
(
const
Point
*
pattern0
,
Mat
&
pattern
,
int
ntuples
,
int
tupleSize
,
int
poolSize
)
{
RNG
rng
(
0x12345678
);
...
...
@@ -381,7 +463,7 @@ namespace
}
}
void
makeRandomPattern
(
int
patchSize
,
Point
*
pattern
,
int
npoints
)
static
void
makeRandomPattern
(
int
patchSize
,
Point
*
pattern
,
int
npoints
)
{
// we always start with a fixed seed,
// to make patterns the same on each run
...
...
@@ -393,155 +475,189 @@ namespace
pattern
[
i
].
y
=
rng
.
uniform
(
-
patchSize
/
2
,
patchSize
/
2
+
1
);
}
}
}
cv
::
cuda
::
ORB_CUDA
::
ORB_CUDA
(
int
nFeatures
,
float
scaleFactor
,
int
nLevels
,
int
edgeThreshold
,
int
firstLevel
,
int
WTA_K
,
int
scoreType
,
int
patchSize
)
:
nFeatures_
(
nFeatures
),
scaleFactor_
(
scaleFactor
),
nLevels_
(
nLevels
),
edgeThreshold_
(
edgeThreshold
),
firstLevel_
(
firstLevel
),
WTA_K_
(
WTA_K
),
scoreType_
(
scoreType
),
patchSize_
(
patchSize
),
fastDetector_
(
DEFAULT_FAST_THRESHOLD
)
{
CV_Assert
(
patchSize_
>=
2
);
// fill the extractors and descriptors for the corresponding scales
float
factor
=
1.0
f
/
scaleFactor_
;
float
n_desired_features_per_scale
=
nFeatures_
*
(
1.0
f
-
factor
)
/
(
1.0
f
-
std
::
pow
(
factor
,
nLevels_
));
n_features_per_level_
.
resize
(
nLevels_
);
size_t
sum_n_features
=
0
;
for
(
int
level
=
0
;
level
<
nLevels_
-
1
;
++
level
)
ORB_Impl
::
ORB_Impl
(
int
nFeatures
,
float
scaleFactor
,
int
nLevels
,
int
edgeThreshold
,
int
firstLevel
,
int
WTA_K
,
int
scoreType
,
int
patchSize
,
int
fastThreshold
,
bool
blurForDescriptor
)
:
nFeatures_
(
nFeatures
),
scaleFactor_
(
scaleFactor
),
nLevels_
(
nLevels
),
edgeThreshold_
(
edgeThreshold
),
firstLevel_
(
firstLevel
),
WTA_K_
(
WTA_K
),
scoreType_
(
scoreType
),
patchSize_
(
patchSize
),
fastThreshold_
(
fastThreshold
),
blurForDescriptor_
(
blurForDescriptor
)
{
n_features_per_level_
[
level
]
=
cvRound
(
n_desired_features_per_scale
);
sum_n_features
+=
n_features_per_level_
[
level
];
n_desired_features_per_scale
*=
factor
;
}
n_features_per_level_
[
nLevels_
-
1
]
=
nFeatures
-
sum_n_features
;
CV_Assert
(
patchSize_
>=
2
);
CV_Assert
(
WTA_K_
==
2
||
WTA_K_
==
3
||
WTA_K_
==
4
);
// pre-compute the end of a row in a circular patch
int
half_patch_size
=
patchSize_
/
2
;
std
::
vector
<
int
>
u_max
(
half_patch_size
+
2
);
for
(
int
v
=
0
;
v
<=
half_patch_size
*
std
::
sqrt
(
2.
f
)
/
2
+
1
;
++
v
)
u_max
[
v
]
=
cvRound
(
std
::
sqrt
(
static_cast
<
float
>
(
half_patch_size
*
half_patch_size
-
v
*
v
)));
fastDetector_
=
cuda
::
FastFeatureDetector
::
create
(
fastThreshold_
);
// Make sure we are symmetric
for
(
int
v
=
half_patch_size
,
v_0
=
0
;
v
>=
half_patch_size
*
std
::
sqrt
(
2.
f
)
/
2
;
--
v
)
{
while
(
u_max
[
v_0
]
==
u_max
[
v_0
+
1
])
++
v_0
;
u_max
[
v
]
=
v_0
;
++
v_0
;
}
CV_Assert
(
u_max
.
size
()
<
32
);
cv
::
cuda
::
device
::
orb
::
loadUMax
(
&
u_max
[
0
],
static_cast
<
int
>
(
u_max
.
size
()));
// Calc pattern
const
int
npoints
=
512
;
Point
pattern_buf
[
npoints
];
const
Point
*
pattern0
=
(
const
Point
*
)
bit_pattern_31_
;
if
(
patchSize_
!=
31
)
{
pattern0
=
pattern_buf
;
makeRandomPattern
(
patchSize_
,
pattern_buf
,
npoints
);
}
// fill the extractors and descriptors for the corresponding scales
float
factor
=
1.0
f
/
scaleFactor_
;
float
n_desired_features_per_scale
=
nFeatures_
*
(
1.0
f
-
factor
)
/
(
1.0
f
-
std
::
pow
(
factor
,
nLevels_
));
CV_Assert
(
WTA_K_
==
2
||
WTA_K_
==
3
||
WTA_K_
==
4
);
n_features_per_level_
.
resize
(
nLevels_
);
size_t
sum_n_features
=
0
;
for
(
int
level
=
0
;
level
<
nLevels_
-
1
;
++
level
)
{
n_features_per_level_
[
level
]
=
cvRound
(
n_desired_features_per_scale
);
sum_n_features
+=
n_features_per_level_
[
level
];
n_desired_features_per_scale
*=
factor
;
}
n_features_per_level_
[
nLevels_
-
1
]
=
nFeatures
-
sum_n_features
;
Mat
h_pattern
;
// pre-compute the end of a row in a circular patch
int
half_patch_size
=
patchSize_
/
2
;
std
::
vector
<
int
>
u_max
(
half_patch_size
+
2
);
for
(
int
v
=
0
;
v
<=
half_patch_size
*
std
::
sqrt
(
2.
f
)
/
2
+
1
;
++
v
)
{
u_max
[
v
]
=
cvRound
(
std
::
sqrt
(
static_cast
<
float
>
(
half_patch_size
*
half_patch_size
-
v
*
v
)));
}
if
(
WTA_K_
==
2
)
{
h_pattern
.
create
(
2
,
npoints
,
CV_32SC1
);
// Make sure we are symmetric
for
(
int
v
=
half_patch_size
,
v_0
=
0
;
v
>=
half_patch_size
*
std
::
sqrt
(
2.
f
)
/
2
;
--
v
)
{
while
(
u_max
[
v_0
]
==
u_max
[
v_0
+
1
])
++
v_0
;
u_max
[
v
]
=
v_0
;
++
v_0
;
}
CV_Assert
(
u_max
.
size
()
<
32
);
cv
::
cuda
::
device
::
orb
::
loadUMax
(
&
u_max
[
0
],
static_cast
<
int
>
(
u_max
.
size
()));
// Calc pattern
const
int
npoints
=
512
;
Point
pattern_buf
[
npoints
];
const
Point
*
pattern0
=
(
const
Point
*
)
bit_pattern_31_
;
if
(
patchSize_
!=
31
)
{
pattern0
=
pattern_buf
;
makeRandomPattern
(
patchSize_
,
pattern_buf
,
npoints
);
}
int
*
pattern_x_ptr
=
h_pattern
.
ptr
<
int
>
(
0
);
int
*
pattern_y_ptr
=
h_pattern
.
ptr
<
int
>
(
1
);
Mat
h_pattern
;
if
(
WTA_K_
==
2
)
{
h_pattern
.
create
(
2
,
npoints
,
CV_32SC1
);
for
(
int
i
=
0
;
i
<
npoints
;
++
i
)
int
*
pattern_x_ptr
=
h_pattern
.
ptr
<
int
>
(
0
);
int
*
pattern_y_ptr
=
h_pattern
.
ptr
<
int
>
(
1
);
for
(
int
i
=
0
;
i
<
npoints
;
++
i
)
{
pattern_x_ptr
[
i
]
=
pattern0
[
i
].
x
;
pattern_y_ptr
[
i
]
=
pattern0
[
i
].
y
;
}
}
else
{
pattern_x_ptr
[
i
]
=
pattern0
[
i
].
x
;
pattern_y_ptr
[
i
]
=
pattern0
[
i
].
y
;
int
ntuples
=
descriptorSize
()
*
4
;
initializeOrbPattern
(
pattern0
,
h_pattern
,
ntuples
,
WTA_K_
,
npoints
)
;
}
pattern_
.
upload
(
h_pattern
);
blurFilter_
=
cuda
::
createGaussianFilter
(
CV_8UC1
,
-
1
,
Size
(
7
,
7
),
2
,
2
,
BORDER_REFLECT_101
);
}
else
void
ORB_Impl
::
detectAndCompute
(
InputArray
_image
,
InputArray
_mask
,
std
::
vector
<
KeyPoint
>&
keypoints
,
OutputArray
_descriptors
,
bool
useProvidedKeypoints
)
{
int
ntuples
=
descriptorSize
()
*
4
;
initializeOrbPattern
(
pattern0
,
h_pattern
,
ntuples
,
WTA_K_
,
npoints
);
}
CV_Assert
(
useProvidedKeypoints
==
false
);
pattern_
.
upload
(
h_pattern
);
detectAndComputeAsync
(
_image
,
_mask
,
d_keypoints_
,
_descriptors
,
false
,
Stream
::
Null
());
convert
(
d_keypoints_
,
keypoints
);
}
blurFilter
=
cuda
::
createGaussianFilter
(
CV_8UC1
,
-
1
,
Size
(
7
,
7
),
2
,
2
,
BORDER_REFLECT_101
);
void
ORB_Impl
::
detectAndComputeAsync
(
InputArray
_image
,
InputArray
_mask
,
OutputArray
_keypoints
,
OutputArray
_descriptors
,
bool
useProvidedKeypoints
,
Stream
&
stream
)
{
CV_Assert
(
useProvidedKeypoints
==
false
);
blurForDescriptor
=
false
;
}
buildScalePyramids
(
_image
,
_mask
);
computeKeyPointsPyramid
();
if
(
_descriptors
.
needed
())
{
computeDescriptors
(
_descriptors
);
}
mergeKeyPoints
(
_keypoints
);
}
namespace
{
inline
float
getScale
(
float
scaleFactor
,
int
firstLevel
,
int
level
)
static
float
getScale
(
float
scaleFactor
,
int
firstLevel
,
int
level
)
{
return
pow
(
scaleFactor
,
level
-
firstLevel
);
}
}
void
cv
::
cuda
::
ORB_CUDA
::
buildScalePyramids
(
const
GpuMat
&
image
,
const
GpuMat
&
mask
)
{
CV_Assert
(
image
.
type
()
==
CV_8UC1
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8UC1
&&
mask
.
size
()
==
image
.
size
()));
imagePyr_
.
resize
(
nLevels_
);
maskPyr_
.
resize
(
nLevels_
);
for
(
int
level
=
0
;
level
<
nLevels_
;
++
level
)
void
ORB_Impl
::
buildScalePyramids
(
InputArray
_image
,
InputArray
_mask
)
{
float
scale
=
1.0
f
/
getScale
(
scaleFactor_
,
firstLevel_
,
level
);
const
GpuMat
image
=
_image
.
getGpuMat
();
const
GpuMat
mask
=
_mask
.
getGpuMat
();
Size
sz
(
cvRound
(
image
.
cols
*
scale
),
cvRound
(
image
.
rows
*
scale
));
CV_Assert
(
image
.
type
()
==
CV_8UC1
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8UC1
&&
mask
.
size
()
==
image
.
size
())
);
ensureSizeIsEnough
(
sz
,
image
.
type
(),
imagePyr_
[
level
]);
ensureSizeIsEnough
(
sz
,
CV_8UC1
,
maskPyr_
[
level
]);
maskPyr_
[
level
].
setTo
(
Scalar
::
all
(
255
));
imagePyr_
.
resize
(
nLevels_
);
maskPyr_
.
resize
(
nLevels_
);
// Compute the resized image
if
(
level
!=
firstLevel_
)
for
(
int
level
=
0
;
level
<
nLevels_
;
++
level
)
{
if
(
level
<
firstLevel_
)
float
scale
=
1.0
f
/
getScale
(
scaleFactor_
,
firstLevel_
,
level
);
Size
sz
(
cvRound
(
image
.
cols
*
scale
),
cvRound
(
image
.
rows
*
scale
));
ensureSizeIsEnough
(
sz
,
image
.
type
(),
imagePyr_
[
level
]);
ensureSizeIsEnough
(
sz
,
CV_8UC1
,
maskPyr_
[
level
]);
maskPyr_
[
level
].
setTo
(
Scalar
::
all
(
255
));
// Compute the resized image
if
(
level
!=
firstLevel_
)
{
cuda
::
resize
(
image
,
imagePyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
);
if
(
level
<
firstLevel_
)
{
cuda
::
resize
(
image
,
imagePyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
);
if
(
!
mask
.
empty
())
cuda
::
resize
(
mask
,
maskPyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
);
if
(
!
mask
.
empty
())
cuda
::
resize
(
mask
,
maskPyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
);
}
else
{
cuda
::
resize
(
imagePyr_
[
level
-
1
],
imagePyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
);
if
(
!
mask
.
empty
())
{
cuda
::
resize
(
maskPyr_
[
level
-
1
],
maskPyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
);
cuda
::
threshold
(
maskPyr_
[
level
],
maskPyr_
[
level
],
254
,
0
,
THRESH_TOZERO
);
}
}
}
else
{
cuda
::
resize
(
imagePyr_
[
level
-
1
],
imagePyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
);
image
.
copyTo
(
imagePyr_
[
level
]
);
if
(
!
mask
.
empty
())
{
cuda
::
resize
(
maskPyr_
[
level
-
1
],
maskPyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
);
cuda
::
threshold
(
maskPyr_
[
level
],
maskPyr_
[
level
],
254
,
0
,
THRESH_TOZERO
);
}
mask
.
copyTo
(
maskPyr_
[
level
]);
}
}
else
{
image
.
copyTo
(
imagePyr_
[
level
]);
if
(
!
mask
.
empty
())
mask
.
copyTo
(
maskPyr_
[
level
]);
}
// Filter keypoints by image border
ensureSizeIsEnough
(
sz
,
CV_8UC1
,
buf_
);
buf_
.
setTo
(
Scalar
::
all
(
0
));
Rect
inner
(
edgeThreshold_
,
edgeThreshold_
,
sz
.
width
-
2
*
edgeThreshold_
,
sz
.
height
-
2
*
edgeThreshold_
);
buf_
(
inner
).
setTo
(
Scalar
::
all
(
255
));
// Filter keypoints by image border
ensureSizeIsEnough
(
sz
,
CV_8UC1
,
buf_
);
buf_
.
setTo
(
Scalar
::
all
(
0
));
Rect
inner
(
edgeThreshold_
,
edgeThreshold_
,
sz
.
width
-
2
*
edgeThreshold_
,
sz
.
height
-
2
*
edgeThreshold_
);
buf_
(
inner
).
setTo
(
Scalar
::
all
(
255
));
cuda
::
bitwise_and
(
maskPyr_
[
level
],
buf_
,
maskPyr_
[
level
]);
cuda
::
bitwise_and
(
maskPyr_
[
level
],
buf_
,
maskPyr_
[
level
]);
}
}
}
namespace
{
//takes keypoints and culls them by the response
void
cull
(
GpuMat
&
keypoints
,
int
&
count
,
int
n_points
)
// takes keypoints and culls them by the response
static
void
cull
(
GpuMat
&
keypoints
,
int
&
count
,
int
n_points
)
{
using
namespace
cv
::
cuda
::
device
::
orb
;
...
...
@@ -554,222 +670,199 @@ namespace
return
;
}
count
=
cull_gpu
(
keypoints
.
ptr
<
int
>
(
FAST_CUDA
::
LOCATION_ROW
),
keypoints
.
ptr
<
float
>
(
FAST_CUDA
::
RESPONSE_ROW
),
count
,
n_points
);
count
=
cull_gpu
(
keypoints
.
ptr
<
int
>
(
cuda
::
FastFeatureDetector
::
LOCATION_ROW
),
keypoints
.
ptr
<
float
>
(
cuda
::
FastFeatureDetector
::
RESPONSE_ROW
),
count
,
n_points
);
}
}
}
void
cv
::
cuda
::
ORB_CUDA
::
computeKeyPointsPyramid
()
{
using
namespace
cv
::
cuda
::
device
::
orb
;
int
half_patch_size
=
patchSize_
/
2
;
keyPointsPyr_
.
resize
(
nLevels_
);
keyPointsCount_
.
resize
(
nLevels_
);
for
(
int
level
=
0
;
level
<
nLevels_
;
++
level
)
void
ORB_Impl
::
computeKeyPointsPyramid
()
{
keyPointsCount_
[
level
]
=
fastDetector_
.
calcKeyPointsLocation
(
imagePyr_
[
level
],
maskPyr_
[
level
]);
if
(
keyPointsCount_
[
level
]
==
0
)
continue
;
ensureSizeIsEnough
(
3
,
keyPointsCount_
[
level
],
CV_32FC1
,
keyPointsPyr_
[
level
]);
using
namespace
cv
::
cuda
::
device
::
orb
;
GpuMat
fastKpRange
=
keyPointsPyr_
[
level
].
rowRange
(
0
,
2
);
keyPointsCount_
[
level
]
=
fastDetector_
.
getKeyPoints
(
fastKpRange
);
int
half_patch_size
=
patchSize_
/
2
;
if
(
keyPointsCount_
[
level
]
==
0
)
continue
;
keyPointsPyr_
.
resize
(
nLevels_
);
keyPointsCount_
.
resize
(
nLevels_
)
;
int
n_features
=
static_cast
<
int
>
(
n_features_per_level_
[
level
]
);
fastDetector_
->
setThreshold
(
fastThreshold_
);
if
(
scoreType_
==
ORB
::
HARRIS_SCORE
)
for
(
int
level
=
0
;
level
<
nLevels_
;
++
level
)
{
// Keep more points than necessary as FAST does not give amazing corners
cull
(
keyPointsPyr_
[
level
],
keyPointsCount_
[
level
],
2
*
n_features
);
fastDetector_
->
setMaxNumPoints
(
0.05
*
imagePyr_
[
level
].
size
().
area
());
// Compute the Harris cornerness (better scoring than FAST)
HarrisResponses_gpu
(
imagePyr_
[
level
],
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsPyr_
[
level
].
ptr
<
float
>
(
1
),
keyPointsCount_
[
level
],
7
,
HARRIS_K
,
0
);
}
GpuMat
fastKpRange
;
fastDetector_
->
detectAsync
(
imagePyr_
[
level
],
fastKpRange
,
maskPyr_
[
level
],
Stream
::
Null
());
//cull to the final desired level, using the new Harris scores or the original FAST scores.
cull
(
keyPointsPyr_
[
level
],
keyPointsCount_
[
level
],
n_features
);
keyPointsCount_
[
level
]
=
fastKpRange
.
cols
;
// Compute orientation
IC_Angle_gpu
(
imagePyr_
[
level
],
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsPyr_
[
level
].
ptr
<
float
>
(
2
),
keyPointsCount_
[
level
],
half_patch_size
,
0
);
}
}
if
(
keyPointsCount_
[
level
]
==
0
)
continue
;
void
cv
::
cuda
::
ORB_CUDA
::
computeDescriptors
(
GpuMat
&
descriptors
)
{
using
namespace
cv
::
cuda
::
device
::
orb
;
ensureSizeIsEnough
(
3
,
keyPointsCount_
[
level
],
fastKpRange
.
type
(),
keyPointsPyr_
[
level
]);
fastKpRange
.
copyTo
(
keyPointsPyr_
[
level
].
rowRange
(
0
,
2
));
int
nAllkeypoints
=
0
;
const
int
n_features
=
static_cast
<
int
>
(
n_features_per_level_
[
level
])
;
for
(
int
level
=
0
;
level
<
nLevels_
;
++
level
)
nAllkeypoints
+=
keyPointsCount_
[
level
];
if
(
scoreType_
==
ORB
::
HARRIS_SCORE
)
{
// Keep more points than necessary as FAST does not give amazing corners
cull
(
keyPointsPyr_
[
level
],
keyPointsCount_
[
level
],
2
*
n_features
);
if
(
nAllkeypoints
==
0
)
{
descriptors
.
release
();
return
;
}
// Compute the Harris cornerness (better scoring than FAST)
HarrisResponses_gpu
(
imagePyr_
[
level
],
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsPyr_
[
level
].
ptr
<
float
>
(
1
),
keyPointsCount_
[
level
],
7
,
HARRIS_K
,
0
);
}
ensureSizeIsEnough
(
nAllkeypoints
,
descriptorSize
(),
CV_8UC1
,
descriptors
);
//cull to the final desired level, using the new Harris scores or the original FAST scores.
cull
(
keyPointsPyr_
[
level
],
keyPointsCount_
[
level
],
n_features
);
int
offset
=
0
;
// Compute orientation
IC_Angle_gpu
(
imagePyr_
[
level
],
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsPyr_
[
level
].
ptr
<
float
>
(
2
),
keyPointsCount_
[
level
],
half_patch_size
,
0
);
}
}
for
(
int
level
=
0
;
level
<
nLevels_
;
++
level
)
void
ORB_Impl
::
computeDescriptors
(
OutputArray
_descriptors
)
{
if
(
keyPointsCount_
[
level
]
==
0
)
continue
;
using
namespace
cv
::
cuda
::
device
::
orb
;
GpuMat
descRange
=
descriptors
.
rowRange
(
offset
,
offset
+
keyPointsCount_
[
level
])
;
int
nAllkeypoints
=
0
;
if
(
blurForDescriptor
)
for
(
int
level
=
0
;
level
<
nLevels_
;
++
level
)
nAllkeypoints
+=
keyPointsCount_
[
level
];
if
(
nAllkeypoints
==
0
)
{
// preprocess the resized image
ensureSizeIsEnough
(
imagePyr_
[
level
].
size
(),
imagePyr_
[
level
].
type
(),
buf_
);
blurFilter
->
apply
(
imagePyr_
[
level
],
buf_
);
_descriptors
.
release
();
return
;
}
computeOrbDescriptor_gpu
(
blurForDescriptor
?
buf_
:
imagePyr_
[
level
],
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsPyr_
[
level
].
ptr
<
float
>
(
2
),
keyPointsCount_
[
level
],
pattern_
.
ptr
<
int
>
(
0
),
pattern_
.
ptr
<
int
>
(
1
),
descRange
,
descriptorSize
(),
WTA_K_
,
0
);
ensureSizeIsEnough
(
nAllkeypoints
,
descriptorSize
(),
CV_8UC1
,
_descriptors
);
GpuMat
descriptors
=
_descriptors
.
getGpuMat
(
);
offset
+=
keyPointsCount_
[
level
];
}
}
int
offset
=
0
;
void
cv
::
cuda
::
ORB_CUDA
::
mergeKeyPoints
(
GpuMat
&
keypoints
)
{
using
namespace
cv
::
cuda
::
device
::
orb
;
for
(
int
level
=
0
;
level
<
nLevels_
;
++
level
)
{
if
(
keyPointsCount_
[
level
]
==
0
)
continue
;
int
nAllkeypoints
=
0
;
GpuMat
descRange
=
descriptors
.
rowRange
(
offset
,
offset
+
keyPointsCount_
[
level
]);
if
(
blurForDescriptor_
)
{
// preprocess the resized image
ensureSizeIsEnough
(
imagePyr_
[
level
].
size
(),
imagePyr_
[
level
].
type
(),
buf_
);
blurFilter_
->
apply
(
imagePyr_
[
level
],
buf_
);
}
for
(
int
level
=
0
;
level
<
nLevels_
;
++
level
)
nAllkeypoints
+=
keyPointsCount_
[
level
]
;
computeOrbDescriptor_gpu
(
blurForDescriptor_
?
buf_
:
imagePyr_
[
level
],
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsPyr_
[
level
].
ptr
<
float
>
(
2
),
keyPointsCount_
[
level
],
pattern_
.
ptr
<
int
>
(
0
),
pattern_
.
ptr
<
int
>
(
1
),
descRange
,
descriptorSize
(),
WTA_K_
,
0
)
;
if
(
nAllkeypoints
==
0
)
{
keypoints
.
release
();
return
;
offset
+=
keyPointsCount_
[
level
];
}
}
ensureSizeIsEnough
(
ROWS_COUNT
,
nAllkeypoints
,
CV_32FC1
,
keypoints
);
void
ORB_Impl
::
mergeKeyPoints
(
OutputArray
_keypoints
)
{
using
namespace
cv
::
cuda
::
device
::
orb
;
int
offset
=
0
;
int
nAllkeypoints
=
0
;
for
(
int
level
=
0
;
level
<
nLevels_
;
++
level
)
{
if
(
keyPointsCount_
[
level
]
==
0
)
continue
;
for
(
int
level
=
0
;
level
<
nLevels_
;
++
level
)
nAllkeypoints
+=
keyPointsCount_
[
level
];
float
sf
=
getScale
(
scaleFactor_
,
firstLevel_
,
level
);
if
(
nAllkeypoints
==
0
)
{
_keypoints
.
release
();
return
;
}
GpuMat
keyPointsRange
=
keypoints
.
colRange
(
offset
,
offset
+
keyPointsCount_
[
level
]);
ensureSizeIsEnough
(
ROWS_COUNT
,
nAllkeypoints
,
CV_32FC1
,
_keypoints
);
GpuMat
&
keypoints
=
_keypoints
.
getGpuMatRef
();
float
locScale
=
level
!=
firstLevel_
?
sf
:
1.0
f
;
int
offset
=
0
;
mergeLocation_gpu
(
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsRange
.
ptr
<
float
>
(
0
),
keyPointsRange
.
ptr
<
float
>
(
1
),
keyPointsCount_
[
level
],
locScale
,
0
);
for
(
int
level
=
0
;
level
<
nLevels_
;
++
level
)
{
if
(
keyPointsCount_
[
level
]
==
0
)
continue
;
GpuMat
range
=
keyPointsRange
.
rowRange
(
2
,
4
);
keyPointsPyr_
[
level
](
Range
(
1
,
3
),
Range
(
0
,
keyPointsCount_
[
level
])).
copyTo
(
range
);
float
sf
=
getScale
(
scaleFactor_
,
firstLevel_
,
level
);
keyPointsRange
.
row
(
4
).
setTo
(
Scalar
::
all
(
level
));
keyPointsRange
.
row
(
5
).
setTo
(
Scalar
::
all
(
patchSize_
*
sf
));
GpuMat
keyPointsRange
=
keypoints
.
colRange
(
offset
,
offset
+
keyPointsCount_
[
level
]);
offset
+=
keyPointsCount_
[
level
];
}
}
float
locScale
=
level
!=
firstLevel_
?
sf
:
1.0
f
;
void
cv
::
cuda
::
ORB_CUDA
::
downloadKeyPoints
(
const
GpuMat
&
d_keypoints
,
std
::
vector
<
KeyPoint
>&
keypoints
)
{
if
(
d_keypoints
.
empty
())
{
keypoints
.
clear
();
return
;
}
mergeLocation_gpu
(
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsRange
.
ptr
<
float
>
(
0
),
keyPointsRange
.
ptr
<
float
>
(
1
),
keyPointsCount_
[
level
],
locScale
,
0
);
Mat
h_keypoints
(
d_keypoints
);
GpuMat
range
=
keyPointsRange
.
rowRange
(
2
,
4
);
keyPointsPyr_
[
level
](
Range
(
1
,
3
),
Range
(
0
,
keyPointsCount_
[
level
])).
copyTo
(
range
);
convertKeyPoints
(
h_keypoints
,
keypoints
);
}
keyPointsRange
.
row
(
4
).
setTo
(
Scalar
::
all
(
level
)
);
keyPointsRange
.
row
(
5
).
setTo
(
Scalar
::
all
(
patchSize_
*
sf
));
void
cv
::
cuda
::
ORB_CUDA
::
convertKeyPoints
(
const
Mat
&
d_keypoints
,
std
::
vector
<
KeyPoint
>&
keypoints
)
{
if
(
d_keypoints
.
empty
())
{
keypoints
.
clear
();
return
;
offset
+=
keyPointsCount_
[
level
];
}
}
CV_Assert
(
d_keypoints
.
type
()
==
CV_32FC1
&&
d_keypoints
.
rows
==
ROWS_COUNT
);
const
float
*
x_ptr
=
d_keypoints
.
ptr
<
float
>
(
X_ROW
);
const
float
*
y_ptr
=
d_keypoints
.
ptr
<
float
>
(
Y_ROW
);
const
float
*
response_ptr
=
d_keypoints
.
ptr
<
float
>
(
RESPONSE_ROW
);
const
float
*
angle_ptr
=
d_keypoints
.
ptr
<
float
>
(
ANGLE_ROW
);
const
float
*
octave_ptr
=
d_keypoints
.
ptr
<
float
>
(
OCTAVE_ROW
);
const
float
*
size_ptr
=
d_keypoints
.
ptr
<
float
>
(
SIZE_ROW
);
void
ORB_Impl
::
convert
(
InputArray
_gpu_keypoints
,
std
::
vector
<
KeyPoint
>&
keypoints
)
{
if
(
_gpu_keypoints
.
empty
())
{
keypoints
.
clear
();
return
;
}
keypoints
.
resize
(
d_keypoints
.
cols
);
Mat
h_keypoints
;
if
(
_gpu_keypoints
.
kind
()
==
_InputArray
::
CUDA_GPU_MAT
)
{
_gpu_keypoints
.
getGpuMat
().
download
(
h_keypoints
);
}
else
{
h_keypoints
=
_gpu_keypoints
.
getMat
();
}
for
(
int
i
=
0
;
i
<
d_keypoints
.
cols
;
++
i
)
{
KeyPoint
kp
;
CV_Assert
(
h_keypoints
.
rows
==
ROWS_COUNT
);
CV_Assert
(
h_keypoints
.
type
()
==
CV_32FC1
);
kp
.
pt
.
x
=
x_ptr
[
i
];
kp
.
pt
.
y
=
y_ptr
[
i
];
kp
.
response
=
response_ptr
[
i
];
kp
.
angle
=
angle_ptr
[
i
];
kp
.
octave
=
static_cast
<
int
>
(
octave_ptr
[
i
]);
kp
.
size
=
size_ptr
[
i
];
const
int
npoints
=
h_keypoints
.
cols
;
keypoints
[
i
]
=
kp
;
}
}
keypoints
.
resize
(
npoints
);
void
cv
::
cuda
::
ORB_CUDA
::
operator
()(
const
GpuMat
&
image
,
const
GpuMat
&
mask
,
GpuMat
&
keypoints
)
{
buildScalePyramids
(
image
,
mask
);
computeKeyPointsPyramid
(
);
mergeKeyPoints
(
keypoints
);
}
const
float
*
x_ptr
=
h_keypoints
.
ptr
<
float
>
(
X_ROW
);
const
float
*
y_ptr
=
h_keypoints
.
ptr
<
float
>
(
Y_ROW
);
const
float
*
response_ptr
=
h_keypoints
.
ptr
<
float
>
(
RESPONSE_ROW
);
const
float
*
angle_ptr
=
h_keypoints
.
ptr
<
float
>
(
ANGLE_ROW
);
const
float
*
octave_ptr
=
h_keypoints
.
ptr
<
float
>
(
OCTAVE_ROW
);
const
float
*
size_ptr
=
h_keypoints
.
ptr
<
float
>
(
SIZE_ROW
);
void
cv
::
cuda
::
ORB_CUDA
::
operator
()(
const
GpuMat
&
image
,
const
GpuMat
&
mask
,
GpuMat
&
keypoints
,
GpuMat
&
descriptors
)
{
buildScalePyramids
(
image
,
mask
);
computeKeyPointsPyramid
();
computeDescriptors
(
descriptors
);
mergeKeyPoints
(
keypoints
);
}
for
(
int
i
=
0
;
i
<
npoints
;
++
i
)
{
KeyPoint
kp
;
void
cv
::
cuda
::
ORB_CUDA
::
operator
()(
const
GpuMat
&
image
,
const
GpuMat
&
mask
,
std
::
vector
<
KeyPoint
>&
keypoints
)
{
(
*
this
)(
image
,
mask
,
d_keypoints_
);
downloadKeyPoints
(
d_keypoints_
,
keypoints
);
}
kp
.
pt
.
x
=
x_ptr
[
i
];
kp
.
pt
.
y
=
y_ptr
[
i
];
kp
.
response
=
response_ptr
[
i
];
kp
.
angle
=
angle_ptr
[
i
];
kp
.
octave
=
static_cast
<
int
>
(
octave_ptr
[
i
]);
kp
.
size
=
size_ptr
[
i
];
void
cv
::
cuda
::
ORB_CUDA
::
operator
()(
const
GpuMat
&
image
,
const
GpuMat
&
mask
,
std
::
vector
<
KeyPoint
>&
keypoints
,
GpuMat
&
descriptors
)
{
(
*
this
)(
image
,
mask
,
d_keypoints_
,
descriptors
);
downloadKeyPoints
(
d_keypoints_
,
keypoints
);
keypoints
[
i
]
=
kp
;
}
}
}
void
cv
::
cuda
::
ORB_CUDA
::
release
()
Ptr
<
cv
::
cuda
::
ORB
>
cv
::
cuda
::
ORB
::
create
(
int
nfeatures
,
float
scaleFactor
,
int
nlevels
,
int
edgeThreshold
,
int
firstLevel
,
int
WTA_K
,
int
scoreType
,
int
patchSize
,
int
fastThreshold
,
bool
blurForDescriptor
)
{
imagePyr_
.
clear
();
maskPyr_
.
clear
();
buf_
.
release
();
keyPointsPyr_
.
clear
();
fastDetector_
.
release
();
d_keypoints_
.
release
();
return
makePtr
<
ORB_Impl
>
(
nfeatures
,
scaleFactor
,
nlevels
,
edgeThreshold
,
firstLevel
,
WTA_K
,
scoreType
,
patchSize
,
fastThreshold
,
blurForDescriptor
);
}
#endif
/* !defined (HAVE_CUDA) */
modules/cudafeatures2d/test/test_features2d.cpp
View file @
3a844444
...
...
@@ -76,15 +76,14 @@ CUDA_TEST_P(FAST, Accuracy)
cv
::
Mat
image
=
readImage
(
"features2d/aloe.png"
,
cv
::
IMREAD_GRAYSCALE
);
ASSERT_FALSE
(
image
.
empty
());
cv
::
cuda
::
FAST_CUDA
fast
(
threshold
);
fast
.
nonmaxSuppression
=
nonmaxSuppression
;
cv
::
Ptr
<
cv
::
cuda
::
FastFeatureDetector
>
fast
=
cv
::
cuda
::
FastFeatureDetector
::
create
(
threshold
,
nonmaxSuppression
);
if
(
!
supportFeature
(
devInfo
,
cv
::
cuda
::
GLOBAL_ATOMICS
))
{
try
{
std
::
vector
<
cv
::
KeyPoint
>
keypoints
;
fast
(
loadMat
(
image
),
cv
::
cuda
::
GpuMat
(
),
keypoints
);
fast
->
detect
(
loadMat
(
image
),
keypoints
);
}
catch
(
const
cv
::
Exception
&
e
)
{
...
...
@@ -94,7 +93,7 @@ CUDA_TEST_P(FAST, Accuracy)
else
{
std
::
vector
<
cv
::
KeyPoint
>
keypoints
;
fast
(
loadMat
(
image
),
cv
::
cuda
::
GpuMat
(
),
keypoints
);
fast
->
detect
(
loadMat
(
image
),
keypoints
);
std
::
vector
<
cv
::
KeyPoint
>
keypoints_gold
;
cv
::
FAST
(
image
,
keypoints_gold
,
threshold
,
nonmaxSuppression
);
...
...
@@ -123,7 +122,7 @@ namespace
IMPLEMENT_PARAM_CLASS
(
ORB_BlurForDescriptor
,
bool
)
}
CV_ENUM
(
ORB_ScoreType
,
ORB
::
HARRIS_SCORE
,
ORB
::
FAST_SCORE
)
CV_ENUM
(
ORB_ScoreType
,
cv
::
ORB
::
HARRIS_SCORE
,
cv
::
ORB
::
FAST_SCORE
)
PARAM_TEST_CASE
(
ORB
,
cv
::
cuda
::
DeviceInfo
,
ORB_FeaturesCount
,
ORB_ScaleFactor
,
ORB_LevelsCount
,
ORB_EdgeThreshold
,
ORB_firstLevel
,
ORB_WTA_K
,
ORB_ScoreType
,
ORB_PatchSize
,
ORB_BlurForDescriptor
)
{
...
...
@@ -163,8 +162,9 @@ CUDA_TEST_P(ORB, Accuracy)
cv
::
Mat
mask
(
image
.
size
(),
CV_8UC1
,
cv
::
Scalar
::
all
(
1
));
mask
(
cv
::
Range
(
0
,
image
.
rows
/
2
),
cv
::
Range
(
0
,
image
.
cols
/
2
)).
setTo
(
cv
::
Scalar
::
all
(
0
));
cv
::
cuda
::
ORB_CUDA
orb
(
nFeatures
,
scaleFactor
,
nLevels
,
edgeThreshold
,
firstLevel
,
WTA_K
,
scoreType
,
patchSize
);
orb
.
blurForDescriptor
=
blurForDescriptor
;
cv
::
Ptr
<
cv
::
cuda
::
ORB
>
orb
=
cv
::
cuda
::
ORB
::
create
(
nFeatures
,
scaleFactor
,
nLevels
,
edgeThreshold
,
firstLevel
,
WTA_K
,
scoreType
,
patchSize
,
20
,
blurForDescriptor
);
if
(
!
supportFeature
(
devInfo
,
cv
::
cuda
::
GLOBAL_ATOMICS
))
{
...
...
@@ -172,7 +172,7 @@ CUDA_TEST_P(ORB, Accuracy)
{
std
::
vector
<
cv
::
KeyPoint
>
keypoints
;
cv
::
cuda
::
GpuMat
descriptors
;
orb
(
loadMat
(
image
),
loadMat
(
mask
),
keypoints
,
descriptors
);
orb
->
detectAndComputeAsync
(
loadMat
(
image
),
loadMat
(
mask
),
keypoints
,
descriptors
);
}
catch
(
const
cv
::
Exception
&
e
)
{
...
...
@@ -183,7 +183,7 @@ CUDA_TEST_P(ORB, Accuracy)
{
std
::
vector
<
cv
::
KeyPoint
>
keypoints
;
cv
::
cuda
::
GpuMat
descriptors
;
orb
(
loadMat
(
image
),
loadMat
(
mask
),
keypoints
,
descriptors
);
orb
->
detectAndCompute
(
loadMat
(
image
),
loadMat
(
mask
),
keypoints
,
descriptors
);
cv
::
Ptr
<
cv
::
ORB
>
orb_gold
=
cv
::
ORB
::
create
(
nFeatures
,
scaleFactor
,
nLevels
,
edgeThreshold
,
firstLevel
,
WTA_K
,
scoreType
,
patchSize
);
...
...
@@ -208,7 +208,7 @@ INSTANTIATE_TEST_CASE_P(CUDA_Features2D, ORB, testing::Combine(
testing
::
Values
(
ORB_ScaleFactor
(
1.2
f
)),
testing
::
Values
(
ORB_LevelsCount
(
4
),
ORB_LevelsCount
(
8
)),
testing
::
Values
(
ORB_EdgeThreshold
(
31
)),
testing
::
Values
(
ORB_firstLevel
(
0
)
,
ORB_firstLevel
(
2
)
),
testing
::
Values
(
ORB_firstLevel
(
0
)),
testing
::
Values
(
ORB_WTA_K
(
2
),
ORB_WTA_K
(
3
),
ORB_WTA_K
(
4
)),
testing
::
Values
(
ORB_ScoreType
(
cv
::
ORB
::
HARRIS_SCORE
)),
testing
::
Values
(
ORB_PatchSize
(
31
),
ORB_PatchSize
(
29
)),
...
...
@@ -285,7 +285,8 @@ PARAM_TEST_CASE(BruteForceMatcher, cv::cuda::DeviceInfo, NormCode, DescriptorSiz
CUDA_TEST_P
(
BruteForceMatcher
,
Match_Single
)
{
cv
::
cuda
::
BFMatcher_CUDA
matcher
(
normCode
);
cv
::
Ptr
<
cv
::
cuda
::
DescriptorMatcher
>
matcher
=
cv
::
cuda
::
DescriptorMatcher
::
createBFMatcher
(
normCode
);
cv
::
cuda
::
GpuMat
mask
;
if
(
useMask
)
...
...
@@ -295,7 +296,7 @@ CUDA_TEST_P(BruteForceMatcher, Match_Single)
}
std
::
vector
<
cv
::
DMatch
>
matches
;
matcher
.
match
(
loadMat
(
query
),
loadMat
(
train
),
matches
,
mask
);
matcher
->
match
(
loadMat
(
query
),
loadMat
(
train
),
matches
,
mask
);
ASSERT_EQ
(
static_cast
<
size_t
>
(
queryDescCount
),
matches
.
size
());
...
...
@@ -312,13 +313,14 @@ CUDA_TEST_P(BruteForceMatcher, Match_Single)
CUDA_TEST_P
(
BruteForceMatcher
,
Match_Collection
)
{
cv
::
cuda
::
BFMatcher_CUDA
matcher
(
normCode
);
cv
::
Ptr
<
cv
::
cuda
::
DescriptorMatcher
>
matcher
=
cv
::
cuda
::
DescriptorMatcher
::
createBFMatcher
(
normCode
);
cv
::
cuda
::
GpuMat
d_train
(
train
);
// make add() twice to test such case
matcher
.
add
(
std
::
vector
<
cv
::
cuda
::
GpuMat
>
(
1
,
d_train
.
rowRange
(
0
,
train
.
rows
/
2
)));
matcher
.
add
(
std
::
vector
<
cv
::
cuda
::
GpuMat
>
(
1
,
d_train
.
rowRange
(
train
.
rows
/
2
,
train
.
rows
)));
matcher
->
add
(
std
::
vector
<
cv
::
cuda
::
GpuMat
>
(
1
,
d_train
.
rowRange
(
0
,
train
.
rows
/
2
)));
matcher
->
add
(
std
::
vector
<
cv
::
cuda
::
GpuMat
>
(
1
,
d_train
.
rowRange
(
train
.
rows
/
2
,
train
.
rows
)));
// prepare masks (make first nearest match illegal)
std
::
vector
<
cv
::
cuda
::
GpuMat
>
masks
(
2
);
...
...
@@ -331,9 +333,9 @@ CUDA_TEST_P(BruteForceMatcher, Match_Collection)
std
::
vector
<
cv
::
DMatch
>
matches
;
if
(
useMask
)
matcher
.
match
(
cv
::
cuda
::
GpuMat
(
query
),
matches
,
masks
);
matcher
->
match
(
cv
::
cuda
::
GpuMat
(
query
),
matches
,
masks
);
else
matcher
.
match
(
cv
::
cuda
::
GpuMat
(
query
),
matches
);
matcher
->
match
(
cv
::
cuda
::
GpuMat
(
query
),
matches
);
ASSERT_EQ
(
static_cast
<
size_t
>
(
queryDescCount
),
matches
.
size
());
...
...
@@ -366,7 +368,8 @@ CUDA_TEST_P(BruteForceMatcher, Match_Collection)
CUDA_TEST_P
(
BruteForceMatcher
,
KnnMatch_2_Single
)
{
cv
::
cuda
::
BFMatcher_CUDA
matcher
(
normCode
);
cv
::
Ptr
<
cv
::
cuda
::
DescriptorMatcher
>
matcher
=
cv
::
cuda
::
DescriptorMatcher
::
createBFMatcher
(
normCode
);
const
int
knn
=
2
;
...
...
@@ -378,7 +381,7 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single)
}
std
::
vector
<
std
::
vector
<
cv
::
DMatch
>
>
matches
;
matcher
.
knnMatch
(
loadMat
(
query
),
loadMat
(
train
),
matches
,
knn
,
mask
);
matcher
->
knnMatch
(
loadMat
(
query
),
loadMat
(
train
),
matches
,
knn
,
mask
);
ASSERT_EQ
(
static_cast
<
size_t
>
(
queryDescCount
),
matches
.
size
());
...
...
@@ -405,7 +408,8 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single)
CUDA_TEST_P
(
BruteForceMatcher
,
KnnMatch_3_Single
)
{
cv
::
cuda
::
BFMatcher_CUDA
matcher
(
normCode
);
cv
::
Ptr
<
cv
::
cuda
::
DescriptorMatcher
>
matcher
=
cv
::
cuda
::
DescriptorMatcher
::
createBFMatcher
(
normCode
);
const
int
knn
=
3
;
...
...
@@ -417,7 +421,7 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single)
}
std
::
vector
<
std
::
vector
<
cv
::
DMatch
>
>
matches
;
matcher
.
knnMatch
(
loadMat
(
query
),
loadMat
(
train
),
matches
,
knn
,
mask
);
matcher
->
knnMatch
(
loadMat
(
query
),
loadMat
(
train
),
matches
,
knn
,
mask
);
ASSERT_EQ
(
static_cast
<
size_t
>
(
queryDescCount
),
matches
.
size
());
...
...
@@ -444,15 +448,16 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single)
CUDA_TEST_P
(
BruteForceMatcher
,
KnnMatch_2_Collection
)
{
cv
::
cuda
::
BFMatcher_CUDA
matcher
(
normCode
);
cv
::
Ptr
<
cv
::
cuda
::
DescriptorMatcher
>
matcher
=
cv
::
cuda
::
DescriptorMatcher
::
createBFMatcher
(
normCode
);
const
int
knn
=
2
;
cv
::
cuda
::
GpuMat
d_train
(
train
);
// make add() twice to test such case
matcher
.
add
(
std
::
vector
<
cv
::
cuda
::
GpuMat
>
(
1
,
d_train
.
rowRange
(
0
,
train
.
rows
/
2
)));
matcher
.
add
(
std
::
vector
<
cv
::
cuda
::
GpuMat
>
(
1
,
d_train
.
rowRange
(
train
.
rows
/
2
,
train
.
rows
)));
matcher
->
add
(
std
::
vector
<
cv
::
cuda
::
GpuMat
>
(
1
,
d_train
.
rowRange
(
0
,
train
.
rows
/
2
)));
matcher
->
add
(
std
::
vector
<
cv
::
cuda
::
GpuMat
>
(
1
,
d_train
.
rowRange
(
train
.
rows
/
2
,
train
.
rows
)));
// prepare masks (make first nearest match illegal)
std
::
vector
<
cv
::
cuda
::
GpuMat
>
masks
(
2
);
...
...
@@ -466,9 +471,9 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection)
std
::
vector
<
std
::
vector
<
cv
::
DMatch
>
>
matches
;
if
(
useMask
)
matcher
.
knnMatch
(
cv
::
cuda
::
GpuMat
(
query
),
matches
,
knn
,
masks
);
matcher
->
knnMatch
(
cv
::
cuda
::
GpuMat
(
query
),
matches
,
knn
,
masks
);
else
matcher
.
knnMatch
(
cv
::
cuda
::
GpuMat
(
query
),
matches
,
knn
);
matcher
->
knnMatch
(
cv
::
cuda
::
GpuMat
(
query
),
matches
,
knn
);
ASSERT_EQ
(
static_cast
<
size_t
>
(
queryDescCount
),
matches
.
size
());
...
...
@@ -506,15 +511,16 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection)
CUDA_TEST_P
(
BruteForceMatcher
,
KnnMatch_3_Collection
)
{
cv
::
cuda
::
BFMatcher_CUDA
matcher
(
normCode
);
cv
::
Ptr
<
cv
::
cuda
::
DescriptorMatcher
>
matcher
=
cv
::
cuda
::
DescriptorMatcher
::
createBFMatcher
(
normCode
);
const
int
knn
=
3
;
cv
::
cuda
::
GpuMat
d_train
(
train
);
// make add() twice to test such case
matcher
.
add
(
std
::
vector
<
cv
::
cuda
::
GpuMat
>
(
1
,
d_train
.
rowRange
(
0
,
train
.
rows
/
2
)));
matcher
.
add
(
std
::
vector
<
cv
::
cuda
::
GpuMat
>
(
1
,
d_train
.
rowRange
(
train
.
rows
/
2
,
train
.
rows
)));
matcher
->
add
(
std
::
vector
<
cv
::
cuda
::
GpuMat
>
(
1
,
d_train
.
rowRange
(
0
,
train
.
rows
/
2
)));
matcher
->
add
(
std
::
vector
<
cv
::
cuda
::
GpuMat
>
(
1
,
d_train
.
rowRange
(
train
.
rows
/
2
,
train
.
rows
)));
// prepare masks (make first nearest match illegal)
std
::
vector
<
cv
::
cuda
::
GpuMat
>
masks
(
2
);
...
...
@@ -528,9 +534,9 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection)
std
::
vector
<
std
::
vector
<
cv
::
DMatch
>
>
matches
;
if
(
useMask
)
matcher
.
knnMatch
(
cv
::
cuda
::
GpuMat
(
query
),
matches
,
knn
,
masks
);
matcher
->
knnMatch
(
cv
::
cuda
::
GpuMat
(
query
),
matches
,
knn
,
masks
);
else
matcher
.
knnMatch
(
cv
::
cuda
::
GpuMat
(
query
),
matches
,
knn
);
matcher
->
knnMatch
(
cv
::
cuda
::
GpuMat
(
query
),
matches
,
knn
);
ASSERT_EQ
(
static_cast
<
size_t
>
(
queryDescCount
),
matches
.
size
());
...
...
@@ -568,7 +574,8 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection)
CUDA_TEST_P
(
BruteForceMatcher
,
RadiusMatch_Single
)
{
cv
::
cuda
::
BFMatcher_CUDA
matcher
(
normCode
);
cv
::
Ptr
<
cv
::
cuda
::
DescriptorMatcher
>
matcher
=
cv
::
cuda
::
DescriptorMatcher
::
createBFMatcher
(
normCode
);
const
float
radius
=
1.
f
/
countFactor
;
...
...
@@ -577,7 +584,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single)
try
{
std
::
vector
<
std
::
vector
<
cv
::
DMatch
>
>
matches
;
matcher
.
radiusMatch
(
loadMat
(
query
),
loadMat
(
train
),
matches
,
radius
);
matcher
->
radiusMatch
(
loadMat
(
query
),
loadMat
(
train
),
matches
,
radius
);
}
catch
(
const
cv
::
Exception
&
e
)
{
...
...
@@ -594,7 +601,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single)
}
std
::
vector
<
std
::
vector
<
cv
::
DMatch
>
>
matches
;
matcher
.
radiusMatch
(
loadMat
(
query
),
loadMat
(
train
),
matches
,
radius
,
mask
);
matcher
->
radiusMatch
(
loadMat
(
query
),
loadMat
(
train
),
matches
,
radius
,
mask
);
ASSERT_EQ
(
static_cast
<
size_t
>
(
queryDescCount
),
matches
.
size
());
...
...
@@ -617,7 +624,8 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single)
CUDA_TEST_P
(
BruteForceMatcher
,
RadiusMatch_Collection
)
{
cv
::
cuda
::
BFMatcher_CUDA
matcher
(
normCode
);
cv
::
Ptr
<
cv
::
cuda
::
DescriptorMatcher
>
matcher
=
cv
::
cuda
::
DescriptorMatcher
::
createBFMatcher
(
normCode
);
const
int
n
=
3
;
const
float
radius
=
1.
f
/
countFactor
*
n
;
...
...
@@ -625,8 +633,8 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
cv
::
cuda
::
GpuMat
d_train
(
train
);
// make add() twice to test such case
matcher
.
add
(
std
::
vector
<
cv
::
cuda
::
GpuMat
>
(
1
,
d_train
.
rowRange
(
0
,
train
.
rows
/
2
)));
matcher
.
add
(
std
::
vector
<
cv
::
cuda
::
GpuMat
>
(
1
,
d_train
.
rowRange
(
train
.
rows
/
2
,
train
.
rows
)));
matcher
->
add
(
std
::
vector
<
cv
::
cuda
::
GpuMat
>
(
1
,
d_train
.
rowRange
(
0
,
train
.
rows
/
2
)));
matcher
->
add
(
std
::
vector
<
cv
::
cuda
::
GpuMat
>
(
1
,
d_train
.
rowRange
(
train
.
rows
/
2
,
train
.
rows
)));
// prepare masks (make first nearest match illegal)
std
::
vector
<
cv
::
cuda
::
GpuMat
>
masks
(
2
);
...
...
@@ -642,7 +650,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
try
{
std
::
vector
<
std
::
vector
<
cv
::
DMatch
>
>
matches
;
matcher
.
radiusMatch
(
cv
::
cuda
::
GpuMat
(
query
),
matches
,
radius
,
masks
);
matcher
->
radiusMatch
(
cv
::
cuda
::
GpuMat
(
query
),
matches
,
radius
,
masks
);
}
catch
(
const
cv
::
Exception
&
e
)
{
...
...
@@ -654,9 +662,9 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
std
::
vector
<
std
::
vector
<
cv
::
DMatch
>
>
matches
;
if
(
useMask
)
matcher
.
radiusMatch
(
cv
::
cuda
::
GpuMat
(
query
),
matches
,
radius
,
masks
);
matcher
->
radiusMatch
(
cv
::
cuda
::
GpuMat
(
query
),
matches
,
radius
,
masks
);
else
matcher
.
radiusMatch
(
cv
::
cuda
::
GpuMat
(
query
),
matches
,
radius
);
matcher
->
radiusMatch
(
cv
::
cuda
::
GpuMat
(
query
),
matches
,
radius
);
ASSERT_EQ
(
static_cast
<
size_t
>
(
queryDescCount
),
matches
.
size
());
...
...
modules/stitching/src/matchers.cpp
View file @
3a844444
...
...
@@ -154,7 +154,7 @@ void CpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat
matches_info
.
matches
.
clear
();
Ptr
<
DescriptorMatcher
>
matcher
;
Ptr
<
cv
::
DescriptorMatcher
>
matcher
;
#if 0 // TODO check this
if (ocl::useOpenCL())
{
...
...
@@ -220,13 +220,13 @@ void GpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat
descriptors1_
.
upload
(
features1
.
descriptors
);
descriptors2_
.
upload
(
features2
.
descriptors
);
BFMatcher_CUDA
matcher
(
NORM_L2
);
Ptr
<
cuda
::
DescriptorMatcher
>
matcher
=
cuda
::
DescriptorMatcher
::
createBFMatcher
(
NORM_L2
);
MatchesSet
matches
;
// Find 1->2 matches
pair_matches
.
clear
();
matcher
.
knnMatchSingle
(
descriptors1_
,
descriptors2_
,
train_idx_
,
distance_
,
all_dist_
,
2
);
matcher
.
knnMatchDownload
(
train_idx_
,
distance_
,
pair_matches
);
matcher
->
knnMatch
(
descriptors1_
,
descriptors2_
,
pair_matches
,
2
);
for
(
size_t
i
=
0
;
i
<
pair_matches
.
size
();
++
i
)
{
if
(
pair_matches
[
i
].
size
()
<
2
)
...
...
@@ -242,8 +242,7 @@ void GpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat
// Find 2->1 matches
pair_matches
.
clear
();
matcher
.
knnMatchSingle
(
descriptors2_
,
descriptors1_
,
train_idx_
,
distance_
,
all_dist_
,
2
);
matcher
.
knnMatchDownload
(
train_idx_
,
distance_
,
pair_matches
);
matcher
->
knnMatch
(
descriptors2_
,
descriptors1_
,
pair_matches
,
2
);
for
(
size_t
i
=
0
;
i
<
pair_matches
.
size
();
++
i
)
{
if
(
pair_matches
[
i
].
size
()
<
2
)
...
...
samples/gpu/performance/tests.cpp
View file @
3a844444
...
...
@@ -322,14 +322,14 @@ TEST(FAST)
FAST
(
src
,
keypoints
,
20
);
CPU_OFF
;
c
uda
::
FAST_CUDA
d_FAST
(
20
);
c
v
::
Ptr
<
cv
::
cuda
::
FastFeatureDetector
>
d_FAST
=
cv
::
cuda
::
FastFeatureDetector
::
create
(
20
);
cuda
::
GpuMat
d_src
(
src
);
cuda
::
GpuMat
d_keypoints
;
d_FAST
(
d_src
,
cuda
::
GpuMat
()
,
d_keypoints
);
d_FAST
->
detectAsync
(
d_src
,
d_keypoints
);
CUDA_ON
;
d_FAST
(
d_src
,
cuda
::
GpuMat
()
,
d_keypoints
);
d_FAST
->
detectAsync
(
d_src
,
d_keypoints
);
CUDA_OFF
;
}
...
...
@@ -350,15 +350,15 @@ TEST(ORB)
orb
->
detectAndCompute
(
src
,
Mat
(),
keypoints
,
descriptors
);
CPU_OFF
;
cuda
::
ORB_CUDA
d_orb
;
Ptr
<
cuda
::
ORB
>
d_orb
=
cuda
::
ORB
::
create
()
;
cuda
::
GpuMat
d_src
(
src
);
cuda
::
GpuMat
d_keypoints
;
cuda
::
GpuMat
d_descriptors
;
d_orb
(
d_src
,
cuda
::
GpuMat
(),
d_keypoints
,
d_descriptors
);
d_orb
->
detectAndComputeAsync
(
d_src
,
cuda
::
GpuMat
(),
d_keypoints
,
d_descriptors
);
CUDA_ON
;
d_orb
(
d_src
,
cuda
::
GpuMat
(),
d_keypoints
,
d_descriptors
);
d_orb
->
detectAndComputeAsync
(
d_src
,
cuda
::
GpuMat
(),
d_keypoints
,
d_descriptors
);
CUDA_OFF
;
}
...
...
@@ -379,14 +379,14 @@ TEST(BruteForceMatcher)
// Init CUDA matcher
cuda
::
BFMatcher_CUDA
d_m
atcher
(
NORM_L2
);
Ptr
<
cuda
::
DescriptorMatcher
>
d_matcher
=
cuda
::
DescriptorMatcher
::
createBFM
atcher
(
NORM_L2
);
cuda
::
GpuMat
d_query
(
query
);
cuda
::
GpuMat
d_train
(
train
);
// Output
vector
<
vector
<
DMatch
>
>
matches
(
2
);
cuda
::
GpuMat
d_
trainIdx
,
d_distance
,
d_allDist
,
d_nM
atches
;
cuda
::
GpuMat
d_
m
atches
;
SUBTEST
<<
"match"
;
...
...
@@ -396,10 +396,10 @@ TEST(BruteForceMatcher)
matcher
.
match
(
query
,
train
,
matches
[
0
]);
CPU_OFF
;
d_matcher
.
matchSingle
(
d_query
,
d_train
,
d_trainIdx
,
d_distance
);
d_matcher
->
matchAsync
(
d_query
,
d_train
,
d_matches
);
CUDA_ON
;
d_matcher
.
matchSingle
(
d_query
,
d_train
,
d_trainIdx
,
d_distance
);
d_matcher
->
matchAsync
(
d_query
,
d_train
,
d_matches
);
CUDA_OFF
;
SUBTEST
<<
"knnMatch"
;
...
...
@@ -410,10 +410,10 @@ TEST(BruteForceMatcher)
matcher
.
knnMatch
(
query
,
train
,
matches
,
2
);
CPU_OFF
;
d_matcher
.
knnMatchSingle
(
d_query
,
d_train
,
d_trainIdx
,
d_distance
,
d_allDist
,
2
);
d_matcher
->
knnMatchAsync
(
d_query
,
d_train
,
d_matches
,
2
);
CUDA_ON
;
d_matcher
.
knnMatchSingle
(
d_query
,
d_train
,
d_trainIdx
,
d_distance
,
d_allDist
,
2
);
d_matcher
->
knnMatchAsync
(
d_query
,
d_train
,
d_matches
,
2
);
CUDA_OFF
;
SUBTEST
<<
"radiusMatch"
;
...
...
@@ -426,12 +426,10 @@ TEST(BruteForceMatcher)
matcher
.
radiusMatch
(
query
,
train
,
matches
,
max_distance
);
CPU_OFF
;
d_trainIdx
.
release
();
d_matcher
.
radiusMatchSingle
(
d_query
,
d_train
,
d_trainIdx
,
d_distance
,
d_nMatches
,
max_distance
);
d_matcher
->
radiusMatchAsync
(
d_query
,
d_train
,
d_matches
,
max_distance
);
CUDA_ON
;
d_matcher
.
radiusMatchSingle
(
d_query
,
d_train
,
d_trainIdx
,
d_distance
,
d_nM
atches
,
max_distance
);
d_matcher
->
radiusMatchAsync
(
d_query
,
d_train
,
d_m
atches
,
max_distance
);
CUDA_OFF
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment