Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
ea4f65b5
Commit
ea4f65b5
authored
Sep 27, 2011
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
implemented optimized version of bf_radius_match for train collection
parent
e99b0908
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
507 additions
and
187 deletions
+507
-187
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+25
-8
perf_features2d.cpp
modules/gpu/perf/perf_features2d.cpp
+1
-1
brute_force_matcher.cpp
modules/gpu/src/brute_force_matcher.cpp
+173
-72
bf_radius_match.cu
modules/gpu/src/cuda/bf_radius_match.cu
+308
-106
No files found.
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
ea4f65b5
...
...
@@ -1288,16 +1288,16 @@ namespace cv
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
(),
bool
compactResult
=
false
);
// Find best matches for each query descriptor which have distance less than maxDistance.
// nMatches.at<
unsigned int>(0, queru
Idx) will contain matches count for queryIdx.
// nMatches.at<
int>(0, query
Idx) will contain matches count for queryIdx.
// carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
// because it didn't have enough memory.
// trainIdx.at<int>(queruIdx, i) will contain ith train index (i < min(nMatches.at<
unsigned
int>(0, queruIdx), trainIdx.cols))
// distance.at<int>(queruIdx, i) will contain ith distance (i < min(nMatches.at<
unsigned
int>(0, queruIdx), trainIdx.cols))
// If trainIdx is empty, then trainIdx and distance will be created with size nQuery x
nTrain
,
// trainIdx.at<int>(queruIdx, i) will contain ith train index (i < min(nMatches.at<int>(0, queruIdx), trainIdx.cols))
// distance.at<int>(queruIdx, i) will contain ith distance (i < min(nMatches.at<int>(0, queruIdx), trainIdx.cols))
// If trainIdx is empty, then trainIdx and distance will be created with size nQuery x
(nTrain / 2)
,
// otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
// Matches doesn't sorted.
void
radiusMatch
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainDescs
,
GpuMat
&
trainIdx
,
GpuMat
&
nMatches
,
GpuMat
&
distance
,
float
maxDistance
,
void
radiusMatch
Single
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainDescs
,
GpuMat
&
trainIdx
,
GpuMat
&
distance
,
GpuMat
&
nMatches
,
float
maxDistance
,
const
GpuMat
&
mask
=
GpuMat
(),
Stream
&
stream
=
Stream
::
Null
());
// Download trainIdx, nMatches and distance and convert it to vector with DMatch.
...
...
@@ -1305,10 +1305,10 @@ namespace cv
// compactResult is used when mask is not empty. If compactResult is false matches
// vector will have the same size as queryDescriptors rows. If compactResult is true
// matches vector will not contain matches for fully masked out query descriptors.
static
void
radiusMatchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
nMatches
,
const
GpuMat
&
distance
,
static
void
radiusMatchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
distance
,
const
GpuMat
&
nMatches
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
=
false
);
// Convert trainIdx, nMatches and distance to vector with DMatch.
static
void
radiusMatchConvert
(
const
Mat
&
trainIdx
,
const
Mat
&
nMatches
,
const
Mat
&
distance
,
static
void
radiusMatchConvert
(
const
Mat
&
trainIdx
,
const
Mat
&
distance
,
const
Mat
&
nMatches
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
=
false
);
// Find best matches for each query descriptor which have distance less than maxDistance
...
...
@@ -1317,6 +1317,23 @@ namespace cv
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
float
maxDistance
,
const
GpuMat
&
mask
=
GpuMat
(),
bool
compactResult
=
false
);
// Find best matches for each query descriptor which have distance less than maxDistance.
// Matches doesn't sorted.
void
radiusMatchCollection
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainCollection
,
GpuMat
&
trainIdx
,
GpuMat
&
imgIdx
,
GpuMat
&
distance
,
GpuMat
&
nMatches
,
float
maxDistance
,
const
GpuMat
&
maskCollection
,
Stream
&
stream
=
Stream
::
Null
());
// Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
// matches will be sorted in increasing order of distances.
// compactResult is used when mask is not empty. If compactResult is false matches
// vector will have the same size as queryDescriptors rows. If compactResult is true
// matches vector will not contain matches for fully masked out query descriptors.
static
void
radiusMatchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
imgIdx
,
const
GpuMat
&
distance
,
const
GpuMat
&
nMatches
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
=
false
);
// Convert trainIdx, nMatches and distance to vector with DMatch.
static
void
radiusMatchConvert
(
const
Mat
&
trainIdx
,
const
Mat
&
imgIdx
,
const
Mat
&
distance
,
const
Mat
&
nMatches
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
=
false
);
// Find best matches from train collection for each query descriptor which have distance less than
// maxDistance (in increasing order of distances).
void
radiusMatch
(
const
GpuMat
&
queryDescs
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
float
maxDistance
,
...
...
modules/gpu/perf/perf_features2d.cpp
View file @
ea4f65b5
...
...
@@ -89,7 +89,7 @@ PERF_TEST_P(DevInfo_DescSize, BruteForceMatcher_radiusMatch, testing::Combine(te
SIMPLE_TEST_CYCLE
()
{
matcher
.
radiusMatch
(
query
,
train
,
trainIdx
,
nMatches
,
distance
,
2.0
);
matcher
.
radiusMatch
Single
(
query
,
train
,
trainIdx
,
distance
,
nMatches
,
2.0
);
}
Mat
trainIdx_host
(
trainIdx
);
...
...
modules/gpu/src/brute_force_matcher.cpp
View file @
ea4f65b5
...
...
@@ -68,10 +68,13 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat&, const
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
knnMatchConvert
(
const
Mat
&
,
const
Mat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
knnMatch
(
const
GpuMat
&
,
const
GpuMat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
int
,
const
GpuMat
&
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
knnMatch
(
const
GpuMat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
int
,
const
std
::
vector
<
GpuMat
>&
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatch
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
float
,
const
GpuMat
&
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatch
Single
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
float
,
const
GpuMat
&
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatchDownload
(
const
GpuMat
&
,
const
GpuMat
&
,
const
GpuMat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatchConvert
(
const
Mat
&
,
const
Mat
&
,
const
Mat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatch
(
const
GpuMat
&
,
const
GpuMat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
float
,
const
GpuMat
&
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatchCollection
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
float
,
const
GpuMat
&
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatchDownload
(
const
GpuMat
&
,
const
GpuMat
&
,
const
GpuMat
&
,
const
GpuMat
&
,
vector
<
vector
<
DMatch
>
>&
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatchConvert
(
const
Mat
&
,
const
Mat
&
,
const
Mat
&
,
const
Mat
&
,
vector
<
vector
<
DMatch
>
>&
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatch
(
const
GpuMat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
float
,
const
std
::
vector
<
GpuMat
>&
,
bool
)
{
throw_nogpu
();
}
#else
/* !defined (HAVE_CUDA) */
...
...
@@ -114,26 +117,26 @@ namespace cv { namespace gpu { namespace bf_knnmatch
namespace
cv
{
namespace
gpu
{
namespace
bf_radius_match
{
template
<
typename
T
>
void
radiusMatchL1_gpu
(
const
DevMem2D
&
query
,
const
DevMem2D
&
train
,
float
maxDistance
,
const
DevMem2D
&
mask
,
const
DevMem2D
&
trainIdx
,
const
DevMem2D
&
nMatches
,
const
DevMem2D
&
distance
,
template
<
typename
T
>
void
radiusMatch
Single
L1_gpu
(
const
DevMem2D
&
query
,
const
DevMem2D
&
train
,
float
maxDistance
,
const
DevMem2D
&
mask
,
const
DevMem2D
&
trainIdx
,
const
DevMem2D
&
distance
,
const
DevMem2D
&
nMatches
,
cudaStream_t
stream
);
template
<
typename
T
>
void
radiusMatchL2_gpu
(
const
DevMem2D
&
query
,
const
DevMem2D
&
train
,
float
maxDistance
,
const
DevMem2D
&
mask
,
const
DevMem2D
&
trainIdx
,
const
DevMem2D
&
nMatches
,
const
DevMem2D
&
distance
,
template
<
typename
T
>
void
radiusMatch
Single
L2_gpu
(
const
DevMem2D
&
query
,
const
DevMem2D
&
train
,
float
maxDistance
,
const
DevMem2D
&
mask
,
const
DevMem2D
&
trainIdx
,
const
DevMem2D
&
distance
,
const
DevMem2D
&
nMatches
,
cudaStream_t
stream
);
template
<
typename
T
>
void
radiusMatchHamming_gpu
(
const
DevMem2D
&
query
,
const
DevMem2D
&
train
,
float
maxDistance
,
const
DevMem2D
&
mask
,
const
DevMem2D
&
trainIdx
,
const
DevMem2D
&
nMatches
,
const
DevMem2D
&
distance
,
template
<
typename
T
>
void
radiusMatch
Single
Hamming_gpu
(
const
DevMem2D
&
query
,
const
DevMem2D
&
train
,
float
maxDistance
,
const
DevMem2D
&
mask
,
const
DevMem2D
&
trainIdx
,
const
DevMem2D
&
distance
,
const
DevMem2D
&
nMatches
,
cudaStream_t
stream
);
}}}
namespace
{
struct
ImgIdxSetter
{
explicit
inline
ImgIdxSetter
(
int
imgIdx_
)
:
imgIdx
(
imgIdx_
)
{}
inline
void
operator
()(
DMatch
&
m
)
const
{
m
.
imgIdx
=
imgIdx
;}
int
imgIdx
;
};
}
template
<
typename
T
>
void
radiusMatchCollectionL1_gpu
(
const
DevMem2D
&
query
,
const
DevMem2D
&
trainCollection
,
float
maxDistance
,
const
DevMem2D_
<
PtrStep
>&
maskCollection
,
const
DevMem2D
&
trainIdx
,
const
DevMem2D
&
imgIdx
,
const
DevMem2D
&
distance
,
const
DevMem2D
&
nMatches
,
cudaStream_t
stream
);
template
<
typename
T
>
void
radiusMatchCollectionL2_gpu
(
const
DevMem2D
&
query
,
const
DevMem2D
&
trainCollection
,
float
maxDistance
,
const
DevMem2D_
<
PtrStep
>&
maskCollection
,
const
DevMem2D
&
trainIdx
,
const
DevMem2D
&
imgIdx
,
const
DevMem2D
&
distance
,
const
DevMem2D
&
nMatches
,
cudaStream_t
stream
);
template
<
typename
T
>
void
radiusMatchCollectionHamming_gpu
(
const
DevMem2D
&
query
,
const
DevMem2D
&
trainCollection
,
float
maxDistance
,
const
DevMem2D_
<
PtrStep
>&
maskCollection
,
const
DevMem2D
&
trainIdx
,
const
DevMem2D
&
imgIdx
,
const
DevMem2D
&
distance
,
const
DevMem2D
&
nMatches
,
cudaStream_t
stream
);
}}}
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
BruteForceMatcher_GPU_base
(
DistType
distType_
)
:
distType
(
distType_
)
{
...
...
@@ -551,6 +554,16 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs, con
knnMatchDownload
(
trainIdx
,
distance
,
matches
,
compactResult
);
}
namespace
{
struct
ImgIdxSetter
{
explicit
inline
ImgIdxSetter
(
int
imgIdx_
)
:
imgIdx
(
imgIdx_
)
{}
inline
void
operator
()(
DMatch
&
m
)
const
{
m
.
imgIdx
=
imgIdx
;}
int
imgIdx
;
};
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
knnMatch
(
const
GpuMat
&
queryDescs
,
vector
<
vector
<
DMatch
>
>&
matches
,
int
knn
,
const
vector
<
GpuMat
>&
masks
,
bool
compactResult
)
{
...
...
@@ -596,8 +609,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs,
////////////////////////////////////////////////////////////////////
// RadiusMatch
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatch
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainDescs
,
GpuMat
&
trainIdx
,
GpuMat
&
nMatches
,
GpuMat
&
distance
,
float
maxDistance
,
const
GpuMat
&
mask
,
Stream
&
stream
)
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatch
Single
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainDescs
,
GpuMat
&
trainIdx
,
GpuMat
&
distance
,
GpuMat
&
nMatches
,
float
maxDistance
,
const
GpuMat
&
mask
,
Stream
&
stream
)
{
if
(
queryDescs
.
empty
()
||
trainDescs
.
empty
())
return
;
...
...
@@ -605,26 +618,26 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& queryDescs,
using
namespace
cv
::
gpu
::
bf_radius_match
;
typedef
void
(
*
radiusMatch_caller_t
)(
const
DevMem2D
&
query
,
const
DevMem2D
&
train
,
float
maxDistance
,
const
DevMem2D
&
mask
,
const
DevMem2D
&
trainIdx
,
const
DevMem2D
&
nMatches
,
const
DevMem2D
&
distance
,
const
DevMem2D
&
trainIdx
,
const
DevMem2D
&
distance
,
const
DevMem2D
&
nMatches
,
cudaStream_t
stream
);
static
const
radiusMatch_caller_t
radiusMatch_callers
[
3
][
8
]
=
{
{
radiusMatch
L1_gpu
<
unsigned
char
>
,
0
/*radiusMatchL1_gpu<signed char>*/
,
radiusMatch
L1_gpu
<
unsigned
short
>
,
radiusMatch
L1_gpu
<
short
>
,
radiusMatchL1_gpu
<
int
>
,
radiusMatch
L1_gpu
<
float
>
,
0
,
0
radiusMatch
SingleL1_gpu
<
unsigned
char
>
,
0
/*radiusMatchSingleL1_gpu<signed char>*/
,
radiusMatchSingle
L1_gpu
<
unsigned
short
>
,
radiusMatch
SingleL1_gpu
<
short
>
,
radiusMatchSingleL1_gpu
<
int
>
,
radiusMatchSingle
L1_gpu
<
float
>
,
0
,
0
},
{
0
/*radiusMatch
L2_gpu<unsigned char>*/
,
0
/*radiusMatchL2_gpu<signed char>*/
,
0
/*radiusMatch
L2_gpu<unsigned short>*/
,
0
/*radiusMatch
L2_gpu<short>*/
,
0
/*radiusMatchL2_gpu<int>*/
,
radiusMatch
L2_gpu
<
float
>
,
0
,
0
0
/*radiusMatch
SingleL2_gpu<unsigned char>*/
,
0
/*radiusMatchSingleL2_gpu<signed char>*/
,
0
/*radiusMatchSingle
L2_gpu<unsigned short>*/
,
0
/*radiusMatch
SingleL2_gpu<short>*/
,
0
/*radiusMatchSingleL2_gpu<int>*/
,
radiusMatchSingle
L2_gpu
<
float
>
,
0
,
0
},
{
radiusMatch
Hamming_gpu
<
unsigned
char
>
,
0
/*radiusMatchHamming_gpu<signed char>*/
,
radiusMatch
Hamming_gpu
<
unsigned
short
>
,
0
/*radiusMatch
Hamming_gpu<short>*/
,
radiusMatch
Hamming_gpu
<
int
>
,
0
,
0
,
0
radiusMatch
SingleHamming_gpu
<
unsigned
char
>
,
0
/*radiusMatchSingleHamming_gpu<signed char>*/
,
radiusMatchSingle
Hamming_gpu
<
unsigned
short
>
,
0
/*radiusMatch
SingleHamming_gpu<short>*/
,
radiusMatchSingle
Hamming_gpu
<
int
>
,
0
,
0
,
0
}
};
CV_Assert
(
TargetArchs
::
builtWith
(
SHARED_ATOMICS
)
&&
DeviceInfo
().
supports
(
GLOBAL
_ATOMICS
));
CV_Assert
(
TargetArchs
::
builtWith
(
SHARED_ATOMICS
)
&&
DeviceInfo
().
supports
(
SHARED
_ATOMICS
));
const
int
nQuery
=
queryDescs
.
rows
;
const
int
nTrain
=
trainDescs
.
rows
;
...
...
@@ -636,38 +649,33 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& queryDescs,
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32SC1
,
nMatches
);
if
(
trainIdx
.
empty
())
{
ensureSizeIsEnough
(
nQuery
,
nTrain
,
CV_32SC1
,
trainIdx
);
ensureSizeIsEnough
(
nQuery
,
nTrain
,
CV_32FC1
,
distance
);
ensureSizeIsEnough
(
nQuery
,
nTrain
/
2
,
CV_32SC1
,
trainIdx
);
ensureSizeIsEnough
(
nQuery
,
nTrain
/
2
,
CV_32FC1
,
distance
);
}
if
(
stream
)
stream
.
enqueueMemSet
(
nMatches
,
Scalar
::
all
(
0
));
else
nMatches
.
setTo
(
Scalar
::
all
(
0
));
radiusMatch_caller_t
func
=
radiusMatch_callers
[
distType
][
queryDescs
.
depth
()];
CV_Assert
(
func
!=
0
);
func
(
queryDescs
,
trainDescs
,
maxDistance
,
mask
,
trainIdx
,
nMatches
,
distance
,
StreamAccessor
::
getStream
(
stream
));
func
(
queryDescs
,
trainDescs
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
StreamAccessor
::
getStream
(
stream
));
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
nMatches
,
const
GpuMat
&
distance
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
)
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
distance
,
const
GpuMat
&
nMatches
,
vector
<
vector
<
DMatch
>
>&
matches
,
bool
compactResult
)
{
if
(
trainIdx
.
empty
()
||
nMatches
.
empty
()
||
distance
.
empty
())
if
(
trainIdx
.
empty
()
||
distance
.
empty
()
||
nMatches
.
empty
())
return
;
Mat
trainIdxCPU
=
trainIdx
;
Mat
nMatchesCPU
=
nMatches
;
Mat
distanceCPU
=
distance
;
Mat
nMatchesCPU
=
nMatches
;
radiusMatchConvert
(
trainIdxCPU
,
nMatchesCPU
,
distance
CPU
,
matches
,
compactResult
);
radiusMatchConvert
(
trainIdxCPU
,
distanceCPU
,
nMatches
CPU
,
matches
,
compactResult
);
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatchConvert
(
const
Mat
&
trainIdx
,
const
Mat
&
nMatches
,
const
Mat
&
distance
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
)
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatchConvert
(
const
Mat
&
trainIdx
,
const
Mat
&
distance
,
const
Mat
&
nMatches
,
vector
<
vector
<
DMatch
>
>&
matches
,
bool
compactResult
)
{
if
(
trainIdx
.
empty
()
||
nMatches
.
empty
()
||
distance
.
empty
())
if
(
trainIdx
.
empty
()
||
distance
.
empty
()
||
nMatches
.
empty
())
return
;
CV_Assert
(
trainIdx
.
type
()
==
CV_32SC1
);
...
...
@@ -679,13 +687,13 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx
matches
.
clear
();
matches
.
reserve
(
nQuery
);
const
unsigned
int
*
nMatches_ptr
=
nMatches
.
ptr
<
unsigned
int
>
();
const
int
*
nMatches_ptr
=
nMatches
.
ptr
<
int
>
();
for
(
int
queryIdx
=
0
;
queryIdx
<
nQuery
;
++
queryIdx
)
{
const
int
*
trainIdx_ptr
=
trainIdx
.
ptr
<
int
>
(
queryIdx
);
const
float
*
distance_ptr
=
distance
.
ptr
<
float
>
(
queryIdx
);
const
int
nMatches
=
std
::
min
(
static_cast
<
int
>
(
nMatches_ptr
[
queryIdx
])
,
trainIdx
.
cols
);
const
int
nMatches
=
std
::
min
(
nMatches_ptr
[
queryIdx
]
,
trainIdx
.
cols
);
if
(
nMatches
==
0
)
{
...
...
@@ -694,9 +702,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx
continue
;
}
matches
.
push_back
(
vector
<
DMatch
>
());
matches
.
push_back
(
vector
<
DMatch
>
(
nMatches
));
vector
<
DMatch
>&
curMatches
=
matches
.
back
();
curMatches
.
reserve
(
nMatches
);
for
(
int
i
=
0
;
i
<
nMatches
;
++
i
,
++
trainIdx_ptr
,
++
distance_ptr
)
{
...
...
@@ -706,8 +713,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx
DMatch
m
(
queryIdx
,
trainIdx
,
0
,
distance
);
curMatches
.
push_back
(
m
)
;
curMatches
[
i
]
=
m
;
}
sort
(
curMatches
.
begin
(),
curMatches
.
end
());
}
}
...
...
@@ -715,46 +723,139 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatch
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainDescs
,
vector
<
vector
<
DMatch
>
>&
matches
,
float
maxDistance
,
const
GpuMat
&
mask
,
bool
compactResult
)
{
GpuMat
trainIdx
,
nMatches
,
distance
;
radiusMatch
(
queryDescs
,
trainDescs
,
trainIdx
,
nMatches
,
distance
,
maxDistance
,
mask
);
radiusMatchDownload
(
trainIdx
,
nMatches
,
distance
,
matches
,
compactResult
);
GpuMat
trainIdx
,
distance
,
nMatches
;
radiusMatch
Single
(
queryDescs
,
trainDescs
,
trainIdx
,
distance
,
nMatches
,
maxDistance
,
mask
);
radiusMatchDownload
(
trainIdx
,
distance
,
nMatches
,
matches
,
compactResult
);
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatch
(
const
GpuMat
&
queryDescs
,
vector
<
vector
<
DMatch
>
>&
matches
,
float
maxDistance
,
const
vector
<
GpuMat
>&
masks
,
bool
compactResult
)
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatchCollection
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainCollection
,
GpuMat
&
trainIdx
,
GpuMat
&
imgIdx
,
GpuMat
&
distance
,
GpuMat
&
nMatches
,
float
maxDistance
,
const
GpuMat
&
maskCollection
,
Stream
&
stream
)
{
if
(
queryDescs
.
empty
()
||
empty
())
if
(
queryDescs
.
empty
()
||
trainCollection
.
empty
())
return
;
matches
.
resize
(
queryDescs
.
rows
)
;
using
namespace
cv
::
gpu
::
bf_radius_match
;
vector
<
vector
<
DMatch
>
>
curMatches
;
typedef
void
(
*
radiusMatch_caller_t
)(
const
DevMem2D
&
query
,
const
DevMem2D
&
trainCollection
,
float
maxDistance
,
const
DevMem2D_
<
PtrStep
>&
maskCollection
,
const
DevMem2D
&
trainIdx
,
const
DevMem2D
&
imgIdx
,
const
DevMem2D
&
distance
,
const
DevMem2D
&
nMatches
,
cudaStream_t
stream
);
for
(
size_t
imgIdx
=
0
;
imgIdx
<
trainDescCollection
.
size
();
++
imgIdx
)
static
const
radiusMatch_caller_t
radiusMatch_callers
[
3
][
8
]
=
{
radiusMatch
(
queryDescs
,
trainDescCollection
[
imgIdx
],
curMatches
,
maxDistance
,
masks
.
empty
()
?
GpuMat
()
:
masks
[
imgIdx
]);
for
(
int
queryIdx
=
0
;
queryIdx
<
queryDescs
.
rows
;
++
queryIdx
)
{
vector
<
DMatch
>&
localMatch
=
curMatches
[
queryIdx
];
vector
<
DMatch
>&
globalMatch
=
matches
[
queryIdx
];
radiusMatchCollectionL1_gpu
<
unsigned
char
>
,
0
/*radiusMatchCollectionL1_gpu<signed char>*/
,
radiusMatchCollectionL1_gpu
<
unsigned
short
>
,
radiusMatchCollectionL1_gpu
<
short
>
,
radiusMatchCollectionL1_gpu
<
int
>
,
radiusMatchCollectionL1_gpu
<
float
>
,
0
,
0
},
{
0
/*radiusMatchCollectionL2_gpu<unsigned char>*/
,
0
/*radiusMatchCollectionL2_gpu<signed char>*/
,
0
/*radiusMatchCollectionL2_gpu<unsigned short>*/
,
0
/*radiusMatchCollectionL2_gpu<short>*/
,
0
/*radiusMatchCollectionL2_gpu<int>*/
,
radiusMatchCollectionL2_gpu
<
float
>
,
0
,
0
},
{
radiusMatchCollectionHamming_gpu
<
unsigned
char
>
,
0
/*radiusMatchCollectionHamming_gpu<signed char>*/
,
radiusMatchCollectionHamming_gpu
<
unsigned
short
>
,
0
/*radiusMatchCollectionHamming_gpu<short>*/
,
radiusMatchCollectionHamming_gpu
<
int
>
,
0
,
0
,
0
}
};
for_each
(
localMatch
.
begin
(),
localMatch
.
end
(),
ImgIdxSetter
(
static_cast
<
int
>
(
imgIdx
)
));
CV_Assert
(
TargetArchs
::
builtWith
(
SHARED_ATOMICS
)
&&
DeviceInfo
().
supports
(
SHARED_ATOMICS
));
const
size_t
oldSize
=
globalMatch
.
size
()
;
const
int
nQuery
=
queryDescs
.
rows
;
copy
(
localMatch
.
begin
(),
localMatch
.
end
(),
back_inserter
(
globalMatch
));
inplace_merge
(
globalMatch
.
begin
(),
globalMatch
.
begin
()
+
oldSize
,
globalMatch
.
end
());
}
CV_Assert
(
queryDescs
.
channels
()
==
1
&&
queryDescs
.
depth
()
<
CV_64F
);
CV_Assert
(
trainIdx
.
empty
()
||
(
trainIdx
.
rows
==
nQuery
&&
trainIdx
.
size
()
==
distance
.
size
()
&&
trainIdx
.
size
()
==
imgIdx
.
size
()));
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32SC1
,
nMatches
);
if
(
trainIdx
.
empty
())
{
ensureSizeIsEnough
(
nQuery
,
nQuery
/
2
,
CV_32SC1
,
trainIdx
);
ensureSizeIsEnough
(
nQuery
,
nQuery
/
2
,
CV_32SC1
,
imgIdx
);
ensureSizeIsEnough
(
nQuery
,
nQuery
/
2
,
CV_32FC1
,
distance
);
}
if
(
compactResult
)
radiusMatch_caller_t
func
=
radiusMatch_callers
[
distType
][
queryDescs
.
depth
()];
CV_Assert
(
func
!=
0
);
func
(
queryDescs
,
trainCollection
,
maxDistance
,
maskCollection
,
trainIdx
,
imgIdx
,
distance
,
nMatches
,
StreamAccessor
::
getStream
(
stream
));
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
imgIdx
,
const
GpuMat
&
distance
,
const
GpuMat
&
nMatches
,
vector
<
vector
<
DMatch
>
>&
matches
,
bool
compactResult
)
{
if
(
trainIdx
.
empty
()
||
imgIdx
.
empty
()
||
distance
.
empty
()
||
nMatches
.
empty
())
return
;
Mat
trainIdxCPU
=
trainIdx
;
Mat
imgIdxCPU
=
imgIdx
;
Mat
distanceCPU
=
distance
;
Mat
nMatchesCPU
=
nMatches
;
radiusMatchConvert
(
trainIdxCPU
,
imgIdxCPU
,
distanceCPU
,
nMatchesCPU
,
matches
,
compactResult
);
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatchConvert
(
const
Mat
&
trainIdx
,
const
Mat
&
imgIdx
,
const
Mat
&
distance
,
const
Mat
&
nMatches
,
vector
<
vector
<
DMatch
>
>&
matches
,
bool
compactResult
)
{
if
(
trainIdx
.
empty
()
||
imgIdx
.
empty
()
||
distance
.
empty
()
||
nMatches
.
empty
())
return
;
CV_Assert
(
trainIdx
.
type
()
==
CV_32SC1
);
CV_Assert
(
imgIdx
.
type
()
==
CV_32SC1
&&
imgIdx
.
size
()
==
trainIdx
.
size
());
CV_Assert
(
distance
.
type
()
==
CV_32FC1
&&
distance
.
size
()
==
trainIdx
.
size
());
CV_Assert
(
nMatches
.
type
()
==
CV_32SC1
&&
nMatches
.
isContinuous
()
&&
nMatches
.
cols
>=
trainIdx
.
rows
);
const
int
nQuery
=
trainIdx
.
rows
;
matches
.
clear
();
matches
.
reserve
(
nQuery
);
const
int
*
nMatches_ptr
=
nMatches
.
ptr
<
int
>
();
for
(
int
queryIdx
=
0
;
queryIdx
<
nQuery
;
++
queryIdx
)
{
vector
<
vector
<
DMatch
>
>::
iterator
new_end
=
remove_if
(
matches
.
begin
(),
matches
.
end
(),
mem_fun_ref
(
&
vector
<
DMatch
>::
empty
));
matches
.
erase
(
new_end
,
matches
.
end
());
const
int
*
trainIdx_ptr
=
trainIdx
.
ptr
<
int
>
(
queryIdx
);
const
int
*
imgIdx_ptr
=
imgIdx
.
ptr
<
int
>
(
queryIdx
);
const
float
*
distance_ptr
=
distance
.
ptr
<
float
>
(
queryIdx
);
const
int
nMatches
=
std
::
min
(
nMatches_ptr
[
queryIdx
],
trainIdx
.
cols
);
if
(
nMatches
==
0
)
{
if
(
!
compactResult
)
matches
.
push_back
(
vector
<
DMatch
>
());
continue
;
}
matches
.
push_back
(
vector
<
DMatch
>
());
vector
<
DMatch
>&
curMatches
=
matches
.
back
();
curMatches
.
reserve
(
nMatches
);
for
(
int
i
=
0
;
i
<
nMatches
;
++
i
,
++
trainIdx_ptr
,
++
imgIdx_ptr
,
++
distance_ptr
)
{
int
trainIdx
=
*
trainIdx_ptr
;
int
imgIdx
=
*
imgIdx_ptr
;
float
distance
=
*
distance_ptr
;
DMatch
m
(
queryIdx
,
trainIdx
,
imgIdx
,
distance
);
curMatches
.
push_back
(
m
);
}
sort
(
curMatches
.
begin
(),
curMatches
.
end
());
}
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatch
(
const
GpuMat
&
queryDescs
,
vector
<
vector
<
DMatch
>
>&
matches
,
float
maxDistance
,
const
vector
<
GpuMat
>&
masks
,
bool
compactResult
)
{
GpuMat
trainCollection
;
GpuMat
maskCollection
;
makeGpuCollection
(
trainCollection
,
maskCollection
,
masks
);
GpuMat
trainIdx
,
imgIdx
,
distance
,
nMatches
;
radiusMatchCollection
(
queryDescs
,
trainCollection
,
trainIdx
,
imgIdx
,
distance
,
nMatches
,
maxDistance
,
maskCollection
);
radiusMatchDownload
(
trainIdx
,
imgIdx
,
distance
,
nMatches
,
matches
,
compactResult
);
}
#endif
/* !defined (HAVE_CUDA) */
modules/gpu/src/cuda/bf_radius_match.cu
View file @
ea4f65b5
...
...
@@ -49,94 +49,210 @@ using namespace cv::gpu::device;
namespace cv { namespace gpu { namespace bf_radius_match
{
__device__ __forceinline__ void store(const int* sidx, const float* sdist, const unsigned int scount, int* trainIdx, float* distance, int& sglob_ind, const int tid)
template <typename T> struct SingleTrain
{
if (tid < scount)
enum {USE_IMG_IDX = 0};
explicit SingleTrain(const DevMem2D_<T>& train_) : train(train_)
{
trainIdx[sglob_ind + tid] = sidx[tid];
distance[sglob_ind + tid] = sdist[tid];
}
if (tid == 0)
sglob_ind += scount;
}
static __device__ __forceinline__ void store(const int* s_trainIdx, const int* s_imgIdx, const float* s_dist, unsigned int& s_count, int& s_globInd,
int* trainIdx, int* imgIdx, float* distance, int maxCount)
{
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_STACK, typename VecDiff, typename Dist, typename T, typename Mask>
__global__ void radiusMatch(const PtrStep_<T> query, const DevMem2D_<T> train, const float maxDistance, const Mask mask,
DevMem2Di trainIdx_, PtrStepf distance, unsigned int* nMatches)
{
#if __CUDA_ARCH__ >= 120
if (tid < s_count && s_globInd + tid < maxCount)
{
trainIdx[s_globInd + tid] = s_trainIdx[tid];
distance[s_globInd + tid] = s_dist[tid];
}
typedef typename Dist::result_type result_type;
typedef typename Dist::value_type value_type;
if (tid == 0)
{
s_globInd += s_count;
s_count = 0;
}
}
__shared__ result_type smem[BLOCK_DIM_X * BLOCK_DIM_Y];
__shared__ int sidx[BLOCK_STACK];
__shared__ float sdist[BLOCK_STACK];
__shared__ unsigned int scount;
__shared__ int sglob_ind;
template <int BLOCK_STACK, typename Dist, typename VecDiff, typename Mask>
__device__ __forceinline__ void loop(float maxDistance, Mask& mask, const VecDiff& vecDiff,
int* s_trainIdx, int* s_imgIdx, float* s_dist, unsigned int& s_count, int& s_globInd,
int* trainIdxRow, int* imgIdxRow, float* distanceRow, int maxCount,
typename Dist::result_type* s_diffRow) const
{
#if __CUDA_ARCH__ >= 120
const int queryIdx = blockIdx.x;
const int tid = threadIdx.y * BLOCK_DIM_X + threadIdx.x;
for (int i = 0; i < train.rows; i += blockDim.y)
{
int trainIdx = i + threadIdx.y;
if (tid == 0)
if (trainIdx < train.rows && mask(blockIdx.x, trainIdx))
{
Dist dist;
vecDiff.calc(train.ptr(trainIdx), train.cols, dist, s_diffRow, threadIdx.x);
const typename Dist::result_type val = dist;
if (threadIdx.x == 0 && val < maxDistance)
{
unsigned int ind = atomicInc(&s_count, (unsigned int) -1);
s_trainIdx[ind] = trainIdx;
s_dist[ind] = val;
}
}
__syncthreads();
if (s_count >= BLOCK_STACK - blockDim.y)
store(s_trainIdx, s_imgIdx, s_dist, s_count, s_globInd, trainIdxRow, imgIdxRow, distanceRow, maxCount);
__syncthreads();
}
store(s_trainIdx, s_imgIdx, s_dist, s_count, s_globInd, trainIdxRow, imgIdxRow, distanceRow, maxCount);
#endif
}
__device__ __forceinline__ int descLen() const
{
scount = 0;
sglob_ind = 0;
return train.cols;
}
__syncthreads();
int* trainIdx_row = trainIdx_.ptr(queryIdx)
;
float* distance_row = distance.ptr(queryIdx)
;
const DevMem2D_<T> train
;
}
;
const VecDiff vecDiff(query.ptr(queryIdx), train.cols, (typename Dist::value_type*)smem, tid, threadIdx.x);
typename Dist::result_type* sdiffRow = smem + BLOCK_DIM_X * threadIdx.y;
template <typename T> struct TrainCollection
{
enum {USE_IMG_IDX = 1};
TrainCollection(const DevMem2D_<T>* trainCollection_, int nImg_, int desclen_) :
trainCollection(trainCollection_), nImg(nImg_), desclen(desclen_)
{
}
for (int trainIdx = threadIdx.y; trainIdx < train.rows; trainIdx += BLOCK_DIM_Y)
static __device__ __forceinline__ void store(const int* s_trainIdx, const int* s_imgIdx, const float* s_dist, unsigned int& s_count, int& s_globInd,
int* trainIdx, int* imgIdx, float* distance, int maxCount)
{
if (mask(queryIdx, trainIdx))
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
if (tid < s_count && s_globInd + tid < maxCount)
{
Dist dist;
trainIdx[s_globInd + tid] = s_trainIdx[tid];
imgIdx[s_globInd + tid] = s_imgIdx[tid];
distance[s_globInd + tid] = s_dist[tid];
}
if (tid == 0)
{
s_globInd += s_count;
s_count = 0;
}
}
const T* trainRow = train.ptr(trainIdx);
vecDiff.calc(trainRow, train.cols, dist, sdiffRow, threadIdx.x);
template <int BLOCK_STACK, typename Dist, typename VecDiff, typename Mask>
__device__ void loop(float maxDistance, Mask& mask, const VecDiff& vecDiff,
int* s_trainIdx, int* s_imgIdx, float* s_dist, unsigned int& s_count, int& s_globInd,
int* trainIdxRow, int* imgIdxRow, float* distanceRow, int maxCount,
typename Dist::result_type* s_diffRow) const
{
#if __CUDA_ARCH__ >= 120
for (int imgIdx = 0; imgIdx < nImg; ++imgIdx)
{
const DevMem2D_<T> train = trainCollection[imgIdx];
const typename Dist::result_type val = dist
;
mask.next()
;
if (threadIdx.x == 0 && val < maxDistance
)
for (int i = 0; i < train.rows; i += blockDim.y
)
{
unsigned int i = atomicInc(&scount, (unsigned int) -1);
sidx[i] = trainIdx;
sdist[i] = val;
int trainIdx = i + threadIdx.y;
if (trainIdx < train.rows && mask(blockIdx.x, trainIdx))
{
Dist dist;
vecDiff.calc(train.ptr(trainIdx), desclen, dist, s_diffRow, threadIdx.x);
const typename Dist::result_type val = dist;
if (threadIdx.x == 0 && val < maxDistance)
{
unsigned int ind = atomicInc(&s_count, (unsigned int) -1);
s_trainIdx[ind] = trainIdx;
s_imgIdx[ind] = imgIdx;
s_dist[ind] = val;
}
}
__syncthreads();
if (s_count >= BLOCK_STACK - blockDim.y)
store(s_trainIdx, s_imgIdx, s_dist, s_count, s_globInd, trainIdxRow, imgIdxRow, distanceRow, maxCount);
__syncthreads();
}
}
__syncthreads();
if (scount > BLOCK_STACK - BLOCK_DIM_Y)
{
store(sidx, sdist, scount, trainIdx_row, distance_row, sglob_ind, tid);
if (tid == 0)
scount = 0;
}
__syncthreads();
store(s_trainIdx, s_imgIdx, s_dist, s_count, s_globInd, trainIdxRow, imgIdxRow, distanceRow, maxCount);
#endif
}
__device__ __forceinline__ int descLen() const
{
return desclen;
}
const DevMem2D_<T>* trainCollection;
const int nImg;
const int desclen;
};
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_STACK, typename VecDiff, typename Dist, typename T, typename Train, typename Mask>
__global__ void radiusMatch(const PtrStep_<T> query, const Train train, float maxDistance, const Mask mask,
PtrStepi trainIdx, PtrStepi imgIdx, PtrStepf distance, int* nMatches, int maxCount)
{
typedef typename Dist::result_type result_type;
typedef typename Dist::value_type value_type;
__shared__ result_type s_mem[BLOCK_DIM_X * BLOCK_DIM_Y];
__shared__ int s_trainIdx[BLOCK_STACK];
__shared__ int s_imgIdx[Train::USE_IMG_IDX ? BLOCK_STACK : 1];
__shared__ float s_dist[BLOCK_STACK];
__shared__ unsigned int s_count;
__shared__ int s_globInd;
if (threadIdx.x == 0 && threadIdx.y == 0)
{
s_count = 0;
s_globInd = 0;
}
__syncthreads();
store(sidx, sdist, scount, trainIdx_row, distance_row, sglob_ind, tid
);
const VecDiff vecDiff(query.ptr(blockIdx.x), train.descLen(), (typename Dist::value_type*)s_mem, threadIdx.y * BLOCK_DIM_X + threadIdx.x, threadIdx.x
);
if (tid == 0)
nMatches[queryIdx] = sglob_ind;
Mask m = mask;
#endif
train.template loop<BLOCK_STACK, Dist>(maxDistance, m, vecDiff,
s_trainIdx, s_imgIdx, s_dist, s_count, s_globInd,
trainIdx.ptr(blockIdx.x), imgIdx.ptr(blockIdx.x), distance.ptr(blockIdx.x), maxCount,
s_mem + BLOCK_DIM_X * threadIdx.y);
if (threadIdx.x == 0 && threadIdx.y == 0)
nMatches[blockIdx.x] = s_globInd;
}
///////////////////////////////////////////////////////////////////////////////
// Radius Match kernel caller
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_STACK, typename Dist, typename T, typename Mask>
void radiusMatchSimple_caller(const DevMem2D_<T>& query, const
DevMem2D_<T>
& train, float maxDistance, const Mask& mask,
const DevMem2Di& trainIdx, const DevMem2D
f& distance, unsigned
int* nMatches,
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_STACK, typename Dist, typename T, typename
Train, typename
Mask>
void radiusMatchSimple_caller(const DevMem2D_<T>& query, const
Train
& train, float maxDistance, const Mask& mask,
const DevMem2Di& trainIdx, const DevMem2D
i& imgIdx, const DevMem2Df& distance,
int* nMatches,
cudaStream_t stream)
{
StaticAssert<BLOCK_STACK >= BLOCK_DIM_Y>::check();
...
...
@@ -146,16 +262,16 @@ namespace cv { namespace gpu { namespace bf_radius_match
const dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
radiusMatch<BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_STACK, VecDiffGlobal<BLOCK_DIM_X, T>, Dist, T>
<<<grid, threads, 0, stream>>>(query, train, maxDistance, mask, trainIdx,
distance, nMatche
s);
<<<grid, threads, 0, stream>>>(query, train, maxDistance, mask, trainIdx,
imgIdx, distance, nMatches, trainIdx.col
s);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_STACK, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T, typename Mask>
void radiusMatchCached_caller(const DevMem2D_<T>& query, const
DevMem2D_<T>
& train, float maxDistance, const Mask& mask,
const DevMem2Di& trainIdx, const DevMem2D
f& distance, unsigned
int* nMatches,
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_STACK, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T, typename
Train, typename
Mask>
void radiusMatchCached_caller(const DevMem2D_<T>& query, const
Train
& train, float maxDistance, const Mask& mask,
const DevMem2Di& trainIdx, const DevMem2D
i& imgIdx, const DevMem2Df& distance,
int* nMatches,
cudaStream_t stream)
{
StaticAssert<BLOCK_STACK >= BLOCK_DIM_Y>::check();
...
...
@@ -167,7 +283,7 @@ namespace cv { namespace gpu { namespace bf_radius_match
const dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
radiusMatch<BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_STACK, VecDiffCachedRegister<BLOCK_DIM_X, MAX_LEN, LEN_EQ_MAX_LEN, typename Dist::value_type>, Dist, T>
<<<grid, threads, 0, stream>>>(query, train, maxDistance, mask, trainIdx,
distance, nMatche
s);
<<<grid, threads, 0, stream>>>(query, train, maxDistance, mask, trainIdx,
imgIdx, distance, nMatches, trainIdx.col
s);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
...
...
@@ -177,58 +293,58 @@ namespace cv { namespace gpu { namespace bf_radius_match
///////////////////////////////////////////////////////////////////////////////
// Radius Match Dispatcher
template <typename Dist, typename T, typename Mask>
void radiusMatchDispatcher(const DevMem2D_<T>& query, const
DevMem2D_<T>
& train, float maxDistance, const Mask& mask,
const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches,
template <typename Dist, typename T, typename
Train, typename
Mask>
void radiusMatchDispatcher(const DevMem2D_<T>& query, const
Train
& train, float maxDistance, const Mask& mask,
const DevMem2D& trainIdx, const DevMem2D&
imgIdx, const DevMem2D&
distance, const DevMem2D& nMatches,
cudaStream_t stream)
{
if (query.cols < 64)
{
radiusMatchCached_caller<16, 16, 64, 64, false, Dist>(
query, train, maxDistance, mask,
static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2D
f>(distance), (unsigned
int*)nMatches.data,
static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2D
i>(imgIdx), static_cast<DevMem2Df>(distance), (
int*)nMatches.data,
stream);
}
else if (query.cols == 64)
{
radiusMatchCached_caller<16, 16, 64, 64, true, Dist>(
query, train, maxDistance, mask,
static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2D
f>(distance), (unsigned
int*)nMatches.data,
static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2D
i>(imgIdx), static_cast<DevMem2Df>(distance), (
int*)nMatches.data,
stream);
}
else if (query.cols < 128)
{
radiusMatchCached_caller<16, 16, 64, 128, false, Dist>(
query, train, maxDistance, mask,
static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2D
f>(distance), (unsigned
int*)nMatches.data,
static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2D
i>(imgIdx), static_cast<DevMem2Df>(distance), (
int*)nMatches.data,
stream);
}
else if (query.cols == 128)
{
radiusMatchCached_caller<16, 16, 64, 128, true, Dist>(
query, train, maxDistance, mask,
static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2D
f>(distance), (unsigned
int*)nMatches.data,
static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2D
i>(imgIdx), static_cast<DevMem2Df>(distance), (
int*)nMatches.data,
stream);
}
else if (query.cols < 256)
{
radiusMatchCached_caller<16, 16, 64, 256, false, Dist>(
query, train, maxDistance, mask,
static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2D
f>(distance), (unsigned
int*)nMatches.data,
static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2D
i>(imgIdx), static_cast<DevMem2Df>(distance), (
int*)nMatches.data,
stream);
}
else if (query.cols == 256)
{
radiusMatchCached_caller<16, 16, 64, 256, true, Dist>(
query, train, maxDistance, mask,
static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2D
f>(distance), (unsigned
int*)nMatches.data,
static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2D
i>(imgIdx), static_cast<DevMem2Df>(distance), (
int*)nMatches.data,
stream);
}
else
{
radiusMatchSimple_caller<16, 16, 64, Dist>(
query, train, maxDistance, mask,
static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2D
f>(distance), (unsigned
int*)nMatches.data,
static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2D
i>(imgIdx), static_cast<DevMem2Df>(distance), (
int*)nMatches.data,
stream);
}
}
...
...
@@ -236,77 +352,163 @@ namespace cv { namespace gpu { namespace bf_radius_match
///////////////////////////////////////////////////////////////////////////////
// Radius Match caller
template <typename T> void radiusMatch
L1_gpu(const DevMem2D& query, const DevMem2D& train
, float maxDistance, const DevMem2D& mask,
const DevMem2D& trainIdx, const DevMem2D&
nMatches, const DevMem2D& distance
,
template <typename T> void radiusMatch
SingleL1_gpu(const DevMem2D& query, const DevMem2D& train_
, float maxDistance, const DevMem2D& mask,
const DevMem2D& trainIdx, const DevMem2D&
distance, const DevMem2D& nMatches
,
cudaStream_t stream)
{
SingleTrain<T> train(static_cast< DevMem2D_<T> >(train_));
if (mask.data)
{
radiusMatchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query),
static_cast< DevMem2D_<T> >(train)
, maxDistance, SingleMask(mask),
trainIdx, distance, nMatches,
radiusMatchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query),
train
, maxDistance, SingleMask(mask),
trainIdx,
DevMem2D(),
distance, nMatches,
stream);
}
else
{
radiusMatchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query),
static_cast< DevMem2D_<T> >(train)
, maxDistance, WithOutMask(),
trainIdx, distance, nMatches,
radiusMatchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query),
train
, maxDistance, WithOutMask(),
trainIdx,
DevMem2D(),
distance, nMatches,
stream);
}
}
template void radiusMatch
L1_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance
, cudaStream_t stream);
//template void radiusMatch
L1_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance
, cudaStream_t stream);
template void radiusMatch
L1_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance
, cudaStream_t stream);
template void radiusMatch
L1_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance
, cudaStream_t stream);
template void radiusMatch
L1_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance
, cudaStream_t stream);
template void radiusMatch
L1_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance
, cudaStream_t stream);
template void radiusMatch
SingleL1_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches
, cudaStream_t stream);
//template void radiusMatch
SingleL1_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches
, cudaStream_t stream);
template void radiusMatch
SingleL1_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches
, cudaStream_t stream);
template void radiusMatch
SingleL1_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches
, cudaStream_t stream);
template void radiusMatch
SingleL1_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches
, cudaStream_t stream);
template void radiusMatch
SingleL1_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches
, cudaStream_t stream);
template <typename T> void radiusMatch
L2_gpu(const DevMem2D& query, const DevMem2D& train
, float maxDistance, const DevMem2D& mask,
const DevMem2D& trainIdx, const DevMem2D&
nMatches, const DevMem2D& distance
,
template <typename T> void radiusMatch
SingleL2_gpu(const DevMem2D& query, const DevMem2D& train_
, float maxDistance, const DevMem2D& mask,
const DevMem2D& trainIdx, const DevMem2D&
distance, const DevMem2D& nMatches
,
cudaStream_t stream)
{
SingleTrain<T> train(static_cast< DevMem2D_<T> >(train_));
if (mask.data)
{
radiusMatchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query),
static_cast< DevMem2D_<T> >(train)
, maxDistance, SingleMask(mask),
trainIdx, distance, nMatches,
radiusMatchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query),
train
, maxDistance, SingleMask(mask),
trainIdx,
DevMem2D(),
distance, nMatches,
stream);
}
else
{
radiusMatchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query),
static_cast< DevMem2D_<T> >(train)
, maxDistance, WithOutMask(),
trainIdx, distance, nMatches,
radiusMatchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query),
train
, maxDistance, WithOutMask(),
trainIdx,
DevMem2D(),
distance, nMatches,
stream);
}
}
//template void radiusMatch
L2_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance
, cudaStream_t stream);
//template void radiusMatch
L2_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance
, cudaStream_t stream);
//template void radiusMatch
L2_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance
, cudaStream_t stream);
//template void radiusMatch
L2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance
, cudaStream_t stream);
//template void radiusMatch
L2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance
, cudaStream_t stream);
template void radiusMatch
L2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance
, cudaStream_t stream);
//template void radiusMatch
SingleL2_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches
, cudaStream_t stream);
//template void radiusMatch
SingleL2_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches
, cudaStream_t stream);
//template void radiusMatch
SingleL2_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches
, cudaStream_t stream);
//template void radiusMatch
SingleL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches
, cudaStream_t stream);
//template void radiusMatch
SingleL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches
, cudaStream_t stream);
template void radiusMatch
SingleL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches
, cudaStream_t stream);
template <typename T> void radiusMatch
Hamming_gpu(const DevMem2D& query, const DevMem2D& train
, float maxDistance, const DevMem2D& mask,
const DevMem2D& trainIdx, const DevMem2D&
nMatches, const DevMem2D& distance
,
template <typename T> void radiusMatch
SingleHamming_gpu(const DevMem2D& query, const DevMem2D& train_
, float maxDistance, const DevMem2D& mask,
const DevMem2D& trainIdx, const DevMem2D&
distance, const DevMem2D& nMatches
,
cudaStream_t stream)
{
SingleTrain<T> train(static_cast< DevMem2D_<T> >(train_));
if (mask.data)
{
radiusMatchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
trainIdx, distance, nMatches,
radiusMatchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), train, maxDistance, SingleMask(mask),
trainIdx, DevMem2D(), distance, nMatches,
stream);
}
else
{
radiusMatchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), train, maxDistance, WithOutMask(),
trainIdx, DevMem2D(), distance, nMatches,
stream);
}
}
template void radiusMatchSingleHamming_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);
//template void radiusMatchSingleHamming_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);
template void radiusMatchSingleHamming_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);
//template void radiusMatchSingleHamming_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);
template void radiusMatchSingleHamming_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);
template <typename T> void radiusMatchCollectionL1_gpu(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection,
const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches,
cudaStream_t stream)
{
TrainCollection<T> train((DevMem2D_<T>*)trainCollection.ptr(), trainCollection.cols, query.cols);
if (maskCollection.data)
{
radiusMatchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), train, maxDistance, MaskCollection(maskCollection.data),
trainIdx, imgIdx, distance, nMatches,
stream);
}
else
{
radiusMatchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), train, maxDistance, WithOutMask(),
trainIdx, imgIdx, distance, nMatches,
stream);
}
}
template void radiusMatchCollectionL1_gpu<uchar >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);
//template void radiusMatchCollectionL1_gpu<schar >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);
template void radiusMatchCollectionL1_gpu<ushort>(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);
template void radiusMatchCollectionL1_gpu<short >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);
template void radiusMatchCollectionL1_gpu<int >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);
template void radiusMatchCollectionL1_gpu<float >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);
template <typename T> void radiusMatchCollectionL2_gpu(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection,
const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches,
cudaStream_t stream)
{
TrainCollection<T> train((DevMem2D_<T>*)trainCollection.ptr(), trainCollection.cols, query.cols);
if (maskCollection.data)
{
radiusMatchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), train, maxDistance, MaskCollection(maskCollection.data),
trainIdx, imgIdx, distance, nMatches,
stream);
}
else
{
radiusMatchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), train, maxDistance, WithOutMask(),
trainIdx, imgIdx, distance, nMatches,
stream);
}
}
//template void radiusMatchCollectionL2_gpu<uchar >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);
//template void radiusMatchCollectionL2_gpu<schar >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);
//template void radiusMatchCollectionL2_gpu<ushort>(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);
//template void radiusMatchCollectionL2_gpu<short >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);
//template void radiusMatchCollectionL2_gpu<int >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);
template void radiusMatchCollectionL2_gpu<float >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);
template <typename T> void radiusMatchCollectionHamming_gpu(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection,
const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches,
cudaStream_t stream)
{
TrainCollection<T> train((DevMem2D_<T>*)trainCollection.ptr(), trainCollection.cols, query.cols);
if (maskCollection.data)
{
radiusMatchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), train, maxDistance, MaskCollection(maskCollection.data),
trainIdx, imgIdx, distance, nMatches,
stream);
}
else
{
radiusMatchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query),
static_cast< DevMem2D_<T> >(train)
, maxDistance, WithOutMask(),
trainIdx, distance, nMatches,
radiusMatchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query),
train
, maxDistance, WithOutMask(),
trainIdx,
imgIdx,
distance, nMatches,
stream);
}
}
template void radiusMatch
Hamming_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance
, cudaStream_t stream);
//template void radiusMatch
Hamming_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance
, cudaStream_t stream);
template void radiusMatch
Hamming_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance
, cudaStream_t stream);
//template void radiusMatch
Hamming_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance
, cudaStream_t stream);
template void radiusMatch
Hamming_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance
, cudaStream_t stream);
template void radiusMatch
CollectionHamming_gpu<uchar >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches
, cudaStream_t stream);
//template void radiusMatch
CollectionHamming_gpu<schar >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches
, cudaStream_t stream);
template void radiusMatch
CollectionHamming_gpu<ushort>(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches
, cudaStream_t stream);
//template void radiusMatch
CollectionHamming_gpu<short >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches
, cudaStream_t stream);
template void radiusMatch
CollectionHamming_gpu<int >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches
, cudaStream_t stream);
}}}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment