Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
6e3a1f7b
Commit
6e3a1f7b
authored
Aug 15, 2011
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
implement optimized version of gpu::BruteForceMatcher::knnMatch when k == 2
parent
c92b040c
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
239 additions
and
72 deletions
+239
-72
brute_force_matcher.cpp
modules/gpu/src/brute_force_matcher.cpp
+13
-8
brute_force_matcher.cu
modules/gpu/src/cuda/brute_force_matcher.cu
+222
-60
test_features2d.cpp
modules/gpu/test/test_features2d.cpp
+1
-1
tests.cpp
samples/gpu/performance/tests.cpp
+3
-3
No files found.
modules/gpu/src/brute_force_matcher.cpp
View file @
6e3a1f7b
...
...
@@ -105,13 +105,13 @@ namespace cv { namespace gpu { namespace bfmatcher
template
<
typename
T
>
void
knnMatchL1_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
int
knn
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2Df
&
allDist
,
cudaStream_t
stream
);
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2Df
&
allDist
,
bool
cc_12
,
cudaStream_t
stream
);
template
<
typename
T
>
void
knnMatchL2_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
int
knn
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2Df
&
allDist
,
cudaStream_t
stream
);
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2Df
&
allDist
,
bool
cc_12
,
cudaStream_t
stream
);
template
<
typename
T
>
void
knnMatchHamming_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
int
knn
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2Df
&
allDist
,
cudaStream_t
stream
);
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2Df
&
allDist
,
bool
cc_12
,
cudaStream_t
stream
);
template
<
typename
T
>
void
radiusMatchL1_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
float
maxDistance
,
...
...
@@ -428,7 +428,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs, con
using
namespace
cv
::
gpu
::
bfmatcher
;
typedef
void
(
*
match_caller_t
)(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
int
knn
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2Df
&
allDist
,
cudaStream_t
stream
);
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2Df
&
allDist
,
bool
cc_12
,
cudaStream_t
stream
);
static
const
match_caller_t
match_callers
[
3
][
8
]
=
{
...
...
@@ -454,23 +454,28 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs, con
ensureSizeIsEnough
(
nQuery
,
k
,
CV_32S
,
trainIdx
);
ensureSizeIsEnough
(
nQuery
,
k
,
CV_32F
,
distance
);
ensureSizeIsEnough
(
nQuery
,
nTrain
,
CV_32FC1
,
allDist
);
if
(
k
!=
2
)
ensureSizeIsEnough
(
nQuery
,
nTrain
,
CV_32FC1
,
allDist
);
if
(
stream
)
{
stream
.
enqueueMemSet
(
trainIdx
,
Scalar
::
all
(
-
1
));
stream
.
enqueueMemSet
(
allDist
,
Scalar
::
all
(
numeric_limits
<
float
>::
max
()));
if
(
k
!=
2
)
stream
.
enqueueMemSet
(
allDist
,
Scalar
::
all
(
numeric_limits
<
float
>::
max
()));
}
else
{
trainIdx
.
setTo
(
Scalar
::
all
(
-
1
));
allDist
.
setTo
(
Scalar
::
all
(
numeric_limits
<
float
>::
max
()));
if
(
k
!=
2
)
allDist
.
setTo
(
Scalar
::
all
(
numeric_limits
<
float
>::
max
()));
}
match_caller_t
func
=
match_callers
[
distType
][
queryDescs
.
depth
()];
CV_Assert
(
func
!=
0
);
bool
cc_12
=
TargetArchs
::
builtWith
(
FEATURE_SET_COMPUTE_12
)
&&
DeviceInfo
().
supports
(
FEATURE_SET_COMPUTE_12
);
func
(
queryDescs
,
trainDescs
,
k
,
mask
,
trainIdx
,
distance
,
allDist
,
StreamAccessor
::
getStream
(
stream
));
func
(
queryDescs
,
trainDescs
,
k
,
mask
,
trainIdx
,
distance
,
allDist
,
cc_12
,
StreamAccessor
::
getStream
(
stream
));
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
knnMatchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
distance
,
...
...
modules/gpu/src/cuda/brute_force_matcher.cu
View file @
6e3a1f7b
...
...
@@ -87,9 +87,8 @@ namespace cv { namespace gpu { namespace bfmatcher
PtrStep curMask;
};
class
WithOutMask
struct
WithOutMask
{
public:
__device__ __forceinline__ void nextMask() const
{
}
...
...
@@ -102,21 +101,19 @@ namespace cv { namespace gpu { namespace bfmatcher
///////////////////////////////////////////////////////////////////////////////
// Reduce Sum
template <int BLOCK_DIM_X> struct SumReductor;
template <int BLOCK_DIM_X> struct SumReductor;
template <> struct SumReductor<16>
{
template <typename T> static __device__ void reduce(T* sdiff_row, T& mySum)
template <typename T> static __device__ void reduce(
volatile
T* sdiff_row, T& mySum)
{
volatile T* smem = sdiff_row;
smem[threadIdx.x] = mySum;
sdiff_row[threadIdx.x] = mySum;
if (threadIdx.x < 8)
{
s
mem[threadIdx.x] = mySum += smem
[threadIdx.x + 8];
s
mem[threadIdx.x] = mySum += smem
[threadIdx.x + 4];
s
mem[threadIdx.x] = mySum += smem
[threadIdx.x + 2];
s
mem[threadIdx.x] = mySum += smem
[threadIdx.x + 1];
s
diff_row[threadIdx.x] = mySum += sdiff_row
[threadIdx.x + 8];
s
diff_row[threadIdx.x] = mySum += sdiff_row
[threadIdx.x + 4];
s
diff_row[threadIdx.x] = mySum += sdiff_row
[threadIdx.x + 2];
s
diff_row[threadIdx.x] = mySum += sdiff_row
[threadIdx.x + 1];
}
}
};
...
...
@@ -344,7 +341,7 @@ namespace cv { namespace gpu { namespace bfmatcher
///////////////////////////////////////////////////////////////////////////////
// warpReduceMinIdxIdx
template <int BLOCK_DIM_Y> struct MinIdxIdxWarpReductor;
template <int BLOCK_DIM_Y> struct MinIdxIdxWarpReductor;
template <> struct MinIdxIdxWarpReductor<16>
{
template <typename T>
...
...
@@ -435,6 +432,7 @@ namespace cv { namespace gpu { namespace bfmatcher
__device__ __forceinline__ void prepare(const T* queryDescs, int desc_len, U* smem)
{
loadDescsVals<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN>(queryDescs, desc_len, queryVals, smem);
__syncthreads();
}
template <typename Dist>
...
...
@@ -778,6 +776,173 @@ namespace cv { namespace gpu { namespace bfmatcher
///////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////// Knn Match ////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename ReduceDescCalculator, typename T, typename Mask>
__device__ void distanceCalcLoop(const PtrStep_<T>& query, const DevMem2D_<T>& train, const Mask& m, int queryIdx,
typename Dist::ResultType& distMin1, typename Dist::ResultType& distMin2, int& bestTrainIdx1, int& bestTrainIdx2,
typename Dist::ResultType* smem)
{
ReduceDescCalculator reduceDescCalc;
reduceDescCalc.prepare(query.ptr(queryIdx), train.cols, (typename Dist::ValueType*)smem);
typename Dist::ResultType* sdiffRow = smem + BLOCK_DIM_X * threadIdx.y;
for (int trainIdx = threadIdx.y; trainIdx < train.rows; trainIdx += BLOCK_DIM_Y)
{
if (m(queryIdx, trainIdx))
{
Dist dist;
const T* trainRow = train.ptr(trainIdx);
reduceDescCalc.calc(trainRow, train.cols, dist, sdiffRow);
if (threadIdx.x == 0)
{
typename Dist::ResultType val = dist;
if (val < distMin1)
{
distMin1 = val;
bestTrainIdx1 = trainIdx;
}
else if (val < distMin2)
{
distMin2 = val;
bestTrainIdx2 = trainIdx;
}
}
}
}
}
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename ReduceDescCalculator, typename T, typename Mask>
__global__ void knnMatch2(const PtrStep_<T> query, const DevMem2D_<T> train, const Mask m, PtrStep_<int2> trainIdx, PtrStep_<float2> distance)
{
typedef typename Dist::ResultType ResultType;
typedef typename Dist::ValueType ValueType;
__shared__ ResultType smem[BLOCK_DIM_X * BLOCK_DIM_Y];
const int queryIdx = blockIdx.x;
ResultType distMin1 = numeric_limits<ResultType>::max();
ResultType distMin2 = numeric_limits<ResultType>::max();
int bestTrainIdx1 = -1;
int bestTrainIdx2 = -1;
distanceCalcLoop<BLOCK_DIM_X, BLOCK_DIM_Y, Dist, ReduceDescCalculator>(query, train, m, queryIdx,
distMin1, distMin2, bestTrainIdx1, bestTrainIdx2, smem);
__syncthreads();
volatile ResultType* sdistMinRow = smem;
volatile int* sbestTrainIdxRow = (int*)(sdistMinRow + 2 * BLOCK_DIM_Y);
if (threadIdx.x == 0)
{
sdistMinRow[threadIdx.y] = distMin1;
sdistMinRow[threadIdx.y + BLOCK_DIM_Y] = distMin2;
sbestTrainIdxRow[threadIdx.y] = bestTrainIdx1;
sbestTrainIdxRow[threadIdx.y + BLOCK_DIM_Y] = bestTrainIdx2;
}
__syncthreads();
if (threadIdx.x == 0 && threadIdx.y == 0)
{
distMin1 = numeric_limits<ResultType>::max();
distMin2 = numeric_limits<ResultType>::max();
bestTrainIdx1 = -1;
bestTrainIdx2 = -1;
#pragma unroll
for (int i = 0; i < BLOCK_DIM_Y; ++i)
{
ResultType val = sdistMinRow[i];
if (val < distMin1)
{
distMin1 = val;
bestTrainIdx1 = sbestTrainIdxRow[i];
}
else if (val < distMin2)
{
distMin2 = val;
bestTrainIdx2 = sbestTrainIdxRow[i];
}
}
#pragma unroll
for (int i = BLOCK_DIM_Y; i < 2 * BLOCK_DIM_Y; ++i)
{
ResultType val = sdistMinRow[i];
if (val < distMin2)
{
distMin2 = val;
bestTrainIdx2 = sbestTrainIdxRow[i];
}
}
trainIdx.ptr(queryIdx)[0] = make_int2(bestTrainIdx1, bestTrainIdx2);
distance.ptr(queryIdx)[0] = make_float2(distMin1, distMin2);
}
}
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename T, typename Mask>
void knnMatch2Simple_caller(const DevMem2D_<T>& queryDescs, const DevMem2D_<T>& trainDescs, const Mask& mask,
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance, cudaStream_t stream)
{
dim3 grid(queryDescs.rows, 1, 1);
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
knnMatch2<BLOCK_DIM_X, BLOCK_DIM_Y, Dist, ReduceDescCalculatorSimple<BLOCK_DIM_X, T>, T>
<<<grid, threads, 0, stream>>>(queryDescs, trainDescs, mask, trainIdx, distance);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int MAX_DESCRIPTORS_LEN, bool DESC_LEN_EQ_MAX_LEN, typename Dist, typename T, typename Mask>
void knnMatch2Cached_caller(const DevMem2D_<T>& queryDescs, const DevMem2D_<T>& trainDescs, const Mask& mask,
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance, cudaStream_t stream)
{
StaticAssert<BLOCK_DIM_X * BLOCK_DIM_Y >= MAX_DESCRIPTORS_LEN>::check(); // block size must be greter than descriptors length
StaticAssert<MAX_DESCRIPTORS_LEN % BLOCK_DIM_X == 0>::check(); // max descriptors length must divide to blockDimX
dim3 grid(queryDescs.rows, 1, 1);
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
knnMatch2<BLOCK_DIM_X, BLOCK_DIM_Y, Dist, ReduceDescCalculatorCached<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN, T, typename Dist::ValueType>, T>
<<<grid, threads, 0, stream>>>(queryDescs, trainDescs, mask, trainIdx, distance);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename Dist, typename T, typename Mask>
void knnMatch2Dispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance, bool cc_12, cudaStream_t stream)
{
if (query.cols < 64)
knnMatch2Cached_caller<16, 16, 64, false, Dist>(query, train, mask, trainIdx, distance, stream);
else if (query.cols == 64)
knnMatch2Cached_caller<16, 16, 64, true, Dist>(query, train, mask, trainIdx, distance, stream);
else if (query.cols < 128)
knnMatch2Cached_caller<16, 16, 128, false, Dist>(query, train, mask, trainIdx, distance, stream);
else if (query.cols == 128 && cc_12)
knnMatch2Cached_caller<16, 16, 128, true, Dist>(query, train, mask, trainIdx, distance, stream);
else if (query.cols < 256 && cc_12)
knnMatch2Cached_caller<16, 16, 256, false, Dist>(query, train, mask, trainIdx, distance, stream);
else if (query.cols == 256 && cc_12)
knnMatch2Cached_caller<16, 16, 256, true, Dist>(query, train, mask, trainIdx, distance, stream);
else
knnMatch2Simple_caller<16, 16, Dist>(query, train, mask, trainIdx, distance, stream);
}
///////////////////////////////////////////////////////////////////////////////
// Calc distance kernel
...
...
@@ -1026,77 +1191,74 @@ namespace cv { namespace gpu { namespace bfmatcher
findKnnMatch_caller<256>(knn, trainIdx, distance, allDist, stream);
}
template <
typename T
>
void knnMatch
L1_gpu(const DevMem2D& queryDescs, const DevMem2D
& trainDescs, int knn,
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream)
template <
typename Dist, typename T
>
void knnMatch
Dispatcher(const DevMem2D_<T>& queryDescs, const DevMem2D_<T>
& trainDescs, int knn,
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist,
bool cc_12,
cudaStream_t stream)
{
if (mask.data)
{
calcDistanceDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs, SingleMask(mask), allDist, stream);
if (knn == 2)
{
knnMatch2Dispatcher<Dist>(queryDescs, trainDescs, SingleMask(mask), (DevMem2D_<int2>)trainIdx, (DevMem2D_<float2>)distance, cc_12, stream);
return;
}
calcDistanceDispatcher<Dist>(queryDescs, trainDescs, SingleMask(mask), allDist, stream);
}
else
{
calcDistanceDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs, WithOutMask(), allDist, stream);
if (knn == 2)
{
knnMatch2Dispatcher<Dist>(queryDescs, trainDescs, WithOutMask(), (DevMem2D_<int2>)trainIdx, (DevMem2D_<float2>)distance, cc_12, stream);
return;
}
calcDistanceDispatcher<Dist>(queryDescs, trainDescs, WithOutMask(), allDist, stream);
}
findKnnMatchDispatcher(knn, trainIdx, distance, allDist, stream);
}
template void knnMatchL1_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
template void knnMatchL1_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
template void knnMatchL1_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
template void knnMatchL1_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
template void knnMatchL1_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
template void knnMatchL1_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
template <typename T>
void knnMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, bool cc_12, cudaStream_t stream)
{
knnMatchDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs, knn, mask, trainIdx, distance, allDist, cc_12, stream);
}
template void knnMatchL1_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, bool cc_12, cudaStream_t stream);
template void knnMatchL1_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, bool cc_12, cudaStream_t stream);
template void knnMatchL1_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, bool cc_12, cudaStream_t stream);
template void knnMatchL1_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, bool cc_12, cudaStream_t stream);
template void knnMatchL1_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, bool cc_12, cudaStream_t stream);
template void knnMatchL1_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, bool cc_12, cudaStream_t stream);
template <typename T>
void knnMatchL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream)
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist,
bool cc_12,
cudaStream_t stream)
{
if (mask.data)
{
calcDistanceDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
SingleMask(mask), allDist, stream);
}
else
{
calcDistanceDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
WithOutMask(), allDist, stream);
}
findKnnMatchDispatcher(knn, trainIdx, distance, allDist, stream);
knnMatchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs, knn, mask, trainIdx, distance, allDist, cc_12, stream);
}
template void knnMatchL2_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
template void knnMatchL2_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
template void knnMatchL2_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
template void knnMatchL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
template void knnMatchL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
template void knnMatchL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
template void knnMatchL2_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist,
bool cc_12,
cudaStream_t stream);
template void knnMatchL2_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist,
bool cc_12,
cudaStream_t stream);
template void knnMatchL2_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist,
bool cc_12,
cudaStream_t stream);
template void knnMatchL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist,
bool cc_12,
cudaStream_t stream);
template void knnMatchL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist,
bool cc_12,
cudaStream_t stream);
template void knnMatchL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist,
bool cc_12,
cudaStream_t stream);
template <typename T>
void knnMatchHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream)
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist,
bool cc_12,
cudaStream_t stream)
{
if (mask.data)
{
calcDistanceDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
SingleMask(mask), allDist, stream);
}
else
{
calcDistanceDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
WithOutMask(), allDist, stream);
}
findKnnMatchDispatcher(knn, trainIdx, distance, allDist, stream);
knnMatchDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs, knn, mask, trainIdx, distance, allDist, cc_12, stream);
}
template void knnMatchHamming_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
template void knnMatchHamming_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
template void knnMatchHamming_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
template void knnMatchHamming_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
template void knnMatchHamming_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream);
template void knnMatchHamming_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist,
bool cc_12,
cudaStream_t stream);
template void knnMatchHamming_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist,
bool cc_12,
cudaStream_t stream);
template void knnMatchHamming_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist,
bool cc_12,
cudaStream_t stream);
template void knnMatchHamming_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist,
bool cc_12,
cudaStream_t stream);
template void knnMatchHamming_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist,
bool cc_12,
cudaStream_t stream);
///////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////// Radius Match //////////////////////////////////
...
...
modules/gpu/test/test_features2d.cpp
View file @
6e3a1f7b
...
...
@@ -320,7 +320,7 @@ TEST_P(BruteForceMatcher, KnnMatch)
PRINT_PARAM
(
distStr
);
PRINT_PARAM
(
dim
);
const
int
knn
=
3
;
const
int
knn
=
2
;
std
::
vector
<
std
::
vector
<
cv
::
DMatch
>
>
matches
;
...
...
samples/gpu/performance/tests.cpp
View file @
6e3a1f7b
...
...
@@ -286,7 +286,7 @@ TEST(BruteForceMatcher)
{
// Init CPU matcher
int
desc_len
=
128
;
int
desc_len
=
64
;
BruteForceMatcher
<
L2
<
float
>
>
matcher
;
...
...
@@ -328,7 +328,7 @@ TEST(BruteForceMatcher)
d_matcher
.
knnMatch
(
d_query
,
d_train
,
d_matches
,
knn
);
GPU_OFF
;
SUBTEST
<<
"radiusMatch"
;
/*
SUBTEST << "radiusMatch";
float max_distance = 3.8f;
CPU_ON;
...
...
@@ -337,7 +337,7 @@ TEST(BruteForceMatcher)
GPU_ON;
d_matcher.radiusMatch(d_query, d_train, d_matches, max_distance);
GPU_OFF
;
GPU_OFF;
*/
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment