Commit 6e3a1f7b authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

implement optimized version of gpu::BruteForceMatcher::knnMatch when k == 2

parent c92b040c
...@@ -105,13 +105,13 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -105,13 +105,13 @@ namespace cv { namespace gpu { namespace bfmatcher
template <typename T> template <typename T>
void knnMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, void knnMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream); const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, bool cc_12, cudaStream_t stream);
template <typename T> template <typename T>
void knnMatchL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, void knnMatchL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream); const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, bool cc_12, cudaStream_t stream);
template <typename T> template <typename T>
void knnMatchHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, void knnMatchHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream); const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, bool cc_12, cudaStream_t stream);
template <typename T> template <typename T>
void radiusMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, void radiusMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance,
...@@ -428,7 +428,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs, con ...@@ -428,7 +428,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs, con
using namespace cv::gpu::bfmatcher; using namespace cv::gpu::bfmatcher;
typedef void (*match_caller_t)(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, typedef void (*match_caller_t)(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream); const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, bool cc_12, cudaStream_t stream);
static const match_caller_t match_callers[3][8] = static const match_caller_t match_callers[3][8] =
{ {
...@@ -454,23 +454,28 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs, con ...@@ -454,23 +454,28 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs, con
ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx); ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx);
ensureSizeIsEnough(nQuery, k, CV_32F, distance); ensureSizeIsEnough(nQuery, k, CV_32F, distance);
ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist); if (k != 2)
ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist);
if (stream) if (stream)
{ {
stream.enqueueMemSet(trainIdx, Scalar::all(-1)); stream.enqueueMemSet(trainIdx, Scalar::all(-1));
stream.enqueueMemSet(allDist, Scalar::all(numeric_limits<float>::max())); if (k != 2)
stream.enqueueMemSet(allDist, Scalar::all(numeric_limits<float>::max()));
} }
else else
{ {
trainIdx.setTo(Scalar::all(-1)); trainIdx.setTo(Scalar::all(-1));
allDist.setTo(Scalar::all(numeric_limits<float>::max())); if (k != 2)
allDist.setTo(Scalar::all(numeric_limits<float>::max()));
} }
match_caller_t func = match_callers[distType][queryDescs.depth()]; match_caller_t func = match_callers[distType][queryDescs.depth()];
CV_Assert(func != 0); CV_Assert(func != 0);
bool cc_12 = TargetArchs::builtWith(FEATURE_SET_COMPUTE_12) && DeviceInfo().supports(FEATURE_SET_COMPUTE_12);
func(queryDescs, trainDescs, k, mask, trainIdx, distance, allDist, StreamAccessor::getStream(stream)); func(queryDescs, trainDescs, k, mask, trainIdx, distance, allDist, cc_12, StreamAccessor::getStream(stream));
} }
void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance,
......
This diff is collapsed.
...@@ -320,7 +320,7 @@ TEST_P(BruteForceMatcher, KnnMatch) ...@@ -320,7 +320,7 @@ TEST_P(BruteForceMatcher, KnnMatch)
PRINT_PARAM(distStr); PRINT_PARAM(distStr);
PRINT_PARAM(dim); PRINT_PARAM(dim);
const int knn = 3; const int knn = 2;
std::vector< std::vector<cv::DMatch> > matches; std::vector< std::vector<cv::DMatch> > matches;
......
...@@ -286,7 +286,7 @@ TEST(BruteForceMatcher) ...@@ -286,7 +286,7 @@ TEST(BruteForceMatcher)
{ {
// Init CPU matcher // Init CPU matcher
int desc_len = 128; int desc_len = 64;
BruteForceMatcher< L2<float> > matcher; BruteForceMatcher< L2<float> > matcher;
...@@ -328,7 +328,7 @@ TEST(BruteForceMatcher) ...@@ -328,7 +328,7 @@ TEST(BruteForceMatcher)
d_matcher.knnMatch(d_query, d_train, d_matches, knn); d_matcher.knnMatch(d_query, d_train, d_matches, knn);
GPU_OFF; GPU_OFF;
SUBTEST << "radiusMatch"; /*SUBTEST << "radiusMatch";
float max_distance = 3.8f; float max_distance = 3.8f;
CPU_ON; CPU_ON;
...@@ -337,7 +337,7 @@ TEST(BruteForceMatcher) ...@@ -337,7 +337,7 @@ TEST(BruteForceMatcher)
GPU_ON; GPU_ON;
d_matcher.radiusMatch(d_query, d_train, d_matches, max_distance); d_matcher.radiusMatch(d_query, d_train, d_matches, max_distance);
GPU_OFF; GPU_OFF;*/
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment