Commit b119833a authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

implemented optimized version of gpu::bf_radius_match

parent 961dc4e3
...@@ -76,7 +76,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, std::vector ...@@ -76,7 +76,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, std::vector
#else /* !defined (HAVE_CUDA) */ #else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu { namespace bfmatcher namespace cv { namespace gpu { namespace bf_match
{ {
template <typename T> void matchSingleL1_gpu(const DevMem2D& query, const DevMem2D& train, const DevMem2D& mask, template <typename T> void matchSingleL1_gpu(const DevMem2D& query, const DevMem2D& train, const DevMem2D& mask,
const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& trainIdx, const DevMem2D& distance,
...@@ -97,7 +97,10 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -97,7 +97,10 @@ namespace cv { namespace gpu { namespace bfmatcher
template <typename T> void matchCollectionHamming_gpu(const DevMem2D& query, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, template <typename T> void matchCollectionHamming_gpu(const DevMem2D& query, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection,
const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance,
int cc, cudaStream_t stream); int cc, cudaStream_t stream);
}}}
namespace cv { namespace gpu { namespace bf_knnmatch
{
template <typename T> void knnMatchL1_gpu(const DevMem2D& query, const DevMem2D& train, int k, const DevMem2D& mask, template <typename T> void knnMatchL1_gpu(const DevMem2D& query, const DevMem2D& train, int k, const DevMem2D& mask,
const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& allDist, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& allDist,
int cc, cudaStream_t stream); int cc, cudaStream_t stream);
...@@ -107,7 +110,10 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -107,7 +110,10 @@ namespace cv { namespace gpu { namespace bfmatcher
template <typename T> void knnMatchHamming_gpu(const DevMem2D& query, const DevMem2D& train, int k, const DevMem2D& mask, template <typename T> void knnMatchHamming_gpu(const DevMem2D& query, const DevMem2D& train, int k, const DevMem2D& mask,
const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& allDist, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& allDist,
int cc, cudaStream_t stream); int cc, cudaStream_t stream);
}}}
namespace cv { namespace gpu { namespace bf_radius_match
{
template <typename T> void radiusMatchL1_gpu(const DevMem2D& query, const DevMem2D& train, float maxDistance, const DevMem2D& mask, template <typename T> void radiusMatchL1_gpu(const DevMem2D& query, const DevMem2D& train, float maxDistance, const DevMem2D& mask,
const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance,
cudaStream_t stream); cudaStream_t stream);
...@@ -170,7 +176,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& queryDescs, ...@@ -170,7 +176,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& queryDescs,
if (queryDescs.empty() || trainDescs.empty()) if (queryDescs.empty() || trainDescs.empty())
return; return;
using namespace cv::gpu::bfmatcher; using namespace cv::gpu::bf_match;
typedef void (*match_caller_t)(const DevMem2D& query, const DevMem2D& train, const DevMem2D& mask, typedef void (*match_caller_t)(const DevMem2D& query, const DevMem2D& train, const DevMem2D& mask,
const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& trainIdx, const DevMem2D& distance,
...@@ -309,7 +315,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& queryDes ...@@ -309,7 +315,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& queryDes
if (queryDescs.empty() || trainCollection.empty()) if (queryDescs.empty() || trainCollection.empty())
return; return;
using namespace cv::gpu::bfmatcher; using namespace cv::gpu::bf_match;
typedef void (*match_caller_t)(const DevMem2D& query, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, typedef void (*match_caller_t)(const DevMem2D& query, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection,
const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance,
...@@ -418,7 +424,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs, con ...@@ -418,7 +424,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs, con
if (queryDescs.empty() || trainDescs.empty()) if (queryDescs.empty() || trainDescs.empty())
return; return;
using namespace cv::gpu::bfmatcher; using namespace cv::gpu::bf_knnmatch;
typedef void (*match_caller_t)(const DevMem2D& query, const DevMem2D& train, int k, const DevMem2D& mask, typedef void (*match_caller_t)(const DevMem2D& query, const DevMem2D& train, int k, const DevMem2D& mask,
const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& allDist, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& allDist,
...@@ -596,7 +602,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& queryDescs, ...@@ -596,7 +602,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& queryDescs,
if (queryDescs.empty() || trainDescs.empty()) if (queryDescs.empty() || trainDescs.empty())
return; return;
using namespace cv::gpu::bfmatcher; using namespace cv::gpu::bf_radius_match;
typedef void (*radiusMatch_caller_t)(const DevMem2D& query, const DevMem2D& train, float maxDistance, const DevMem2D& mask, typedef void (*radiusMatch_caller_t)(const DevMem2D& query, const DevMem2D& train, float maxDistance, const DevMem2D& mask,
const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance,
...@@ -618,7 +624,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& queryDescs, ...@@ -618,7 +624,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& queryDescs,
} }
}; };
CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS)); CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS));
const int nQuery = queryDescs.rows; const int nQuery = queryDescs.rows;
const int nTrain = trainDescs.rows; const int nTrain = trainDescs.rows;
......
...@@ -47,7 +47,7 @@ ...@@ -47,7 +47,7 @@
using namespace cv::gpu; using namespace cv::gpu;
using namespace cv::gpu::device; using namespace cv::gpu::device;
namespace cv { namespace gpu { namespace bfmatcher namespace cv { namespace gpu { namespace bf_knnmatch
{ {
template <typename VecDiff, typename Dist, typename T, typename Mask> template <typename VecDiff, typename Dist, typename T, typename Mask>
__device__ void distanceCalcLoop(const PtrStep_<T>& query, const DevMem2D_<T>& train, const Mask& m, int queryIdx, __device__ void distanceCalcLoop(const PtrStep_<T>& query, const DevMem2D_<T>& train, const Mask& m, int queryIdx,
......
...@@ -47,7 +47,7 @@ ...@@ -47,7 +47,7 @@
using namespace cv::gpu; using namespace cv::gpu;
using namespace cv::gpu::device; using namespace cv::gpu::device;
namespace cv { namespace gpu { namespace bfmatcher namespace cv { namespace gpu { namespace bf_match
{ {
template <int BLOCK_DIM_Y, typename T> template <int BLOCK_DIM_Y, typename T>
__device__ void findBestMatch(T& myDist, int2& myIdx, T* smin, int2* sIdx) __device__ void findBestMatch(T& myDist, int2& myIdx, T* smin, int2* sIdx)
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment