Commit 8bb9e430 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

added registerPageLocked/unregisterPageLocked functions

added convert functions to BruteForceMatcher_GPU
other minor fixes
parent 24279c2c
...@@ -149,6 +149,11 @@ namespace cv ...@@ -149,6 +149,11 @@ namespace cv
// It is convertable to cv::Mat header without reference counting // It is convertable to cv::Mat header without reference counting
// so you can use it with other opencv functions. // so you can use it with other opencv functions.
// Page-locks the matrix m memory and maps it for the device(s)
CV_EXPORTS void registerPageLocked(Mat& m);
// Unmaps the memory of matrix m, and makes it pageable again.
CV_EXPORTS void unregisterPageLocked(Mat& m);
class CV_EXPORTS CudaMem class CV_EXPORTS CudaMem
{ {
public: public:
...@@ -1254,8 +1259,10 @@ namespace cv ...@@ -1254,8 +1259,10 @@ namespace cv
GpuMat& trainIdx, GpuMat& distance, GpuMat& trainIdx, GpuMat& distance,
const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null()); const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
// Download trainIdx and distance to CPU vector with DMatch // Download trainIdx and distance and convert it to CPU vector with DMatch
static void matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector<DMatch>& matches); static void matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector<DMatch>& matches);
// Convert trainIdx and distance to vector with DMatch
static void matchConvert(const Mat& trainIdx, const Mat& distance, std::vector<DMatch>& matches);
// Find one best match for each query descriptor. // Find one best match for each query descriptor.
void match(const GpuMat& queryDescs, const GpuMat& trainDescs, std::vector<DMatch>& matches, void match(const GpuMat& queryDescs, const GpuMat& trainDescs, std::vector<DMatch>& matches,
...@@ -1273,13 +1280,13 @@ namespace cv ...@@ -1273,13 +1280,13 @@ namespace cv
GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
const GpuMat& maskCollection, Stream& stream = Stream::Null()); const GpuMat& maskCollection, Stream& stream = Stream::Null());
// Download trainIdx, imgIdx and distance to CPU vector with DMatch // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
static void matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, static void matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, std::vector<DMatch>& matches);
std::vector<DMatch>& matches); // Convert trainIdx, imgIdx and distance to vector with DMatch
static void matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>& matches);
// Find one best match from train collection for each query descriptor. // Find one best match from train collection for each query descriptor.
void match(const GpuMat& queryDescs, std::vector<DMatch>& matches, void match(const GpuMat& queryDescs, std::vector<DMatch>& matches, const std::vector<GpuMat>& masks = std::vector<GpuMat>());
const std::vector<GpuMat>& masks = std::vector<GpuMat>());
// Find k best matches for each query descriptor (in increasing order of distances). // Find k best matches for each query descriptor (in increasing order of distances).
// trainIdx.at<int>(queryIdx, i) will contain index of i'th best trains (i < k). // trainIdx.at<int>(queryIdx, i) will contain index of i'th best trains (i < k).
...@@ -1291,12 +1298,15 @@ namespace cv ...@@ -1291,12 +1298,15 @@ namespace cv
void knnMatch(const GpuMat& queryDescs, const GpuMat& trainDescs, void knnMatch(const GpuMat& queryDescs, const GpuMat& trainDescs,
GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k, const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null()); GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k, const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
// Download trainIdx and distance to CPU vector with DMatch // Download trainIdx and distance and convert it to vector with DMatch
// compactResult is used when mask is not empty. If compactResult is false matches // compactResult is used when mask is not empty. If compactResult is false matches
// vector will have the same size as queryDescriptors rows. If compactResult is true // vector will have the same size as queryDescriptors rows. If compactResult is true
// matches vector will not contain matches for fully masked out query descriptors. // matches vector will not contain matches for fully masked out query descriptors.
static void knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, static void knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance,
std::vector< std::vector<DMatch> >& matches, bool compactResult = false); std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
// Convert trainIdx and distance to vector with DMatch
static void knnMatchConvert(const Mat& trainIdx, const Mat& distance,
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
// Find k best matches for each query descriptor (in increasing order of distances). // Find k best matches for each query descriptor (in increasing order of distances).
// compactResult is used when mask is not empty. If compactResult is false matches // compactResult is used when mask is not empty. If compactResult is false matches
...@@ -1326,13 +1336,16 @@ namespace cv ...@@ -1326,13 +1336,16 @@ namespace cv
GpuMat& trainIdx, GpuMat& nMatches, GpuMat& distance, float maxDistance, GpuMat& trainIdx, GpuMat& nMatches, GpuMat& distance, float maxDistance,
const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null()); const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
// Download trainIdx, nMatches and distance to CPU vector with DMatch. // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
// matches will be sorted in increasing order of distances. // matches will be sorted in increasing order of distances.
// compactResult is used when mask is not empty. If compactResult is false matches // compactResult is used when mask is not empty. If compactResult is false matches
// vector will have the same size as queryDescriptors rows. If compactResult is true // vector will have the same size as queryDescriptors rows. If compactResult is true
// matches vector will not contain matches for fully masked out query descriptors. // matches vector will not contain matches for fully masked out query descriptors.
static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& nMatches, const GpuMat& distance, static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& nMatches, const GpuMat& distance,
std::vector< std::vector<DMatch> >& matches, bool compactResult = false); std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
// Convert trainIdx, nMatches and distance to vector with DMatch.
static void radiusMatchConvert(const Mat& trainIdx, const Mat& nMatches, const Mat& distance,
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
// Find best matches for each query descriptor which have distance less than maxDistance // Find best matches for each query descriptor which have distance less than maxDistance
// in increasing order of distances). // in increasing order of distances).
......
...@@ -56,17 +56,21 @@ bool cv::gpu::BruteForceMatcher_GPU_base::empty() const { throw_nogpu(); return ...@@ -56,17 +56,21 @@ bool cv::gpu::BruteForceMatcher_GPU_base::empty() const { throw_nogpu(); return
bool cv::gpu::BruteForceMatcher_GPU_base::isMaskSupported() const { throw_nogpu(); return true; } bool cv::gpu::BruteForceMatcher_GPU_base::isMaskSupported() const { throw_nogpu(); return true; }
void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat&, const GpuMat&, vector<DMatch>&) { throw_nogpu(); } void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat&, const GpuMat&, vector<DMatch>&) { throw_nogpu(); }
void cv::gpu::BruteForceMatcher_GPU_base::matchConvert(const Mat&, const Mat&, std::vector<DMatch>&) { throw_nogpu(); }
void cv::gpu::BruteForceMatcher_GPU_base::match(const GpuMat&, const GpuMat&, vector<DMatch>&, const GpuMat&) { throw_nogpu(); } void cv::gpu::BruteForceMatcher_GPU_base::match(const GpuMat&, const GpuMat&, vector<DMatch>&, const GpuMat&) { throw_nogpu(); }
void cv::gpu::BruteForceMatcher_GPU_base::makeGpuCollection(GpuMat&, GpuMat&, const vector<GpuMat>&) { throw_nogpu(); } void cv::gpu::BruteForceMatcher_GPU_base::makeGpuCollection(GpuMat&, GpuMat&, const vector<GpuMat>&) { throw_nogpu(); }
void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector<DMatch>&) { throw_nogpu(); } void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector<DMatch>&) { throw_nogpu(); }
void cv::gpu::BruteForceMatcher_GPU_base::matchConvert(const Mat&, const Mat&, const Mat&, std::vector<DMatch>&) { throw_nogpu(); }
void cv::gpu::BruteForceMatcher_GPU_base::match(const GpuMat&, std::vector<DMatch>&, const std::vector<GpuMat>&) { throw_nogpu(); } void cv::gpu::BruteForceMatcher_GPU_base::match(const GpuMat&, std::vector<DMatch>&, const std::vector<GpuMat>&) { throw_nogpu(); }
void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, const GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, const GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_nogpu(); } void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_nogpu(); }
void cv::gpu::BruteForceMatcher_GPU_base::knnMatchConvert(const Mat&, const Mat&, std::vector< std::vector<DMatch> >&, bool) { throw_nogpu(); }
void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, int, const GpuMat&, bool) { throw_nogpu(); } void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, int, const GpuMat&, bool) { throw_nogpu(); }
void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat&, std::vector< std::vector<DMatch> >&, int, const std::vector<GpuMat>&, bool) { throw_nogpu(); } void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat&, std::vector< std::vector<DMatch> >&, int, const std::vector<GpuMat>&, bool) { throw_nogpu(); }
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_nogpu(); } void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_nogpu(); }
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat&, const Mat&, const Mat&, std::vector< std::vector<DMatch> >&, bool) { throw_nogpu(); }
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, float, const GpuMat&, bool) { throw_nogpu(); } void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, float, const GpuMat&, bool) { throw_nogpu(); }
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, std::vector< std::vector<DMatch> >&, float, const std::vector<GpuMat>&, bool) { throw_nogpu(); } void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, std::vector< std::vector<DMatch> >&, float, const std::vector<GpuMat>&, bool) { throw_nogpu(); }
...@@ -216,8 +220,18 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& queryDescs, ...@@ -216,8 +220,18 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& queryDescs,
func(queryDescs, trainDescs, mask, trainIdx, trainIdx, distance, cc_12, StreamAccessor::getStream(stream)); func(queryDescs, trainDescs, mask, trainIdx, trainIdx, distance, cc_12, StreamAccessor::getStream(stream));
} }
void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx, const GpuMat& distance, void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx, const GpuMat& distance, vector<DMatch>& matches)
vector<DMatch>& matches) {
if (trainIdx.empty() || distance.empty())
return;
Mat trainIdxCPU = trainIdx;
Mat distanceCPU = distance;
matchConvert(trainIdxCPU, distanceCPU, matches);
}
void cv::gpu::BruteForceMatcher_GPU_base::matchConvert(const Mat& trainIdx, const Mat& distance, std::vector<DMatch>& matches)
{ {
if (trainIdx.empty() || distance.empty()) if (trainIdx.empty() || distance.empty())
return; return;
...@@ -227,14 +241,11 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx, ...@@ -227,14 +241,11 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx,
const int nQuery = trainIdx.cols; const int nQuery = trainIdx.cols;
Mat trainIdxCPU = trainIdx;
Mat distanceCPU = distance;
matches.clear(); matches.clear();
matches.reserve(nQuery); matches.reserve(nQuery);
const int* trainIdx_ptr = trainIdxCPU.ptr<int>(); const int* trainIdx_ptr = trainIdx.ptr<int>();
const float* distance_ptr = distanceCPU.ptr<float>(); const float* distance_ptr = distance.ptr<float>();
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++distance_ptr) for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++distance_ptr)
{ {
int trainIdx = *trainIdx_ptr; int trainIdx = *trainIdx_ptr;
...@@ -347,8 +358,19 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& queryDes ...@@ -347,8 +358,19 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& queryDes
func(queryDescs, trainCollection, maskCollection, trainIdx, imgIdx, distance, cc_12, StreamAccessor::getStream(stream)); func(queryDescs, trainCollection, maskCollection, trainIdx, imgIdx, distance, cc_12, StreamAccessor::getStream(stream));
} }
void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, vector<DMatch>& matches)
const GpuMat& distance, vector<DMatch>& matches) {
if (trainIdx.empty() || imgIdx.empty() || distance.empty())
return;
Mat trainIdxCPU = trainIdx;
Mat imgIdxCPU = imgIdx;
Mat distanceCPU = distance;
matchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, matches);
}
void cv::gpu::BruteForceMatcher_GPU_base::matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>& matches)
{ {
if (trainIdx.empty() || imgIdx.empty() || distance.empty()) if (trainIdx.empty() || imgIdx.empty() || distance.empty())
return; return;
...@@ -359,16 +381,12 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx, ...@@ -359,16 +381,12 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx,
const int nQuery = trainIdx.cols; const int nQuery = trainIdx.cols;
Mat trainIdxCPU = trainIdx;
Mat imgIdxCPU = imgIdx;
Mat distanceCPU = distance;
matches.clear(); matches.clear();
matches.reserve(nQuery); matches.reserve(nQuery);
const int* trainIdx_ptr = trainIdxCPU.ptr<int>(); const int* trainIdx_ptr = trainIdx.ptr<int>();
const int* imgIdx_ptr = imgIdxCPU.ptr<int>(); const int* imgIdx_ptr = imgIdx.ptr<int>();
const float* distance_ptr = distanceCPU.ptr<float>(); const float* distance_ptr = distance.ptr<float>();
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr) for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
{ {
int trainIdx = *trainIdx_ptr; int trainIdx = *trainIdx_ptr;
...@@ -385,8 +403,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx, ...@@ -385,8 +403,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx,
} }
} }
void cv::gpu::BruteForceMatcher_GPU_base::match(const GpuMat& queryDescs, vector<DMatch>& matches, void cv::gpu::BruteForceMatcher_GPU_base::match(const GpuMat& queryDescs, vector<DMatch>& matches, const vector<GpuMat>& masks)
const vector<GpuMat>& masks)
{ {
GpuMat trainCollection; GpuMat trainCollection;
GpuMat maskCollection; GpuMat maskCollection;
...@@ -462,15 +479,24 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat& trainId ...@@ -462,15 +479,24 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat& trainId
if (trainIdx.empty() || distance.empty()) if (trainIdx.empty() || distance.empty())
return; return;
Mat trainIdxCPU = trainIdx;
Mat distanceCPU = distance;
knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult);
}
void cv::gpu::BruteForceMatcher_GPU_base::knnMatchConvert(const Mat& trainIdx, const Mat& distance,
std::vector< std::vector<DMatch> >& matches, bool compactResult)
{
if (trainIdx.empty() || distance.empty())
return;
CV_Assert(trainIdx.type() == CV_32SC1); CV_Assert(trainIdx.type() == CV_32SC1);
CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size()); CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size());
const int nQuery = distance.rows; const int nQuery = distance.rows;
const int k = trainIdx.cols; const int k = trainIdx.cols;
Mat trainIdxCPU = trainIdx;
Mat distanceCPU = distance;
matches.clear(); matches.clear();
matches.reserve(nQuery); matches.reserve(nQuery);
...@@ -480,8 +506,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat& trainId ...@@ -480,8 +506,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat& trainId
vector<DMatch>& curMatches = matches.back(); vector<DMatch>& curMatches = matches.back();
curMatches.reserve(k); curMatches.reserve(k);
int* trainIdx_ptr = trainIdxCPU.ptr<int>(queryIdx); const int* trainIdx_ptr = trainIdx.ptr<int>(queryIdx);
float* distance_ptr = distanceCPU.ptr<float>(queryIdx); const float* distance_ptr = distance.ptr<float>(queryIdx);
for (int i = 0; i < k; ++i, ++trainIdx_ptr, ++distance_ptr) for (int i = 0; i < k; ++i, ++trainIdx_ptr, ++distance_ptr)
{ {
int trainIdx = *trainIdx_ptr; int trainIdx = *trainIdx_ptr;
...@@ -614,24 +640,33 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trai ...@@ -614,24 +640,33 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trai
if (trainIdx.empty() || nMatches.empty() || distance.empty()) if (trainIdx.empty() || nMatches.empty() || distance.empty())
return; return;
Mat trainIdxCPU = trainIdx;
Mat nMatchesCPU = nMatches;
Mat distanceCPU = distance;
radiusMatchConvert(trainIdxCPU, nMatchesCPU, distanceCPU, matches, compactResult);
}
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx, const Mat& nMatches, const Mat& distance,
std::vector< std::vector<DMatch> >& matches, bool compactResult)
{
if (trainIdx.empty() || nMatches.empty() || distance.empty())
return;
CV_Assert(trainIdx.type() == CV_32SC1); CV_Assert(trainIdx.type() == CV_32SC1);
CV_Assert(nMatches.type() == CV_32SC1 && nMatches.isContinuous() && nMatches.cols >= trainIdx.rows); CV_Assert(nMatches.type() == CV_32SC1 && nMatches.isContinuous() && nMatches.cols >= trainIdx.rows);
CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size()); CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size());
const int nQuery = trainIdx.rows; const int nQuery = trainIdx.rows;
Mat trainIdxCPU = trainIdx;
Mat nMatchesCPU = nMatches;
Mat distanceCPU = distance;
matches.clear(); matches.clear();
matches.reserve(nQuery); matches.reserve(nQuery);
const unsigned int* nMatches_ptr = nMatchesCPU.ptr<unsigned int>(); const unsigned int* nMatches_ptr = nMatches.ptr<unsigned int>();
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
{ {
const int* trainIdx_ptr = trainIdxCPU.ptr<int>(queryIdx); const int* trainIdx_ptr = trainIdx.ptr<int>(queryIdx);
const float* distance_ptr = distanceCPU.ptr<float>(queryIdx); const float* distance_ptr = distance.ptr<float>(queryIdx);
const int nMatches = std::min(static_cast<int>(nMatches_ptr[queryIdx]), trainIdx.cols); const int nMatches = std::min(static_cast<int>(nMatches_ptr[queryIdx]), trainIdx.cols);
......
...@@ -56,9 +56,8 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -56,9 +56,8 @@ namespace cv { namespace gpu { namespace bfmatcher
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Mask strategy // Mask strategy
class SingleMask struct SingleMask
{ {
public:
explicit SingleMask(const PtrStep& mask_) : mask(mask_) {} explicit SingleMask(const PtrStep& mask_) : mask(mask_) {}
__device__ __forceinline__ bool operator()(int queryIdx, int trainIdx) const __device__ __forceinline__ bool operator()(int queryIdx, int trainIdx) const
...@@ -66,13 +65,11 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -66,13 +65,11 @@ namespace cv { namespace gpu { namespace bfmatcher
return mask.ptr(queryIdx)[trainIdx] != 0; return mask.ptr(queryIdx)[trainIdx] != 0;
} }
private: const PtrStep mask;
PtrStep mask;
}; };
class MaskCollection struct MaskCollection
{ {
public:
explicit MaskCollection(PtrStep* maskCollection_) : maskCollection(maskCollection_) {} explicit MaskCollection(PtrStep* maskCollection_) : maskCollection(maskCollection_) {}
__device__ __forceinline__ void nextMask() __device__ __forceinline__ void nextMask()
...@@ -86,15 +83,14 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -86,15 +83,14 @@ namespace cv { namespace gpu { namespace bfmatcher
return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(queryIdx), trainIdx, val), (val != 0)); return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(queryIdx), trainIdx, val), (val != 0));
} }
private: const PtrStep* maskCollection;
PtrStep* maskCollection;
PtrStep curMask; PtrStep curMask;
}; };
class WithOutMask class WithOutMask
{ {
public: public:
__device__ __forceinline__ void nextMask() __device__ __forceinline__ void nextMask() const
{ {
} }
__device__ __forceinline__ bool operator()(int queryIdx, int trainIdx) const __device__ __forceinline__ bool operator()(int queryIdx, int trainIdx) const
...@@ -128,9 +124,8 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -128,9 +124,8 @@ namespace cv { namespace gpu { namespace bfmatcher
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Distance // Distance
template <typename T> class L1Dist template <typename T> struct L1Dist
{ {
public:
typedef int ResultType; typedef int ResultType;
typedef int ValueType; typedef int ValueType;
...@@ -151,12 +146,10 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -151,12 +146,10 @@ namespace cv { namespace gpu { namespace bfmatcher
return mySum; return mySum;
} }
private:
int mySum; int mySum;
}; };
template <> class L1Dist<float> template <> struct L1Dist<float>
{ {
public:
typedef float ResultType; typedef float ResultType;
typedef float ValueType; typedef float ValueType;
...@@ -177,13 +170,11 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -177,13 +170,11 @@ namespace cv { namespace gpu { namespace bfmatcher
return mySum; return mySum;
} }
private:
float mySum; float mySum;
}; };
class L2Dist struct L2Dist
{ {
public:
typedef float ResultType; typedef float ResultType;
typedef float ValueType; typedef float ValueType;
...@@ -205,13 +196,11 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -205,13 +196,11 @@ namespace cv { namespace gpu { namespace bfmatcher
return sqrtf(mySum); return sqrtf(mySum);
} }
private:
float mySum; float mySum;
}; };
class HammingDist struct HammingDist
{ {
public:
typedef int ResultType; typedef int ResultType;
typedef int ValueType; typedef int ValueType;
...@@ -232,7 +221,6 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -232,7 +221,6 @@ namespace cv { namespace gpu { namespace bfmatcher
return mySum; return mySum;
} }
private:
int mySum; int mySum;
}; };
...@@ -425,10 +413,8 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -425,10 +413,8 @@ namespace cv { namespace gpu { namespace bfmatcher
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// ReduceDescCalculator // ReduceDescCalculator
template <int BLOCK_DIM_X, typename T> template <int BLOCK_DIM_X, typename T> struct ReduceDescCalculatorSimple
class ReduceDescCalculatorSimple
{ {
public:
__device__ __forceinline__ void prepare(const T* queryDescs_, int, void*) __device__ __forceinline__ void prepare(const T* queryDescs_, int, void*)
{ {
queryDescs = queryDescs_; queryDescs = queryDescs_;
...@@ -440,14 +426,12 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -440,14 +426,12 @@ namespace cv { namespace gpu { namespace bfmatcher
reduceDescDiff<BLOCK_DIM_X>(queryDescs, trainDescs, desc_len, dist, sdiff_row); reduceDescDiff<BLOCK_DIM_X>(queryDescs, trainDescs, desc_len, dist, sdiff_row);
} }
private:
const T* queryDescs; const T* queryDescs;
}; };
template <int BLOCK_DIM_X, int MAX_DESCRIPTORS_LEN, bool DESC_LEN_EQ_MAX_LEN, typename T, typename U> template <int BLOCK_DIM_X, int MAX_DESCRIPTORS_LEN, bool DESC_LEN_EQ_MAX_LEN, typename T, typename U>
class ReduceDescCalculatorCached struct ReduceDescCalculatorCached
{ {
public:
__device__ __forceinline__ void prepare(const T* queryDescs, int desc_len, U* smem) __device__ __forceinline__ void prepare(const T* queryDescs, int desc_len, U* smem)
{ {
loadDescsVals<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN>(queryDescs, desc_len, queryVals, smem); loadDescsVals<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN>(queryDescs, desc_len, queryVals, smem);
...@@ -459,7 +443,6 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -459,7 +443,6 @@ namespace cv { namespace gpu { namespace bfmatcher
reduceDescDiffCached<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN>(queryVals, trainDescs, desc_len, dist, sdiff_row); reduceDescDiffCached<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN>(queryVals, trainDescs, desc_len, dist, sdiff_row);
} }
private:
U queryVals[MAX_DESCRIPTORS_LEN / BLOCK_DIM_X]; U queryVals[MAX_DESCRIPTORS_LEN / BLOCK_DIM_X];
}; };
...@@ -497,10 +480,8 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -497,10 +480,8 @@ namespace cv { namespace gpu { namespace bfmatcher
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Train collection loop strategy // Train collection loop strategy
template <typename T> template <typename T> struct SingleTrain
class SingleTrain
{ {
public:
explicit SingleTrain(const DevMem2D_<T>& trainDescs_) : trainDescs(trainDescs_) explicit SingleTrain(const DevMem2D_<T>& trainDescs_) : trainDescs(trainDescs_)
{ {
} }
...@@ -517,14 +498,11 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -517,14 +498,11 @@ namespace cv { namespace gpu { namespace bfmatcher
return trainDescs.cols; return trainDescs.cols;
} }
private: const DevMem2D_<T> trainDescs;
DevMem2D_<T> trainDescs;
}; };
template <typename T> template <typename T> struct TrainCollection
class TrainCollection
{ {
public:
TrainCollection(const DevMem2D_<T>* trainCollection_, int nImg_, int desclen_) : TrainCollection(const DevMem2D_<T>* trainCollection_, int nImg_, int desclen_) :
trainCollection(trainCollection_), nImg(nImg_), desclen(desclen_) trainCollection(trainCollection_), nImg(nImg_), desclen(desclen_)
{ {
...@@ -536,7 +514,7 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -536,7 +514,7 @@ namespace cv { namespace gpu { namespace bfmatcher
{ {
for (int imgIdx = 0; imgIdx < nImg; ++imgIdx) for (int imgIdx = 0; imgIdx < nImg; ++imgIdx)
{ {
DevMem2D_<T> trainDescs = trainCollection[imgIdx]; const DevMem2D_<T> trainDescs = trainCollection[imgIdx];
m.nextMask(); m.nextMask();
matchDescs<Dist>(queryIdx, imgIdx, trainDescs, m, reduceDescCalc, myMin, myBestTrainIdx, myBestImgIdx, sdiff_row); matchDescs<Dist>(queryIdx, imgIdx, trainDescs, m, reduceDescCalc, myMin, myBestTrainIdx, myBestImgIdx, sdiff_row);
} }
...@@ -547,7 +525,6 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -547,7 +525,6 @@ namespace cv { namespace gpu { namespace bfmatcher
return desclen; return desclen;
} }
private:
const DevMem2D_<T>* trainCollection; const DevMem2D_<T>* trainCollection;
int nImg; int nImg;
int desclen; int desclen;
...@@ -806,7 +783,7 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -806,7 +783,7 @@ namespace cv { namespace gpu { namespace bfmatcher
// Calc distance kernel // Calc distance kernel
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename T, typename Mask> template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename T, typename Mask>
__global__ void calcDistance(PtrStep_<T> queryDescs_, DevMem2D_<T> trainDescs_, Mask mask, PtrStepf distance) __global__ void calcDistance(const PtrStep_<T> queryDescs_, const DevMem2D_<T> trainDescs_, const Mask mask, PtrStepf distance)
{ {
__shared__ typename Dist::ResultType sdiff[BLOCK_DIM_X * BLOCK_DIM_Y]; __shared__ typename Dist::ResultType sdiff[BLOCK_DIM_X * BLOCK_DIM_Y];
...@@ -989,8 +966,7 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -989,8 +966,7 @@ namespace cv { namespace gpu { namespace bfmatcher
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// find knn match kernel // find knn match kernel
template <int BLOCK_SIZE> template <int BLOCK_SIZE> __global__ void findBestMatch(DevMem2Df allDist_, int i, PtrStepi trainIdx_, PtrStepf distance_)
__global__ void findBestMatch(DevMem2Df allDist_, int i, PtrStepi trainIdx_, PtrStepf distance_)
{ {
const int SMEM_SIZE = BLOCK_SIZE > 64 ? BLOCK_SIZE : 64; const int SMEM_SIZE = BLOCK_SIZE > 64 ? BLOCK_SIZE : 64;
__shared__ float sdist[SMEM_SIZE]; __shared__ float sdist[SMEM_SIZE];
...@@ -1130,8 +1106,8 @@ namespace cv { namespace gpu { namespace bfmatcher ...@@ -1130,8 +1106,8 @@ namespace cv { namespace gpu { namespace bfmatcher
// Radius Match kernel // Radius Match kernel
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename T, typename Mask> template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename T, typename Mask>
__global__ void radiusMatch(PtrStep_<T> queryDescs_, DevMem2D_<T> trainDescs_, __global__ void radiusMatch(const PtrStep_<T> queryDescs_, const DevMem2D_<T> trainDescs_,
float maxDistance, Mask mask, DevMem2Di trainIdx_, unsigned int* nMatches, PtrStepf distance) float maxDistance, const Mask mask, DevMem2Di trainIdx_, unsigned int* nMatches, PtrStepf distance)
{ {
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110 #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#include "internal_shared.hpp" #include "internal_shared.hpp"
#include "opencv2/gpu/device/transform.hpp" #include "opencv2/gpu/device/transform.hpp"
#include "opencv2/gpu/device/functional.hpp"
#define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200 #define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200
...@@ -56,9 +57,9 @@ namespace cv { namespace gpu ...@@ -56,9 +57,9 @@ namespace cv { namespace gpu
__constant__ float3 crot2; __constant__ float3 crot2;
__constant__ float3 ctransl; __constant__ float3 ctransl;
struct TransformOp struct TransformOp : unary_function<float3, float3>
{ {
__device__ __forceinline__ float3 operator()(float3 p) const __device__ __forceinline__ float3 operator()(const float3& p) const
{ {
return make_float3( return make_float3(
crot0.x * p.x + crot0.y * p.y + crot0.z * p.z + ctransl.x, crot0.x * p.x + crot0.y * p.y + crot0.z * p.z + ctransl.x,
...@@ -89,9 +90,9 @@ namespace cv { namespace gpu ...@@ -89,9 +90,9 @@ namespace cv { namespace gpu
__constant__ float3 cproj0; __constant__ float3 cproj0;
__constant__ float3 cproj1; __constant__ float3 cproj1;
struct ProjectOp struct ProjectOp : unary_function<float3, float3>
{ {
__device__ __forceinline__ float2 operator()(float3 p) const __device__ __forceinline__ float2 operator()(const float3& p) const
{ {
// Rotate and translate in 3D // Rotate and translate in 3D
float3 t = make_float3( float3 t = make_float3(
......
...@@ -49,7 +49,7 @@ using namespace cv::gpu::device; ...@@ -49,7 +49,7 @@ using namespace cv::gpu::device;
namespace cv { namespace gpu { namespace canny namespace cv { namespace gpu { namespace canny
{ {
__global__ void calcSobelRowPass(PtrStep src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols) __global__ void calcSobelRowPass(const PtrStep src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
{ {
__shared__ int smem[16][18]; __shared__ int smem[16][18];
...@@ -100,7 +100,8 @@ namespace cv { namespace gpu { namespace canny ...@@ -100,7 +100,8 @@ namespace cv { namespace gpu { namespace canny
} }
}; };
template <typename Norm> __global__ void calcMagnitude(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols) template <typename Norm> __global__ void calcMagnitude(const PtrStepi dx_buf, const PtrStepi dy_buf,
PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols)
{ {
__shared__ int sdx[18][16]; __shared__ int sdx[18][16];
__shared__ int sdy[18][16]; __shared__ int sdy[18][16];
...@@ -179,7 +180,7 @@ namespace cv { namespace gpu { namespace canny ...@@ -179,7 +180,7 @@ namespace cv { namespace gpu { namespace canny
#define CANNY_SHIFT 15 #define CANNY_SHIFT 15
#define TG22 (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5) #define TG22 (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5)
__global__ void calcMap(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh) __global__ void calcMap(const PtrStepi dx, const PtrStepi dy, const PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh)
{ {
__shared__ float smem[18][18]; __shared__ float smem[18][18];
......
...@@ -56,10 +56,9 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -56,10 +56,9 @@ namespace cv { namespace gpu { namespace mathfunc
////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////
// Compare // Compare
template <typename T1, typename T2> template <typename T1, typename T2> struct NotEqual : binary_function<T1, T2, uchar>
struct NotEqual
{ {
__device__ __forceinline__ uchar operator()(const T1& src1, const T2& src2) __device__ __forceinline__ uchar operator()(const T1& src1, const T2& src2) const
{ {
return static_cast<uchar>(static_cast<int>(src1 != src2) * 255); return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);
} }
...@@ -467,8 +466,7 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -467,8 +466,7 @@ namespace cv { namespace gpu { namespace mathfunc
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// pow // pow
template<typename T, bool Signed = device::numeric_limits<T>::is_signed> template<typename T, bool Signed = device::numeric_limits<T>::is_signed> struct PowOp : unary_function<T, T>
struct PowOp
{ {
float power; float power;
PowOp(float power_) : power(power_) {} PowOp(float power_) : power(power_) {}
...@@ -479,13 +477,12 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -479,13 +477,12 @@ namespace cv { namespace gpu { namespace mathfunc
} }
}; };
template<typename T> template<typename T> struct PowOp<T, true> : unary_function<T, T>
struct PowOp<T, true>
{ {
float power; float power;
PowOp(float power_) : power(power_) {} PowOp(float power_) : power(power_) {}
__device__ __forceinline__ float operator()(const T& e) __device__ __forceinline__ float operator()(const T& e) const
{ {
T res = saturate_cast<T>(__powf((float)e, power)); T res = saturate_cast<T>(__powf((float)e, power));
...@@ -495,13 +492,12 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -495,13 +492,12 @@ namespace cv { namespace gpu { namespace mathfunc
} }
}; };
template<> template<> struct PowOp<float> : unary_function<float, float>
struct PowOp<float>
{ {
float power; float power;
PowOp(float power_) : power(power_) {} PowOp(float power_) : power(power_) {}
__device__ __forceinline__ float operator()(const float& e) __device__ __forceinline__ float operator()(const float& e) const
{ {
return __powf(fabs(e), power); return __powf(fabs(e), power);
} }
......
...@@ -105,7 +105,7 @@ namespace cv { namespace gpu { namespace histograms ...@@ -105,7 +105,7 @@ namespace cv { namespace gpu { namespace histograms
if (x + 3 < cols) addByte(s_WarpHist, (data >> 24) & 0xFFU, tag); if (x + 3 < cols) addByte(s_WarpHist, (data >> 24) & 0xFFU, tag);
} }
__global__ void histogram256(PtrStep_<uint> d_Data, uint* d_PartialHistograms, uint dataCount, uint cols) __global__ void histogram256(const PtrStep_<uint> d_Data, uint* d_PartialHistograms, uint dataCount, uint cols)
{ {
//Per-warp subhistogram storage //Per-warp subhistogram storage
__shared__ uint s_Hist[HISTOGRAM256_THREADBLOCK_MEMORY]; __shared__ uint s_Hist[HISTOGRAM256_THREADBLOCK_MEMORY];
...@@ -189,21 +189,18 @@ namespace cv { namespace gpu { namespace histograms ...@@ -189,21 +189,18 @@ namespace cv { namespace gpu { namespace histograms
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
__global__ void equalizeHist(DevMem2D src, PtrStep dst, const int* lut) __constant__ int c_lut[256];
{
__shared__ int s_lut[256];
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
s_lut[tid] = lut[tid];
__syncthreads();
__global__ void equalizeHist(const DevMem2D src, PtrStep dst)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x; const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y; const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < src.cols && y < src.rows) if (x < src.cols && y < src.rows)
{ {
dst.ptr(y)[x] = __float2int_rn(255.0f * s_lut[src.ptr(y)[x]] / (src.cols * src.rows)); const uchar val = src.ptr(y)[x];
const int lut = c_lut[val];
dst.ptr(y)[x] = __float2int_rn(255.0f / (src.cols * src.rows) * lut);
} }
} }
...@@ -212,7 +209,9 @@ namespace cv { namespace gpu { namespace histograms ...@@ -212,7 +209,9 @@ namespace cv { namespace gpu { namespace histograms
dim3 block(16, 16); dim3 block(16, 16);
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y)); dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
equalizeHist<<<grid, block, 0, stream>>>(src, dst, lut); cudaSafeCall( cudaMemcpyToSymbol(cv::gpu::histograms::c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice) );
equalizeHist<<<grid, block, 0, stream>>>(src, dst);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
......
...@@ -49,7 +49,7 @@ using namespace cv::gpu::device; ...@@ -49,7 +49,7 @@ using namespace cv::gpu::device;
/////////////////////////////////// Remap /////////////////////////////////////////////// /////////////////////////////////// Remap ///////////////////////////////////////////////
namespace cv { namespace gpu { namespace imgproc namespace cv { namespace gpu { namespace imgproc
{ {
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex_remap; texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex_remap(0, cudaFilterModeLinear, cudaAddressModeWrap);
__global__ void remap_1c(const float* mapx, const float* mapy, size_t map_step, uchar* out, size_t out_step, int width, int height) __global__ void remap_1c(const float* mapx, const float* mapy, size_t map_step, uchar* out, size_t out_step, int width, int height)
{ {
...@@ -131,16 +131,12 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -131,16 +131,12 @@ namespace cv { namespace gpu { namespace imgproc
grid.x = divUp(dst.cols, threads.x); grid.x = divUp(dst.cols, threads.x);
grid.y = divUp(dst.rows, threads.y); grid.y = divUp(dst.rows, threads.y);
tex_remap.filterMode = cudaFilterModeLinear; TextureBinder tex_remap(&tex_remap, src);
tex_remap.addressMode[0] = tex_remap.addressMode[1] = cudaAddressModeWrap;
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
cudaSafeCall( cudaBindTexture2D(0, tex_remap, src.data, desc, src.cols, src.rows, src.step) );
remap_1c<<<grid, threads>>>(xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows); remap_1c<<<grid, threads>>>(xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaUnbindTexture(tex_remap) );
} }
void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst) void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)
...@@ -151,8 +147,8 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -151,8 +147,8 @@ namespace cv { namespace gpu { namespace imgproc
grid.y = divUp(dst.rows, threads.y); grid.y = divUp(dst.rows, threads.y);
remap_3c<<<grid, threads>>>(src.data, src.step, xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows); remap_3c<<<grid, threads>>>(src.data, src.step, xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
......
...@@ -77,7 +77,6 @@ namespace cv ...@@ -77,7 +77,6 @@ namespace cv
// Returns true if the GPU analogue exists, false otherwise. // Returns true if the GPU analogue exists, false otherwise.
bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType); bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType);
static inline int divUp(int total, int grain) { return (total + grain - 1) / grain; } static inline int divUp(int total, int grain) { return (total + grain - 1) / grain; }
template<class T> static inline void uploadConstant(const char* name, const T& value) template<class T> static inline void uploadConstant(const char* name, const T& value)
...@@ -117,6 +116,49 @@ namespace cv ...@@ -117,6 +116,49 @@ namespace cv
cudaSafeCall( cudaUnbindTexture(tex) ); cudaSafeCall( cudaUnbindTexture(tex) );
} }
class TextureBinder
{
public:
TextureBinder() : tex_(0) {}
template <typename T> TextureBinder(const textureReference* tex, const DevMem2D_<T>& img) : tex_(0)
{
bind(tex, img);
}
template <typename T> TextureBinder(const char* tex_name, const DevMem2D_<T>& img) : tex_(0)
{
bind(tex_name, img);
}
~TextureBinder() { unbind(); }
template <typename T> void bind(const textureReference* tex, const DevMem2D_<T>& img)
{
unbind();
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
tex_ = tex;
}
template <typename T> void bind(const char* tex_name, const DevMem2D_<T>& img)
{
const textureReference* tex;
cudaSafeCall( cudaGetTextureReference(&tex, tex_name) );
bind(tex, img);
}
void unbind()
{
if (tex_)
{
cudaUnbindTexture(tex_);
tex_ = 0;
}
}
private:
const textureReference* tex_;
};
class NppStreamHandler class NppStreamHandler
{ {
public: public:
......
...@@ -43,6 +43,7 @@ ...@@ -43,6 +43,7 @@
#include "internal_shared.hpp" #include "internal_shared.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp" #include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/transform.hpp" #include "opencv2/gpu/device/transform.hpp"
#include "opencv2/gpu/device/functional.hpp"
using namespace cv::gpu::device; using namespace cv::gpu::device;
...@@ -62,7 +63,7 @@ namespace cv { namespace gpu { namespace matrix_operations { ...@@ -62,7 +63,7 @@ namespace cv { namespace gpu { namespace matrix_operations {
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
template<typename T> template<typename T>
__global__ void copy_to_with_mask(T * mat_src, T * mat_dst, const unsigned char * mask, int cols, int rows, size_t step_mat, size_t step_mask, int channels) __global__ void copy_to_with_mask(const T* mat_src, T* mat_dst, const uchar* mask, int cols, int rows, size_t step_mat, size_t step_mask, int channels)
{ {
size_t x = blockIdx.x * blockDim.x + threadIdx.x; size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y; size_t y = blockIdx.y * blockDim.y + threadIdx.y;
...@@ -162,7 +163,7 @@ namespace cv { namespace gpu { namespace matrix_operations { ...@@ -162,7 +163,7 @@ namespace cv { namespace gpu { namespace matrix_operations {
} }
template<typename T> template<typename T>
__global__ void set_to_without_mask(T * mat, int cols, int rows, size_t step, int channels) __global__ void set_to_without_mask(T* mat, int cols, int rows, size_t step, int channels)
{ {
size_t x = blockIdx.x * blockDim.x + threadIdx.x; size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y; size_t y = blockIdx.y * blockDim.y + threadIdx.y;
...@@ -175,7 +176,7 @@ namespace cv { namespace gpu { namespace matrix_operations { ...@@ -175,7 +176,7 @@ namespace cv { namespace gpu { namespace matrix_operations {
} }
template<typename T> template<typename T>
__global__ void set_to_with_mask(T * mat, const unsigned char * mask, int cols, int rows, size_t step, int channels, size_t step_mask) __global__ void set_to_with_mask(T* mat, const uchar* mask, int cols, int rows, size_t step, int channels, size_t step_mask)
{ {
size_t x = blockIdx.x * blockDim.x + threadIdx.x; size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y; size_t y = blockIdx.y * blockDim.y + threadIdx.y;
...@@ -237,19 +238,16 @@ namespace cv { namespace gpu { namespace matrix_operations { ...@@ -237,19 +238,16 @@ namespace cv { namespace gpu { namespace matrix_operations {
//////////////////////////////// ConvertTo //////////////////////////////// //////////////////////////////// ConvertTo ////////////////////////////////
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
template <typename T, typename D> template <typename T, typename D> struct Convertor : unary_function<T, D>
class Convertor
{ {
public:
Convertor(double alpha_, double beta_) : alpha(alpha_), beta(beta_) {} Convertor(double alpha_, double beta_) : alpha(alpha_), beta(beta_) {}
__device__ __forceinline__ D operator()(const T& src) __device__ __forceinline__ D operator()(const T& src) const
{ {
return saturate_cast<D>(alpha * src + beta); return saturate_cast<D>(alpha * src + beta);
} }
private: const double alpha, beta;
double alpha, beta;
}; };
template<typename T, typename D> template<typename T, typename D>
......
...@@ -225,7 +225,7 @@ namespace cv { namespace gpu { namespace surf ...@@ -225,7 +225,7 @@ namespace cv { namespace gpu { namespace surf
}; };
template <typename Mask> template <typename Mask>
__global__ void icvFindMaximaInLayer(PtrStepf det, PtrStepf trace, int4* maxPosBuffer, unsigned int* maxCounter) __global__ void icvFindMaximaInLayer(const PtrStepf det, const PtrStepf trace, int4* maxPosBuffer, unsigned int* maxCounter)
{ {
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110 #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
...@@ -346,7 +346,7 @@ namespace cv { namespace gpu { namespace surf ...@@ -346,7 +346,7 @@ namespace cv { namespace gpu { namespace surf
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// INTERPOLATION // INTERPOLATION
__global__ void icvInterpolateKeypoint(PtrStepf det, const int4* maxPosBuffer, __global__ void icvInterpolateKeypoint(const PtrStepf det, const int4* maxPosBuffer,
float* featureX, float* featureY, int* featureLaplacian, float* featureSize, float* featureHessian, float* featureX, float* featureY, int* featureLaplacian, float* featureSize, float* featureHessian,
unsigned int* featureCounter) unsigned int* featureCounter)
{ {
......
...@@ -45,13 +45,10 @@ ...@@ -45,13 +45,10 @@
using namespace cv; using namespace cv;
using namespace cv::gpu; using namespace cv::gpu;
////////////////////////////////////////////////////////////////////////
//////////////////////////////// GpuMat ////////////////////////////////
////////////////////////////////////////////////////////////////////////
#if !defined (HAVE_CUDA) #if !defined (HAVE_CUDA)
void cv::gpu::registerPageLocked(Mat&) { throw_nogpu(); }
void cv::gpu::unregisterPageLocked(Mat&) { throw_nogpu(); }
void cv::gpu::CudaMem::create(int /*_rows*/, int /*_cols*/, int /*_type*/, int /*type_alloc*/) { throw_nogpu(); } void cv::gpu::CudaMem::create(int /*_rows*/, int /*_cols*/, int /*_type*/, int /*type_alloc*/) { throw_nogpu(); }
bool cv::gpu::CudaMem::canMapHostMemory() { throw_nogpu(); return false; } bool cv::gpu::CudaMem::canMapHostMemory() { throw_nogpu(); return false; }
void cv::gpu::CudaMem::release() { throw_nogpu(); } void cv::gpu::CudaMem::release() { throw_nogpu(); }
...@@ -59,9 +56,15 @@ GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { throw_nogpu(); return Gpu ...@@ -59,9 +56,15 @@ GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { throw_nogpu(); return Gpu
#else /* !defined (HAVE_CUDA) */ #else /* !defined (HAVE_CUDA) */
/////////////////////////////////////////////////////////////////////// void registerPageLocked(Mat& m)
//////////////////////////////// CudaMem ////////////////////////////// {
/////////////////////////////////////////////////////////////////////// cudaSafeCall( cudaHostRegister(m.ptr(), m.step * m.rows, cudaHostRegisterPortable) );
}
void unregisterPageLocked(Mat& m)
{
cudaSafeCall( cudaHostUnregister(m.ptr()) );
}
bool cv::gpu::CudaMem::canMapHostMemory() bool cv::gpu::CudaMem::canMapHostMemory()
{ {
......
...@@ -52,15 +52,13 @@ namespace cv { namespace gpu { namespace device ...@@ -52,15 +52,13 @@ namespace cv { namespace gpu { namespace device
{ {
//! Mask accessor //! Mask accessor
class MaskReader struct MaskReader
{ {
public:
explicit MaskReader(const PtrStep& mask_): mask(mask_) {} explicit MaskReader(const PtrStep& mask_): mask(mask_) {}
__device__ __forceinline__ bool operator()(int y, int x) const { return mask.ptr(y)[x]; } __device__ __forceinline__ bool operator()(int y, int x) const { return mask.ptr(y)[x]; }
private: const PtrStep mask;
PtrStep mask;
}; };
struct NoMask struct NoMask
...@@ -159,7 +157,7 @@ namespace cv { namespace gpu { namespace device ...@@ -159,7 +157,7 @@ namespace cv { namespace gpu { namespace device
template <> struct OpUnroller<3> template <> struct OpUnroller<3>
{ {
template <typename T, typename D, typename UnOp, typename Mask> template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y) static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
{ {
if (mask(y, x_shifted)) if (mask(y, x_shifted))
dst.x = op(src.x); dst.x = op(src.x);
...@@ -170,7 +168,7 @@ namespace cv { namespace gpu { namespace device ...@@ -170,7 +168,7 @@ namespace cv { namespace gpu { namespace device
} }
template <typename T1, typename T2, typename D, typename BinOp, typename Mask> template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y) static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
{ {
if (mask(y, x_shifted)) if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x); dst.x = op(src1.x, src2.x);
...@@ -183,7 +181,7 @@ namespace cv { namespace gpu { namespace device ...@@ -183,7 +181,7 @@ namespace cv { namespace gpu { namespace device
template <> struct OpUnroller<4> template <> struct OpUnroller<4>
{ {
template <typename T, typename D, typename UnOp, typename Mask> template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y) static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
{ {
if (mask(y, x_shifted)) if (mask(y, x_shifted))
dst.x = op(src.x); dst.x = op(src.x);
...@@ -196,7 +194,7 @@ namespace cv { namespace gpu { namespace device ...@@ -196,7 +194,7 @@ namespace cv { namespace gpu { namespace device
} }
template <typename T1, typename T2, typename D, typename BinOp, typename Mask> template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y) static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
{ {
if (mask(y, x_shifted)) if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x); dst.x = op(src1.x, src2.x);
...@@ -210,7 +208,7 @@ namespace cv { namespace gpu { namespace device ...@@ -210,7 +208,7 @@ namespace cv { namespace gpu { namespace device
}; };
template <typename T, typename D, typename UnOp, typename Mask> template <typename T, typename D, typename UnOp, typename Mask>
__global__ static void transformSmart(const DevMem2D_<T> src_, PtrStep_<D> dst_, const Mask mask, UnOp op) __global__ static void transformSmart(const DevMem2D_<T> src_, PtrStep_<D> dst_, const Mask mask, const UnOp op)
{ {
typedef typename UnReadWriteTraits<T, D>::read_type read_type; typedef typename UnReadWriteTraits<T, D>::read_type read_type;
typedef typename UnReadWriteTraits<T, D>::write_type write_type; typedef typename UnReadWriteTraits<T, D>::write_type write_type;
...@@ -227,7 +225,7 @@ namespace cv { namespace gpu { namespace device ...@@ -227,7 +225,7 @@ namespace cv { namespace gpu { namespace device
if (x_shifted + shift - 1 < src_.cols) if (x_shifted + shift - 1 < src_.cols)
{ {
read_type src_n_el = ((const read_type*)src)[x]; const read_type src_n_el = ((const read_type*)src)[x];
write_type dst_n_el; write_type dst_n_el;
OpUnroller<shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y); OpUnroller<shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
...@@ -246,7 +244,7 @@ namespace cv { namespace gpu { namespace device ...@@ -246,7 +244,7 @@ namespace cv { namespace gpu { namespace device
} }
template <typename T, typename D, typename UnOp, typename Mask> template <typename T, typename D, typename UnOp, typename Mask>
static __global__ void transformSimple(const DevMem2D_<T> src, PtrStep_<D> dst, const Mask mask, UnOp op) static __global__ void transformSimple(const DevMem2D_<T> src, PtrStep_<D> dst, const Mask mask, const UnOp op)
{ {
const int x = blockDim.x * blockIdx.x + threadIdx.x; const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y; const int y = blockDim.y * blockIdx.y + threadIdx.y;
...@@ -259,7 +257,7 @@ namespace cv { namespace gpu { namespace device ...@@ -259,7 +257,7 @@ namespace cv { namespace gpu { namespace device
template <typename T1, typename T2, typename D, typename BinOp, typename Mask> template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
__global__ static void transformSmart(const DevMem2D_<T1> src1_, const PtrStep_<T2> src2_, PtrStep_<D> dst_, __global__ static void transformSmart(const DevMem2D_<T1> src1_, const PtrStep_<T2> src2_, PtrStep_<D> dst_,
const Mask mask, BinOp op) const Mask mask, const BinOp op)
{ {
typedef typename BinReadWriteTraits<T1, T2, D>::read_type1 read_type1; typedef typename BinReadWriteTraits<T1, T2, D>::read_type1 read_type1;
typedef typename BinReadWriteTraits<T1, T2, D>::read_type2 read_type2; typedef typename BinReadWriteTraits<T1, T2, D>::read_type2 read_type2;
...@@ -278,8 +276,8 @@ namespace cv { namespace gpu { namespace device ...@@ -278,8 +276,8 @@ namespace cv { namespace gpu { namespace device
if (x_shifted + shift - 1 < src1_.cols) if (x_shifted + shift - 1 < src1_.cols)
{ {
read_type1 src1_n_el = ((const read_type1*)src1)[x]; const read_type1 src1_n_el = ((const read_type1*)src1)[x];
read_type2 src2_n_el = ((const read_type2*)src2)[x]; const read_type2 src2_n_el = ((const read_type2*)src2)[x];
write_type dst_n_el; write_type dst_n_el;
OpUnroller<shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y); OpUnroller<shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
...@@ -299,15 +297,15 @@ namespace cv { namespace gpu { namespace device ...@@ -299,15 +297,15 @@ namespace cv { namespace gpu { namespace device
template <typename T1, typename T2, typename D, typename BinOp, typename Mask> template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __global__ void transformSimple(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst, static __global__ void transformSimple(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst,
const Mask mask, BinOp op) const Mask mask, const BinOp op)
{ {
const int x = blockDim.x * blockIdx.x + threadIdx.x; const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y; const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < src1.cols && y < src1.rows && mask(y, x)) if (x < src1.cols && y < src1.rows && mask(y, x))
{ {
T1 src1_data = src1.ptr(y)[x]; const T1 src1_data = src1.ptr(y)[x];
T2 src2_data = src2.ptr(y)[x]; const T2 src2_data = src2.ptr(y)[x];
dst.ptr(y)[x] = op(src1_data, src2_data); dst.ptr(y)[x] = op(src1_data, src2_data);
} }
} }
...@@ -316,7 +314,7 @@ namespace cv { namespace gpu { namespace device ...@@ -316,7 +314,7 @@ namespace cv { namespace gpu { namespace device
template<> struct TransformDispatcher<false> template<> struct TransformDispatcher<false>
{ {
template <typename T, typename D, typename UnOp, typename Mask> template <typename T, typename D, typename UnOp, typename Mask>
static void call(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask, cudaStream_t stream) static void call(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const UnOp& op, const Mask& mask, cudaStream_t stream)
{ {
dim3 threads(16, 16, 1); dim3 threads(16, 16, 1);
dim3 grid(1, 1, 1); dim3 grid(1, 1, 1);
...@@ -332,7 +330,7 @@ namespace cv { namespace gpu { namespace device ...@@ -332,7 +330,7 @@ namespace cv { namespace gpu { namespace device
} }
template <typename T1, typename T2, typename D, typename BinOp, typename Mask> template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static void call(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, BinOp op, const Mask& mask, cudaStream_t stream) static void call(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, const BinOp& op, const Mask& mask, cudaStream_t stream)
{ {
dim3 threads(16, 16, 1); dim3 threads(16, 16, 1);
dim3 grid(1, 1, 1); dim3 grid(1, 1, 1);
...@@ -350,7 +348,7 @@ namespace cv { namespace gpu { namespace device ...@@ -350,7 +348,7 @@ namespace cv { namespace gpu { namespace device
template<> struct TransformDispatcher<true> template<> struct TransformDispatcher<true>
{ {
template <typename T, typename D, typename UnOp, typename Mask> template <typename T, typename D, typename UnOp, typename Mask>
static void call(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask, cudaStream_t stream) static void call(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const UnOp& op, const Mask& mask, cudaStream_t stream)
{ {
const int shift = UnReadWriteTraits<T, D>::shift; const int shift = UnReadWriteTraits<T, D>::shift;
...@@ -368,7 +366,7 @@ namespace cv { namespace gpu { namespace device ...@@ -368,7 +366,7 @@ namespace cv { namespace gpu { namespace device
} }
template <typename T1, typename T2, typename D, typename BinOp, typename Mask> template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static void call(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, BinOp op, const Mask& mask, cudaStream_t stream) static void call(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, const BinOp& op, const Mask& mask, cudaStream_t stream)
{ {
const int shift = BinReadWriteTraits<T1, T2, D>::shift; const int shift = BinReadWriteTraits<T1, T2, D>::shift;
...@@ -413,13 +411,13 @@ namespace cv { namespace gpu { namespace device ...@@ -413,13 +411,13 @@ namespace cv { namespace gpu { namespace device
}; };
template <typename T, typename D, typename UnOp, typename Mask> template <typename T, typename D, typename UnOp, typename Mask>
static void transform_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask, cudaStream_t stream) static void transform_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const UnOp& op, const Mask& mask, cudaStream_t stream)
{ {
TransformDispatcher< UseSmartUn<T, D>::value >::call(src, dst, op, mask, stream); TransformDispatcher< UseSmartUn<T, D>::value >::call(src, dst, op, mask, stream);
} }
template <typename T1, typename T2, typename D, typename BinOp, typename Mask> template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static void transform_caller(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, BinOp op, const Mask& mask, cudaStream_t stream) static void transform_caller(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, const BinOp& op, const Mask& mask, cudaStream_t stream)
{ {
TransformDispatcher< UseSmartBin<T1, T2, D>::value >::call(src1, src2, dst, op, mask, stream); TransformDispatcher< UseSmartBin<T1, T2, D>::value >::call(src1, src2, dst, op, mask, stream);
} }
......
...@@ -73,7 +73,7 @@ namespace cv { namespace gpu { namespace device ...@@ -73,7 +73,7 @@ namespace cv { namespace gpu { namespace device
using thrust::bit_and; using thrust::bit_and;
using thrust::bit_or; using thrust::bit_or;
using thrust::bit_xor; using thrust::bit_xor;
template <typename T> struct bit_not : public unary_function<T, T> template <typename T> struct bit_not : unary_function<T, T>
{ {
__forceinline__ __device__ T operator ()(const T& v) const {return ~v;} __forceinline__ __device__ T operator ()(const T& v) const {return ~v;}
}; };
...@@ -81,12 +81,12 @@ namespace cv { namespace gpu { namespace device ...@@ -81,12 +81,12 @@ namespace cv { namespace gpu { namespace device
using thrust::identity; using thrust::identity;
#define OPENCV_GPU_IMPLEMENT_MINMAX(name, type, op) \ #define OPENCV_GPU_IMPLEMENT_MINMAX(name, type, op) \
template <> struct name<type> : public binary_function<type, type, type> \ template <> struct name<type> : binary_function<type, type, type> \
{ \ { \
__forceinline__ __device__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \ __forceinline__ __device__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \
}; };
template <typename T> struct maximum : public binary_function<T, T, T> template <typename T> struct maximum : binary_function<T, T, T>
{ {
__forceinline__ __device__ T operator()(const T& lhs, const T& rhs) const {return lhs < rhs ? rhs : lhs;} __forceinline__ __device__ T operator()(const T& lhs, const T& rhs) const {return lhs < rhs ? rhs : lhs;}
}; };
...@@ -100,7 +100,7 @@ namespace cv { namespace gpu { namespace device ...@@ -100,7 +100,7 @@ namespace cv { namespace gpu { namespace device
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, float, fmax) OPENCV_GPU_IMPLEMENT_MINMAX(maximum, float, fmax)
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, double, fmax) OPENCV_GPU_IMPLEMENT_MINMAX(maximum, double, fmax)
template <typename T> struct minimum : public binary_function<T, T, T> template <typename T> struct minimum : binary_function<T, T, T>
{ {
__forceinline__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs < rhs ? lhs : rhs;} __forceinline__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs < rhs ? lhs : rhs;}
}; };
...@@ -126,31 +126,31 @@ namespace cv { namespace gpu { namespace device ...@@ -126,31 +126,31 @@ namespace cv { namespace gpu { namespace device
using thrust::not2; using thrust::not2;
#define OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(func) \ #define OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(func) \
template <typename T> struct func ## _func : public unary_function<T, float> \ template <typename T> struct func ## _func : unary_function<T, float> \
{ \ { \
__forceinline__ __device__ float operator ()(const T& v) \ __forceinline__ __device__ float operator ()(const T& v) const \
{ \ { \
return func ## f(v); \ return func ## f(v); \
} \ } \
}; \ }; \
template <> struct func ## _func<double> : public unary_function<double, double> \ template <> struct func ## _func<double> : unary_function<double, double> \
{ \ { \
__forceinline__ __device__ double operator ()(double v) \ __forceinline__ __device__ double operator ()(double v) const \
{ \ { \
return func(v); \ return func(v); \
} \ } \
}; };
#define OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(func) \ #define OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(func) \
template <typename T> struct func ## _func : public binary_function<T, T, float> \ template <typename T> struct func ## _func : binary_function<T, T, float> \
{ \ { \
__forceinline__ __device__ float operator ()(const T& v1, const T& v2) \ __forceinline__ __device__ float operator ()(const T& v1, const T& v2) const \
{ \ { \
return func ## f(v1, v2); \ return func ## f(v1, v2); \
} \ } \
}; \ }; \
template <> struct func ## _func<double> : public binary_function<double, double, double> \ template <> struct func ## _func<double> : binary_function<double, double, double> \
{ \ { \
__forceinline__ __device__ double operator ()(double v1, double v2) \ __forceinline__ __device__ double operator ()(double v1, double v2) const \
{ \ { \
return func(v1, v2); \ return func(v1, v2); \
} \ } \
...@@ -184,7 +184,7 @@ namespace cv { namespace gpu { namespace device ...@@ -184,7 +184,7 @@ namespace cv { namespace gpu { namespace device
#undef OPENCV_GPU_IMPLEMENT_UN_FUNCTOR #undef OPENCV_GPU_IMPLEMENT_UN_FUNCTOR
#undef OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR #undef OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR
template<typename T> struct hypot_sqr_func : public binary_function<T, T, float> template<typename T> struct hypot_sqr_func : binary_function<T, T, float>
{ {
__forceinline__ __device__ T operator ()(T src1, T src2) const __forceinline__ __device__ T operator ()(T src1, T src2) const
{ {
...@@ -192,15 +192,15 @@ namespace cv { namespace gpu { namespace device ...@@ -192,15 +192,15 @@ namespace cv { namespace gpu { namespace device
} }
}; };
template <typename T, typename D> struct saturate_cast_func : public unary_function<T, D> template <typename T, typename D> struct saturate_cast_func : unary_function<T, D>
{ {
__forceinline__ __device__ D operator ()(const T& v) __forceinline__ __device__ D operator ()(const T& v) const
{ {
return saturate_cast<D>(v); return saturate_cast<D>(v);
} }
}; };
template <typename T> struct thresh_binary_func : public unary_function<T, T> template <typename T> struct thresh_binary_func : unary_function<T, T>
{ {
__forceinline__ __host__ __device__ thresh_binary_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {} __forceinline__ __host__ __device__ thresh_binary_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
...@@ -209,10 +209,10 @@ namespace cv { namespace gpu { namespace device ...@@ -209,10 +209,10 @@ namespace cv { namespace gpu { namespace device
return src > thresh ? maxVal : 0; return src > thresh ? maxVal : 0;
} }
T thresh; const T thresh;
T maxVal; const T maxVal;
}; };
template <typename T> struct thresh_binary_inv_func : public unary_function<T, T> template <typename T> struct thresh_binary_inv_func : unary_function<T, T>
{ {
__forceinline__ __host__ __device__ thresh_binary_inv_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {} __forceinline__ __host__ __device__ thresh_binary_inv_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
...@@ -221,10 +221,10 @@ namespace cv { namespace gpu { namespace device ...@@ -221,10 +221,10 @@ namespace cv { namespace gpu { namespace device
return src > thresh ? 0 : maxVal; return src > thresh ? 0 : maxVal;
} }
T thresh; const T thresh;
T maxVal; const T maxVal;
}; };
template <typename T> struct thresh_trunc_func : public unary_function<T, T> template <typename T> struct thresh_trunc_func : unary_function<T, T>
{ {
explicit __forceinline__ __host__ __device__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {} explicit __forceinline__ __host__ __device__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {}
...@@ -233,11 +233,10 @@ namespace cv { namespace gpu { namespace device ...@@ -233,11 +233,10 @@ namespace cv { namespace gpu { namespace device
return minimum<T>()(src, thresh); return minimum<T>()(src, thresh);
} }
T thresh; const T thresh;
}; };
template <typename T> struct thresh_to_zero_func : public unary_function<T, T> template <typename T> struct thresh_to_zero_func : unary_function<T, T>
{ {
public:
explicit __forceinline__ __host__ __device__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {} explicit __forceinline__ __host__ __device__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {}
__forceinline__ __device__ T operator()(const T& src) const __forceinline__ __device__ T operator()(const T& src) const
...@@ -245,11 +244,10 @@ namespace cv { namespace gpu { namespace device ...@@ -245,11 +244,10 @@ namespace cv { namespace gpu { namespace device
return src > thresh ? src : 0; return src > thresh ? src : 0;
} }
T thresh; const T thresh;
}; };
template <typename T> struct thresh_to_zero_inv_func : public unary_function<T, T> template <typename T> struct thresh_to_zero_inv_func : unary_function<T, T>
{ {
public:
explicit __forceinline__ __host__ __device__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {} explicit __forceinline__ __host__ __device__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {}
__forceinline__ __device__ T operator()(const T& src) const __forceinline__ __device__ T operator()(const T& src) const
...@@ -257,36 +255,36 @@ namespace cv { namespace gpu { namespace device ...@@ -257,36 +255,36 @@ namespace cv { namespace gpu { namespace device
return src > thresh ? 0 : src; return src > thresh ? 0 : src;
} }
T thresh; const T thresh;
}; };
template <typename Op> struct binder1st : public unary_function<typename Op::second_argument_type, typename Op::result_type> template <typename Op> struct binder1st : unary_function<typename Op::second_argument_type, typename Op::result_type>
{ {
__forceinline__ __host__ __device__ binder1st(const Op& op_, const typename Op::first_argument_type& arg1_) : op(op_), arg1(arg1_) {} __forceinline__ __host__ __device__ binder1st(const Op& op_, const typename Op::first_argument_type& arg1_) : op(op_), arg1(arg1_) {}
__forceinline__ __device__ typename Op::result_type operator ()(const typename Op::second_argument_type& a) __forceinline__ __device__ typename Op::result_type operator ()(const typename Op::second_argument_type& a) const
{ {
return op(arg1, a); return op(arg1, a);
} }
Op op; const Op op;
typename Op::first_argument_type arg1; const typename Op::first_argument_type arg1;
}; };
template <typename Op, typename T> static __forceinline__ __host__ __device__ binder1st<Op> bind1st(const Op& op, const T& x) template <typename Op, typename T> static __forceinline__ __host__ __device__ binder1st<Op> bind1st(const Op& op, const T& x)
{ {
return binder1st<Op>(op, typename Op::first_argument_type(x)); return binder1st<Op>(op, typename Op::first_argument_type(x));
} }
template <typename Op> struct binder2nd : public unary_function<typename Op::first_argument_type, typename Op::result_type> template <typename Op> struct binder2nd : unary_function<typename Op::first_argument_type, typename Op::result_type>
{ {
__forceinline__ __host__ __device__ binder2nd(const Op& op_, const typename Op::second_argument_type& arg2_) : op(op_), arg2(arg2_) {} __forceinline__ __host__ __device__ binder2nd(const Op& op_, const typename Op::second_argument_type& arg2_) : op(op_), arg2(arg2_) {}
__forceinline__ __device__ typename Op::result_type operator ()(const typename Op::first_argument_type& a) __forceinline__ __device__ typename Op::result_type operator ()(const typename Op::first_argument_type& a) const
{ {
return op(a, arg2); return op(a, arg2);
} }
Op op; const Op op;
typename Op::second_argument_type arg2; const typename Op::second_argument_type arg2;
}; };
template <typename Op, typename T> static __forceinline__ __host__ __device__ binder2nd<Op> bind2nd(const Op& op, const T& x) template <typename Op, typename T> static __forceinline__ __host__ __device__ binder2nd<Op> bind2nd(const Op& op, const T& x)
{ {
......
...@@ -48,12 +48,12 @@ ...@@ -48,12 +48,12 @@
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
template <typename T, typename D, typename UnOp> template <typename T, typename D, typename UnOp>
static void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, cudaStream_t stream = 0) static void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const UnOp& op, cudaStream_t stream = 0)
{ {
detail::transform_caller(src, dst, op, detail::NoMask(), stream); detail::transform_caller(src, dst, op, detail::NoMask(), stream);
} }
template <typename T, typename D, typename UnOp> template <typename T, typename D, typename UnOp>
static void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const PtrStep& mask, UnOp op, static void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const PtrStep& mask, const UnOp& op,
cudaStream_t stream = 0) cudaStream_t stream = 0)
{ {
detail::transform_caller(src, dst, op, detail::MaskReader(mask), stream); detail::transform_caller(src, dst, op, detail::MaskReader(mask), stream);
...@@ -61,13 +61,13 @@ namespace cv { namespace gpu { namespace device ...@@ -61,13 +61,13 @@ namespace cv { namespace gpu { namespace device
template <typename T1, typename T2, typename D, typename BinOp> template <typename T1, typename T2, typename D, typename BinOp>
static void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, static void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst,
BinOp op, cudaStream_t stream = 0) const BinOp& op, cudaStream_t stream = 0)
{ {
detail::transform_caller(src1, src2, dst, op, detail::NoMask(), stream); detail::transform_caller(src1, src2, dst, op, detail::NoMask(), stream);
} }
template <typename T1, typename T2, typename D, typename BinOp> template <typename T1, typename T2, typename D, typename BinOp>
static void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, static void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst,
const PtrStep& mask, BinOp op, cudaStream_t stream = 0) const PtrStep& mask, const BinOp& op, cudaStream_t stream = 0)
{ {
detail::transform_caller(src1, src2, dst, op, detail::MaskReader(mask), stream); detail::transform_caller(src1, src2, dst, op, detail::MaskReader(mask), stream);
} }
......
...@@ -65,15 +65,15 @@ ...@@ -65,15 +65,15 @@
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
template <typename T> void __host__ __device__ __forceinline__ swap(T &a, T &b) template <typename T> void __host__ __device__ __forceinline__ swap(T& a, T& b)
{ {
T temp = a; const T temp = a;
a = b; a = b;
b = temp; b = temp;
} }
// warp-synchronous 32 elements reduction // warp-synchronous 32 elements reduction
template <typename T, typename Op> __device__ __forceinline__ void warpReduce32(volatile T* data, T& partial_reduction, int tid, Op op) template <typename T, typename Op> __device__ __forceinline__ void warpReduce32(volatile T* data, T& partial_reduction, int tid, const Op& op)
{ {
data[tid] = partial_reduction; data[tid] = partial_reduction;
...@@ -88,7 +88,7 @@ namespace cv { namespace gpu { namespace device ...@@ -88,7 +88,7 @@ namespace cv { namespace gpu { namespace device
} }
// warp-synchronous 16 elements reduction // warp-synchronous 16 elements reduction
template <typename T, typename Op> __device__ __forceinline__ void warpReduce16(volatile T* data, T& partial_reduction, int tid, Op op) template <typename T, typename Op> __device__ __forceinline__ void warpReduce16(volatile T* data, T& partial_reduction, int tid, const Op& op)
{ {
data[tid] = partial_reduction; data[tid] = partial_reduction;
...@@ -102,7 +102,7 @@ namespace cv { namespace gpu { namespace device ...@@ -102,7 +102,7 @@ namespace cv { namespace gpu { namespace device
} }
// warp-synchronous reduction // warp-synchronous reduction
template <int n, typename T, typename Op> __device__ __forceinline__ void warpReduce(volatile T* data, T& partial_reduction, int tid, Op op) template <int n, typename T, typename Op> __device__ __forceinline__ void warpReduce(volatile T* data, T& partial_reduction, int tid, const Op& op)
{ {
if (tid < n) if (tid < n)
data[tid] = partial_reduction; data[tid] = partial_reduction;
......
...@@ -107,9 +107,9 @@ namespace ...@@ -107,9 +107,9 @@ namespace
maxCandidates = min(static_cast<int>(1.5 * maxFeatures), 65535); maxCandidates = min(static_cast<int>(1.5 * maxFeatures), 65535);
CV_Assert(maxFeatures > 0); CV_Assert(maxFeatures > 0);
cudaSafeCall( cudaMalloc((void**)&d_counters, (nOctaves + 1) * sizeof(unsigned int)) ); counters.create(1, nOctaves + 1, CV_32SC1);
cudaSafeCall( cudaMemset(d_counters, 0, (nOctaves + 1) * sizeof(unsigned int)) ); counters.setTo(Scalar::all(0));
uploadConstant("cv::gpu::surf::c_max_candidates", maxCandidates); uploadConstant("cv::gpu::surf::c_max_candidates", maxCandidates);
uploadConstant("cv::gpu::surf::c_max_features", maxFeatures); uploadConstant("cv::gpu::surf::c_max_features", maxFeatures);
...@@ -118,30 +118,20 @@ namespace ...@@ -118,30 +118,20 @@ namespace
uploadConstant("cv::gpu::surf::c_nOctaveLayers", nOctaveLayers); uploadConstant("cv::gpu::surf::c_nOctaveLayers", nOctaveLayers);
uploadConstant("cv::gpu::surf::c_hessianThreshold", static_cast<float>(hessianThreshold)); uploadConstant("cv::gpu::surf::c_hessianThreshold", static_cast<float>(hessianThreshold));
bindTexture("cv::gpu::surf::imgTex", (DevMem2D)img); imgTex.bind("cv::gpu::surf::imgTex", (DevMem2D)img);
integralBuffered(img, sum, intBuffer); integralBuffered(img, sum, intBuffer);
bindTexture("cv::gpu::surf::sumTex", (DevMem2D_<unsigned int>)sum); sumTex.bind("cv::gpu::surf::sumTex", (DevMem2D_<unsigned int>)sum);
if (use_mask) if (use_mask)
{ {
min(mask, 1.0, mask1); min(mask, 1.0, mask1);
integralBuffered(mask1, maskSum, intBuffer); integralBuffered(mask1, maskSum, intBuffer);
bindTexture("cv::gpu::surf::maskSumTex", (DevMem2D_<unsigned int>)maskSum); maskSumTex.bind("cv::gpu::surf::maskSumTex", (DevMem2D_<unsigned int>)maskSum);
} }
} }
~SURF_GPU_Invoker()
{
cudaSafeCall( cudaFree(d_counters) );
unbindTexture("cv::gpu::surf::imgTex");
unbindTexture("cv::gpu::surf::sumTex");
if (use_mask)
unbindTexture("cv::gpu::surf::maskSumTex");
}
void detectKeypoints(GpuMat& keypoints) void detectKeypoints(GpuMat& keypoints)
{ {
ensureSizeIsEnough(img_rows * (nOctaveLayers + 2), img_cols, CV_32FC1, det); ensureSizeIsEnough(img_rows * (nOctaveLayers + 2), img_cols, CV_32FC1, det);
...@@ -162,11 +152,11 @@ namespace ...@@ -162,11 +152,11 @@ namespace
icvCalcLayerDetAndTrace_gpu(det, trace, img_rows, img_cols, octave, nOctaveLayers); icvCalcLayerDetAndTrace_gpu(det, trace, img_rows, img_cols, octave, nOctaveLayers);
icvFindMaximaInLayer_gpu(det, trace, maxPosBuffer.ptr<int4>(), d_counters + 1 + octave, icvFindMaximaInLayer_gpu(det, trace, maxPosBuffer.ptr<int4>(), counters.ptr<unsigned int>() + 1 + octave,
img_rows, img_cols, octave, use_mask, nOctaveLayers); img_rows, img_cols, octave, use_mask, nOctaveLayers);
unsigned int maxCounter; unsigned int maxCounter;
cudaSafeCall( cudaMemcpy(&maxCounter, d_counters + 1 + octave, sizeof(unsigned int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&maxCounter, counters.ptr<unsigned int>() + 1 + octave, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
maxCounter = std::min(maxCounter, static_cast<unsigned int>(maxCandidates)); maxCounter = std::min(maxCounter, static_cast<unsigned int>(maxCandidates));
if (maxCounter > 0) if (maxCounter > 0)
...@@ -174,11 +164,11 @@ namespace ...@@ -174,11 +164,11 @@ namespace
icvInterpolateKeypoint_gpu(det, maxPosBuffer.ptr<int4>(), maxCounter, icvInterpolateKeypoint_gpu(det, maxPosBuffer.ptr<int4>(), maxCounter,
keypoints.ptr<float>(SURF_GPU::SF_X), keypoints.ptr<float>(SURF_GPU::SF_Y), keypoints.ptr<float>(SURF_GPU::SF_X), keypoints.ptr<float>(SURF_GPU::SF_Y),
keypoints.ptr<int>(SURF_GPU::SF_LAPLACIAN), keypoints.ptr<float>(SURF_GPU::SF_SIZE), keypoints.ptr<int>(SURF_GPU::SF_LAPLACIAN), keypoints.ptr<float>(SURF_GPU::SF_SIZE),
keypoints.ptr<float>(SURF_GPU::SF_HESSIAN), d_counters); keypoints.ptr<float>(SURF_GPU::SF_HESSIAN), counters.ptr<unsigned int>());
} }
} }
unsigned int featureCounter; unsigned int featureCounter;
cudaSafeCall( cudaMemcpy(&featureCounter, d_counters, sizeof(unsigned int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&featureCounter, counters.ptr<unsigned int>(), sizeof(unsigned int), cudaMemcpyDeviceToHost) );
featureCounter = std::min(featureCounter, static_cast<unsigned int>(maxFeatures)); featureCounter = std::min(featureCounter, static_cast<unsigned int>(maxFeatures));
keypoints.cols = featureCounter; keypoints.cols = featureCounter;
...@@ -226,7 +216,9 @@ namespace ...@@ -226,7 +216,9 @@ namespace
int maxCandidates; int maxCandidates;
int maxFeatures; int maxFeatures;
unsigned int* d_counters; GpuMat counters;
TextureBinder imgTex, sumTex, maskSumTex;
}; };
} }
......
...@@ -318,7 +318,7 @@ TEST(BruteForceMatcher) ...@@ -318,7 +318,7 @@ TEST(BruteForceMatcher)
GPU_OFF; GPU_OFF;
SUBTEST << "knnMatch"; SUBTEST << "knnMatch";
int knn = 10; int knn = 2;
CPU_ON; CPU_ON;
matcher.knnMatch(query, train, matches, knn); matcher.knnMatch(query, train, matches, knn);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment