Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
f11efdce
Commit
f11efdce
authored
May 16, 2011
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added support of Hamming distance to BruteForceMatcher_GPU
parent
79ed4e4c
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
335 additions
and
166 deletions
+335
-166
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+13
-1
brute_force_matcher.cpp
modules/gpu/src/brute_force_matcher.cpp
+38
-8
brute_force_matcher.cu
modules/gpu/src/cuda/brute_force_matcher.cu
+284
-157
No files found.
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
f11efdce
...
@@ -1382,7 +1382,7 @@ namespace cv
...
@@ -1382,7 +1382,7 @@ namespace cv
class
CV_EXPORTS
BruteForceMatcher_GPU_base
class
CV_EXPORTS
BruteForceMatcher_GPU_base
{
{
public
:
public
:
enum
DistType
{
L1Dist
=
0
,
L2Dist
};
enum
DistType
{
L1Dist
=
0
,
L2Dist
,
HammingDist
};
explicit
BruteForceMatcher_GPU_base
(
DistType
distType
=
L2Dist
);
explicit
BruteForceMatcher_GPU_base
(
DistType
distType
=
L2Dist
);
...
@@ -1522,6 +1522,18 @@ namespace cv
...
@@ -1522,6 +1522,18 @@ namespace cv
explicit
BruteForceMatcher_GPU
()
:
BruteForceMatcher_GPU_base
(
L2Dist
)
{}
explicit
BruteForceMatcher_GPU
()
:
BruteForceMatcher_GPU_base
(
L2Dist
)
{}
explicit
BruteForceMatcher_GPU
(
L2
<
T
>
/*d*/
)
:
BruteForceMatcher_GPU_base
(
L2Dist
)
{}
explicit
BruteForceMatcher_GPU
(
L2
<
T
>
/*d*/
)
:
BruteForceMatcher_GPU_base
(
L2Dist
)
{}
};
};
template
<>
class
CV_EXPORTS
BruteForceMatcher_GPU
<
HammingLUT
>
:
public
BruteForceMatcher_GPU_base
{
public
:
explicit
BruteForceMatcher_GPU
()
:
BruteForceMatcher_GPU_base
(
HammingDist
)
{}
explicit
BruteForceMatcher_GPU
(
HammingLUT
/*d*/
)
:
BruteForceMatcher_GPU_base
(
HammingDist
)
{}
};
template
<>
class
CV_EXPORTS
BruteForceMatcher_GPU
<
Hamming
>
:
public
BruteForceMatcher_GPU_base
{
public
:
explicit
BruteForceMatcher_GPU
()
:
BruteForceMatcher_GPU_base
(
HammingDist
)
{}
explicit
BruteForceMatcher_GPU
(
Hamming
/*d*/
)
:
BruteForceMatcher_GPU_base
(
HammingDist
)
{}
};
////////////////////////////////// CascadeClassifier_GPU //////////////////////////////////////////
////////////////////////////////// CascadeClassifier_GPU //////////////////////////////////////////
// The cascade classifier class for object detection.
// The cascade classifier class for object detection.
...
...
modules/gpu/src/brute_force_matcher.cpp
View file @
f11efdce
...
@@ -83,14 +83,20 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -83,14 +83,20 @@ namespace cv { namespace gpu { namespace bfmatcher
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
bool
cc_12
);
bool
cc_12
);
template
<
typename
T
>
template
<
typename
T
>
void
matchSingleHamming_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
bool
cc_12
);
template
<
typename
T
>
void
matchCollectionL1_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainCollection
,
void
matchCollectionL1_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainCollection
,
const
DevMem2D_
<
PtrStep
>&
maskCollection
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2D_
<
PtrStep
>&
maskCollection
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2Df
&
distance
,
bool
cc_12
);
bool
cc_12
);
template
<
typename
T
>
template
<
typename
T
>
void
matchCollectionL2_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainCollection
,
void
matchCollectionL2_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainCollection
,
const
DevMem2D_
<
PtrStep
>&
maskCollection
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2D_
<
PtrStep
>&
maskCollection
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2Df
&
distance
,
bool
cc_12
);
template
<
typename
T
>
void
matchCollectionHamming_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainCollection
,
const
DevMem2D_
<
PtrStep
>&
maskCollection
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
bool
cc_12
);
bool
cc_12
);
template
<
typename
T
>
template
<
typename
T
>
...
@@ -99,6 +105,9 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -99,6 +105,9 @@ namespace cv { namespace gpu { namespace bfmatcher
template
<
typename
T
>
template
<
typename
T
>
void
knnMatchL2_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
int
knn
,
void
knnMatchL2_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
int
knn
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2Df
&
allDist
);
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2Df
&
allDist
);
template
<
typename
T
>
void
knnMatchHamming_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
int
knn
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2Df
&
allDist
);
template
<
typename
T
>
template
<
typename
T
>
void
radiusMatchL1_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
float
maxDistance
,
void
radiusMatchL1_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
float
maxDistance
,
...
@@ -106,6 +115,9 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -106,6 +115,9 @@ namespace cv { namespace gpu { namespace bfmatcher
template
<
typename
T
>
template
<
typename
T
>
void
radiusMatchL2_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
float
maxDistance
,
void
radiusMatchL2_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
float
maxDistance
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
unsigned
int
*
nMatches
,
const
DevMem2Df
&
distance
);
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
unsigned
int
*
nMatches
,
const
DevMem2Df
&
distance
);
template
<
typename
T
>
void
radiusMatchHamming_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
float
maxDistance
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
unsigned
int
*
nMatches
,
const
DevMem2Df
&
distance
);
}}}
}}}
namespace
namespace
...
@@ -167,7 +179,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& queryDescs,
...
@@ -167,7 +179,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& queryDescs,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
bool
cc_12
);
bool
cc_12
);
static
const
match_caller_t
match_callers
[
2
][
8
]
=
static
const
match_caller_t
match_callers
[
3
][
8
]
=
{
{
{
{
matchSingleL1_gpu
<
unsigned
char
>
,
matchSingleL1_gpu
<
signed
char
>
,
matchSingleL1_gpu
<
unsigned
char
>
,
matchSingleL1_gpu
<
signed
char
>
,
...
@@ -178,6 +190,11 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& queryDescs,
...
@@ -178,6 +190,11 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& queryDescs,
matchSingleL2_gpu
<
unsigned
char
>
,
matchSingleL2_gpu
<
signed
char
>
,
matchSingleL2_gpu
<
unsigned
char
>
,
matchSingleL2_gpu
<
signed
char
>
,
matchSingleL2_gpu
<
unsigned
short
>
,
matchSingleL2_gpu
<
short
>
,
matchSingleL2_gpu
<
unsigned
short
>
,
matchSingleL2_gpu
<
short
>
,
matchSingleL2_gpu
<
int
>
,
matchSingleL2_gpu
<
float
>
,
0
,
0
matchSingleL2_gpu
<
int
>
,
matchSingleL2_gpu
<
float
>
,
0
,
0
},
{
matchSingleHamming_gpu
<
unsigned
char
>
,
matchSingleHamming_gpu
<
signed
char
>
,
matchSingleHamming_gpu
<
unsigned
short
>
,
matchSingleHamming_gpu
<
short
>
,
matchSingleHamming_gpu
<
int
>
,
0
,
0
,
0
}
}
};
};
...
@@ -295,7 +312,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& queryDes
...
@@ -295,7 +312,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& queryDes
const
DevMem2D_
<
PtrStep
>&
maskCollection
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2D_
<
PtrStep
>&
maskCollection
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
bool
cc_12
);
const
DevMem2Df
&
distance
,
bool
cc_12
);
static
const
match_caller_t
match_callers
[
2
][
8
]
=
static
const
match_caller_t
match_callers
[
3
][
8
]
=
{
{
{
{
matchCollectionL1_gpu
<
unsigned
char
>
,
matchCollectionL1_gpu
<
signed
char
>
,
matchCollectionL1_gpu
<
unsigned
char
>
,
matchCollectionL1_gpu
<
signed
char
>
,
...
@@ -306,6 +323,11 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& queryDes
...
@@ -306,6 +323,11 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& queryDes
matchCollectionL2_gpu
<
unsigned
char
>
,
matchCollectionL2_gpu
<
signed
char
>
,
matchCollectionL2_gpu
<
unsigned
char
>
,
matchCollectionL2_gpu
<
signed
char
>
,
matchCollectionL2_gpu
<
unsigned
short
>
,
matchCollectionL2_gpu
<
short
>
,
matchCollectionL2_gpu
<
unsigned
short
>
,
matchCollectionL2_gpu
<
short
>
,
matchCollectionL2_gpu
<
int
>
,
matchCollectionL2_gpu
<
float
>
,
0
,
0
matchCollectionL2_gpu
<
int
>
,
matchCollectionL2_gpu
<
float
>
,
0
,
0
},
{
matchCollectionHamming_gpu
<
unsigned
char
>
,
matchCollectionHamming_gpu
<
signed
char
>
,
matchCollectionHamming_gpu
<
unsigned
short
>
,
matchCollectionHamming_gpu
<
short
>
,
matchCollectionHamming_gpu
<
int
>
,
0
,
0
,
0
}
}
};
};
...
@@ -391,7 +413,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs, con
...
@@ -391,7 +413,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs, con
typedef
void
(
*
match_caller_t
)(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
int
knn
,
typedef
void
(
*
match_caller_t
)(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
int
knn
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2Df
&
allDist
);
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2Df
&
allDist
);
static
const
match_caller_t
match_callers
[
2
][
8
]
=
static
const
match_caller_t
match_callers
[
3
][
8
]
=
{
{
{
{
knnMatchL1_gpu
<
unsigned
char
>
,
knnMatchL1_gpu
<
signed
char
>
,
knnMatchL1_gpu
<
unsigned
short
>
,
knnMatchL1_gpu
<
unsigned
char
>
,
knnMatchL1_gpu
<
signed
char
>
,
knnMatchL1_gpu
<
unsigned
short
>
,
...
@@ -400,6 +422,10 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs, con
...
@@ -400,6 +422,10 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs, con
{
{
knnMatchL2_gpu
<
unsigned
char
>
,
knnMatchL2_gpu
<
signed
char
>
,
knnMatchL2_gpu
<
unsigned
short
>
,
knnMatchL2_gpu
<
unsigned
char
>
,
knnMatchL2_gpu
<
signed
char
>
,
knnMatchL2_gpu
<
unsigned
short
>
,
knnMatchL2_gpu
<
short
>
,
knnMatchL2_gpu
<
int
>
,
knnMatchL2_gpu
<
float
>
,
0
,
0
knnMatchL2_gpu
<
short
>
,
knnMatchL2_gpu
<
int
>
,
knnMatchL2_gpu
<
float
>
,
0
,
0
},
{
knnMatchHamming_gpu
<
unsigned
char
>
,
knnMatchHamming_gpu
<
signed
char
>
,
knnMatchHamming_gpu
<
unsigned
short
>
,
knnMatchHamming_gpu
<
short
>
,
knnMatchHamming_gpu
<
int
>
,
0
,
0
,
0
}
}
};
};
...
@@ -531,7 +557,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& queryDescs,
...
@@ -531,7 +557,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& queryDescs,
typedef
void
(
*
radiusMatch_caller_t
)(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
float
maxDistance
,
typedef
void
(
*
radiusMatch_caller_t
)(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
float
maxDistance
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
unsigned
int
*
nMatches
,
const
DevMem2Df
&
distance
);
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
unsigned
int
*
nMatches
,
const
DevMem2Df
&
distance
);
static
const
radiusMatch_caller_t
radiusMatch_callers
[
2
][
8
]
=
static
const
radiusMatch_caller_t
radiusMatch_callers
[
3
][
8
]
=
{
{
{
{
radiusMatchL1_gpu
<
unsigned
char
>
,
radiusMatchL1_gpu
<
signed
char
>
,
radiusMatchL1_gpu
<
unsigned
short
>
,
radiusMatchL1_gpu
<
unsigned
char
>
,
radiusMatchL1_gpu
<
signed
char
>
,
radiusMatchL1_gpu
<
unsigned
short
>
,
...
@@ -540,6 +566,10 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& queryDescs,
...
@@ -540,6 +566,10 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& queryDescs,
{
{
radiusMatchL2_gpu
<
unsigned
char
>
,
radiusMatchL2_gpu
<
signed
char
>
,
radiusMatchL2_gpu
<
unsigned
short
>
,
radiusMatchL2_gpu
<
unsigned
char
>
,
radiusMatchL2_gpu
<
signed
char
>
,
radiusMatchL2_gpu
<
unsigned
short
>
,
radiusMatchL2_gpu
<
short
>
,
radiusMatchL2_gpu
<
int
>
,
radiusMatchL2_gpu
<
float
>
,
0
,
0
radiusMatchL2_gpu
<
short
>
,
radiusMatchL2_gpu
<
int
>
,
radiusMatchL2_gpu
<
float
>
,
0
,
0
},
{
radiusMatchHamming_gpu
<
unsigned
char
>
,
radiusMatchHamming_gpu
<
signed
char
>
,
radiusMatchHamming_gpu
<
unsigned
short
>
,
radiusMatchHamming_gpu
<
short
>
,
radiusMatchHamming_gpu
<
int
>
,
0
,
0
,
0
}
}
};
};
...
...
modules/gpu/src/cuda/brute_force_matcher.cu
View file @
f11efdce
...
@@ -103,30 +103,61 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -103,30 +103,61 @@ namespace cv { namespace gpu { namespace bfmatcher
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Reduce Sum
// Reduce Sum
template <int BLOCK_DIM_X> __device__ void reduceSum(float* sdiff_row, float& mySum);
template <> __device__ void reduceSum<16>(float* sdiff_row, float& mySum)
template <int BLOCK_DIM_X> struct SumReductor;
template <> struct SumReductor<16>
{
{
volatile float* smem = sdiff_row;
template <typename T> static __device__ void reduce(T* sdiff_row, T& mySum)
smem[threadIdx.x] = mySum;
if (threadIdx.x < 8)
{
{
smem[threadIdx.x] = mySum += smem[threadIdx.x + 8];
volatile T* smem = sdiff_row;
smem[threadIdx.x] = mySum += smem[threadIdx.x + 4];
smem[threadIdx.x] = mySum += smem[threadIdx.x + 2];
smem[threadIdx.x] = mySum;
smem[threadIdx.x] = mySum += smem[threadIdx.x + 1];
if (threadIdx.x < 8)
{
smem[threadIdx.x] = mySum += smem[threadIdx.x + 8];
smem[threadIdx.x] = mySum += smem[threadIdx.x + 4];
smem[threadIdx.x] = mySum += smem[threadIdx.x + 2];
smem[threadIdx.x] = mySum += smem[threadIdx.x + 1];
}
}
}
}
}
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Distance
// Distance
class L1Dist
template <typename T>
class L1Dist
{
{
public:
public:
typedef int ResultType;
typedef int ValueType;
__device__ L1Dist() : mySum(0) {}
__device__ void reduceIter(int val1, int val2)
{
mySum = __sad(val1, val2, mySum);
}
template <int BLOCK_DIM_X> __device__ void reduceAll(int* sdiff_row)
{
SumReductor<BLOCK_DIM_X>::reduce(sdiff_row, mySum);
}
__device__ operator int() const
{
return mySum;
}
private:
int mySum;
};
template <> class L1Dist<float>
{
public:
typedef float ResultType;
typedef float ValueType;
__device__ L1Dist() : mySum(0.0f) {}
__device__ L1Dist() : mySum(0.0f) {}
__device__ void reduceIter(float val1, float val2)
__device__ void reduceIter(float val1, float val2)
...
@@ -134,10 +165,9 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -134,10 +165,9 @@ namespace cv { namespace gpu { namespace bfmatcher
mySum += fabs(val1 - val2);
mySum += fabs(val1 - val2);
}
}
template <int BLOCK_DIM_X>
template <int BLOCK_DIM_X> __device__ void reduceAll(float* sdiff_row)
__device__ void reduceAll(float* sdiff_row)
{
{
reduceSum<BLOCK_DIM_X>
(sdiff_row, mySum);
SumReductor<BLOCK_DIM_X>::reduce
(sdiff_row, mySum);
}
}
__device__ operator float() const
__device__ operator float() const
...
@@ -152,6 +182,9 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -152,6 +182,9 @@ namespace cv { namespace gpu { namespace bfmatcher
class L2Dist
class L2Dist
{
{
public:
public:
typedef float ResultType;
typedef float ValueType;
__device__ L2Dist() : mySum(0.0f) {}
__device__ L2Dist() : mySum(0.0f) {}
__device__ void reduceIter(float val1, float val2)
__device__ void reduceIter(float val1, float val2)
...
@@ -160,10 +193,9 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -160,10 +193,9 @@ namespace cv { namespace gpu { namespace bfmatcher
mySum += reg * reg;
mySum += reg * reg;
}
}
template <int BLOCK_DIM_X>
template <int BLOCK_DIM_X> __device__ void reduceAll(float* sdiff_row)
__device__ void reduceAll(float* sdiff_row)
{
{
reduceSum<BLOCK_DIM_X>
(sdiff_row, mySum);
SumReductor<BLOCK_DIM_X>::reduce
(sdiff_row, mySum);
}
}
__device__ operator float() const
__device__ operator float() const
...
@@ -174,13 +206,39 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -174,13 +206,39 @@ namespace cv { namespace gpu { namespace bfmatcher
private:
private:
float mySum;
float mySum;
};
};
class HammingDist
{
public:
typedef int ResultType;
typedef int ValueType;
__device__ HammingDist() : mySum(0) {}
__device__ void reduceIter(int val1, int val2)
{
mySum += __popc(val1 ^ val2);
}
template <int BLOCK_DIM_X> __device__ void reduceAll(int* sdiff_row)
{
SumReductor<BLOCK_DIM_X>::reduce(sdiff_row, mySum);
}
__device__ operator int() const
{
return mySum;
}
private:
int mySum;
};
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// reduceDescDiff
// reduceDescDiff
template <int BLOCK_DIM_X, typename Dist, typename T>
template <int BLOCK_DIM_X, typename Dist, typename T>
__device__ void reduceDescDiff(const T* queryDescs, const T* trainDescs, int desc_len, Dist& dist,
__device__ void reduceDescDiff(const T* queryDescs, const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row)
float* sdiff_row)
{
{
for (int i = threadIdx.x; i < desc_len; i += BLOCK_DIM_X)
for (int i = threadIdx.x; i < desc_len; i += BLOCK_DIM_X)
dist.reduceIter(queryDescs[i], trainDescs[i]);
dist.reduceIter(queryDescs[i], trainDescs[i]);
...
@@ -195,14 +253,14 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -195,14 +253,14 @@ namespace cv { namespace gpu { namespace bfmatcher
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// loadDescsVals
// loadDescsVals
template <int BLOCK_DIM_X, int MAX_DESCRIPTORS_LEN, typename T>
template <int BLOCK_DIM_X, int MAX_DESCRIPTORS_LEN, typename T
, typename U
>
__device__ void loadDescsVals(const T* descs, int desc_len,
float* queryVals, float
* smem)
__device__ void loadDescsVals(const T* descs, int desc_len,
U* queryVals, U
* smem)
{
{
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
if (tid < desc_len)
if (tid < desc_len)
{
{
smem[tid] =
(float)
descs[tid];
smem[tid] = descs[tid];
}
}
__syncthreads();
__syncthreads();
...
@@ -220,8 +278,7 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -220,8 +278,7 @@ namespace cv { namespace gpu { namespace bfmatcher
template <int N> struct UnrollDescDiff
template <int N> struct UnrollDescDiff
{
{
template <typename Dist, typename T>
template <typename Dist, typename T>
static __device__ void calcCheck(const float* queryVals, const T* trainDescs, int desc_len,
static __device__ void calcCheck(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, Dist& dist, int ind)
Dist& dist, int ind)
{
{
if (ind < desc_len)
if (ind < desc_len)
{
{
...
@@ -234,7 +291,7 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -234,7 +291,7 @@ namespace cv { namespace gpu { namespace bfmatcher
}
}
template <typename Dist, typename T>
template <typename Dist, typename T>
static __device__ void calcWithoutCheck(const
float
* queryVals, const T* trainDescs, Dist& dist)
static __device__ void calcWithoutCheck(const
typename Dist::ValueType
* queryVals, const T* trainDescs, Dist& dist)
{
{
dist.reduceIter(*queryVals, *trainDescs);
dist.reduceIter(*queryVals, *trainDescs);
...
@@ -247,13 +304,13 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -247,13 +304,13 @@ namespace cv { namespace gpu { namespace bfmatcher
template <> struct UnrollDescDiff<0>
template <> struct UnrollDescDiff<0>
{
{
template <typename Dist, typename T>
template <typename Dist, typename T>
static __device__ void calcCheck(const
float
* queryVals, const T* trainDescs, int desc_len,
static __device__ void calcCheck(const
typename Dist::ValueType
* queryVals, const T* trainDescs, int desc_len,
Dist& dist, int ind)
Dist& dist, int ind)
{
{
}
}
template <typename Dist, typename T>
template <typename Dist, typename T>
static __device__ void calcWithoutCheck(const
float
* queryVals, const T* trainDescs, Dist& dist)
static __device__ void calcWithoutCheck(const
typename Dist::ValueType
* queryVals, const T* trainDescs, Dist& dist)
{
{
}
}
};
};
...
@@ -263,29 +320,25 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -263,29 +320,25 @@ namespace cv { namespace gpu { namespace bfmatcher
struct DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, false>
struct DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, false>
{
{
template <typename Dist, typename T>
template <typename Dist, typename T>
static __device__ void calc(const
float
* queryVals, const T* trainDescs, int desc_len, Dist& dist)
static __device__ void calc(const
typename Dist::ValueType
* queryVals, const T* trainDescs, int desc_len, Dist& dist)
{
{
UnrollDescDiff<MAX_DESCRIPTORS_LEN / BLOCK_DIM_X>::calcCheck(queryVals, trainDescs, desc_len,
UnrollDescDiff<MAX_DESCRIPTORS_LEN / BLOCK_DIM_X>::calcCheck(queryVals, trainDescs, desc_len, dist, threadIdx.x);
dist, threadIdx.x);
}
}
};
};
template <int BLOCK_DIM_X, int MAX_DESCRIPTORS_LEN>
template <int BLOCK_DIM_X, int MAX_DESCRIPTORS_LEN>
struct DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, true>
struct DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, true>
{
{
template <typename Dist, typename T>
template <typename Dist, typename T>
static __device__ void calc(const
float
* queryVals, const T* trainDescs, int desc_len, Dist& dist)
static __device__ void calc(const
typename Dist::ValueType
* queryVals, const T* trainDescs, int desc_len, Dist& dist)
{
{
UnrollDescDiff<MAX_DESCRIPTORS_LEN / BLOCK_DIM_X>::calcWithoutCheck(queryVals,
UnrollDescDiff<MAX_DESCRIPTORS_LEN / BLOCK_DIM_X>::calcWithoutCheck(queryVals, trainDescs + threadIdx.x, dist);
trainDescs + threadIdx.x, dist);
}
}
};
};
template <int BLOCK_DIM_X, int MAX_DESCRIPTORS_LEN, bool DESC_LEN_EQ_MAX_LEN, typename Dist, typename T>
template <int BLOCK_DIM_X, int MAX_DESCRIPTORS_LEN, bool DESC_LEN_EQ_MAX_LEN, typename Dist, typename T>
__device__ void reduceDescDiffCached(const float* queryVals, const T* trainDescs, int desc_len, Dist& dist,
__device__ void reduceDescDiffCached(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row)
float* sdiff_row)
{
{
DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN>::calc(queryVals,
DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN>::calc(queryVals, trainDescs, desc_len, dist);
trainDescs, desc_len, dist);
dist.reduceAll<BLOCK_DIM_X>(sdiff_row);
dist.reduceAll<BLOCK_DIM_X>(sdiff_row);
}
}
...
@@ -293,62 +346,60 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -293,62 +346,60 @@ namespace cv { namespace gpu { namespace bfmatcher
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// warpReduceMinIdxIdx
// warpReduceMinIdxIdx
template <int BLOCK_DIM_Y>
template <int BLOCK_DIM_Y> struct MinIdxIdxWarpReductor;
__device__ void warpReduceMinIdxIdx(float& myMin, int& myBestTrainIdx, int& myBestImgIdx,
template <> struct MinIdxIdxWarpReductor<16>
volatile float* sdata, volatile int* strainIdx, volatile int* simgIdx);
template <>
__device__ void warpReduceMinIdxIdx<16>(float& myMin, int& myBestTrainIdx, int& myBestImgIdx,
volatile float* smin, volatile int* strainIdx, volatile int* simgIdx)
{
{
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
template <typename T>
static __device__ void reduce(T& myMin, int& myBestTrainIdx, int& myBestImgIdx, volatile T* smin, volatile int* strainIdx, volatile int* simgIdx)
if (tid < 8)
{
{
myMin = smin[tid];
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
myBestTrainIdx = strainIdx[tid];
myBestImgIdx = simgIdx[tid];
float reg = smin[tid + 8];
if (tid < 8)
if (reg < myMin)
{
{
smin[tid] = myMin = reg;
myMin = smin[tid];
strainIdx[tid] = myBestTrainIdx = strainIdx[tid + 8];
myBestTrainIdx = strainIdx[tid];
simgIdx[tid] = myBestImgIdx = simgIdx[tid + 8];
myBestImgIdx = simgIdx[tid];
}
reg = smin[tid + 4];
float reg = smin[tid + 8];
if (reg < myMin)
if (reg < myMin)
{
{
smin[tid] = myMin = reg;
smin[tid] = myMin = reg;
strainIdx[tid] = myBestTrainIdx = strainIdx[tid + 4];
strainIdx[tid] = myBestTrainIdx = strainIdx[tid + 8];
simgIdx[tid] = myBestImgIdx = simgIdx[tid + 4];
simgIdx[tid] = myBestImgIdx = simgIdx[tid + 8];
}
}
reg = smin[tid + 2];
reg = smin[tid + 4];
if (reg < myMin)
if (reg < myMin)
{
{
smin[tid] = myMin = reg;
smin[tid] = myMin = reg;
strainIdx[tid] = myBestTrainIdx = strainIdx[tid + 2];
strainIdx[tid] = myBestTrainIdx = strainIdx[tid + 4];
simgIdx[tid] = myBestImgIdx = simgIdx[tid + 2];
simgIdx[tid] = myBestImgIdx = simgIdx[tid + 4];
}
}
reg = smin[tid + 1];
reg = smin[tid + 2];
if (reg < myMin)
if (reg < myMin)
{
{
smin[tid] = myMin = reg;
smin[tid] = myMin = reg;
strainIdx[tid] = myBestTrainIdx = strainIdx[tid + 1];
strainIdx[tid] = myBestTrainIdx = strainIdx[tid + 2];
simgIdx[tid] = myBestImgIdx = simgIdx[tid + 1];
simgIdx[tid] = myBestImgIdx = simgIdx[tid + 2];
}
reg = smin[tid + 1];
if (reg < myMin)
{
smin[tid] = myMin = reg;
strainIdx[tid] = myBestTrainIdx = strainIdx[tid + 1];
simgIdx[tid] = myBestImgIdx = simgIdx[tid + 1];
}
}
}
}
}
}
}
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// findBestMatch
// findBestMatch
template <int BLOCK_DIM_Y>
template <int BLOCK_DIM_Y, typename T>
__device__ void findBestMatch(float& myMin, int& myBestTrainIdx, int& myBestImgIdx,
__device__ void findBestMatch(T& myMin, int& myBestTrainIdx, int& myBestImgIdx, T* smin, int* strainIdx, int* simgIdx)
float* smin, int* strainIdx, int* simgIdx)
{
{
if (threadIdx.x == 0)
if (threadIdx.x == 0)
{
{
...
@@ -358,7 +409,7 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -358,7 +409,7 @@ namespace cv { namespace gpu { namespace bfmatcher
}
}
__syncthreads();
__syncthreads();
warpReduceMinIdxIdx<BLOCK_DIM_Y>
(myMin, myBestTrainIdx, myBestImgIdx, smin, strainIdx, simgIdx);
MinIdxIdxWarpReductor<BLOCK_DIM_Y>::reduce
(myMin, myBestTrainIdx, myBestImgIdx, smin, strainIdx, simgIdx);
}
}
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
...
@@ -368,13 +419,13 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -368,13 +419,13 @@ namespace cv { namespace gpu { namespace bfmatcher
class ReduceDescCalculatorSimple
class ReduceDescCalculatorSimple
{
{
public:
public:
__device__ void prepare(const T* queryDescs_, int,
float
*)
__device__ void prepare(const T* queryDescs_, int,
void
*)
{
{
queryDescs = queryDescs_;
queryDescs = queryDescs_;
}
}
template <typename Dist>
template <typename Dist>
__device__ void calc(const T* trainDescs, int desc_len, Dist& dist,
float
* sdiff_row) const
__device__ void calc(const T* trainDescs, int desc_len, Dist& dist,
typename Dist::ResultType
* sdiff_row) const
{
{
reduceDescDiff<BLOCK_DIM_X>(queryDescs, trainDescs, desc_len, dist, sdiff_row);
reduceDescDiff<BLOCK_DIM_X>(queryDescs, trainDescs, desc_len, dist, sdiff_row);
}
}
...
@@ -383,24 +434,23 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -383,24 +434,23 @@ namespace cv { namespace gpu { namespace bfmatcher
const T* queryDescs;
const T* queryDescs;
};
};
template <int BLOCK_DIM_X, int MAX_DESCRIPTORS_LEN, bool DESC_LEN_EQ_MAX_LEN, typename T>
template <int BLOCK_DIM_X, int MAX_DESCRIPTORS_LEN, bool DESC_LEN_EQ_MAX_LEN, typename T
, typename U
>
class ReduceDescCalculatorCached
class ReduceDescCalculatorCached
{
{
public:
public:
__device__ void prepare(const T* queryDescs, int desc_len,
float
* smem)
__device__ void prepare(const T* queryDescs, int desc_len,
U
* smem)
{
{
loadDescsVals<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN>(queryDescs, desc_len, queryVals, smem);
loadDescsVals<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN>(queryDescs, desc_len, queryVals, smem);
}
}
template <typename Dist>
template <typename Dist>
__device__ void calc(const T* trainDescs, int desc_len, Dist& dist,
float
* sdiff_row) const
__device__ void calc(const T* trainDescs, int desc_len, Dist& dist,
typename Dist::ResultType
* sdiff_row) const
{
{
reduceDescDiffCached<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN>(queryVals, trainDescs,
reduceDescDiffCached<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN>(queryVals, trainDescs, desc_len, dist, sdiff_row);
desc_len, dist, sdiff_row);
}
}
private:
private:
float
queryVals[MAX_DESCRIPTORS_LEN / BLOCK_DIM_X];
U
queryVals[MAX_DESCRIPTORS_LEN / BLOCK_DIM_X];
};
};
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
...
@@ -409,7 +459,7 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -409,7 +459,7 @@ namespace cv { namespace gpu { namespace bfmatcher
template <typename Dist, typename ReduceDescCalculator, typename T, typename Mask>
template <typename Dist, typename ReduceDescCalculator, typename T, typename Mask>
__device__ void matchDescs(int queryIdx, int imgIdx, const DevMem2D_<T>& trainDescs_,
__device__ void matchDescs(int queryIdx, int imgIdx, const DevMem2D_<T>& trainDescs_,
const Mask& m, const ReduceDescCalculator& reduceDescCalc,
const Mask& m, const ReduceDescCalculator& reduceDescCalc,
float& myMin, int& myBestTrainIdx, int& myBestImgIdx, float
* sdiff_row)
typename Dist::ResultType& myMin, int& myBestTrainIdx, int& myBestImgIdx, typename Dist::ResultType
* sdiff_row)
{
{
for (int trainIdx = threadIdx.y; trainIdx < trainDescs_.rows; trainIdx += blockDim.y)
for (int trainIdx = threadIdx.y; trainIdx < trainDescs_.rows; trainIdx += blockDim.y)
{
{
...
@@ -447,10 +497,9 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -447,10 +497,9 @@ namespace cv { namespace gpu { namespace bfmatcher
template <typename Dist, typename ReduceDescCalculator, typename Mask>
template <typename Dist, typename ReduceDescCalculator, typename Mask>
__device__ void loop(int queryIdx, Mask& m, const ReduceDescCalculator& reduceDescCalc,
__device__ void loop(int queryIdx, Mask& m, const ReduceDescCalculator& reduceDescCalc,
float& myMin, int& myBestTrainIdx, int& myBestImgIdx, float
* sdiff_row) const
typename Dist::ResultType& myMin, int& myBestTrainIdx, int& myBestImgIdx, typename Dist::ResultType
* sdiff_row) const
{
{
matchDescs<Dist>(queryIdx, 0, trainDescs, m, reduceDescCalc,
matchDescs<Dist>(queryIdx, 0, trainDescs, m, reduceDescCalc, myMin, myBestTrainIdx, myBestImgIdx, sdiff_row);
myMin, myBestTrainIdx, myBestImgIdx, sdiff_row);
}
}
__device__ int desc_len() const
__device__ int desc_len() const
...
@@ -473,14 +522,13 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -473,14 +522,13 @@ namespace cv { namespace gpu { namespace bfmatcher
template <typename Dist, typename ReduceDescCalculator, typename Mask>
template <typename Dist, typename ReduceDescCalculator, typename Mask>
__device__ void loop(int queryIdx, Mask& m, const ReduceDescCalculator& reduceDescCalc,
__device__ void loop(int queryIdx, Mask& m, const ReduceDescCalculator& reduceDescCalc,
float& myMin, int& myBestTrainIdx, int& myBestImgIdx, float
* sdiff_row) const
typename Dist::ResultType& myMin, int& myBestTrainIdx, int& myBestImgIdx, typename Dist::ResultType
* sdiff_row) const
{
{
for (int imgIdx = 0; imgIdx < nImg; ++imgIdx)
for (int imgIdx = 0; imgIdx < nImg; ++imgIdx)
{
{
DevMem2D_<T> trainDescs = trainCollection[imgIdx];
DevMem2D_<T> trainDescs = trainCollection[imgIdx];
m.nextMask();
m.nextMask();
matchDescs<Dist>(queryIdx, imgIdx, trainDescs, m, reduceDescCalc,
matchDescs<Dist>(queryIdx, imgIdx, trainDescs, m, reduceDescCalc, myMin, myBestTrainIdx, myBestImgIdx, sdiff_row);
myMin, myBestTrainIdx, myBestImgIdx, sdiff_row);
}
}
}
}
...
@@ -498,38 +546,35 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -498,38 +546,35 @@ namespace cv { namespace gpu { namespace bfmatcher
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Match kernel
// Match kernel
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename ReduceDescCalculator, typename Dist, typename T,
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename ReduceDescCalculator, typename Dist, typename T, typename Train, typename Mask>
typename Train, typename Mask>
__global__ void match(const PtrStep_<T> queryDescs_, const Train train, const Mask mask, int* trainIdx, int* imgIdx, float* distance)
__global__ void match(const PtrStep_<T> queryDescs_, const Train train, const Mask mask,
int* trainIdx, int* imgIdx, float* distance)
{
{
__shared__
float
smem[BLOCK_DIM_X * BLOCK_DIM_Y];
__shared__
typename Dist::ResultType
smem[BLOCK_DIM_X * BLOCK_DIM_Y];
const int queryIdx = blockIdx.x;
const int queryIdx = blockIdx.x;
int myBestTrainIdx = -1;
int myBestTrainIdx = -1;
int myBestImgIdx = -1;
int myBestImgIdx = -1;
float myMin = numeric_limits_gpu<float
>::max();
typename Dist::ResultType myMin = numeric_limits_gpu<typename Dist::ResultType
>::max();
{
{
float
* sdiff_row = smem + BLOCK_DIM_X * threadIdx.y;
typename Dist::ResultType
* sdiff_row = smem + BLOCK_DIM_X * threadIdx.y;
Mask m = mask;
Mask m = mask;
ReduceDescCalculator reduceDescCalc;
ReduceDescCalculator reduceDescCalc;
reduceDescCalc.prepare(queryDescs_.ptr(queryIdx), train.desc_len(), smem);
reduceDescCalc.prepare(queryDescs_.ptr(queryIdx), train.desc_len(),
(typename Dist::ValueType*)
smem);
train.template loop<Dist>(queryIdx, m, reduceDescCalc, myMin, myBestTrainIdx, myBestImgIdx, sdiff_row);
train.template loop<Dist>(queryIdx, m, reduceDescCalc, myMin, myBestTrainIdx, myBestImgIdx, sdiff_row);
}
}
__syncthreads();
__syncthreads();
float
* smin = smem;
typename Dist::ResultType
* smin = smem;
int* strainIdx = (int*)(smin + BLOCK_DIM_Y);
int* strainIdx = (int*)(smin + BLOCK_DIM_Y);
int* simgIdx = strainIdx + BLOCK_DIM_Y;
int* simgIdx = strainIdx + BLOCK_DIM_Y;
findBestMatch<BLOCK_DIM_Y>(myMin, myBestTrainIdx, myBestImgIdx,
findBestMatch<BLOCK_DIM_Y>(myMin, myBestTrainIdx, myBestImgIdx, smin, strainIdx, simgIdx);
smin, strainIdx, simgIdx);
if (threadIdx.x == 0 && threadIdx.y == 0)
if (threadIdx.x == 0 && threadIdx.y == 0)
{
{
...
@@ -542,8 +587,7 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -542,8 +587,7 @@ namespace cv { namespace gpu { namespace bfmatcher
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Match kernel callers
// Match kernel callers
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename T,
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename T, typename Train, typename Mask>
typename Train, typename Mask>
void matchSimple_caller(const DevMem2D_<T>& queryDescs, const Train& train,
void matchSimple_caller(const DevMem2D_<T>& queryDescs, const Train& train,
const Mask& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance)
const Mask& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance)
{
{
...
@@ -553,14 +597,12 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -553,14 +597,12 @@ namespace cv { namespace gpu { namespace bfmatcher
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
match<BLOCK_DIM_X, BLOCK_DIM_Y, ReduceDescCalculatorSimple<BLOCK_DIM_X, T>, Dist, T>
match<BLOCK_DIM_X, BLOCK_DIM_Y, ReduceDescCalculatorSimple<BLOCK_DIM_X, T>, Dist, T>
<<<grid, threads>>>(queryDescs, train, mask, trainIdx.data,
<<<grid, threads>>>(queryDescs, train, mask, trainIdx.data, imgIdx.data, distance.data);
imgIdx.data, distance.data);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
}
}
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int MAX_DESCRIPTORS_LEN, bool DESC_LEN_EQ_MAX_LEN,
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int MAX_DESCRIPTORS_LEN, bool DESC_LEN_EQ_MAX_LEN, typename Dist, typename T, typename Train, typename Mask>
typename Dist, typename T, typename Train, typename Mask>
void matchCached_caller(const DevMem2D_<T>& queryDescs, const Train& train,
void matchCached_caller(const DevMem2D_<T>& queryDescs, const Train& train,
const Mask& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance)
const Mask& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance)
{
{
...
@@ -571,11 +613,8 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -571,11 +613,8 @@ namespace cv { namespace gpu { namespace bfmatcher
dim3 grid(queryDescs.rows, 1, 1);
dim3 grid(queryDescs.rows, 1, 1);
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
match<BLOCK_DIM_X, BLOCK_DIM_Y,
match<BLOCK_DIM_X, BLOCK_DIM_Y, ReduceDescCalculatorCached<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN, T, typename Dist::ValueType>, Dist, T>
ReduceDescCalculatorCached<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN, T>,
<<<grid, threads>>>(queryDescs, train, mask, trainIdx.data, imgIdx.data, distance.data);
Dist, T>
<<<grid, threads>>>(queryDescs, train, mask, trainIdx.data,
imgIdx.data, distance.data);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
...
@@ -616,11 +655,11 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -616,11 +655,11 @@ namespace cv { namespace gpu { namespace bfmatcher
if (mask.data)
if (mask.data)
{
{
SingleMask m(mask);
SingleMask m(mask);
matchDispatcher<
L1Dist
>((DevMem2D_<T>)queryDescs, train, m, trainIdx, imgIdx, distance, cc_12);
matchDispatcher<
L1Dist<T>
>((DevMem2D_<T>)queryDescs, train, m, trainIdx, imgIdx, distance, cc_12);
}
}
else
else
{
{
matchDispatcher<
L1Dist
>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
matchDispatcher<
L1Dist<T>
>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
}
}
}
}
...
@@ -655,6 +694,29 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -655,6 +694,29 @@ namespace cv { namespace gpu { namespace bfmatcher
template void matchSingleL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
template void matchSingleL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
template void matchSingleL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
template void matchSingleL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
template <typename T>
void matchSingleHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs,
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
bool cc_12)
{
SingleTrain<T> train((DevMem2D_<T>)trainDescs);
if (mask.data)
{
SingleMask m(mask);
matchDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, train, m, trainIdx, imgIdx, distance, cc_12);
}
else
{
matchDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
}
}
template void matchSingleHamming_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
template void matchSingleHamming_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
template void matchSingleHamming_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
template void matchSingleHamming_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
template void matchSingleHamming_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
template <typename T>
template <typename T>
void matchCollectionL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainCollection,
void matchCollectionL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainCollection,
const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx,
const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx,
...
@@ -664,11 +726,11 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -664,11 +726,11 @@ namespace cv { namespace gpu { namespace bfmatcher
if (maskCollection.data)
if (maskCollection.data)
{
{
MaskCollection mask(maskCollection.data);
MaskCollection mask(maskCollection.data);
matchDispatcher<
L1Dist
>((DevMem2D_<T>)queryDescs, train, mask, trainIdx, imgIdx, distance, cc_12);
matchDispatcher<
L1Dist<T>
>((DevMem2D_<T>)queryDescs, train, mask, trainIdx, imgIdx, distance, cc_12);
}
}
else
else
{
{
matchDispatcher<
L1Dist
>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
matchDispatcher<
L1Dist<T>
>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
}
}
}
}
...
@@ -702,6 +764,29 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -702,6 +764,29 @@ namespace cv { namespace gpu { namespace bfmatcher
template void matchCollectionL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
template void matchCollectionL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
template void matchCollectionL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
template void matchCollectionL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
template void matchCollectionL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
template void matchCollectionL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
template <typename T>
void matchCollectionHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainCollection,
const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx,
const DevMem2Df& distance, bool cc_12)
{
TrainCollection<T> train((DevMem2D_<T>*)trainCollection.ptr(), trainCollection.cols, queryDescs.cols);
if (maskCollection.data)
{
MaskCollection mask(maskCollection.data);
matchDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, train, mask, trainIdx, imgIdx, distance, cc_12);
}
else
{
matchDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
}
}
template void matchCollectionHamming_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
template void matchCollectionHamming_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
template void matchCollectionHamming_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
template void matchCollectionHamming_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
template void matchCollectionHamming_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, bool cc_12);
///////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////// Knn Match ////////////////////////////////////
//////////////////////////////////// Knn Match ////////////////////////////////////
...
@@ -713,9 +798,9 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -713,9 +798,9 @@ namespace cv { namespace gpu { namespace bfmatcher
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename T, typename Mask>
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename T, typename Mask>
__global__ void calcDistance(PtrStep_<T> queryDescs_, DevMem2D_<T> trainDescs_, Mask mask, PtrStepf distance)
__global__ void calcDistance(PtrStep_<T> queryDescs_, DevMem2D_<T> trainDescs_, Mask mask, PtrStepf distance)
{
{
__shared__
float
sdiff[BLOCK_DIM_X * BLOCK_DIM_Y];
__shared__
typename Dist::ResultType
sdiff[BLOCK_DIM_X * BLOCK_DIM_Y];
float
* sdiff_row = sdiff + BLOCK_DIM_X * threadIdx.y;
typename Dist::ResultType
* sdiff_row = sdiff + BLOCK_DIM_X * threadIdx.y;
const int queryIdx = blockIdx.x;
const int queryIdx = blockIdx.x;
const T* queryDescs = queryDescs_.ptr(queryIdx);
const T* queryDescs = queryDescs_.ptr(queryIdx);
...
@@ -726,7 +811,7 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -726,7 +811,7 @@ namespace cv { namespace gpu { namespace bfmatcher
{
{
const T* trainDescs = trainDescs_.ptr(trainIdx);
const T* trainDescs = trainDescs_.ptr(trainIdx);
float myDist = numeric_limits_gpu<float
>::max();
typename Dist::ResultType myDist = numeric_limits_gpu<typename Dist::ResultType
>::max();
if (mask(queryIdx, trainIdx))
if (mask(queryIdx, trainIdx))
{
{
...
@@ -763,14 +848,14 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -763,14 +848,14 @@ namespace cv { namespace gpu { namespace bfmatcher
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// warpReduceMinIdx
// warpReduceMinIdx
template <int BLOCK_SIZE>
template <int BLOCK_SIZE
, typename T
>
__device__ void warpReduceMinIdx(volatile
float* sdist, volatile int* strainIdx, float
& myMin, int tid)
__device__ void warpReduceMinIdx(volatile
T* sdist, volatile int* strainIdx, T
& myMin, int tid)
{
{
if (tid < 32)
if (tid < 32)
{
{
if (BLOCK_SIZE >= 64)
if (BLOCK_SIZE >= 64)
{
{
float
reg = sdist[tid + 32];
T
reg = sdist[tid + 32];
if (reg < myMin)
if (reg < myMin)
{
{
...
@@ -780,7 +865,7 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -780,7 +865,7 @@ namespace cv { namespace gpu { namespace bfmatcher
}
}
if (BLOCK_SIZE >= 32)
if (BLOCK_SIZE >= 32)
{
{
float
reg = sdist[tid + 16];
T
reg = sdist[tid + 16];
if (reg < myMin)
if (reg < myMin)
{
{
...
@@ -790,7 +875,7 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -790,7 +875,7 @@ namespace cv { namespace gpu { namespace bfmatcher
}
}
if (BLOCK_SIZE >= 16)
if (BLOCK_SIZE >= 16)
{
{
float
reg = sdist[tid + 8];
T
reg = sdist[tid + 8];
if (reg < myMin)
if (reg < myMin)
{
{
...
@@ -800,7 +885,7 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -800,7 +885,7 @@ namespace cv { namespace gpu { namespace bfmatcher
}
}
if (BLOCK_SIZE >= 8)
if (BLOCK_SIZE >= 8)
{
{
float
reg = sdist[tid + 4];
T
reg = sdist[tid + 4];
if (reg < myMin)
if (reg < myMin)
{
{
...
@@ -810,7 +895,7 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -810,7 +895,7 @@ namespace cv { namespace gpu { namespace bfmatcher
}
}
if (BLOCK_SIZE >= 4)
if (BLOCK_SIZE >= 4)
{
{
float
reg = sdist[tid + 2];
T
reg = sdist[tid + 2];
if (reg < myMin)
if (reg < myMin)
{
{
...
@@ -820,7 +905,7 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -820,7 +905,7 @@ namespace cv { namespace gpu { namespace bfmatcher
}
}
if (BLOCK_SIZE >= 2)
if (BLOCK_SIZE >= 2)
{
{
float
reg = sdist[tid + 1];
T
reg = sdist[tid + 1];
if (reg < myMin)
if (reg < myMin)
{
{
...
@@ -831,17 +916,17 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -831,17 +916,17 @@ namespace cv { namespace gpu { namespace bfmatcher
}
}
}
}
template <int BLOCK_SIZE>
template <int BLOCK_SIZE
, typename T
>
__device__ void reduceMinIdx(const
float* dist, int n, float
* sdist, int* strainIdx)
__device__ void reduceMinIdx(const
T* dist, int n, T
* sdist, int* strainIdx)
{
{
const int tid = threadIdx.x;
const int tid = threadIdx.x;
float myMin = numeric_limits_gpu<float
>::max();
T myMin = numeric_limits_gpu<T
>::max();
int myMinIdx = -1;
int myMinIdx = -1;
for (int i = tid; i < n; i += BLOCK_SIZE)
for (int i = tid; i < n; i += BLOCK_SIZE)
{
{
float
reg = dist[i];
T
reg = dist[i];
if (reg < myMin)
if (reg < myMin)
{
{
myMin = reg;
myMin = reg;
...
@@ -855,7 +940,7 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -855,7 +940,7 @@ namespace cv { namespace gpu { namespace bfmatcher
if (BLOCK_SIZE >= 512 && tid < 256)
if (BLOCK_SIZE >= 512 && tid < 256)
{
{
float
reg = sdist[tid + 256];
T
reg = sdist[tid + 256];
if (reg < myMin)
if (reg < myMin)
{
{
...
@@ -866,7 +951,7 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -866,7 +951,7 @@ namespace cv { namespace gpu { namespace bfmatcher
}
}
if (BLOCK_SIZE >= 256 && tid < 128)
if (BLOCK_SIZE >= 256 && tid < 128)
{
{
float
reg = sdist[tid + 128];
T
reg = sdist[tid + 128];
if (reg < myMin)
if (reg < myMin)
{
{
...
@@ -877,7 +962,7 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -877,7 +962,7 @@ namespace cv { namespace gpu { namespace bfmatcher
}
}
if (BLOCK_SIZE >= 128 && tid < 64)
if (BLOCK_SIZE >= 128 && tid < 64)
{
{
float
reg = sdist[tid + 64];
T
reg = sdist[tid + 64];
if (reg < myMin)
if (reg < myMin)
{
{
...
@@ -943,14 +1028,12 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -943,14 +1028,12 @@ namespace cv { namespace gpu { namespace bfmatcher
// knn match caller
// knn match caller
template <typename Dist, typename T, typename Mask>
template <typename Dist, typename T, typename Mask>
void calcDistanceDispatcher(const DevMem2D_<T>& queryDescs, const DevMem2D_<T>& trainDescs,
void calcDistanceDispatcher(const DevMem2D_<T>& queryDescs, const DevMem2D_<T>& trainDescs, const Mask& mask, const DevMem2Df& allDist)
const Mask& mask, const DevMem2Df& allDist)
{
{
calcDistance_caller<16, 16, Dist>(queryDescs, trainDescs, mask, allDist);
calcDistance_caller<16, 16, Dist>(queryDescs, trainDescs, mask, allDist);
}
}
void findKnnMatchDispatcher(int knn, const DevMem2Di& trainIdx, const DevMem2Df& distance,
void findKnnMatchDispatcher(int knn, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist)
const DevMem2Df& allDist)
{
{
findKnnMatch_caller<256>(knn, trainIdx, distance, allDist);
findKnnMatch_caller<256>(knn, trainIdx, distance, allDist);
}
}
...
@@ -961,13 +1044,11 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -961,13 +1044,11 @@ namespace cv { namespace gpu { namespace bfmatcher
{
{
if (mask.data)
if (mask.data)
{
{
calcDistanceDispatcher<L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
calcDistanceDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs, SingleMask(mask), allDist);
SingleMask(mask), allDist);
}
}
else
else
{
{
calcDistanceDispatcher<L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
calcDistanceDispatcher< L1Dist<T> >((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs, WithOutMask(), allDist);
WithOutMask(), allDist);
}
}
findKnnMatchDispatcher(knn, trainIdx, distance, allDist);
findKnnMatchDispatcher(knn, trainIdx, distance, allDist);
...
@@ -1005,6 +1086,30 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -1005,6 +1086,30 @@ namespace cv { namespace gpu { namespace bfmatcher
template void knnMatchL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
template void knnMatchL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
template void knnMatchL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
template void knnMatchL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
template <typename T>
void knnMatchHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist)
{
if (mask.data)
{
calcDistanceDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
SingleMask(mask), allDist);
}
else
{
calcDistanceDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
WithOutMask(), allDist);
}
findKnnMatchDispatcher(knn, trainIdx, distance, allDist);
}
template void knnMatchHamming_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
template void knnMatchHamming_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
template void knnMatchHamming_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
template void knnMatchHamming_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
template void knnMatchHamming_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
///////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////// Radius Match //////////////////////////////////
/////////////////////////////////// Radius Match //////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
...
@@ -1018,9 +1123,9 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -1018,9 +1123,9 @@ namespace cv { namespace gpu { namespace bfmatcher
{
{
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
__shared__
float
smem[BLOCK_DIM_X * BLOCK_DIM_Y];
__shared__
typename Dist::ResultType
smem[BLOCK_DIM_X * BLOCK_DIM_Y];
float
* sdiff_row = smem + BLOCK_DIM_X * threadIdx.y;
typename Dist::ResultType
* sdiff_row = smem + BLOCK_DIM_X * threadIdx.y;
const int queryIdx = blockIdx.x;
const int queryIdx = blockIdx.x;
const T* queryDescs = queryDescs_.ptr(queryIdx);
const T* queryDescs = queryDescs_.ptr(queryIdx);
...
@@ -1091,12 +1196,12 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -1091,12 +1196,12 @@ namespace cv { namespace gpu { namespace bfmatcher
{
{
if (mask.data)
if (mask.data)
{
{
radiusMatchDispatcher<
L1Dist
>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
radiusMatchDispatcher<
L1Dist<T>
>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
maxDistance, SingleMask(mask), trainIdx, nMatches, distance);
maxDistance, SingleMask(mask), trainIdx, nMatches, distance);
}
}
else
else
{
{
radiusMatchDispatcher<
L1Dist
>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
radiusMatchDispatcher<
L1Dist<T>
>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
maxDistance, WithOutMask(), trainIdx, nMatches, distance);
maxDistance, WithOutMask(), trainIdx, nMatches, distance);
}
}
}
}
...
@@ -1130,4 +1235,26 @@ namespace cv { namespace gpu { namespace bfmatcher
...
@@ -1130,4 +1235,26 @@ namespace cv { namespace gpu { namespace bfmatcher
template void radiusMatchL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template void radiusMatchL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template void radiusMatchL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template void radiusMatchL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template void radiusMatchL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template void radiusMatchL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template <typename T>
void radiusMatchHamming_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance,
const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance)
{
if (mask.data)
{
radiusMatchDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
maxDistance, SingleMask(mask), trainIdx, nMatches, distance);
}
else
{
radiusMatchDispatcher<HammingDist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
maxDistance, WithOutMask(), trainIdx, nMatches, distance);
}
}
template void radiusMatchHamming_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template void radiusMatchHamming_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template void radiusMatchHamming_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template void radiusMatchHamming_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template void radiusMatchHamming_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
}}}
}}}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment