Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
bd13e947
Commit
bd13e947
authored
Mar 27, 2012
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added assertion on device features (global atomics) into gpu tests
parent
4a996111
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
483 additions
and
343 deletions
+483
-343
brute_force_matcher.cpp
modules/gpu/src/brute_force_matcher.cpp
+98
-96
surf.cu
modules/gpu/src/cuda/surf.cu
+126
-142
fast.cpp
modules/gpu/src/fast.cpp
+9
-6
surf.cpp
modules/gpu/src/surf.cpp
+6
-4
test_features2d.cpp
modules/gpu/test/test_features2d.cpp
+224
-90
test_filters.cpp
modules/gpu/test/test_filters.cpp
+20
-5
No files found.
modules/gpu/src/brute_force_matcher.cpp
View file @
bd13e947
...
...
@@ -82,76 +82,76 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, vector< vec
#else
/* !defined (HAVE_CUDA) */
namespace
cv
{
namespace
gpu
{
namespace
device
namespace
cv
{
namespace
gpu
{
namespace
device
{
namespace
bf_match
{
template
<
typename
T
>
void
matchL1_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
const
DevMem2Db
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
template
<
typename
T
>
void
matchL1_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
const
DevMem2Db
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
int
cc
,
cudaStream_t
stream
);
template
<
typename
T
>
void
matchL2_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
const
DevMem2Db
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
template
<
typename
T
>
void
matchL2_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
const
DevMem2Db
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
int
cc
,
cudaStream_t
stream
);
template
<
typename
T
>
void
matchHamming_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
const
DevMem2Db
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
template
<
typename
T
>
void
matchHamming_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
const
DevMem2Db
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
int
cc
,
cudaStream_t
stream
);
template
<
typename
T
>
void
matchL1_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
trains
,
const
DevMem2D_
<
PtrStepb
>&
masks
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
template
<
typename
T
>
void
matchL1_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
trains
,
const
DevMem2D_
<
PtrStepb
>&
masks
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
int
cc
,
cudaStream_t
stream
);
template
<
typename
T
>
void
matchL2_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
trains
,
const
DevMem2D_
<
PtrStepb
>&
masks
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
template
<
typename
T
>
void
matchL2_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
trains
,
const
DevMem2D_
<
PtrStepb
>&
masks
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
int
cc
,
cudaStream_t
stream
);
template
<
typename
T
>
void
matchHamming_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
trains
,
const
DevMem2D_
<
PtrStepb
>&
masks
,
template
<
typename
T
>
void
matchHamming_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
trains
,
const
DevMem2D_
<
PtrStepb
>&
masks
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
int
cc
,
cudaStream_t
stream
);
}
namespace
bf_knnmatch
{
template
<
typename
T
>
void
matchL1_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
int
k
,
const
DevMem2Db
&
mask
,
const
DevMem2Db
&
trainIdx
,
const
DevMem2Db
&
distance
,
const
DevMem2Df
&
allDist
,
template
<
typename
T
>
void
matchL1_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
int
k
,
const
DevMem2Db
&
mask
,
const
DevMem2Db
&
trainIdx
,
const
DevMem2Db
&
distance
,
const
DevMem2Df
&
allDist
,
int
cc
,
cudaStream_t
stream
);
template
<
typename
T
>
void
matchL2_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
int
k
,
const
DevMem2Db
&
mask
,
const
DevMem2Db
&
trainIdx
,
const
DevMem2Db
&
distance
,
const
DevMem2Df
&
allDist
,
template
<
typename
T
>
void
matchL2_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
int
k
,
const
DevMem2Db
&
mask
,
const
DevMem2Db
&
trainIdx
,
const
DevMem2Db
&
distance
,
const
DevMem2Df
&
allDist
,
int
cc
,
cudaStream_t
stream
);
template
<
typename
T
>
void
matchHamming_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
int
k
,
const
DevMem2Db
&
mask
,
const
DevMem2Db
&
trainIdx
,
const
DevMem2Db
&
distance
,
const
DevMem2Df
&
allDist
,
template
<
typename
T
>
void
matchHamming_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
int
k
,
const
DevMem2Db
&
mask
,
const
DevMem2Db
&
trainIdx
,
const
DevMem2Db
&
distance
,
const
DevMem2Df
&
allDist
,
int
cc
,
cudaStream_t
stream
);
template
<
typename
T
>
void
match2L1_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
trains
,
const
DevMem2D_
<
PtrStepb
>&
masks
,
const
DevMem2Db
&
trainIdx
,
const
DevMem2Db
&
imgIdx
,
const
DevMem2Db
&
distance
,
template
<
typename
T
>
void
match2L1_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
trains
,
const
DevMem2D_
<
PtrStepb
>&
masks
,
const
DevMem2Db
&
trainIdx
,
const
DevMem2Db
&
imgIdx
,
const
DevMem2Db
&
distance
,
int
cc
,
cudaStream_t
stream
);
template
<
typename
T
>
void
match2L2_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
trains
,
const
DevMem2D_
<
PtrStepb
>&
masks
,
const
DevMem2Db
&
trainIdx
,
const
DevMem2Db
&
imgIdx
,
const
DevMem2Db
&
distance
,
template
<
typename
T
>
void
match2L2_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
trains
,
const
DevMem2D_
<
PtrStepb
>&
masks
,
const
DevMem2Db
&
trainIdx
,
const
DevMem2Db
&
imgIdx
,
const
DevMem2Db
&
distance
,
int
cc
,
cudaStream_t
stream
);
template
<
typename
T
>
void
match2Hamming_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
trains
,
const
DevMem2D_
<
PtrStepb
>&
masks
,
const
DevMem2Db
&
trainIdx
,
const
DevMem2Db
&
imgIdx
,
const
DevMem2Db
&
distance
,
template
<
typename
T
>
void
match2Hamming_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
trains
,
const
DevMem2D_
<
PtrStepb
>&
masks
,
const
DevMem2Db
&
trainIdx
,
const
DevMem2Db
&
imgIdx
,
const
DevMem2Db
&
distance
,
int
cc
,
cudaStream_t
stream
);
}
namespace
bf_radius_match
namespace
bf_radius_match
{
template
<
typename
T
>
void
matchL1_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
float
maxDistance
,
const
DevMem2Db
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2D_
<
unsigned
int
>&
nMatches
,
template
<
typename
T
>
void
matchL1_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
float
maxDistance
,
const
DevMem2Db
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2D_
<
unsigned
int
>&
nMatches
,
int
cc
,
cudaStream_t
stream
);
template
<
typename
T
>
void
matchL2_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
float
maxDistance
,
const
DevMem2Db
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2D_
<
unsigned
int
>&
nMatches
,
template
<
typename
T
>
void
matchL2_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
float
maxDistance
,
const
DevMem2Db
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2D_
<
unsigned
int
>&
nMatches
,
int
cc
,
cudaStream_t
stream
);
template
<
typename
T
>
void
matchHamming_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
float
maxDistance
,
const
DevMem2Db
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2D_
<
unsigned
int
>&
nMatches
,
template
<
typename
T
>
void
matchHamming_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
float
maxDistance
,
const
DevMem2Db
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2D_
<
unsigned
int
>&
nMatches
,
int
cc
,
cudaStream_t
stream
);
template
<
typename
T
>
void
matchL1_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
*
trains
,
int
n
,
float
maxDistance
,
const
DevMem2Db
*
masks
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2D_
<
unsigned
int
>&
nMatches
,
template
<
typename
T
>
void
matchL1_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
*
trains
,
int
n
,
float
maxDistance
,
const
DevMem2Db
*
masks
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2D_
<
unsigned
int
>&
nMatches
,
int
cc
,
cudaStream_t
stream
);
template
<
typename
T
>
void
matchL2_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
*
trains
,
int
n
,
float
maxDistance
,
const
DevMem2Db
*
masks
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2D_
<
unsigned
int
>&
nMatches
,
template
<
typename
T
>
void
matchL2_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
*
trains
,
int
n
,
float
maxDistance
,
const
DevMem2Db
*
masks
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2D_
<
unsigned
int
>&
nMatches
,
int
cc
,
cudaStream_t
stream
);
template
<
typename
T
>
void
matchHamming_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
*
trains
,
int
n
,
float
maxDistance
,
const
DevMem2Db
*
masks
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2D_
<
unsigned
int
>&
nMatches
,
template
<
typename
T
>
void
matchHamming_gpu
(
const
DevMem2Db
&
query
,
const
DevMem2Db
*
trains
,
int
n
,
float
maxDistance
,
const
DevMem2Db
*
masks
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2D_
<
unsigned
int
>&
nMatches
,
int
cc
,
cudaStream_t
stream
);
}
}}}
...
...
@@ -192,7 +192,7 @@ bool cv::gpu::BruteForceMatcher_GPU_base::isMaskSupported() const
// Match
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
matchSingle
(
const
GpuMat
&
query
,
const
GpuMat
&
train
,
GpuMat
&
trainIdx
,
GpuMat
&
distance
,
GpuMat
&
trainIdx
,
GpuMat
&
distance
,
const
GpuMat
&
mask
,
Stream
&
stream
)
{
if
(
query
.
empty
()
||
train
.
empty
())
...
...
@@ -200,25 +200,25 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const
using
namespace
::
cv
::
gpu
::
device
::
bf_match
;
typedef
void
(
*
caller_t
)(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
const
DevMem2Db
&
mask
,
typedef
void
(
*
caller_t
)(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
const
DevMem2Db
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
int
cc
,
cudaStream_t
stream
);
static
const
caller_t
callers
[
3
][
6
]
=
{
{
matchL1_gpu
<
unsigned
char
>
,
0
/*matchL1_gpu<signed char>*/
,
matchL1_gpu
<
unsigned
short
>
,
matchL1_gpu
<
short
>
,
matchL1_gpu
<
unsigned
char
>
,
0
/*matchL1_gpu<signed char>*/
,
matchL1_gpu
<
unsigned
short
>
,
matchL1_gpu
<
short
>
,
matchL1_gpu
<
int
>
,
matchL1_gpu
<
float
>
},
{
0
/*matchL2_gpu<unsigned char>*/
,
0
/*matchL2_gpu<signed char>*/
,
0
/*matchL2_gpu<unsigned short>*/
,
0
/*matchL2_gpu<short>*/
,
0
/*matchL2_gpu<unsigned char>*/
,
0
/*matchL2_gpu<signed char>*/
,
0
/*matchL2_gpu<unsigned short>*/
,
0
/*matchL2_gpu<short>*/
,
0
/*matchL2_gpu<int>*/
,
matchL2_gpu
<
float
>
},
{
matchHamming_gpu
<
unsigned
char
>
,
0
/*matchHamming_gpu<signed char>*/
,
matchHamming_gpu
<
unsigned
short
>
,
0
/*matchHamming_gpu<short>*/
,
matchHamming_gpu
<
unsigned
char
>
,
0
/*matchHamming_gpu<signed char>*/
,
matchHamming_gpu
<
unsigned
short
>
,
0
/*matchHamming_gpu<short>*/
,
matchHamming_gpu
<
int
>
,
0
/*matchHamming_gpu<float>*/
}
};
...
...
@@ -334,7 +334,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::makeGpuCollection(GpuMat& trainCollect
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
matchCollection
(
const
GpuMat
&
query
,
const
GpuMat
&
trainCollection
,
GpuMat
&
trainIdx
,
GpuMat
&
imgIdx
,
GpuMat
&
distance
,
GpuMat
&
trainIdx
,
GpuMat
&
imgIdx
,
GpuMat
&
distance
,
const
GpuMat
&
masks
,
Stream
&
stream
)
{
if
(
query
.
empty
()
||
trainCollection
.
empty
())
...
...
@@ -342,8 +342,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, c
using
namespace
::
cv
::
gpu
::
device
::
bf_match
;
typedef
void
(
*
caller_t
)(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
trains
,
const
DevMem2D_
<
PtrStepb
>&
masks
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
typedef
void
(
*
caller_t
)(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
trains
,
const
DevMem2D_
<
PtrStepb
>&
masks
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
int
cc
,
cudaStream_t
stream
);
static
const
caller_t
callers
[
3
][
6
]
=
...
...
@@ -453,25 +453,25 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co
using
namespace
::
cv
::
gpu
::
device
::
bf_knnmatch
;
typedef
void
(
*
caller_t
)(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
int
k
,
const
DevMem2Db
&
mask
,
const
DevMem2Db
&
trainIdx
,
const
DevMem2Db
&
distance
,
const
DevMem2Df
&
allDist
,
typedef
void
(
*
caller_t
)(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
int
k
,
const
DevMem2Db
&
mask
,
const
DevMem2Db
&
trainIdx
,
const
DevMem2Db
&
distance
,
const
DevMem2Df
&
allDist
,
int
cc
,
cudaStream_t
stream
);
static
const
caller_t
callers
[
3
][
6
]
=
{
{
matchL1_gpu
<
unsigned
char
>
,
0
/*matchL1_gpu<signed char>*/
,
matchL1_gpu
<
unsigned
short
>
,
matchL1_gpu
<
short
>
,
matchL1_gpu
<
unsigned
char
>
,
0
/*matchL1_gpu<signed char>*/
,
matchL1_gpu
<
unsigned
short
>
,
matchL1_gpu
<
short
>
,
matchL1_gpu
<
int
>
,
matchL1_gpu
<
float
>
},
{
0
/*matchL2_gpu<unsigned char>*/
,
0
/*matchL2_gpu<signed char>*/
,
0
/*matchL2_gpu<unsigned short>*/
,
0
/*matchL2_gpu<short>*/
,
0
/*matchL2_gpu<unsigned char>*/
,
0
/*matchL2_gpu<signed char>*/
,
0
/*matchL2_gpu<unsigned short>*/
,
0
/*matchL2_gpu<short>*/
,
0
/*matchL2_gpu<int>*/
,
matchL2_gpu
<
float
>
},
{
matchHamming_gpu
<
unsigned
char
>
,
0
/*matchHamming_gpu<signed char>*/
,
matchHamming_gpu
<
unsigned
short
>
,
0
/*matchHamming_gpu<short>*/
,
matchHamming_gpu
<
unsigned
char
>
,
0
/*matchHamming_gpu<signed char>*/
,
matchHamming_gpu
<
unsigned
short
>
,
0
/*matchHamming_gpu<short>*/
,
matchHamming_gpu
<
int
>
,
0
/*matchHamming_gpu<float>*/
}
};
...
...
@@ -501,7 +501,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co
caller_t
func
=
callers
[
distType
][
query
.
depth
()];
CV_Assert
(
func
!=
0
);
DeviceInfo
info
;
int
cc
=
info
.
majorVersion
()
*
10
+
info
.
minorVersion
();
...
...
@@ -520,7 +520,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat& trainId
knnMatchConvert
(
trainIdxCPU
,
distanceCPU
,
matches
,
compactResult
);
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
knnMatchConvert
(
const
Mat
&
trainIdx
,
const
Mat
&
distance
,
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
knnMatchConvert
(
const
Mat
&
trainIdx
,
const
Mat
&
distance
,
vector
<
vector
<
DMatch
>
>&
matches
,
bool
compactResult
)
{
if
(
trainIdx
.
empty
()
||
distance
.
empty
())
...
...
@@ -536,7 +536,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchConvert(const Mat& trainIdx, c
matches
.
clear
();
matches
.
reserve
(
nQuery
);
const
int
*
trainIdx_ptr
=
trainIdx
.
ptr
<
int
>
();
const
float
*
distance_ptr
=
distance
.
ptr
<
float
>
();
...
...
@@ -582,25 +582,25 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer
using
namespace
::
cv
::
gpu
::
device
::
bf_knnmatch
;
typedef
void
(
*
caller_t
)(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
trains
,
const
DevMem2D_
<
PtrStepb
>&
masks
,
const
DevMem2Db
&
trainIdx
,
const
DevMem2Db
&
imgIdx
,
const
DevMem2Db
&
distance
,
typedef
void
(
*
caller_t
)(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
trains
,
const
DevMem2D_
<
PtrStepb
>&
masks
,
const
DevMem2Db
&
trainIdx
,
const
DevMem2Db
&
imgIdx
,
const
DevMem2Db
&
distance
,
int
cc
,
cudaStream_t
stream
);
static
const
caller_t
callers
[
3
][
6
]
=
{
{
match2L1_gpu
<
unsigned
char
>
,
0
/*match2L1_gpu<signed char>*/
,
match2L1_gpu
<
unsigned
short
>
,
match2L1_gpu
<
short
>
,
match2L1_gpu
<
unsigned
char
>
,
0
/*match2L1_gpu<signed char>*/
,
match2L1_gpu
<
unsigned
short
>
,
match2L1_gpu
<
short
>
,
match2L1_gpu
<
int
>
,
match2L1_gpu
<
float
>
},
{
0
/*match2L2_gpu<unsigned char>*/
,
0
/*match2L2_gpu<signed char>*/
,
0
/*match2L2_gpu<unsigned short>*/
,
0
/*match2L2_gpu<short>*/
,
0
/*match2L2_gpu<unsigned char>*/
,
0
/*match2L2_gpu<signed char>*/
,
0
/*match2L2_gpu<unsigned short>*/
,
0
/*match2L2_gpu<short>*/
,
0
/*match2L2_gpu<int>*/
,
match2L2_gpu
<
float
>
},
{
match2Hamming_gpu
<
unsigned
char
>
,
0
/*match2Hamming_gpu<signed char>*/
,
match2Hamming_gpu
<
unsigned
short
>
,
0
/*match2Hamming_gpu<short>*/
,
match2Hamming_gpu
<
unsigned
char
>
,
0
/*match2Hamming_gpu<signed char>*/
,
match2Hamming_gpu
<
unsigned
short
>
,
0
/*match2Hamming_gpu<short>*/
,
match2Hamming_gpu
<
int
>
,
0
/*match2Hamming_gpu<float>*/
}
};
...
...
@@ -620,7 +620,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer
caller_t
func
=
callers
[
distType
][
query
.
depth
()];
CV_Assert
(
func
!=
0
);
DeviceInfo
info
;
int
cc
=
info
.
majorVersion
()
*
10
+
info
.
minorVersion
();
...
...
@@ -654,7 +654,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Convert(const Mat& trainIdx,
matches
.
clear
();
matches
.
reserve
(
nQuery
);
const
int
*
trainIdx_ptr
=
trainIdx
.
ptr
<
int
>
();
const
int
*
imgIdx_ptr
=
imgIdx
.
ptr
<
int
>
();
const
float
*
distance_ptr
=
distance
.
ptr
<
float
>
();
...
...
@@ -755,33 +755,33 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& query, vector<
// RadiusMatch
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatchSingle
(
const
GpuMat
&
query
,
const
GpuMat
&
train
,
GpuMat
&
trainIdx
,
GpuMat
&
distance
,
GpuMat
&
nMatches
,
float
maxDistance
,
GpuMat
&
trainIdx
,
GpuMat
&
distance
,
GpuMat
&
nMatches
,
float
maxDistance
,
const
GpuMat
&
mask
,
Stream
&
stream
)
{
if
(
query
.
empty
()
||
train
.
empty
())
return
;
using
namespace
::
cv
::
gpu
::
device
::
bf_radius_match
;
using
namespace
cv
::
gpu
::
device
::
bf_radius_match
;
typedef
void
(
*
caller_t
)(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
float
maxDistance
,
const
DevMem2Db
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2D_
<
unsigned
int
>&
nMatches
,
typedef
void
(
*
caller_t
)(
const
DevMem2Db
&
query
,
const
DevMem2Db
&
train
,
float
maxDistance
,
const
DevMem2Db
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2D_
<
unsigned
int
>&
nMatches
,
int
cc
,
cudaStream_t
stream
);
static
const
caller_t
callers
[
3
][
6
]
=
{
{
matchL1_gpu
<
unsigned
char
>
,
0
/*matchL1_gpu<signed char>*/
,
matchL1_gpu
<
unsigned
short
>
,
matchL1_gpu
<
short
>
,
matchL1_gpu
<
unsigned
char
>
,
0
/*matchL1_gpu<signed char>*/
,
matchL1_gpu
<
unsigned
short
>
,
matchL1_gpu
<
short
>
,
matchL1_gpu
<
int
>
,
matchL1_gpu
<
float
>
},
{
0
/*matchL2_gpu<unsigned char>*/
,
0
/*matchL2_gpu<signed char>*/
,
0
/*matchL2_gpu<unsigned short>*/
,
0
/*matchL2_gpu<short>*/
,
0
/*matchL2_gpu<unsigned char>*/
,
0
/*matchL2_gpu<signed char>*/
,
0
/*matchL2_gpu<unsigned short>*/
,
0
/*matchL2_gpu<short>*/
,
0
/*matchL2_gpu<int>*/
,
matchL2_gpu
<
float
>
},
{
matchHamming_gpu
<
unsigned
char
>
,
0
/*matchHamming_gpu<signed char>*/
,
matchHamming_gpu
<
unsigned
short
>
,
0
/*matchHamming_gpu<short>*/
,
matchHamming_gpu
<
unsigned
char
>
,
0
/*matchHamming_gpu<signed char>*/
,
matchHamming_gpu
<
unsigned
short
>
,
0
/*matchHamming_gpu<short>*/
,
matchHamming_gpu
<
int
>
,
0
/*matchHamming_gpu<float>*/
}
};
...
...
@@ -789,7 +789,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query,
DeviceInfo
info
;
int
cc
=
info
.
majorVersion
()
*
10
+
info
.
minorVersion
();
CV_Assert
(
TargetArchs
::
builtWith
(
GLOBAL_ATOMICS
)
&&
info
.
supports
(
GLOBAL_ATOMICS
));
if
(
!
TargetArchs
::
builtWith
(
GLOBAL_ATOMICS
)
||
!
DeviceInfo
().
supports
(
GLOBAL_ATOMICS
))
CV_Error
(
CV_StsNotImplemented
,
"The device doesn't support global atomics"
);
const
int
nQuery
=
query
.
rows
;
const
int
nTrain
=
train
.
rows
;
...
...
@@ -804,19 +805,19 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query,
ensureSizeIsEnough
(
nQuery
,
std
::
max
((
nTrain
/
100
),
10
),
CV_32SC1
,
trainIdx
);
ensureSizeIsEnough
(
nQuery
,
std
::
max
((
nTrain
/
100
),
10
),
CV_32FC1
,
distance
);
}
if
(
stream
)
stream
.
enqueueMemSet
(
nMatches
,
Scalar
::
all
(
0
));
else
nMatches
.
setTo
(
Scalar
::
all
(
0
));
caller_t
func
=
callers
[
distType
][
query
.
depth
()];
CV_Assert
(
func
!=
0
);
CV_Assert
(
func
!=
0
);
func
(
query
,
train
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
cc
,
StreamAccessor
::
getStream
(
stream
));
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
distance
,
const
GpuMat
&
nMatches
,
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
distance
,
const
GpuMat
&
nMatches
,
vector
<
vector
<
DMatch
>
>&
matches
,
bool
compactResult
)
{
if
(
trainIdx
.
empty
()
||
distance
.
empty
()
||
nMatches
.
empty
())
...
...
@@ -886,33 +887,33 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& query, const
radiusMatchDownload
(
trainIdx
,
distance
,
nMatches
,
matches
,
compactResult
);
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatchCollection
(
const
GpuMat
&
query
,
GpuMat
&
trainIdx
,
GpuMat
&
imgIdx
,
GpuMat
&
distance
,
GpuMat
&
nMatches
,
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatchCollection
(
const
GpuMat
&
query
,
GpuMat
&
trainIdx
,
GpuMat
&
imgIdx
,
GpuMat
&
distance
,
GpuMat
&
nMatches
,
float
maxDistance
,
const
vector
<
GpuMat
>&
masks
,
Stream
&
stream
)
{
if
(
query
.
empty
()
||
empty
())
return
;
using
namespace
::
cv
::
gpu
::
device
::
bf_radius_match
;
using
namespace
cv
::
gpu
::
device
::
bf_radius_match
;
typedef
void
(
*
caller_t
)(
const
DevMem2Db
&
query
,
const
DevMem2Db
*
trains
,
int
n
,
float
maxDistance
,
const
DevMem2Db
*
masks
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2D_
<
unsigned
int
>&
nMatches
,
typedef
void
(
*
caller_t
)(
const
DevMem2Db
&
query
,
const
DevMem2Db
*
trains
,
int
n
,
float
maxDistance
,
const
DevMem2Db
*
masks
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2D_
<
unsigned
int
>&
nMatches
,
int
cc
,
cudaStream_t
stream
);
static
const
caller_t
callers
[
3
][
6
]
=
{
{
matchL1_gpu
<
unsigned
char
>
,
0
/*matchL1_gpu<signed char>*/
,
matchL1_gpu
<
unsigned
short
>
,
matchL1_gpu
<
short
>
,
matchL1_gpu
<
unsigned
char
>
,
0
/*matchL1_gpu<signed char>*/
,
matchL1_gpu
<
unsigned
short
>
,
matchL1_gpu
<
short
>
,
matchL1_gpu
<
int
>
,
matchL1_gpu
<
float
>
},
{
0
/*matchL2_gpu<unsigned char>*/
,
0
/*matchL2_gpu<signed char>*/
,
0
/*matchL2_gpu<unsigned short>*/
,
0
/*matchL2_gpu<short>*/
,
0
/*matchL2_gpu<unsigned char>*/
,
0
/*matchL2_gpu<signed char>*/
,
0
/*matchL2_gpu<unsigned short>*/
,
0
/*matchL2_gpu<short>*/
,
0
/*matchL2_gpu<int>*/
,
matchL2_gpu
<
float
>
},
{
matchHamming_gpu
<
unsigned
char
>
,
0
/*matchHamming_gpu<signed char>*/
,
matchHamming_gpu
<
unsigned
short
>
,
0
/*matchHamming_gpu<short>*/
,
matchHamming_gpu
<
unsigned
char
>
,
0
/*matchHamming_gpu<signed char>*/
,
matchHamming_gpu
<
unsigned
short
>
,
0
/*matchHamming_gpu<short>*/
,
matchHamming_gpu
<
int
>
,
0
/*matchHamming_gpu<float>*/
}
};
...
...
@@ -920,7 +921,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
DeviceInfo
info
;
int
cc
=
info
.
majorVersion
()
*
10
+
info
.
minorVersion
();
CV_Assert
(
TargetArchs
::
builtWith
(
GLOBAL_ATOMICS
)
&&
info
.
supports
(
GLOBAL_ATOMICS
));
if
(
!
TargetArchs
::
builtWith
(
GLOBAL_ATOMICS
)
||
!
DeviceInfo
().
supports
(
GLOBAL_ATOMICS
))
CV_Error
(
CV_StsNotImplemented
,
"The device doesn't support global atomics"
);
const
int
nQuery
=
query
.
rows
;
...
...
@@ -934,7 +936,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
ensureSizeIsEnough
(
nQuery
,
std
::
max
((
nQuery
/
100
),
10
),
CV_32SC1
,
imgIdx
);
ensureSizeIsEnough
(
nQuery
,
std
::
max
((
nQuery
/
100
),
10
),
CV_32FC1
,
distance
);
}
if
(
stream
)
stream
.
enqueueMemSet
(
nMatches
,
Scalar
::
all
(
0
));
else
...
...
@@ -946,7 +948,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
vector
<
DevMem2Db
>
trains_
(
trainDescCollection
.
begin
(),
trainDescCollection
.
end
());
vector
<
DevMem2Db
>
masks_
(
masks
.
begin
(),
masks
.
end
());
func
(
query
,
&
trains_
[
0
],
static_cast
<
int
>
(
trains_
.
size
()),
maxDistance
,
masks_
.
size
()
==
0
?
0
:
&
masks_
[
0
],
func
(
query
,
&
trains_
[
0
],
static_cast
<
int
>
(
trains_
.
size
()),
maxDistance
,
masks_
.
size
()
==
0
?
0
:
&
masks_
[
0
],
trainIdx
,
imgIdx
,
distance
,
nMatches
,
cc
,
StreamAccessor
::
getStream
(
stream
));
}
...
...
modules/gpu/src/cuda/surf.cu
View file @
bd13e947
...
...
@@ -40,7 +40,7 @@
//
// Copyright (c) 2010, Paul Furgale, Chi Hay Tong
//
// The original code was written by Paul Furgale and Chi Hay Tong
// The original code was written by Paul Furgale and Chi Hay Tong
// and later optimized and prepared for integration into OpenCV by Itseez.
//
//M*/
...
...
@@ -52,9 +52,9 @@
#include "opencv2/gpu/device/functional.hpp"
#include "opencv2/gpu/device/filters.hpp"
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
namespace surf
namespace surf
{
////////////////////////////////////////////////////////////////////////
// Global parameters
...
...
@@ -123,7 +123,7 @@ namespace cv { namespace gpu { namespace device
#endif
float ratio = (float)newSize / oldSize;
real_t d = 0;
#pragma unroll
...
...
@@ -225,7 +225,7 @@ namespace cv { namespace gpu { namespace device
static __device__ bool check(int sum_i, int sum_j, int size)
{
float ratio = (float)size / 9.0f;
float d = 0;
int dx1 = __float2int_rn(ratio * c_DM[0]);
...
...
@@ -423,12 +423,12 @@ namespace cv { namespace gpu { namespace device
if (::fabs(x[0]) <= 1.f && ::fabs(x[1]) <= 1.f && ::fabs(x[2]) <= 1.f)
{
// if the step is within the interpolation region, perform it
const int size = calcSize(c_octave, maxPos.z);
const int sum_i = (maxPos.y - ((size >> 1) >> c_octave)) << c_octave;
const int sum_j = (maxPos.x - ((size >> 1) >> c_octave)) << c_octave;
const float center_i = sum_i + (float)(size - 1) / 2;
const float center_j = sum_j + (float)(size - 1) / 2;
...
...
@@ -471,8 +471,8 @@ namespace cv { namespace gpu { namespace device
#endif
}
void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter,
float* featureX, float* featureY, int* featureLaplacian, int* featureOctave, float* featureSize, float* featureHessian,
void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter,
float* featureX, float* featureY, int* featureLaplacian, int* featureOctave, float* featureSize, float* featureHessian,
unsigned int* featureCounter)
{
dim3 threads;
...
...
@@ -509,7 +509,8 @@ namespace cv { namespace gpu { namespace device
__shared__ float s_Y[128];
__shared__ float s_angle[128];
__shared__ float s_sum[32 * 4];
__shared__ float s_sumx[32 * 4];
__shared__ float s_sumy[32 * 4];
/* The sampling intervals and wavelet sized for selecting an orientation
and building the keypoint descriptor are defined relative to 's' */
...
...
@@ -522,126 +523,109 @@ namespace cv { namespace gpu { namespace device
const int grad_wav_size = 2 * __float2int_rn(2.0f * s);
// check when grad_wav_size is too big
if ((c_img_rows + 1) >= grad_wav_size && (c_img_cols + 1) >= grad_wav_size)
{
// Calc X, Y, angle and store it to shared memory
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
float X = 0.0f, Y = 0.0f, angle = 0.0f;
if ((c_img_rows + 1) < grad_wav_size || (c_img_cols + 1) < grad_wav_size)
return;
if (tid < ORI_SAMPLES)
{
const float margin = (float)(grad_wav_size - 1) / 2.0f;
const int x = __float2int_rn(featureX[blockIdx.x] + c_aptX[tid] * s - margin);
const int y = __float2int_rn(featureY[blockIdx.x] + c_aptY[tid] * s - margin);
// Calc X, Y, angle and store it to shared memory
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
if ((unsigned)y < (unsigned)((c_img_rows + 1) - grad_wav_size) && (unsigned)x < (unsigned)((c_img_cols + 1) - grad_wav_size))
{
X = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NX, 4, grad_wav_size, y, x);
Y = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NY, 4, grad_wav_size, y, x);
angle = atan2f(Y, X);
if (angle < 0)
angle += 2.0f * CV_PI_F;
angle *= 180.0f / CV_PI_F;
}
}
s_X[tid] = X;
s_Y[tid] = Y;
s_angle[tid] = angle;
__syncthreads();
float X = 0.0f, Y = 0.0f, angle = 0.0f;
float bestx = 0, besty = 0, best_mod = 0;
if (tid < ORI_SAMPLES)
{
const float margin = (float)(grad_wav_size - 1) / 2.0f;
const int x = __float2int_rn(featureX[blockIdx.x] + c_aptX[tid] * s - margin);
const int y = __float2int_rn(featureY[blockIdx.x] + c_aptY[tid] * s - margin);
#pragma unroll
for (int i = 0; i < 18; ++i
)
if (y >= 0 && y < (c_img_rows + 1) - grad_wav_size &&
x >= 0 && x < (c_img_cols + 1) - grad_wav_size
)
{
const int dir = (i * 4 + threadIdx.y) * ORI_SEARCH_INC;
X = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NX, 4, grad_wav_size, y, x);
Y = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NY, 4, grad_wav_size, y, x);
float sumx = 0.0f, sumy = 0.0f;
int d = ::abs(__float2int_rn(s_angle[threadIdx.x]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx = s_X[threadIdx.x];
sumy = s_Y[threadIdx.x];
}
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 32]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[threadIdx.x + 32];
sumy += s_Y[threadIdx.x + 32];
}
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 64]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[threadIdx.x + 64];
sumy += s_Y[threadIdx.x + 64];
}
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 96]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[threadIdx.x + 96];
sumy += s_Y[threadIdx.x + 96];
}
float* s_sum_row = s_sum + threadIdx.y * 32;
angle = atan2f(Y, X);
if (angle < 0)
angle += 2.0f * CV_PI_F;
angle *= 180.0f / CV_PI_F;
}
}
s_X[tid] = X;
s_Y[tid] = Y;
s_angle[tid] = angle;
__syncthreads();
device::reduce<32>(s_sum_row, sumx, threadIdx.x, plus<volatile float>());
device::reduce<32>(s_sum_row, sumy, threadIdx.x, plus<volatile float>());
float bestx = 0, besty = 0, best_mod = 0;
const float temp_mod = sumx * sumx + sumy * sumy;
if (temp_mod > best_mod)
{
best_mod = temp_mod;
bestx = sumx;
besty = sumy;
}
#pragma unroll
for (int i = 0; i < 18; ++i)
{
const int dir = (i * 4 + threadIdx.y) * ORI_SEARCH_INC;
__syncthreads();
float sumx = 0.0f, sumy = 0.0f;
int d = ::abs(__float2int_rn(s_angle[threadIdx.x]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx = s_X[threadIdx.x];
sumy = s_Y[threadIdx.x];
}
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 32]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[threadIdx.x + 32];
sumy += s_Y[threadIdx.x + 32];
}
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 64]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[threadIdx.x + 64];
sumy += s_Y[threadIdx.x + 64];
}
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 96]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[threadIdx.x + 96];
sumy += s_Y[threadIdx.x + 96];
}
if (threadIdx.x == 0)
device::reduce<32>(s_sumx + threadIdx.y * 32, sumx, threadIdx.x, plus<volatile float>());
device::reduce<32>(s_sumy + threadIdx.y * 32, sumy, threadIdx.x, plus<volatile float>());
const float temp_mod = sumx * sumx + sumy * sumy;
if (temp_mod > best_mod)
{
s_X[threadIdx.y] = bestx
;
s_Y[threadIdx.y] = besty
;
s_angle[threadIdx.y] = best_mod
;
best_mod = temp_mod
;
bestx = sumx
;
besty = sumy
;
}
__syncthreads();
}
if (threadIdx.x < 2 && threadIdx.y == 0)
{
volatile float* v_x = s_X;
volatile float* v_y = s_Y;
volatile float* v_mod = s_angle;
if (threadIdx.x == 0)
{
s_X[threadIdx.y] = bestx;
s_Y[threadIdx.y] = besty;
s_angle[threadIdx.y] = best_mod;
}
__syncthreads();
bestx = v_x[threadIdx.x];
besty = v_y[threadIdx.x];
best_mod = v_mod[threadIdx.x]
;
if (threadIdx.x == 0 && threadIdx.y == 0)
{
int bestIdx = 0
;
float temp_mod = v_mod[threadIdx.x + 2];
if (temp_mod > best_mod)
{
v_x[threadIdx.x] = bestx = v_x[threadIdx.x + 2];
v_y[threadIdx.x] = besty = v_y[threadIdx.x + 2];
v_mod[threadIdx.x] = best_mod = temp_mod;
}
temp_mod = v_mod[threadIdx.x + 1];
if (temp_mod > best_mod)
{
v_x[threadIdx.x] = bestx = v_x[threadIdx.x + 1];
v_y[threadIdx.x] = besty = v_y[threadIdx.x + 1];
}
}
if (s_angle[1] > s_angle[bestIdx])
bestIdx = 1;
if (s_angle[2] > s_angle[bestIdx])
bestIdx = 2;
if (s_angle[3] > s_angle[bestIdx])
bestIdx = 3;
if (threadIdx.x == 0 && threadIdx.y == 0 && best_mod != 0)
{
float kp_dir = atan2f(besty, bestx);
if (kp_dir < 0)
kp_dir += 2.0f * CV_PI_F;
kp_dir *= 180.0f / CV_PI_F;
float kp_dir = atan2f(s_Y[bestIdx], s_X[bestIdx]);
if (kp_dir < 0)
kp_dir += 2.0f * CV_PI_F;
kp_dir *= 180.0f / CV_PI_F;
featureDir[blockIdx.x] = kp_dir;
}
featureDir[blockIdx.x] = kp_dir;
}
}
...
...
@@ -649,7 +633,7 @@ namespace cv { namespace gpu { namespace device
#undef ORI_WIN
#undef ORI_SAMPLES
void icvCalcOrientation_gpu(const float* featureX, const float* featureY, const float* featureSize, float* featureDir, int nFeatures)
void icvCalcOrientation_gpu(const float* featureX, const float* featureY, const float* featureSize, float* featureDir, int nFeatures)
{
dim3 threads;
threads.x = 32;
...
...
@@ -669,27 +653,27 @@ namespace cv { namespace gpu { namespace device
#define PATCH_SZ 20
__constant__ float c_DW[PATCH_SZ * PATCH_SZ] =
__constant__ float c_DW[PATCH_SZ * PATCH_SZ] =
{
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f,
8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f,
1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f,
3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f,
5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f,
9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f,
0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f,
0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f,
0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f,
0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f,
0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f,
0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f,
0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f,
0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f,
9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f,
5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f,
3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f,
1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f,
8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f,
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f,
8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f,
1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f,
3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f,
5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f,
9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f,
0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f,
0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f,
0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f,
0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f,
0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f,
0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f,
0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f,
0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f,
9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f,
5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f,
3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f,
1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f,
8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f,
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f
};
...
...
@@ -697,7 +681,7 @@ namespace cv { namespace gpu { namespace device
{
typedef uchar elem_type;
__device__ __forceinline__ WinReader(float centerX_, float centerY_, float win_offset_, float cos_dir_, float sin_dir_) :
__device__ __forceinline__ WinReader(float centerX_, float centerY_, float win_offset_, float cos_dir_, float sin_dir_) :
centerX(centerX_), centerY(centerY_), win_offset(win_offset_), cos_dir(cos_dir_), sin_dir(sin_dir_)
{
}
...
...
@@ -710,14 +694,14 @@ namespace cv { namespace gpu { namespace device
return tex2D(imgTex, pixel_x, pixel_y);
}
float centerX;
float centerX;
float centerY;
float win_offset;
float cos_dir;
float win_offset;
float cos_dir;
float sin_dir;
};
__device__ void calc_dx_dy(float s_dx_bin[25], float s_dy_bin[25],
__device__ void calc_dx_dy(float s_dx_bin[25], float s_dy_bin[25],
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir)
{
__shared__ float s_PATCH[6][6];
...
...
@@ -739,7 +723,7 @@ namespace cv { namespace gpu { namespace device
sincosf(descriptor_dir, &sin_dir, &cos_dir);
/* Nearest neighbour version (faster) */
const float win_offset = -(float)(win_size - 1) / 2;
const float win_offset = -(float)(win_size - 1) / 2;
// Compute sampling points
// since grids are 2D, need to compute xBlock and yBlock indices
...
...
@@ -966,11 +950,11 @@ namespace cv { namespace gpu { namespace device
descriptor_base[threadIdx.x] = lookup / len;
}
void compute_descriptors_gpu(const DevMem2Df& descriptors,
void compute_descriptors_gpu(const DevMem2Df& descriptors,
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir, int nFeatures)
{
// compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D
if (descriptors.cols == 64)
{
compute_descriptors64<<<dim3(nFeatures, 16, 1), dim3(6, 6, 1)>>>(descriptors, featureX, featureY, featureSize, featureDir);
...
...
@@ -985,12 +969,12 @@ namespace cv { namespace gpu { namespace device
}
else
{
compute_descriptors128<<<dim3(nFeatures, 16, 1), dim3(6, 6, 1)>>>(descriptors, featureX, featureY, featureSize, featureDir);
compute_descriptors128<<<dim3(nFeatures, 16, 1), dim3(6, 6, 1)>>>(descriptors, featureX, featureY, featureSize, featureDir);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
normalize_descriptors<128><<<dim3(nFeatures, 1, 1), dim3(128, 1, 1)>>>(descriptors);
normalize_descriptors<128><<<dim3(nFeatures, 1, 1), dim3(128, 1, 1)>>>(descriptors);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
...
...
modules/gpu/src/fast.cpp
View file @
bd13e947
...
...
@@ -59,7 +59,7 @@ int cv::gpu::FAST_GPU::getKeyPoints(GpuMat&) { throw_nogpu(); return 0; }
#else
/* !defined (HAVE_CUDA) */
cv
::
gpu
::
FAST_GPU
::
FAST_GPU
(
int
_threshold
,
bool
_nonmaxSupression
,
double
_keypointsRatio
)
:
cv
::
gpu
::
FAST_GPU
::
FAST_GPU
(
int
_threshold
,
bool
_nonmaxSupression
,
double
_keypointsRatio
)
:
nonmaxSupression
(
_nonmaxSupression
),
threshold
(
_threshold
),
keypointsRatio
(
_keypointsRatio
),
count_
(
0
)
{
}
...
...
@@ -109,9 +109,9 @@ void cv::gpu::FAST_GPU::operator ()(const GpuMat& img, const GpuMat& mask, GpuMa
keypoints
.
cols
=
getKeyPoints
(
keypoints
);
}
namespace
cv
{
namespace
gpu
{
namespace
device
namespace
cv
{
namespace
gpu
{
namespace
device
{
namespace
fast
namespace
fast
{
int
calcKeypoints_gpu
(
DevMem2Db
img
,
DevMem2Db
mask
,
short2
*
kpLoc
,
int
maxKeypoints
,
DevMem2Di
score
,
int
threshold
);
int
nonmaxSupression_gpu
(
const
short2
*
kpLoc
,
int
count
,
DevMem2Di
score
,
short2
*
loc
,
float
*
response
);
...
...
@@ -124,7 +124,9 @@ int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat& img, const GpuMat& ma
CV_Assert
(
img
.
type
()
==
CV_8UC1
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8UC1
&&
mask
.
size
()
==
img
.
size
()));
CV_Assert
(
TargetArchs
::
builtWith
(
GLOBAL_ATOMICS
)
&&
DeviceInfo
().
supports
(
GLOBAL_ATOMICS
));
if
(
!
TargetArchs
::
builtWith
(
GLOBAL_ATOMICS
)
||
!
DeviceInfo
().
supports
(
GLOBAL_ATOMICS
))
CV_Error
(
CV_StsNotImplemented
,
"The device doesn't support global atomics"
);
int
maxKeypoints
=
static_cast
<
int
>
(
keypointsRatio
*
img
.
size
().
area
());
...
...
@@ -146,7 +148,8 @@ int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints)
{
using
namespace
cv
::
gpu
::
device
::
fast
;
CV_Assert
(
TargetArchs
::
builtWith
(
GLOBAL_ATOMICS
)
&&
DeviceInfo
().
supports
(
GLOBAL_ATOMICS
));
if
(
!
TargetArchs
::
builtWith
(
GLOBAL_ATOMICS
)
||
!
DeviceInfo
().
supports
(
GLOBAL_ATOMICS
))
CV_Error
(
CV_StsNotImplemented
,
"The device doesn't support global atomics"
);
if
(
count_
==
0
)
return
0
;
...
...
@@ -160,7 +163,7 @@ int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints)
kpLoc_
.
colRange
(
0
,
count_
).
copyTo
(
locRow
);
keypoints
.
row
(
1
).
setTo
(
Scalar
::
all
(
0
));
return
count_
;
return
count_
;
}
void
cv
::
gpu
::
FAST_GPU
::
release
()
...
...
modules/gpu/src/surf.cpp
View file @
bd13e947
...
...
@@ -120,7 +120,9 @@ namespace
CV_Assert
(
!
img
.
empty
()
&&
img
.
type
()
==
CV_8UC1
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
size
()
==
img
.
size
()
&&
mask
.
type
()
==
CV_8UC1
));
CV_Assert
(
surf_
.
nOctaves
>
0
&&
surf_
.
nOctaveLayers
>
0
);
CV_Assert
(
TargetArchs
::
builtWith
(
GLOBAL_ATOMICS
)
&&
DeviceInfo
().
supports
(
GLOBAL_ATOMICS
));
if
(
!
TargetArchs
::
builtWith
(
GLOBAL_ATOMICS
)
||
!
DeviceInfo
().
supports
(
GLOBAL_ATOMICS
))
CV_Error
(
CV_StsNotImplemented
,
"The device doesn't support global atomics"
);
const
int
min_size
=
calcSize
(
surf_
.
nOctaves
-
1
,
0
);
CV_Assert
(
img_rows
-
min_size
>=
0
);
...
...
@@ -184,8 +186,8 @@ namespace
{
icvInterpolateKeypoint_gpu
(
surf_
.
det
,
surf_
.
maxPosBuffer
.
ptr
<
int4
>
(),
maxCounter
,
keypoints
.
ptr
<
float
>
(
SURF_GPU
::
X_ROW
),
keypoints
.
ptr
<
float
>
(
SURF_GPU
::
Y_ROW
),
keypoints
.
ptr
<
int
>
(
SURF_GPU
::
LAPLACIAN_ROW
),
keypoints
.
ptr
<
int
>
(
SURF_GPU
::
OCTAVE_ROW
),
keypoints
.
ptr
<
float
>
(
SURF_GPU
::
SIZE_ROW
),
keypoints
.
ptr
<
float
>
(
SURF_GPU
::
HESSIAN_ROW
),
keypoints
.
ptr
<
int
>
(
SURF_GPU
::
LAPLACIAN_ROW
),
keypoints
.
ptr
<
int
>
(
SURF_GPU
::
OCTAVE_ROW
),
keypoints
.
ptr
<
float
>
(
SURF_GPU
::
SIZE_ROW
),
keypoints
.
ptr
<
float
>
(
SURF_GPU
::
HESSIAN_ROW
),
counters
.
ptr
<
unsigned
int
>
());
}
}
...
...
@@ -306,7 +308,7 @@ void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat& keypointsGPU, vector<Key
Mat
keypointsCPU
(
keypointsGPU
);
keypoints
.
resize
(
nFeatures
);
float
*
kp_x
=
keypointsCPU
.
ptr
<
float
>
(
SURF_GPU
::
X_ROW
);
float
*
kp_y
=
keypointsCPU
.
ptr
<
float
>
(
SURF_GPU
::
Y_ROW
);
int
*
kp_laplacian
=
keypointsCPU
.
ptr
<
int
>
(
SURF_GPU
::
LAPLACIAN_ROW
);
...
...
modules/gpu/test/test_features2d.cpp
View file @
bd13e947
...
...
@@ -108,6 +108,25 @@ testing::AssertionResult assertKeyPointsEquals(const char* gold_expr, const char
#define ASSERT_KEYPOINTS_EQ(gold, actual) EXPECT_PRED_FORMAT2(assertKeyPointsEquals, gold, actual);
int
getMatchedPointsCount
(
std
::
vector
<
cv
::
KeyPoint
>&
gold
,
std
::
vector
<
cv
::
KeyPoint
>&
actual
)
{
std
::
sort
(
actual
.
begin
(),
actual
.
end
(),
KeyPointLess
());
std
::
sort
(
gold
.
begin
(),
gold
.
end
(),
KeyPointLess
());
int
validCount
=
0
;
for
(
size_t
i
=
0
;
i
<
gold
.
size
();
++
i
)
{
const
cv
::
KeyPoint
&
p1
=
gold
[
i
];
const
cv
::
KeyPoint
&
p2
=
actual
[
i
];
if
(
keyPointsEquals
(
p1
,
p2
))
++
validCount
;
}
return
validCount
;
}
int
getMatchedPointsCount
(
const
std
::
vector
<
cv
::
KeyPoint
>&
keypoints1
,
const
std
::
vector
<
cv
::
KeyPoint
>&
keypoints2
,
const
std
::
vector
<
cv
::
DMatch
>&
matches
)
{
int
validCount
=
0
;
...
...
@@ -170,20 +189,39 @@ TEST_P(SURF, Detector)
surf
.
upright
=
upright
;
surf
.
keypointsRatio
=
0.05
f
;
std
::
vector
<
cv
::
KeyPoint
>
keypoints
;
surf
(
loadMat
(
image
),
cv
::
gpu
::
GpuMat
(),
keypoints
);
if
(
!
supportFeature
(
devInfo
,
cv
::
gpu
::
GLOBAL_ATOMICS
))
{
try
{
std
::
vector
<
cv
::
KeyPoint
>
keypoints
;
surf
(
loadMat
(
image
),
cv
::
gpu
::
GpuMat
(),
keypoints
);
}
catch
(
const
cv
::
Exception
&
e
)
{
ASSERT_EQ
(
CV_StsNotImplemented
,
e
.
code
);
}
}
else
{
std
::
vector
<
cv
::
KeyPoint
>
keypoints
;
surf
(
loadMat
(
image
),
cv
::
gpu
::
GpuMat
(),
keypoints
);
cv
::
SURF
surf_gold
;
surf_gold
.
hessianThreshold
=
hessianThreshold
;
surf_gold
.
nOctaves
=
nOctaves
;
surf_gold
.
nOctaveLayers
=
nOctaveLayers
;
surf_gold
.
extended
=
extended
;
surf_gold
.
upright
=
upright
;
cv
::
SURF
surf_gold
;
surf_gold
.
hessianThreshold
=
hessianThreshold
;
surf_gold
.
nOctaves
=
nOctaves
;
surf_gold
.
nOctaveLayers
=
nOctaveLayers
;
surf_gold
.
extended
=
extended
;
surf_gold
.
upright
=
upright
;
std
::
vector
<
cv
::
KeyPoint
>
keypoints_gold
;
surf_gold
(
image
,
cv
::
noArray
(),
keypoints_gold
);
std
::
vector
<
cv
::
KeyPoint
>
keypoints_gold
;
surf_gold
(
image
,
cv
::
noArray
(),
keypoints_gold
);
ASSERT_KEYPOINTS_EQ
(
keypoints_gold
,
keypoints
);
ASSERT_EQ
(
keypoints_gold
.
size
(),
keypoints
.
size
());
int
matchedCount
=
getMatchedPointsCount
(
keypoints_gold
,
keypoints
);
double
matchedRatio
=
static_cast
<
double
>
(
matchedCount
)
/
keypoints_gold
.
size
();
EXPECT_GT
(
matchedRatio
,
0.95
);
}
}
TEST_P
(
SURF
,
Detector_Masked
)
...
...
@@ -202,20 +240,39 @@ TEST_P(SURF, Detector_Masked)
surf
.
upright
=
upright
;
surf
.
keypointsRatio
=
0.05
f
;
std
::
vector
<
cv
::
KeyPoint
>
keypoints
;
surf
(
loadMat
(
image
),
loadMat
(
mask
),
keypoints
);
if
(
!
supportFeature
(
devInfo
,
cv
::
gpu
::
GLOBAL_ATOMICS
))
{
try
{
std
::
vector
<
cv
::
KeyPoint
>
keypoints
;
surf
(
loadMat
(
image
),
loadMat
(
mask
),
keypoints
);
}
catch
(
const
cv
::
Exception
&
e
)
{
ASSERT_EQ
(
CV_StsNotImplemented
,
e
.
code
);
}
}
else
{
std
::
vector
<
cv
::
KeyPoint
>
keypoints
;
surf
(
loadMat
(
image
),
loadMat
(
mask
),
keypoints
);
cv
::
SURF
surf_gold
;
surf_gold
.
hessianThreshold
=
hessianThreshold
;
surf_gold
.
nOctaves
=
nOctaves
;
surf_gold
.
nOctaveLayers
=
nOctaveLayers
;
surf_gold
.
extended
=
extended
;
surf_gold
.
upright
=
upright
;
cv
::
SURF
surf_gold
;
surf_gold
.
hessianThreshold
=
hessianThreshold
;
surf_gold
.
nOctaves
=
nOctaves
;
surf_gold
.
nOctaveLayers
=
nOctaveLayers
;
surf_gold
.
extended
=
extended
;
surf_gold
.
upright
=
upright
;
std
::
vector
<
cv
::
KeyPoint
>
keypoints_gold
;
surf_gold
(
image
,
mask
,
keypoints_gold
);
std
::
vector
<
cv
::
KeyPoint
>
keypoints_gold
;
surf_gold
(
image
,
mask
,
keypoints_gold
);
ASSERT_EQ
(
keypoints_gold
.
size
(),
keypoints
.
size
());
int
matchedCount
=
getMatchedPointsCount
(
keypoints_gold
,
keypoints
);
double
matchedRatio
=
static_cast
<
double
>
(
matchedCount
)
/
keypoints_gold
.
size
();
ASSERT_KEYPOINTS_EQ
(
keypoints_gold
,
keypoints
);
EXPECT_GT
(
matchedRatio
,
0.95
);
}
}
TEST_P
(
SURF
,
Descriptor
)
...
...
@@ -238,23 +295,39 @@ TEST_P(SURF, Descriptor)
surf_gold
.
extended
=
extended
;
surf_gold
.
upright
=
upright
;
std
::
vector
<
cv
::
KeyPoint
>
keypoints
;
surf_gold
(
image
,
cv
::
noArray
(),
keypoints
);
if
(
!
supportFeature
(
devInfo
,
cv
::
gpu
::
GLOBAL_ATOMICS
))
{
try
{
std
::
vector
<
cv
::
KeyPoint
>
keypoints
;
cv
::
gpu
::
GpuMat
descriptors
;
surf
(
loadMat
(
image
),
cv
::
gpu
::
GpuMat
(),
keypoints
,
descriptors
);
}
catch
(
const
cv
::
Exception
&
e
)
{
ASSERT_EQ
(
CV_StsNotImplemented
,
e
.
code
);
}
}
else
{
std
::
vector
<
cv
::
KeyPoint
>
keypoints
;
surf_gold
(
image
,
cv
::
noArray
(),
keypoints
);
cv
::
gpu
::
GpuMat
descriptors
;
surf
(
loadMat
(
image
),
cv
::
gpu
::
GpuMat
(),
keypoints
,
descriptors
,
true
);
cv
::
gpu
::
GpuMat
descriptors
;
surf
(
loadMat
(
image
),
cv
::
gpu
::
GpuMat
(),
keypoints
,
descriptors
,
true
);
cv
::
Mat
descriptors_gold
;
surf_gold
(
image
,
cv
::
noArray
(),
keypoints
,
descriptors_gold
,
true
);
cv
::
Mat
descriptors_gold
;
surf_gold
(
image
,
cv
::
noArray
(),
keypoints
,
descriptors_gold
,
true
);
cv
::
BFMatcher
matcher
(
cv
::
NORM_L2
);
std
::
vector
<
cv
::
DMatch
>
matches
;
matcher
.
match
(
descriptors_gold
,
cv
::
Mat
(
descriptors
),
matches
);
cv
::
BFMatcher
matcher
(
cv
::
NORM_L2
);
std
::
vector
<
cv
::
DMatch
>
matches
;
matcher
.
match
(
descriptors_gold
,
cv
::
Mat
(
descriptors
),
matches
);
int
matchedCount
=
getMatchedPointsCount
(
keypoints
,
keypoints
,
matches
);
double
matchedRatio
=
static_cast
<
double
>
(
matchedCount
)
/
keypoints
.
size
();
int
matchedCount
=
getMatchedPointsCount
(
keypoints
,
keypoints
,
matches
);
double
matchedRatio
=
static_cast
<
double
>
(
matchedCount
)
/
keypoints
.
size
();
EXPECT_GT
(
matchedRatio
,
0.35
);
EXPECT_GT
(
matchedRatio
,
0.35
);
}
}
INSTANTIATE_TEST_CASE_P
(
GPU_Features2D
,
SURF
,
testing
::
Combine
(
...
...
@@ -295,13 +368,28 @@ TEST_P(FAST, Accuracy)
cv
::
gpu
::
FAST_GPU
fast
(
threshold
);
fast
.
nonmaxSupression
=
nonmaxSupression
;
std
::
vector
<
cv
::
KeyPoint
>
keypoints
;
fast
(
loadMat
(
image
),
cv
::
gpu
::
GpuMat
(),
keypoints
);
if
(
!
supportFeature
(
devInfo
,
cv
::
gpu
::
GLOBAL_ATOMICS
))
{
try
{
std
::
vector
<
cv
::
KeyPoint
>
keypoints
;
fast
(
loadMat
(
image
),
cv
::
gpu
::
GpuMat
(),
keypoints
);
}
catch
(
const
cv
::
Exception
&
e
)
{
ASSERT_EQ
(
CV_StsNotImplemented
,
e
.
code
);
}
}
else
{
std
::
vector
<
cv
::
KeyPoint
>
keypoints
;
fast
(
loadMat
(
image
),
cv
::
gpu
::
GpuMat
(),
keypoints
);
std
::
vector
<
cv
::
KeyPoint
>
keypoints_gold
;
cv
::
FAST
(
image
,
keypoints_gold
,
threshold
,
nonmaxSupression
);
std
::
vector
<
cv
::
KeyPoint
>
keypoints_gold
;
cv
::
FAST
(
image
,
keypoints_gold
,
threshold
,
nonmaxSupression
);
ASSERT_KEYPOINTS_EQ
(
keypoints_gold
,
keypoints
);
ASSERT_KEYPOINTS_EQ
(
keypoints_gold
,
keypoints
);
}
}
INSTANTIATE_TEST_CASE_P
(
GPU_Features2D
,
FAST
,
testing
::
Combine
(
...
...
@@ -364,24 +452,40 @@ TEST_P(ORB, Accuracy)
cv
::
gpu
::
ORB_GPU
orb
(
nFeatures
,
scaleFactor
,
nLevels
,
edgeThreshold
,
firstLevel
,
WTA_K
,
scoreType
,
patchSize
);
orb
.
blurForDescriptor
=
blurForDescriptor
;
std
::
vector
<
cv
::
KeyPoint
>
keypoints
;
cv
::
gpu
::
GpuMat
descriptors
;
orb
(
loadMat
(
image
),
loadMat
(
mask
),
keypoints
,
descriptors
);
if
(
!
supportFeature
(
devInfo
,
cv
::
gpu
::
GLOBAL_ATOMICS
))
{
try
{
std
::
vector
<
cv
::
KeyPoint
>
keypoints
;
cv
::
gpu
::
GpuMat
descriptors
;
orb
(
loadMat
(
image
),
loadMat
(
mask
),
keypoints
,
descriptors
);
}
catch
(
const
cv
::
Exception
&
e
)
{
ASSERT_EQ
(
CV_StsNotImplemented
,
e
.
code
);
}
}
else
{
std
::
vector
<
cv
::
KeyPoint
>
keypoints
;
cv
::
gpu
::
GpuMat
descriptors
;
orb
(
loadMat
(
image
),
loadMat
(
mask
),
keypoints
,
descriptors
);
cv
::
ORB
orb_gold
(
nFeatures
,
scaleFactor
,
nLevels
,
edgeThreshold
,
firstLevel
,
WTA_K
,
scoreType
,
patchSize
);
cv
::
ORB
orb_gold
(
nFeatures
,
scaleFactor
,
nLevels
,
edgeThreshold
,
firstLevel
,
WTA_K
,
scoreType
,
patchSize
);
std
::
vector
<
cv
::
KeyPoint
>
keypoints_gold
;
cv
::
Mat
descriptors_gold
;
orb_gold
(
image
,
mask
,
keypoints_gold
,
descriptors_gold
);
std
::
vector
<
cv
::
KeyPoint
>
keypoints_gold
;
cv
::
Mat
descriptors_gold
;
orb_gold
(
image
,
mask
,
keypoints_gold
,
descriptors_gold
);
cv
::
BFMatcher
matcher
(
cv
::
NORM_HAMMING
);
std
::
vector
<
cv
::
DMatch
>
matches
;
matcher
.
match
(
descriptors_gold
,
cv
::
Mat
(
descriptors
),
matches
);
cv
::
BFMatcher
matcher
(
cv
::
NORM_HAMMING
);
std
::
vector
<
cv
::
DMatch
>
matches
;
matcher
.
match
(
descriptors_gold
,
cv
::
Mat
(
descriptors
),
matches
);
int
matchedCount
=
getMatchedPointsCount
(
keypoints_gold
,
keypoints
,
matches
);
double
matchedRatio
=
static_cast
<
double
>
(
matchedCount
)
/
keypoints
.
size
();
int
matchedCount
=
getMatchedPointsCount
(
keypoints_gold
,
keypoints
,
matches
);
double
matchedRatio
=
static_cast
<
double
>
(
matchedCount
)
/
keypoints
.
size
();
EXPECT_GT
(
matchedRatio
,
0.35
);
EXPECT_GT
(
matchedRatio
,
0.35
);
}
}
INSTANTIATE_TEST_CASE_P
(
GPU_Features2D
,
ORB
,
testing
::
Combine
(
...
...
@@ -713,25 +817,40 @@ TEST_P(BruteForceMatcher, RadiusMatch)
cv
::
gpu
::
BruteForceMatcher_GPU_base
matcher
(
distType
);
std
::
vector
<
std
::
vector
<
cv
::
DMatch
>
>
matches
;
matcher
.
radiusMatch
(
loadMat
(
query
),
loadMat
(
train
),
matches
,
radius
);
if
(
!
supportFeature
(
devInfo
,
cv
::
gpu
::
GLOBAL_ATOMICS
))
{
try
{
std
::
vector
<
std
::
vector
<
cv
::
DMatch
>
>
matches
;
matcher
.
radiusMatch
(
loadMat
(
query
),
loadMat
(
train
),
matches
,
radius
);
}
catch
(
const
cv
::
Exception
&
e
)
{
ASSERT_EQ
(
CV_StsNotImplemented
,
e
.
code
);
}
}
else
{
std
::
vector
<
std
::
vector
<
cv
::
DMatch
>
>
matches
;
matcher
.
radiusMatch
(
loadMat
(
query
),
loadMat
(
train
),
matches
,
radius
);
ASSERT_EQ
(
static_cast
<
size_t
>
(
queryDescCount
),
matches
.
size
());
ASSERT_EQ
(
static_cast
<
size_t
>
(
queryDescCount
),
matches
.
size
());
int
badCount
=
0
;
for
(
size_t
i
=
0
;
i
<
matches
.
size
();
i
++
)
{
if
((
int
)
matches
[
i
].
size
()
!=
1
)
badCount
++
;
else
int
badCount
=
0
;
for
(
size_t
i
=
0
;
i
<
matches
.
size
();
i
++
)
{
cv
::
DMatch
match
=
matches
[
i
][
0
];
if
((
match
.
queryIdx
!=
(
int
)
i
)
||
(
match
.
trainIdx
!=
(
int
)
i
*
countFactor
)
||
(
match
.
imgIdx
!=
0
))
if
((
int
)
matches
[
i
].
size
()
!=
1
)
badCount
++
;
else
{
cv
::
DMatch
match
=
matches
[
i
][
0
];
if
((
match
.
queryIdx
!=
(
int
)
i
)
||
(
match
.
trainIdx
!=
(
int
)
i
*
countFactor
)
||
(
match
.
imgIdx
!=
0
))
badCount
++
;
}
}
}
ASSERT_EQ
(
0
,
badCount
);
ASSERT_EQ
(
0
,
badCount
);
}
}
TEST_P
(
BruteForceMatcher
,
RadiusMatchAdd
)
...
...
@@ -756,42 +875,57 @@ TEST_P(BruteForceMatcher, RadiusMatchAdd)
masks
[
mi
].
col
(
di
*
countFactor
).
setTo
(
cv
::
Scalar
::
all
(
0
));
}
std
::
vector
<
std
::
vector
<
cv
::
DMatch
>
>
matches
;
matcher
.
radiusMatch
(
cv
::
gpu
::
GpuMat
(
query
),
matches
,
radius
,
masks
);
if
(
!
supportFeature
(
devInfo
,
cv
::
gpu
::
GLOBAL_ATOMICS
))
{
try
{
std
::
vector
<
std
::
vector
<
cv
::
DMatch
>
>
matches
;
matcher
.
radiusMatch
(
cv
::
gpu
::
GpuMat
(
query
),
matches
,
radius
,
masks
);
}
catch
(
const
cv
::
Exception
&
e
)
{
ASSERT_EQ
(
CV_StsNotImplemented
,
e
.
code
);
}
}
else
{
std
::
vector
<
std
::
vector
<
cv
::
DMatch
>
>
matches
;
matcher
.
radiusMatch
(
cv
::
gpu
::
GpuMat
(
query
),
matches
,
radius
,
masks
);
ASSERT_EQ
(
static_cast
<
size_t
>
(
queryDescCount
),
matches
.
size
());
ASSERT_EQ
(
static_cast
<
size_t
>
(
queryDescCount
),
matches
.
size
());
int
badCount
=
0
;
int
shift
=
matcher
.
isMaskSupported
()
?
1
:
0
;
int
needMatchCount
=
matcher
.
isMaskSupported
()
?
n
-
1
:
n
;
for
(
size_t
i
=
0
;
i
<
matches
.
size
();
i
++
)
{
if
((
int
)
matches
[
i
].
size
()
!=
needMatchCount
)
badCount
++
;
else
int
badCount
=
0
;
int
shift
=
matcher
.
isMaskSupported
()
?
1
:
0
;
int
needMatchCount
=
matcher
.
isMaskSupported
()
?
n
-
1
:
n
;
for
(
size_t
i
=
0
;
i
<
matches
.
size
();
i
++
)
{
int
localBadCount
=
0
;
for
(
int
k
=
0
;
k
<
needMatchCount
;
k
++
)
if
((
int
)
matches
[
i
].
size
()
!=
needMatchCount
)
badCount
++
;
else
{
cv
::
DMatch
match
=
matches
[
i
][
k
];
int
localBadCount
=
0
;
for
(
int
k
=
0
;
k
<
needMatchCount
;
k
++
)
{
if
((
int
)
i
<
queryDescCount
/
2
)
cv
::
DMatch
match
=
matches
[
i
][
k
];
{
if
((
match
.
queryIdx
!=
(
int
)
i
)
||
(
match
.
trainIdx
!=
(
int
)
i
*
countFactor
+
k
+
shift
)
||
(
match
.
imgIdx
!=
0
)
)
localBadCount
++
;
}
else
{
if
((
match
.
queryIdx
!=
(
int
)
i
)
||
(
match
.
trainIdx
!=
((
int
)
i
-
queryDescCount
/
2
)
*
countFactor
+
k
+
shift
)
||
(
match
.
imgIdx
!=
1
)
)
localBadCount
++
;
if
((
int
)
i
<
queryDescCount
/
2
)
{
if
((
match
.
queryIdx
!=
(
int
)
i
)
||
(
match
.
trainIdx
!=
(
int
)
i
*
countFactor
+
k
+
shift
)
||
(
match
.
imgIdx
!=
0
)
)
localBadCount
++
;
}
else
{
if
((
match
.
queryIdx
!=
(
int
)
i
)
||
(
match
.
trainIdx
!=
((
int
)
i
-
queryDescCount
/
2
)
*
countFactor
+
k
+
shift
)
||
(
match
.
imgIdx
!=
1
)
)
localBadCount
++
;
}
}
}
badCount
+=
localBadCount
>
0
?
1
:
0
;
}
badCount
+=
localBadCount
>
0
?
1
:
0
;
}
}
ASSERT_EQ
(
0
,
badCount
);
ASSERT_EQ
(
0
,
badCount
);
}
}
INSTANTIATE_TEST_CASE_P
(
GPU_Features2D
,
BruteForceMatcher
,
testing
::
Combine
(
...
...
modules/gpu/test/test_filters.cpp
View file @
bd13e947
...
...
@@ -258,13 +258,28 @@ TEST_P(GaussianBlur, Accuracy)
double
sigma1
=
randomDouble
(
0.1
,
1.0
);
double
sigma2
=
randomDouble
(
0.1
,
1.0
);
cv
::
gpu
::
GpuMat
dst
=
createMat
(
size
,
type
,
useRoi
);
cv
::
gpu
::
GaussianBlur
(
loadMat
(
src
,
useRoi
),
dst
,
ksize
,
sigma1
,
sigma2
,
borderType
);
if
(
ksize
.
height
>
16
&&
!
supportFeature
(
devInfo
,
cv
::
gpu
::
FEATURE_SET_COMPUTE_20
))
{
try
{
cv
::
gpu
::
GpuMat
dst
;
cv
::
gpu
::
GaussianBlur
(
loadMat
(
src
),
dst
,
ksize
,
sigma1
,
sigma2
,
borderType
);
}
catch
(
const
cv
::
Exception
&
e
)
{
ASSERT_EQ
(
CV_StsNotImplemented
,
e
.
code
);
}
}
else
{
cv
::
gpu
::
GpuMat
dst
=
createMat
(
size
,
type
,
useRoi
);
cv
::
gpu
::
GaussianBlur
(
loadMat
(
src
,
useRoi
),
dst
,
ksize
,
sigma1
,
sigma2
,
borderType
);
cv
::
Mat
dst_gold
;
cv
::
GaussianBlur
(
src
,
dst_gold
,
ksize
,
sigma1
,
sigma2
,
borderType
);
cv
::
Mat
dst_gold
;
cv
::
GaussianBlur
(
src
,
dst_gold
,
ksize
,
sigma1
,
sigma2
,
borderType
);
EXPECT_MAT_NEAR
(
dst_gold
,
dst
,
4.0
);
EXPECT_MAT_NEAR
(
dst_gold
,
dst
,
4.0
);
}
}
INSTANTIATE_TEST_CASE_P
(
GPU_Filter
,
GaussianBlur
,
testing
::
Combine
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment