Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
79ecefb5
Commit
79ecefb5
authored
Dec 09, 2015
by
Dan
Committed by
Dan
Mar 15, 2016
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implemented async calls.
parent
77832069
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
43 additions
and
37 deletions
+43
-37
orb.cu
modules/cudafeatures2d/src/cuda/orb.cu
+10
-4
orb.cpp
modules/cudafeatures2d/src/orb.cpp
+33
-33
No files found.
modules/cudafeatures2d/src/cuda/orb.cu
View file @
79ecefb5
...
@@ -44,6 +44,8 @@
...
@@ -44,6 +44,8 @@
#include <thrust/device_ptr.h>
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#include <thrust/sort.h>
#include <thrust/system/cuda/execution_policy.h>
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/reduce.hpp"
...
@@ -56,13 +58,17 @@ namespace cv { namespace cuda { namespace device
...
@@ -56,13 +58,17 @@ namespace cv { namespace cuda { namespace device
////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////
// cull
// cull
int cull_gpu(int* loc, float* response, int size, int n_points)
int cull_gpu(int* loc, float* response, int size, int n_points
, cudaStream_t stream
)
{
{
thrust::device_ptr<int> loc_ptr(loc);
thrust::device_ptr<int> loc_ptr(loc);
thrust::device_ptr<float> response_ptr(response);
thrust::device_ptr<float> response_ptr(response);
if(stream)
thrust::sort_by_key(response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
{
thrust::sort_by_key(thrust::cuda::par.on(stream), response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
}else
{
thrust::sort_by_key(response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
}
return n_points;
return n_points;
}
}
...
...
modules/cudafeatures2d/src/orb.cpp
View file @
79ecefb5
...
@@ -55,7 +55,7 @@ namespace cv { namespace cuda { namespace device
...
@@ -55,7 +55,7 @@ namespace cv { namespace cuda { namespace device
{
{
namespace
orb
namespace
orb
{
{
int
cull_gpu
(
int
*
loc
,
float
*
response
,
int
size
,
int
n_points
);
int
cull_gpu
(
int
*
loc
,
float
*
response
,
int
size
,
int
n_points
,
cudaStream_t
stream
);
void
HarrisResponses_gpu
(
PtrStepSzb
img
,
const
short2
*
loc
,
float
*
response
,
const
int
npoints
,
int
blockSize
,
float
harris_k
,
cudaStream_t
stream
);
void
HarrisResponses_gpu
(
PtrStepSzb
img
,
const
short2
*
loc
,
float
*
response
,
const
int
npoints
,
int
blockSize
,
float
harris_k
,
cudaStream_t
stream
);
...
@@ -401,10 +401,10 @@ namespace
...
@@ -401,10 +401,10 @@ namespace
bool
blurForDescriptor_
;
bool
blurForDescriptor_
;
private
:
private
:
void
buildScalePyramids
(
InputArray
_image
,
InputArray
_mask
);
void
buildScalePyramids
(
InputArray
_image
,
InputArray
_mask
,
Stream
&
stream
);
void
computeKeyPointsPyramid
();
void
computeKeyPointsPyramid
(
Stream
&
stream
);
void
computeDescriptors
(
OutputArray
_descriptors
);
void
computeDescriptors
(
OutputArray
_descriptors
,
Stream
&
stream
);
void
mergeKeyPoints
(
OutputArray
_keypoints
);
void
mergeKeyPoints
(
OutputArray
_keypoints
,
Stream
&
stream
);
private
:
private
:
Ptr
<
cv
::
cuda
::
FastFeatureDetector
>
fastDetector_
;
Ptr
<
cv
::
cuda
::
FastFeatureDetector
>
fastDetector_
;
...
@@ -582,13 +582,13 @@ namespace
...
@@ -582,13 +582,13 @@ namespace
{
{
CV_Assert
(
useProvidedKeypoints
==
false
);
CV_Assert
(
useProvidedKeypoints
==
false
);
buildScalePyramids
(
_image
,
_mask
);
buildScalePyramids
(
_image
,
_mask
,
stream
);
computeKeyPointsPyramid
();
computeKeyPointsPyramid
(
stream
);
if
(
_descriptors
.
needed
())
if
(
_descriptors
.
needed
())
{
{
computeDescriptors
(
_descriptors
);
computeDescriptors
(
_descriptors
,
stream
);
}
}
mergeKeyPoints
(
_keypoints
);
mergeKeyPoints
(
_keypoints
,
stream
);
}
}
static
float
getScale
(
float
scaleFactor
,
int
firstLevel
,
int
level
)
static
float
getScale
(
float
scaleFactor
,
int
firstLevel
,
int
level
)
...
@@ -596,7 +596,7 @@ namespace
...
@@ -596,7 +596,7 @@ namespace
return
pow
(
scaleFactor
,
level
-
firstLevel
);
return
pow
(
scaleFactor
,
level
-
firstLevel
);
}
}
void
ORB_Impl
::
buildScalePyramids
(
InputArray
_image
,
InputArray
_mask
)
void
ORB_Impl
::
buildScalePyramids
(
InputArray
_image
,
InputArray
_mask
,
Stream
&
stream
)
{
{
const
GpuMat
image
=
_image
.
getGpuMat
();
const
GpuMat
image
=
_image
.
getGpuMat
();
const
GpuMat
mask
=
_mask
.
getGpuMat
();
const
GpuMat
mask
=
_mask
.
getGpuMat
();
...
@@ -622,28 +622,28 @@ namespace
...
@@ -622,28 +622,28 @@ namespace
{
{
if
(
level
<
firstLevel_
)
if
(
level
<
firstLevel_
)
{
{
cuda
::
resize
(
image
,
imagePyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
);
cuda
::
resize
(
image
,
imagePyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
,
stream
);
if
(
!
mask
.
empty
())
if
(
!
mask
.
empty
())
cuda
::
resize
(
mask
,
maskPyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
);
cuda
::
resize
(
mask
,
maskPyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
,
stream
);
}
}
else
else
{
{
cuda
::
resize
(
imagePyr_
[
level
-
1
],
imagePyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
);
cuda
::
resize
(
imagePyr_
[
level
-
1
],
imagePyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
,
stream
);
if
(
!
mask
.
empty
())
if
(
!
mask
.
empty
())
{
{
cuda
::
resize
(
maskPyr_
[
level
-
1
],
maskPyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
);
cuda
::
resize
(
maskPyr_
[
level
-
1
],
maskPyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
,
stream
);
cuda
::
threshold
(
maskPyr_
[
level
],
maskPyr_
[
level
],
254
,
0
,
THRESH_TOZERO
);
cuda
::
threshold
(
maskPyr_
[
level
],
maskPyr_
[
level
],
254
,
0
,
THRESH_TOZERO
,
stream
);
}
}
}
}
}
}
else
else
{
{
image
.
copyTo
(
imagePyr_
[
level
]);
image
.
copyTo
(
imagePyr_
[
level
]
,
stream
);
if
(
!
mask
.
empty
())
if
(
!
mask
.
empty
())
mask
.
copyTo
(
maskPyr_
[
level
]);
mask
.
copyTo
(
maskPyr_
[
level
]
,
stream
);
}
}
// Filter keypoints by image border
// Filter keypoints by image border
...
@@ -652,12 +652,12 @@ namespace
...
@@ -652,12 +652,12 @@ namespace
Rect
inner
(
edgeThreshold_
,
edgeThreshold_
,
sz
.
width
-
2
*
edgeThreshold_
,
sz
.
height
-
2
*
edgeThreshold_
);
Rect
inner
(
edgeThreshold_
,
edgeThreshold_
,
sz
.
width
-
2
*
edgeThreshold_
,
sz
.
height
-
2
*
edgeThreshold_
);
buf_
(
inner
).
setTo
(
Scalar
::
all
(
255
));
buf_
(
inner
).
setTo
(
Scalar
::
all
(
255
));
cuda
::
bitwise_and
(
maskPyr_
[
level
],
buf_
,
maskPyr_
[
level
]);
cuda
::
bitwise_and
(
maskPyr_
[
level
],
buf_
,
maskPyr_
[
level
]
,
stream
);
}
}
}
}
// takes keypoints and culls them by the response
// takes keypoints and culls them by the response
static
void
cull
(
GpuMat
&
keypoints
,
int
&
count
,
int
n_points
)
static
void
cull
(
GpuMat
&
keypoints
,
int
&
count
,
int
n_points
,
Stream
&
stream
)
{
{
using
namespace
cv
::
cuda
::
device
::
orb
;
using
namespace
cv
::
cuda
::
device
::
orb
;
...
@@ -670,11 +670,11 @@ namespace
...
@@ -670,11 +670,11 @@ namespace
return
;
return
;
}
}
count
=
cull_gpu
(
keypoints
.
ptr
<
int
>
(
cuda
::
FastFeatureDetector
::
LOCATION_ROW
),
keypoints
.
ptr
<
float
>
(
cuda
::
FastFeatureDetector
::
RESPONSE_ROW
),
count
,
n_points
);
count
=
cull_gpu
(
keypoints
.
ptr
<
int
>
(
cuda
::
FastFeatureDetector
::
LOCATION_ROW
),
keypoints
.
ptr
<
float
>
(
cuda
::
FastFeatureDetector
::
RESPONSE_ROW
),
count
,
n_points
,
StreamAccessor
::
getStream
(
stream
)
);
}
}
}
}
void
ORB_Impl
::
computeKeyPointsPyramid
()
void
ORB_Impl
::
computeKeyPointsPyramid
(
Stream
&
stream
)
{
{
using
namespace
cv
::
cuda
::
device
::
orb
;
using
namespace
cv
::
cuda
::
device
::
orb
;
...
@@ -690,7 +690,7 @@ namespace
...
@@ -690,7 +690,7 @@ namespace
fastDetector_
->
setMaxNumPoints
(
0.05
*
imagePyr_
[
level
].
size
().
area
());
fastDetector_
->
setMaxNumPoints
(
0.05
*
imagePyr_
[
level
].
size
().
area
());
GpuMat
fastKpRange
;
GpuMat
fastKpRange
;
fastDetector_
->
detectAsync
(
imagePyr_
[
level
],
fastKpRange
,
maskPyr_
[
level
],
Stream
::
Null
()
);
fastDetector_
->
detectAsync
(
imagePyr_
[
level
],
fastKpRange
,
maskPyr_
[
level
],
stream
);
keyPointsCount_
[
level
]
=
fastKpRange
.
cols
;
keyPointsCount_
[
level
]
=
fastKpRange
.
cols
;
...
@@ -698,28 +698,28 @@ namespace
...
@@ -698,28 +698,28 @@ namespace
continue
;
continue
;
ensureSizeIsEnough
(
3
,
keyPointsCount_
[
level
],
fastKpRange
.
type
(),
keyPointsPyr_
[
level
]);
ensureSizeIsEnough
(
3
,
keyPointsCount_
[
level
],
fastKpRange
.
type
(),
keyPointsPyr_
[
level
]);
fastKpRange
.
copyTo
(
keyPointsPyr_
[
level
].
rowRange
(
0
,
2
));
fastKpRange
.
copyTo
(
keyPointsPyr_
[
level
].
rowRange
(
0
,
2
)
,
stream
);
const
int
n_features
=
static_cast
<
int
>
(
n_features_per_level_
[
level
]);
const
int
n_features
=
static_cast
<
int
>
(
n_features_per_level_
[
level
]);
if
(
scoreType_
==
ORB
::
HARRIS_SCORE
)
if
(
scoreType_
==
ORB
::
HARRIS_SCORE
)
{
{
// Keep more points than necessary as FAST does not give amazing corners
// Keep more points than necessary as FAST does not give amazing corners
cull
(
keyPointsPyr_
[
level
],
keyPointsCount_
[
level
],
2
*
n_features
);
cull
(
keyPointsPyr_
[
level
],
keyPointsCount_
[
level
],
2
*
n_features
,
stream
);
// Compute the Harris cornerness (better scoring than FAST)
// Compute the Harris cornerness (better scoring than FAST)
HarrisResponses_gpu
(
imagePyr_
[
level
],
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsPyr_
[
level
].
ptr
<
float
>
(
1
),
keyPointsCount_
[
level
],
7
,
HARRIS_K
,
0
);
HarrisResponses_gpu
(
imagePyr_
[
level
],
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsPyr_
[
level
].
ptr
<
float
>
(
1
),
keyPointsCount_
[
level
],
7
,
HARRIS_K
,
StreamAccessor
::
getStream
(
stream
)
);
}
}
//cull to the final desired level, using the new Harris scores or the original FAST scores.
//cull to the final desired level, using the new Harris scores or the original FAST scores.
cull
(
keyPointsPyr_
[
level
],
keyPointsCount_
[
level
],
n_features
);
cull
(
keyPointsPyr_
[
level
],
keyPointsCount_
[
level
],
n_features
,
stream
);
// Compute orientation
// Compute orientation
IC_Angle_gpu
(
imagePyr_
[
level
],
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsPyr_
[
level
].
ptr
<
float
>
(
2
),
keyPointsCount_
[
level
],
half_patch_size
,
0
);
IC_Angle_gpu
(
imagePyr_
[
level
],
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsPyr_
[
level
].
ptr
<
float
>
(
2
),
keyPointsCount_
[
level
],
half_patch_size
,
StreamAccessor
::
getStream
(
stream
)
);
}
}
}
}
void
ORB_Impl
::
computeDescriptors
(
OutputArray
_descriptors
)
void
ORB_Impl
::
computeDescriptors
(
OutputArray
_descriptors
,
Stream
&
stream
)
{
{
using
namespace
cv
::
cuda
::
device
::
orb
;
using
namespace
cv
::
cuda
::
device
::
orb
;
...
@@ -750,17 +750,17 @@ namespace
...
@@ -750,17 +750,17 @@ namespace
{
{
// preprocess the resized image
// preprocess the resized image
ensureSizeIsEnough
(
imagePyr_
[
level
].
size
(),
imagePyr_
[
level
].
type
(),
buf_
);
ensureSizeIsEnough
(
imagePyr_
[
level
].
size
(),
imagePyr_
[
level
].
type
(),
buf_
);
blurFilter_
->
apply
(
imagePyr_
[
level
],
buf_
);
blurFilter_
->
apply
(
imagePyr_
[
level
],
buf_
,
stream
);
}
}
computeOrbDescriptor_gpu
(
blurForDescriptor_
?
buf_
:
imagePyr_
[
level
],
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsPyr_
[
level
].
ptr
<
float
>
(
2
),
computeOrbDescriptor_gpu
(
blurForDescriptor_
?
buf_
:
imagePyr_
[
level
],
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsPyr_
[
level
].
ptr
<
float
>
(
2
),
keyPointsCount_
[
level
],
pattern_
.
ptr
<
int
>
(
0
),
pattern_
.
ptr
<
int
>
(
1
),
descRange
,
descriptorSize
(),
WTA_K_
,
0
);
keyPointsCount_
[
level
],
pattern_
.
ptr
<
int
>
(
0
),
pattern_
.
ptr
<
int
>
(
1
),
descRange
,
descriptorSize
(),
WTA_K_
,
StreamAccessor
::
getStream
(
stream
)
);
offset
+=
keyPointsCount_
[
level
];
offset
+=
keyPointsCount_
[
level
];
}
}
}
}
void
ORB_Impl
::
mergeKeyPoints
(
OutputArray
_keypoints
)
void
ORB_Impl
::
mergeKeyPoints
(
OutputArray
_keypoints
,
Stream
&
stream
)
{
{
using
namespace
cv
::
cuda
::
device
::
orb
;
using
namespace
cv
::
cuda
::
device
::
orb
;
...
@@ -791,10 +791,10 @@ namespace
...
@@ -791,10 +791,10 @@ namespace
float
locScale
=
level
!=
firstLevel_
?
sf
:
1.0
f
;
float
locScale
=
level
!=
firstLevel_
?
sf
:
1.0
f
;
mergeLocation_gpu
(
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsRange
.
ptr
<
float
>
(
0
),
keyPointsRange
.
ptr
<
float
>
(
1
),
keyPointsCount_
[
level
],
locScale
,
0
);
mergeLocation_gpu
(
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsRange
.
ptr
<
float
>
(
0
),
keyPointsRange
.
ptr
<
float
>
(
1
),
keyPointsCount_
[
level
],
locScale
,
StreamAccessor
::
getStream
(
stream
)
);
GpuMat
range
=
keyPointsRange
.
rowRange
(
2
,
4
);
GpuMat
range
=
keyPointsRange
.
rowRange
(
2
,
4
);
keyPointsPyr_
[
level
](
Range
(
1
,
3
),
Range
(
0
,
keyPointsCount_
[
level
])).
copyTo
(
range
);
keyPointsPyr_
[
level
](
Range
(
1
,
3
),
Range
(
0
,
keyPointsCount_
[
level
])).
copyTo
(
range
,
stream
);
keyPointsRange
.
row
(
4
).
setTo
(
Scalar
::
all
(
level
));
keyPointsRange
.
row
(
4
).
setTo
(
Scalar
::
all
(
level
));
keyPointsRange
.
row
(
5
).
setTo
(
Scalar
::
all
(
patchSize_
*
sf
));
keyPointsRange
.
row
(
5
).
setTo
(
Scalar
::
all
(
patchSize_
*
sf
));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment