Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
4e479d58
Commit
4e479d58
authored
Mar 18, 2016
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #6290 from dtmoodie:thrust_allocator_usage_pr
parents
2f4e38c8
1d214933
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
91 additions
and
65 deletions
+91
-65
orb.cu
modules/cudafeatures2d/src/cuda/orb.cu
+26
-4
orb.cpp
modules/cudafeatures2d/src/orb.cpp
+37
-37
gftt.cu
modules/cudaimgproc/src/cuda/gftt.cu
+18
-11
gftt.cpp
modules/cudaimgproc/src/gftt.cpp
+10
-13
No files found.
modules/cudafeatures2d/src/cuda/orb.cu
View file @
4e479d58
...
...
@@ -44,11 +44,14 @@
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#include <thrust/system/cuda/execution_policy.h>
#include <thrust/version.h>
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/utility.hpp"
namespace cv { namespace cuda { namespace device
{
namespace orb
...
...
@@ -56,13 +59,32 @@ namespace cv { namespace cuda { namespace device
////////////////////////////////////////////////////////////////////////////////////////////////////////
// cull
int cull_gpu(int* loc, float* response, int size, int n_points)
int cull_gpu(int* loc, float* response, int size, int n_points
, cudaStream_t stream
)
{
thrust::device_ptr<int> loc_ptr(loc);
thrust::device_ptr<float> response_ptr(response);
#if THRUST_VERSION >= 100800
#if THRUST_VERSION >= 100802
if (stream)
{
thrust::sort_by_key(thrust::cuda::par(ThrustAllocator::getAllocator()).on(stream), response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
}
else
{
thrust::sort_by_key(thrust::cuda::par(ThrustAllocator::getAllocator()), response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
}
#else
if(stream)
{
thrust::sort_by_key(thrust::cuda::par.on(stream), response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
}else
{
thrust::sort_by_key(response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
}
#endif
#else
thrust::sort_by_key(response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
#endif
return n_points;
}
...
...
modules/cudafeatures2d/src/orb.cpp
View file @
4e479d58
...
...
@@ -55,7 +55,7 @@ namespace cv { namespace cuda { namespace device
{
namespace
orb
{
int
cull_gpu
(
int
*
loc
,
float
*
response
,
int
size
,
int
n_points
);
int
cull_gpu
(
int
*
loc
,
float
*
response
,
int
size
,
int
n_points
,
cudaStream_t
stream
);
void
HarrisResponses_gpu
(
PtrStepSzb
img
,
const
short2
*
loc
,
float
*
response
,
const
int
npoints
,
int
blockSize
,
float
harris_k
,
cudaStream_t
stream
);
...
...
@@ -401,10 +401,10 @@ namespace
bool
blurForDescriptor_
;
private
:
void
buildScalePyramids
(
InputArray
_image
,
InputArray
_mask
);
void
computeKeyPointsPyramid
();
void
computeDescriptors
(
OutputArray
_descriptors
);
void
mergeKeyPoints
(
OutputArray
_keypoints
);
void
buildScalePyramids
(
InputArray
_image
,
InputArray
_mask
,
Stream
&
stream
);
void
computeKeyPointsPyramid
(
Stream
&
stream
);
void
computeDescriptors
(
OutputArray
_descriptors
,
Stream
&
stream
);
void
mergeKeyPoints
(
OutputArray
_keypoints
,
Stream
&
stream
);
private
:
Ptr
<
cv
::
cuda
::
FastFeatureDetector
>
fastDetector_
;
...
...
@@ -582,13 +582,13 @@ namespace
{
CV_Assert
(
useProvidedKeypoints
==
false
);
buildScalePyramids
(
_image
,
_mask
);
computeKeyPointsPyramid
();
buildScalePyramids
(
_image
,
_mask
,
stream
);
computeKeyPointsPyramid
(
stream
);
if
(
_descriptors
.
needed
())
{
computeDescriptors
(
_descriptors
);
computeDescriptors
(
_descriptors
,
stream
);
}
mergeKeyPoints
(
_keypoints
);
mergeKeyPoints
(
_keypoints
,
stream
);
}
static
float
getScale
(
float
scaleFactor
,
int
firstLevel
,
int
level
)
...
...
@@ -596,7 +596,7 @@ namespace
return
pow
(
scaleFactor
,
level
-
firstLevel
);
}
void
ORB_Impl
::
buildScalePyramids
(
InputArray
_image
,
InputArray
_mask
)
void
ORB_Impl
::
buildScalePyramids
(
InputArray
_image
,
InputArray
_mask
,
Stream
&
stream
)
{
const
GpuMat
image
=
_image
.
getGpuMat
();
const
GpuMat
mask
=
_mask
.
getGpuMat
();
...
...
@@ -622,42 +622,42 @@ namespace
{
if
(
level
<
firstLevel_
)
{
cuda
::
resize
(
image
,
imagePyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
);
cuda
::
resize
(
image
,
imagePyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
,
stream
);
if
(
!
mask
.
empty
())
cuda
::
resize
(
mask
,
maskPyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
);
cuda
::
resize
(
mask
,
maskPyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
,
stream
);
}
else
{
cuda
::
resize
(
imagePyr_
[
level
-
1
],
imagePyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
);
cuda
::
resize
(
imagePyr_
[
level
-
1
],
imagePyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
,
stream
);
if
(
!
mask
.
empty
())
{
cuda
::
resize
(
maskPyr_
[
level
-
1
],
maskPyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
);
cuda
::
threshold
(
maskPyr_
[
level
],
maskPyr_
[
level
],
254
,
0
,
THRESH_TOZERO
);
cuda
::
resize
(
maskPyr_
[
level
-
1
],
maskPyr_
[
level
],
sz
,
0
,
0
,
INTER_LINEAR
,
stream
);
cuda
::
threshold
(
maskPyr_
[
level
],
maskPyr_
[
level
],
254
,
0
,
THRESH_TOZERO
,
stream
);
}
}
}
else
{
image
.
copyTo
(
imagePyr_
[
level
]);
image
.
copyTo
(
imagePyr_
[
level
]
,
stream
);
if
(
!
mask
.
empty
())
mask
.
copyTo
(
maskPyr_
[
level
]);
mask
.
copyTo
(
maskPyr_
[
level
]
,
stream
);
}
// Filter keypoints by image border
ensureSizeIsEnough
(
sz
,
CV_8UC1
,
buf_
);
buf_
.
setTo
(
Scalar
::
all
(
0
));
buf_
.
setTo
(
Scalar
::
all
(
0
)
,
stream
);
Rect
inner
(
edgeThreshold_
,
edgeThreshold_
,
sz
.
width
-
2
*
edgeThreshold_
,
sz
.
height
-
2
*
edgeThreshold_
);
buf_
(
inner
).
setTo
(
Scalar
::
all
(
255
));
buf_
(
inner
).
setTo
(
Scalar
::
all
(
255
)
,
stream
);
cuda
::
bitwise_and
(
maskPyr_
[
level
],
buf_
,
maskPyr_
[
level
]);
cuda
::
bitwise_and
(
maskPyr_
[
level
],
buf_
,
maskPyr_
[
level
]
,
cv
::
noArray
(),
stream
);
}
}
// takes keypoints and culls them by the response
static
void
cull
(
GpuMat
&
keypoints
,
int
&
count
,
int
n_points
)
static
void
cull
(
GpuMat
&
keypoints
,
int
&
count
,
int
n_points
,
Stream
&
stream
)
{
using
namespace
cv
::
cuda
::
device
::
orb
;
...
...
@@ -670,11 +670,11 @@ namespace
return
;
}
count
=
cull_gpu
(
keypoints
.
ptr
<
int
>
(
cuda
::
FastFeatureDetector
::
LOCATION_ROW
),
keypoints
.
ptr
<
float
>
(
cuda
::
FastFeatureDetector
::
RESPONSE_ROW
),
count
,
n_points
);
count
=
cull_gpu
(
keypoints
.
ptr
<
int
>
(
cuda
::
FastFeatureDetector
::
LOCATION_ROW
),
keypoints
.
ptr
<
float
>
(
cuda
::
FastFeatureDetector
::
RESPONSE_ROW
),
count
,
n_points
,
StreamAccessor
::
getStream
(
stream
)
);
}
}
void
ORB_Impl
::
computeKeyPointsPyramid
()
void
ORB_Impl
::
computeKeyPointsPyramid
(
Stream
&
stream
)
{
using
namespace
cv
::
cuda
::
device
::
orb
;
...
...
@@ -690,7 +690,7 @@ namespace
fastDetector_
->
setMaxNumPoints
(
0.05
*
imagePyr_
[
level
].
size
().
area
());
GpuMat
fastKpRange
;
fastDetector_
->
detectAsync
(
imagePyr_
[
level
],
fastKpRange
,
maskPyr_
[
level
],
Stream
::
Null
()
);
fastDetector_
->
detectAsync
(
imagePyr_
[
level
],
fastKpRange
,
maskPyr_
[
level
],
stream
);
keyPointsCount_
[
level
]
=
fastKpRange
.
cols
;
...
...
@@ -698,28 +698,28 @@ namespace
continue
;
ensureSizeIsEnough
(
3
,
keyPointsCount_
[
level
],
fastKpRange
.
type
(),
keyPointsPyr_
[
level
]);
fastKpRange
.
copyTo
(
keyPointsPyr_
[
level
].
rowRange
(
0
,
2
));
fastKpRange
.
copyTo
(
keyPointsPyr_
[
level
].
rowRange
(
0
,
2
)
,
stream
);
const
int
n_features
=
static_cast
<
int
>
(
n_features_per_level_
[
level
]);
if
(
scoreType_
==
ORB
::
HARRIS_SCORE
)
{
// Keep more points than necessary as FAST does not give amazing corners
cull
(
keyPointsPyr_
[
level
],
keyPointsCount_
[
level
],
2
*
n_features
);
cull
(
keyPointsPyr_
[
level
],
keyPointsCount_
[
level
],
2
*
n_features
,
stream
);
// Compute the Harris cornerness (better scoring than FAST)
HarrisResponses_gpu
(
imagePyr_
[
level
],
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsPyr_
[
level
].
ptr
<
float
>
(
1
),
keyPointsCount_
[
level
],
7
,
HARRIS_K
,
0
);
HarrisResponses_gpu
(
imagePyr_
[
level
],
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsPyr_
[
level
].
ptr
<
float
>
(
1
),
keyPointsCount_
[
level
],
7
,
HARRIS_K
,
StreamAccessor
::
getStream
(
stream
)
);
}
//cull to the final desired level, using the new Harris scores or the original FAST scores.
cull
(
keyPointsPyr_
[
level
],
keyPointsCount_
[
level
],
n_features
);
cull
(
keyPointsPyr_
[
level
],
keyPointsCount_
[
level
],
n_features
,
stream
);
// Compute orientation
IC_Angle_gpu
(
imagePyr_
[
level
],
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsPyr_
[
level
].
ptr
<
float
>
(
2
),
keyPointsCount_
[
level
],
half_patch_size
,
0
);
IC_Angle_gpu
(
imagePyr_
[
level
],
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsPyr_
[
level
].
ptr
<
float
>
(
2
),
keyPointsCount_
[
level
],
half_patch_size
,
StreamAccessor
::
getStream
(
stream
)
);
}
}
void
ORB_Impl
::
computeDescriptors
(
OutputArray
_descriptors
)
void
ORB_Impl
::
computeDescriptors
(
OutputArray
_descriptors
,
Stream
&
stream
)
{
using
namespace
cv
::
cuda
::
device
::
orb
;
...
...
@@ -750,17 +750,17 @@ namespace
{
// preprocess the resized image
ensureSizeIsEnough
(
imagePyr_
[
level
].
size
(),
imagePyr_
[
level
].
type
(),
buf_
);
blurFilter_
->
apply
(
imagePyr_
[
level
],
buf_
);
blurFilter_
->
apply
(
imagePyr_
[
level
],
buf_
,
stream
);
}
computeOrbDescriptor_gpu
(
blurForDescriptor_
?
buf_
:
imagePyr_
[
level
],
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsPyr_
[
level
].
ptr
<
float
>
(
2
),
keyPointsCount_
[
level
],
pattern_
.
ptr
<
int
>
(
0
),
pattern_
.
ptr
<
int
>
(
1
),
descRange
,
descriptorSize
(),
WTA_K_
,
0
);
keyPointsCount_
[
level
],
pattern_
.
ptr
<
int
>
(
0
),
pattern_
.
ptr
<
int
>
(
1
),
descRange
,
descriptorSize
(),
WTA_K_
,
StreamAccessor
::
getStream
(
stream
)
);
offset
+=
keyPointsCount_
[
level
];
}
}
void
ORB_Impl
::
mergeKeyPoints
(
OutputArray
_keypoints
)
void
ORB_Impl
::
mergeKeyPoints
(
OutputArray
_keypoints
,
Stream
&
stream
)
{
using
namespace
cv
::
cuda
::
device
::
orb
;
...
...
@@ -791,13 +791,13 @@ namespace
float
locScale
=
level
!=
firstLevel_
?
sf
:
1.0
f
;
mergeLocation_gpu
(
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsRange
.
ptr
<
float
>
(
0
),
keyPointsRange
.
ptr
<
float
>
(
1
),
keyPointsCount_
[
level
],
locScale
,
0
);
mergeLocation_gpu
(
keyPointsPyr_
[
level
].
ptr
<
short2
>
(
0
),
keyPointsRange
.
ptr
<
float
>
(
0
),
keyPointsRange
.
ptr
<
float
>
(
1
),
keyPointsCount_
[
level
],
locScale
,
StreamAccessor
::
getStream
(
stream
)
);
GpuMat
range
=
keyPointsRange
.
rowRange
(
2
,
4
);
keyPointsPyr_
[
level
](
Range
(
1
,
3
),
Range
(
0
,
keyPointsCount_
[
level
])).
copyTo
(
range
);
keyPointsPyr_
[
level
](
Range
(
1
,
3
),
Range
(
0
,
keyPointsCount_
[
level
])).
copyTo
(
range
,
stream
);
keyPointsRange
.
row
(
4
).
setTo
(
Scalar
::
all
(
level
));
keyPointsRange
.
row
(
5
).
setTo
(
Scalar
::
all
(
patchSize_
*
sf
));
keyPointsRange
.
row
(
4
).
setTo
(
Scalar
::
all
(
level
)
,
stream
);
keyPointsRange
.
row
(
5
).
setTo
(
Scalar
::
all
(
patchSize_
*
sf
)
,
stream
);
offset
+=
keyPointsCount_
[
level
];
}
...
...
modules/cudaimgproc/src/cuda/gftt.cu
View file @
4e479d58
...
...
@@ -47,7 +47,7 @@
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/utility.hpp"
#include <thrust/execution_policy.h>
namespace cv { namespace cuda { namespace device
{
namespace gfft
...
...
@@ -91,12 +91,12 @@ namespace cv { namespace cuda { namespace device
}
}
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count)
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count
, cudaStream_t stream
)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
cudaSafeCall( cudaMemset
(counter_ptr, 0, sizeof(int)
) );
cudaSafeCall( cudaMemset
Async(counter_ptr, 0, sizeof(int), stream
) );
bindTexture(&eigTex, eig);
...
...
@@ -104,17 +104,18 @@ namespace cv { namespace cuda { namespace device
dim3 grid(divUp(eig.cols, block.x), divUp(eig.rows, block.y));
if (mask.data)
findCorners<<<grid, block>>>(threshold, SingleMask(mask), corners, max_count, eig.rows, eig.cols);
findCorners<<<grid, block
, 0, stream
>>>(threshold, SingleMask(mask), corners, max_count, eig.rows, eig.cols);
else
findCorners<<<grid, block>>>(threshold, WithOutMask(), corners, max_count, eig.rows, eig.cols);
findCorners<<<grid, block
, 0, stream
>>>(threshold, WithOutMask(), corners, max_count, eig.rows, eig.cols);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int count;
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost, stream) );
if (stream)
cudaSafeCall(cudaStreamSynchronize(stream));
else
cudaSafeCall( cudaDeviceSynchronize() );
return std::min(count, max_count);
}
...
...
@@ -128,13 +129,19 @@ namespace cv { namespace cuda { namespace device
};
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count)
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count
, cudaStream_t stream
)
{
bindTexture(&eigTex, eig);
thrust::device_ptr<float2> ptr(corners);
#if THRUST_VERSION >= 100802
if (stream)
thrust::sort(thrust::cuda::par(ThrustAllocator::getAllocator()).on(stream), ptr, ptr + count, EigGreater());
else
thrust::sort(thrust::cuda::par(ThrustAllocator::getAllocator()), ptr, ptr + count, EigGreater());
#else
thrust::sort(ptr, ptr + count, EigGreater());
#endif
}
} // namespace optical_flow
}}}
...
...
modules/cudaimgproc/src/gftt.cpp
View file @
4e479d58
...
...
@@ -55,8 +55,8 @@ namespace cv { namespace cuda { namespace device
{
namespace
gfft
{
int
findCorners_gpu
(
PtrStepSzf
eig
,
float
threshold
,
PtrStepSzb
mask
,
float2
*
corners
,
int
max_count
);
void
sortCorners_gpu
(
PtrStepSzf
eig
,
float2
*
corners
,
int
count
);
int
findCorners_gpu
(
PtrStepSzf
eig
,
float
threshold
,
PtrStepSzb
mask
,
float2
*
corners
,
int
max_count
,
cudaStream_t
stream
);
void
sortCorners_gpu
(
PtrStepSzf
eig
,
float2
*
corners
,
int
count
,
cudaStream_t
stream
);
}
}}}
...
...
@@ -97,9 +97,6 @@ namespace
void
GoodFeaturesToTrackDetector
::
detect
(
InputArray
_image
,
OutputArray
_corners
,
InputArray
_mask
,
Stream
&
stream
)
{
// TODO : implement async version
(
void
)
stream
;
using
namespace
cv
::
cuda
::
device
::
gfft
;
GpuMat
image
=
_image
.
getGpuMat
();
...
...
@@ -108,14 +105,14 @@ namespace
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8UC1
&&
mask
.
size
()
==
image
.
size
())
);
ensureSizeIsEnough
(
image
.
size
(),
CV_32FC1
,
eig_
);
cornerCriteria_
->
compute
(
image
,
eig_
);
cornerCriteria_
->
compute
(
image
,
eig_
,
stream
);
double
maxVal
=
0
;
cuda
::
minMax
(
eig_
,
0
,
&
maxVal
);
cudaStream_t
stream_
=
StreamAccessor
::
getStream
(
stream
);
ensureSizeIsEnough
(
1
,
std
::
max
(
1000
,
static_cast
<
int
>
(
image
.
size
().
area
()
*
0.05
)),
CV_32FC2
,
tmpCorners_
);
int
total
=
findCorners_gpu
(
eig_
,
static_cast
<
float
>
(
maxVal
*
qualityLevel_
),
mask
,
tmpCorners_
.
ptr
<
float2
>
(),
tmpCorners_
.
cols
);
int
total
=
findCorners_gpu
(
eig_
,
static_cast
<
float
>
(
maxVal
*
qualityLevel_
),
mask
,
tmpCorners_
.
ptr
<
float2
>
(),
tmpCorners_
.
cols
,
stream_
);
if
(
total
==
0
)
{
...
...
@@ -123,18 +120,18 @@ namespace
return
;
}
sortCorners_gpu
(
eig_
,
tmpCorners_
.
ptr
<
float2
>
(),
total
);
sortCorners_gpu
(
eig_
,
tmpCorners_
.
ptr
<
float2
>
(),
total
,
stream_
);
if
(
minDistance_
<
1
)
{
tmpCorners_
.
colRange
(
0
,
maxCorners_
>
0
?
std
::
min
(
maxCorners_
,
total
)
:
total
).
copyTo
(
_corners
);
tmpCorners_
.
colRange
(
0
,
maxCorners_
>
0
?
std
::
min
(
maxCorners_
,
total
)
:
total
).
copyTo
(
_corners
,
stream
);
}
else
{
std
::
vector
<
Point2f
>
tmp
(
total
);
Mat
tmpMat
(
1
,
total
,
CV_32FC2
,
(
void
*
)
&
tmp
[
0
]);
tmpCorners_
.
colRange
(
0
,
total
).
download
(
tmpMat
);
tmpCorners_
.
colRange
(
0
,
total
).
download
(
tmpMat
,
stream
);
stream
.
waitForCompletion
();
std
::
vector
<
Point2f
>
tmp2
;
tmp2
.
reserve
(
total
);
...
...
@@ -203,7 +200,7 @@ namespace
_corners
.
create
(
1
,
static_cast
<
int
>
(
tmp2
.
size
()),
CV_32FC2
);
GpuMat
corners
=
_corners
.
getGpuMat
();
corners
.
upload
(
Mat
(
1
,
static_cast
<
int
>
(
tmp2
.
size
()),
CV_32FC2
,
&
tmp2
[
0
]));
corners
.
upload
(
Mat
(
1
,
static_cast
<
int
>
(
tmp2
.
size
()),
CV_32FC2
,
&
tmp2
[
0
])
,
stream
);
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment