Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
a21ede94
Commit
a21ede94
authored
Mar 10, 2016
by
Dan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Thrust allocator usage.
parent
7a934f9e
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
40 additions
and
25 deletions
+40
-25
orb.cu
modules/cudafeatures2d/src/cuda/orb.cu
+12
-1
gftt.cu
modules/cudaimgproc/src/cuda/gftt.cu
+18
-11
gftt.cpp
modules/cudaimgproc/src/gftt.cpp
+10
-13
No files found.
modules/cudafeatures2d/src/cuda/orb.cu
View file @
a21ede94
...
...
@@ -51,7 +51,7 @@
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/utility.hpp"
namespace cv { namespace cuda { namespace device
{
namespace orb
...
...
@@ -64,6 +64,16 @@ namespace cv { namespace cuda { namespace device
thrust::device_ptr<int> loc_ptr(loc);
thrust::device_ptr<float> response_ptr(response);
#if THRUST_VERSION >= 100800
#if THRUST_VERSION >= 100802
if (stream)
{
thrust::sort_by_key(thrust::cuda::par(ThrustAllocator::getAllocator()).on(stream), response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
}
else
{
thrust::sort_by_key(thrust::cuda::par(ThrustAllocator::getAllocator()), response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
}
#else
if(stream)
{
thrust::sort_by_key(thrust::cuda::par.on(stream), response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
...
...
@@ -71,6 +81,7 @@ namespace cv { namespace cuda { namespace device
{
thrust::sort_by_key(response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
}
#endif
#else
thrust::sort_by_key(response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
#endif
...
...
modules/cudaimgproc/src/cuda/gftt.cu
View file @
a21ede94
...
...
@@ -47,7 +47,7 @@
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/utility.hpp"
#include <thrust/execution_policy.h>
namespace cv { namespace cuda { namespace device
{
namespace gfft
...
...
@@ -91,12 +91,12 @@ namespace cv { namespace cuda { namespace device
}
}
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count)
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count
, cudaStream_t stream
)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
cudaSafeCall( cudaMemset
(counter_ptr, 0, sizeof(int)
) );
cudaSafeCall( cudaMemset
Async(counter_ptr, 0, sizeof(int), stream
) );
bindTexture(&eigTex, eig);
...
...
@@ -104,17 +104,18 @@ namespace cv { namespace cuda { namespace device
dim3 grid(divUp(eig.cols, block.x), divUp(eig.rows, block.y));
if (mask.data)
findCorners<<<grid, block>>>(threshold, SingleMask(mask), corners, max_count, eig.rows, eig.cols);
findCorners<<<grid, block
, 0, stream
>>>(threshold, SingleMask(mask), corners, max_count, eig.rows, eig.cols);
else
findCorners<<<grid, block>>>(threshold, WithOutMask(), corners, max_count, eig.rows, eig.cols);
findCorners<<<grid, block
, 0, stream
>>>(threshold, WithOutMask(), corners, max_count, eig.rows, eig.cols);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int count;
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost, stream) );
if (stream)
cudaSafeCall(cudaStreamSynchronize(stream));
else
cudaSafeCall( cudaDeviceSynchronize() );
return std::min(count, max_count);
}
...
...
@@ -128,13 +129,19 @@ namespace cv { namespace cuda { namespace device
};
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count)
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count
, cudaStream_t stream
)
{
bindTexture(&eigTex, eig);
thrust::device_ptr<float2> ptr(corners);
#if THRUST_VERSION >= 100802
if (stream)
thrust::sort(thrust::cuda::par(ThrustAllocator::getAllocator()).on(stream), ptr, ptr + count, EigGreater());
else
thrust::sort(thrust::cuda::par(ThrustAllocator::getAllocator()), ptr, ptr + count, EigGreater());
#else
thrust::sort(ptr, ptr + count, EigGreater());
#endif
}
} // namespace optical_flow
}}}
...
...
modules/cudaimgproc/src/gftt.cpp
View file @
a21ede94
...
...
@@ -55,8 +55,8 @@ namespace cv { namespace cuda { namespace device
{
namespace
gfft
{
int
findCorners_gpu
(
PtrStepSzf
eig
,
float
threshold
,
PtrStepSzb
mask
,
float2
*
corners
,
int
max_count
);
void
sortCorners_gpu
(
PtrStepSzf
eig
,
float2
*
corners
,
int
count
);
int
findCorners_gpu
(
PtrStepSzf
eig
,
float
threshold
,
PtrStepSzb
mask
,
float2
*
corners
,
int
max_count
,
cudaStream_t
stream
);
void
sortCorners_gpu
(
PtrStepSzf
eig
,
float2
*
corners
,
int
count
,
cudaStream_t
stream
);
}
}}}
...
...
@@ -97,9 +97,6 @@ namespace
void
GoodFeaturesToTrackDetector
::
detect
(
InputArray
_image
,
OutputArray
_corners
,
InputArray
_mask
,
Stream
&
stream
)
{
// TODO : implement async version
(
void
)
stream
;
using
namespace
cv
::
cuda
::
device
::
gfft
;
GpuMat
image
=
_image
.
getGpuMat
();
...
...
@@ -108,14 +105,14 @@ namespace
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8UC1
&&
mask
.
size
()
==
image
.
size
())
);
ensureSizeIsEnough
(
image
.
size
(),
CV_32FC1
,
eig_
);
cornerCriteria_
->
compute
(
image
,
eig_
);
cornerCriteria_
->
compute
(
image
,
eig_
,
stream
);
double
maxVal
=
0
;
cuda
::
minMax
(
eig_
,
0
,
&
maxVal
);
cudaStream_t
stream_
=
StreamAccessor
::
getStream
(
stream
);
ensureSizeIsEnough
(
1
,
std
::
max
(
1000
,
static_cast
<
int
>
(
image
.
size
().
area
()
*
0.05
)),
CV_32FC2
,
tmpCorners_
);
int
total
=
findCorners_gpu
(
eig_
,
static_cast
<
float
>
(
maxVal
*
qualityLevel_
),
mask
,
tmpCorners_
.
ptr
<
float2
>
(),
tmpCorners_
.
cols
);
int
total
=
findCorners_gpu
(
eig_
,
static_cast
<
float
>
(
maxVal
*
qualityLevel_
),
mask
,
tmpCorners_
.
ptr
<
float2
>
(),
tmpCorners_
.
cols
,
stream_
);
if
(
total
==
0
)
{
...
...
@@ -123,18 +120,18 @@ namespace
return
;
}
sortCorners_gpu
(
eig_
,
tmpCorners_
.
ptr
<
float2
>
(),
total
);
sortCorners_gpu
(
eig_
,
tmpCorners_
.
ptr
<
float2
>
(),
total
,
stream_
);
if
(
minDistance_
<
1
)
{
tmpCorners_
.
colRange
(
0
,
maxCorners_
>
0
?
std
::
min
(
maxCorners_
,
total
)
:
total
).
copyTo
(
_corners
);
tmpCorners_
.
colRange
(
0
,
maxCorners_
>
0
?
std
::
min
(
maxCorners_
,
total
)
:
total
).
copyTo
(
_corners
,
stream
);
}
else
{
std
::
vector
<
Point2f
>
tmp
(
total
);
Mat
tmpMat
(
1
,
total
,
CV_32FC2
,
(
void
*
)
&
tmp
[
0
]);
tmpCorners_
.
colRange
(
0
,
total
).
download
(
tmpMat
);
tmpCorners_
.
colRange
(
0
,
total
).
download
(
tmpMat
,
stream
);
stream
.
waitForCompletion
();
std
::
vector
<
Point2f
>
tmp2
;
tmp2
.
reserve
(
total
);
...
...
@@ -203,7 +200,7 @@ namespace
_corners
.
create
(
1
,
static_cast
<
int
>
(
tmp2
.
size
()),
CV_32FC2
);
GpuMat
corners
=
_corners
.
getGpuMat
();
corners
.
upload
(
Mat
(
1
,
static_cast
<
int
>
(
tmp2
.
size
()),
CV_32FC2
,
&
tmp2
[
0
]));
corners
.
upload
(
Mat
(
1
,
static_cast
<
int
>
(
tmp2
.
size
()),
CV_32FC2
,
&
tmp2
[
0
])
,
stream
);
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment