Commit a21ede94 authored by Dan's avatar Dan

Thrust allocator usage.

parent 7a934f9e
...@@ -51,7 +51,7 @@ ...@@ -51,7 +51,7 @@
#include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/reduce.hpp" #include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/functional.hpp" #include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/utility.hpp"
namespace cv { namespace cuda { namespace device namespace cv { namespace cuda { namespace device
{ {
namespace orb namespace orb
...@@ -64,6 +64,16 @@ namespace cv { namespace cuda { namespace device ...@@ -64,6 +64,16 @@ namespace cv { namespace cuda { namespace device
thrust::device_ptr<int> loc_ptr(loc); thrust::device_ptr<int> loc_ptr(loc);
thrust::device_ptr<float> response_ptr(response); thrust::device_ptr<float> response_ptr(response);
#if THRUST_VERSION >= 100800 #if THRUST_VERSION >= 100800
#if THRUST_VERSION >= 100802
if (stream)
{
thrust::sort_by_key(thrust::cuda::par(ThrustAllocator::getAllocator()).on(stream), response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
}
else
{
thrust::sort_by_key(thrust::cuda::par(ThrustAllocator::getAllocator()), response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
}
#else
if(stream) if(stream)
{ {
thrust::sort_by_key(thrust::cuda::par.on(stream), response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>()); thrust::sort_by_key(thrust::cuda::par.on(stream), response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
...@@ -71,6 +81,7 @@ namespace cv { namespace cuda { namespace device ...@@ -71,6 +81,7 @@ namespace cv { namespace cuda { namespace device
{ {
thrust::sort_by_key(response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>()); thrust::sort_by_key(response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
} }
#endif
#else #else
thrust::sort_by_key(response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>()); thrust::sort_by_key(response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
#endif #endif
......
...@@ -47,7 +47,7 @@ ...@@ -47,7 +47,7 @@
#include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/utility.hpp" #include "opencv2/core/cuda/utility.hpp"
#include <thrust/execution_policy.h>
namespace cv { namespace cuda { namespace device namespace cv { namespace cuda { namespace device
{ {
namespace gfft namespace gfft
...@@ -91,12 +91,12 @@ namespace cv { namespace cuda { namespace device ...@@ -91,12 +91,12 @@ namespace cv { namespace cuda { namespace device
} }
} }
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count) int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count, cudaStream_t stream)
{ {
void* counter_ptr; void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) ); cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) ); cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(int), stream) );
bindTexture(&eigTex, eig); bindTexture(&eigTex, eig);
...@@ -104,17 +104,18 @@ namespace cv { namespace cuda { namespace device ...@@ -104,17 +104,18 @@ namespace cv { namespace cuda { namespace device
dim3 grid(divUp(eig.cols, block.x), divUp(eig.rows, block.y)); dim3 grid(divUp(eig.cols, block.x), divUp(eig.rows, block.y));
if (mask.data) if (mask.data)
findCorners<<<grid, block>>>(threshold, SingleMask(mask), corners, max_count, eig.rows, eig.cols); findCorners<<<grid, block, 0, stream>>>(threshold, SingleMask(mask), corners, max_count, eig.rows, eig.cols);
else else
findCorners<<<grid, block>>>(threshold, WithOutMask(), corners, max_count, eig.rows, eig.cols); findCorners<<<grid, block, 0, stream>>>(threshold, WithOutMask(), corners, max_count, eig.rows, eig.cols);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int count; int count;
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost, stream) );
if (stream)
cudaSafeCall(cudaStreamSynchronize(stream));
else
cudaSafeCall( cudaDeviceSynchronize() );
return std::min(count, max_count); return std::min(count, max_count);
} }
...@@ -128,13 +129,19 @@ namespace cv { namespace cuda { namespace device ...@@ -128,13 +129,19 @@ namespace cv { namespace cuda { namespace device
}; };
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count) void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count, cudaStream_t stream)
{ {
bindTexture(&eigTex, eig); bindTexture(&eigTex, eig);
thrust::device_ptr<float2> ptr(corners); thrust::device_ptr<float2> ptr(corners);
#if THRUST_VERSION >= 100802
if (stream)
thrust::sort(thrust::cuda::par(ThrustAllocator::getAllocator()).on(stream), ptr, ptr + count, EigGreater());
else
thrust::sort(thrust::cuda::par(ThrustAllocator::getAllocator()), ptr, ptr + count, EigGreater());
#else
thrust::sort(ptr, ptr + count, EigGreater()); thrust::sort(ptr, ptr + count, EigGreater());
#endif
} }
} // namespace optical_flow } // namespace optical_flow
}}} }}}
......
...@@ -55,8 +55,8 @@ namespace cv { namespace cuda { namespace device ...@@ -55,8 +55,8 @@ namespace cv { namespace cuda { namespace device
{ {
namespace gfft namespace gfft
{ {
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count); int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count, cudaStream_t stream);
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count); void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count, cudaStream_t stream);
} }
}}} }}}
...@@ -97,9 +97,6 @@ namespace ...@@ -97,9 +97,6 @@ namespace
void GoodFeaturesToTrackDetector::detect(InputArray _image, OutputArray _corners, InputArray _mask, Stream& stream) void GoodFeaturesToTrackDetector::detect(InputArray _image, OutputArray _corners, InputArray _mask, Stream& stream)
{ {
// TODO : implement async version
(void) stream;
using namespace cv::cuda::device::gfft; using namespace cv::cuda::device::gfft;
GpuMat image = _image.getGpuMat(); GpuMat image = _image.getGpuMat();
...@@ -108,14 +105,14 @@ namespace ...@@ -108,14 +105,14 @@ namespace
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()) ); CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()) );
ensureSizeIsEnough(image.size(), CV_32FC1, eig_); ensureSizeIsEnough(image.size(), CV_32FC1, eig_);
cornerCriteria_->compute(image, eig_); cornerCriteria_->compute(image, eig_, stream);
double maxVal = 0; double maxVal = 0;
cuda::minMax(eig_, 0, &maxVal); cuda::minMax(eig_, 0, &maxVal);
cudaStream_t stream_ = StreamAccessor::getStream(stream);
ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_); ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
int total = findCorners_gpu(eig_, static_cast<float>(maxVal * qualityLevel_), mask, tmpCorners_.ptr<float2>(), tmpCorners_.cols); int total = findCorners_gpu(eig_, static_cast<float>(maxVal * qualityLevel_), mask, tmpCorners_.ptr<float2>(), tmpCorners_.cols, stream_);
if (total == 0) if (total == 0)
{ {
...@@ -123,18 +120,18 @@ namespace ...@@ -123,18 +120,18 @@ namespace
return; return;
} }
sortCorners_gpu(eig_, tmpCorners_.ptr<float2>(), total); sortCorners_gpu(eig_, tmpCorners_.ptr<float2>(), total, stream_);
if (minDistance_ < 1) if (minDistance_ < 1)
{ {
tmpCorners_.colRange(0, maxCorners_ > 0 ? std::min(maxCorners_, total) : total).copyTo(_corners); tmpCorners_.colRange(0, maxCorners_ > 0 ? std::min(maxCorners_, total) : total).copyTo(_corners, stream);
} }
else else
{ {
std::vector<Point2f> tmp(total); std::vector<Point2f> tmp(total);
Mat tmpMat(1, total, CV_32FC2, (void*)&tmp[0]); Mat tmpMat(1, total, CV_32FC2, (void*)&tmp[0]);
tmpCorners_.colRange(0, total).download(tmpMat); tmpCorners_.colRange(0, total).download(tmpMat, stream);
stream.waitForCompletion();
std::vector<Point2f> tmp2; std::vector<Point2f> tmp2;
tmp2.reserve(total); tmp2.reserve(total);
...@@ -203,7 +200,7 @@ namespace ...@@ -203,7 +200,7 @@ namespace
_corners.create(1, static_cast<int>(tmp2.size()), CV_32FC2); _corners.create(1, static_cast<int>(tmp2.size()), CV_32FC2);
GpuMat corners = _corners.getGpuMat(); GpuMat corners = _corners.getGpuMat();
corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0])); corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]), stream);
} }
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment