Commit 7376c531 authored by Dan Moodie's avatar Dan Moodie

Fixed tabs in whitespace.

parent 12dcb155
...@@ -23,13 +23,11 @@ The following code will produce an iterator for a GpuMat ...@@ -23,13 +23,11 @@ The following code will produce an iterator for a GpuMat
@snippet samples/cpp/tutorial_code/gpu/gpu-thrust-interop/Thrust_interop.hpp begin_itr @snippet samples/cpp/tutorial_code/gpu/gpu-thrust-interop/Thrust_interop.hpp begin_itr
@snippet samples/cpp/tutorial_code/gpu/gpu-thrust-interop/Thrust_interop.hpp end_itr @snippet samples/cpp/tutorial_code/gpu/gpu-thrust-interop/Thrust_interop.hpp end_itr
Our goal is to have an iterator that will start at the beginning of the matrix, and increment correctly to access continuous matrix elements. This is trivial for a continuous row, but how about for a column Our goal is to have an iterator that will start at the beginning of the matrix, and increment correctly to access continuous matrix elements. This is trivial for a continuous row, but how about for a column of a pitched matrix? To do this we need the iterator to be aware of the matrix dimensions and step. This information is embedded in the step_functor.
of a pitched matrix? To do this we need the iterator to be aware of the matrix dimensions and step. This information is embedded in the step_functor.
@snippet samples/cpp/tutorial_code/gpu/gpu-thrust-interop/Thrust_interop.hpp step_functor @snippet samples/cpp/tutorial_code/gpu/gpu-thrust-interop/Thrust_interop.hpp step_functor
The step functor takes in an index value and returns the appropriate The step functor takes in an index value and returns the appropriate
offset from the beginning of the matrix. The counting iterator simply increments over the range of pixel elements. Combined into the transform_iterator we have an iterator that counts from 0 to M*N and correctly offset from the beginning of the matrix. The counting iterator simply increments over the range of pixel elements. Combined into the transform_iterator we have an iterator that counts from 0 to M*N and correctly
increments to account for the pitched memory of a GpuMat. Unfortunately this does not include any memory location information, for that we need a thrust::device_ptr. By combining a device pointer with the increments to account for the pitched memory of a GpuMat. Unfortunately this does not include any memory location information, for that we need a thrust::device_ptr. By combining a device pointer with the transform_iterator we can point thrust to the first element of our matrix and have it step accordingly.
transform_iterator we can point thrust to the first element of our matrix and have it step accordingly.
Fill a GpuMat with random numbers Fill a GpuMat with random numbers
---- ----
...@@ -47,13 +45,12 @@ Now we will populate our matrix with values between 0 and 10 with a thrust trans ...@@ -47,13 +45,12 @@ Now we will populate our matrix with values between 0 and 10 with a thrust trans
Sort a column of a GpuMat in place Sort a column of a GpuMat in place
---- ----
Lets fill matrix elements with random values and an index. Afterwards we will sort the random numbers and the indecies. Lets fill matrix elements with random values and an index. Afterwards we will sort the random numbers and the indecies.
@snippet samples/cpp/tutorial_code/gpu/gpu-thrust-interop/main.cu sort @snippet samples/cpp/tutorial_code/gpu/gpu-thrust-interop/main.cu sort
Copy values greater than 0 to a new gpu matrix while using streams Copy values greater than 0 to a new gpu matrix while using streams
---- ----
In this example we're going to see how cv::cuda::Streams can be used with thrust. Unfortunately this specific example uses functions that must return In this example we're going to see how cv::cuda::Streams can be used with thrust. Unfortunately this specific example uses functions that must return results to the CPU so it isn't the optimal use of streams.
results to the CPU so it isn't the optimal use of streams.
@snippet samples/cpp/tutorial_code/gpu/gpu-thrust-interop/main.cu copy_greater @snippet samples/cpp/tutorial_code/gpu/gpu-thrust-interop/main.cu copy_greater
......
...@@ -7,63 +7,63 @@ ...@@ -7,63 +7,63 @@
#include <thrust/device_ptr.h> #include <thrust/device_ptr.h>
/* /*
@Brief step_functor is an object to correctly step a thrust iterator according to the stride of a matrix @Brief step_functor is an object to correctly step a thrust iterator according to the stride of a matrix
*/ */
//! [step_functor] //! [step_functor]
template<typename T> struct step_functor : public thrust::unary_function<int, int> template<typename T> struct step_functor : public thrust::unary_function<int, int>
{ {
int columns; int columns;
int step; int step;
int channels; int channels;
__host__ __device__ step_functor(int columns_, int step_, int channels_ = 1) : columns(columns_), step(step_), channels(channels_) { }; __host__ __device__ step_functor(int columns_, int step_, int channels_ = 1) : columns(columns_), step(step_), channels(channels_) { };
__host__ step_functor(cv::cuda::GpuMat& mat) __host__ step_functor(cv::cuda::GpuMat& mat)
{ {
CV_Assert(mat.depth() == cv::DataType<T>::depth); CV_Assert(mat.depth() == cv::DataType<T>::depth);
columns = mat.cols; columns = mat.cols;
step = mat.step / sizeof(T); step = mat.step / sizeof(T);
channels = mat.channels(); channels = mat.channels();
} }
__host__ __device__ __host__ __device__
int operator()(int x) const int operator()(int x) const
{ {
int row = x / columns; int row = x / columns;
int idx = (row * step) + (x % columns)*channels; int idx = (row * step) + (x % columns)*channels;
return idx; return idx;
} }
}; };
//! [step_functor] //! [step_functor]
//! [begin_itr] //! [begin_itr]
/* /*
@Brief GpuMatBeginItr returns a thrust compatible iterator to the beginning of a GPU mat's memory. @Brief GpuMatBeginItr returns a thrust compatible iterator to the beginning of a GPU mat's memory.
@Param mat is the input matrix @Param mat is the input matrix
@Param channel is the channel of the matrix that the iterator is accessing. If set to -1, the iterator will access every element in sequential order @Param channel is the channel of the matrix that the iterator is accessing. If set to -1, the iterator will access every element in sequential order
*/ */
template<typename T> template<typename T>
thrust::permutation_iterator<thrust::device_ptr<T>, thrust::transform_iterator<step_functor<T>, thrust::counting_iterator<int>>> GpuMatBeginItr(cv::cuda::GpuMat mat, int channel = 0) thrust::permutation_iterator<thrust::device_ptr<T>, thrust::transform_iterator<step_functor<T>, thrust::counting_iterator<int>>> GpuMatBeginItr(cv::cuda::GpuMat mat, int channel = 0)
{ {
if (channel == -1) if (channel == -1)
mat = mat.reshape(1); mat = mat.reshape(1);
CV_Assert(mat.depth() == cv::DataType<T>::depth); CV_Assert(mat.depth() == cv::DataType<T>::depth);
CV_Assert(channel < mat.channels()); CV_Assert(channel < mat.channels());
return thrust::make_permutation_iterator(thrust::device_pointer_cast(mat.ptr<T>(0) + channel), return thrust::make_permutation_iterator(thrust::device_pointer_cast(mat.ptr<T>(0) + channel),
thrust::make_transform_iterator(thrust::make_counting_iterator(0), step_functor<T>(mat.cols, mat.step / sizeof(T), mat.channels()))); thrust::make_transform_iterator(thrust::make_counting_iterator(0), step_functor<T>(mat.cols, mat.step / sizeof(T), mat.channels())));
} }
//! [begin_itr] //! [begin_itr]
//! [end_itr] //! [end_itr]
/* /*
@Brief GpuMatEndItr returns a thrust compatible iterator to the end of a GPU mat's memory. @Brief GpuMatEndItr returns a thrust compatible iterator to the end of a GPU mat's memory.
@Param mat is the input matrix @Param mat is the input matrix
@Param channel is the channel of the matrix that the iterator is accessing. If set to -1, the iterator will access every element in sequential order @Param channel is the channel of the matrix that the iterator is accessing. If set to -1, the iterator will access every element in sequential order
*/ */
template<typename T> template<typename T>
thrust::permutation_iterator<thrust::device_ptr<T>, thrust::transform_iterator<step_functor<T>, thrust::counting_iterator<int>>> GpuMatEndItr(cv::cuda::GpuMat mat, int channel = 0) thrust::permutation_iterator<thrust::device_ptr<T>, thrust::transform_iterator<step_functor<T>, thrust::counting_iterator<int>>> GpuMatEndItr(cv::cuda::GpuMat mat, int channel = 0)
{ {
if (channel == -1) if (channel == -1)
mat = mat.reshape(1); mat = mat.reshape(1);
CV_Assert(mat.depth() == cv::DataType<T>::depth); CV_Assert(mat.depth() == cv::DataType<T>::depth);
CV_Assert(channel < mat.channels()); CV_Assert(channel < mat.channels());
return thrust::make_permutation_iterator(thrust::device_pointer_cast(mat.ptr<T>(0) + channel), return thrust::make_permutation_iterator(thrust::device_pointer_cast(mat.ptr<T>(0) + channel),
thrust::make_transform_iterator(thrust::make_counting_iterator(mat.rows*mat.cols), step_functor<T>(mat.cols, mat.step / sizeof(T), mat.channels()))); thrust::make_transform_iterator(thrust::make_counting_iterator(mat.rows*mat.cols), step_functor<T>(mat.cols, mat.step / sizeof(T), mat.channels())));
} }
//! [end_itr] //! [end_itr]
......
...@@ -8,20 +8,19 @@ ...@@ -8,20 +8,19 @@
//! [prg] //! [prg]
struct prg struct prg
{ {
float a, b; float a, b;
__host__ __device__ __host__ __device__
prg(float _a = 0.f, float _b = 1.f) : a(_a), b(_b) {}; prg(float _a = 0.f, float _b = 1.f) : a(_a), b(_b) {};
__host__ __device__ __host__ __device__
float operator()(const unsigned int n) const float operator()(const unsigned int n) const
{ {
thrust::default_random_engine rng; thrust::default_random_engine rng;
thrust::uniform_real_distribution<float> dist(a, b); thrust::uniform_real_distribution<float> dist(a, b);
rng.discard(n); rng.discard(n);
return dist(rng);
return dist(rng); }
}
}; };
//! [prg] //! [prg]
...@@ -29,83 +28,83 @@ struct prg ...@@ -29,83 +28,83 @@ struct prg
//! [pred_greater] //! [pred_greater]
template<typename T> struct pred_greater template<typename T> struct pred_greater
{ {
T value; T value;
__host__ __device__ pred_greater(T value_) : value(value_){} __host__ __device__ pred_greater(T value_) : value(value_){}
__host__ __device__ bool operator()(const T& val) const __host__ __device__ bool operator()(const T& val) const
{ {
return val > value; return val > value;
} }
}; };
//! [pred_greater] //! [pred_greater]
int main(void) int main(void)
{ {
// Generate a 2 channel row matrix with 100 elements. Set the first channel to be the element index, and the second to be a randomly // Generate a 2 channel row matrix with 100 elements. Set the first channel to be the element index, and the second to be a randomly
// generated value. Sort by the randomly generated value while maintaining index association. // generated value. Sort by the randomly generated value while maintaining index association.
//! [sort] //! [sort]
{ {
cv::cuda::GpuMat d_data(1, 100, CV_32SC2); cv::cuda::GpuMat d_data(1, 100, CV_32SC2);
// Thrust compatible begin and end iterators to channel 1 of this matrix // Thrust compatible begin and end iterators to channel 1 of this matrix
auto keyBegin = GpuMatBeginItr<int>(d_data, 1); auto keyBegin = GpuMatBeginItr<int>(d_data, 1);
auto keyEnd = GpuMatEndItr<int>(d_data, 1); auto keyEnd = GpuMatEndItr<int>(d_data, 1);
// Thrust compatible begin and end iterators to channel 0 of this matrix // Thrust compatible begin and end iterators to channel 0 of this matrix
auto idxBegin = GpuMatBeginItr<int>(d_data, 0); auto idxBegin = GpuMatBeginItr<int>(d_data, 0);
auto idxEnd = GpuMatEndItr<int>(d_data, 0); auto idxEnd = GpuMatEndItr<int>(d_data, 0);
// Fill the index channel with a sequence of numbers from 0 to 100 // Fill the index channel with a sequence of numbers from 0 to 100
thrust::sequence(idxBegin, idxEnd); thrust::sequence(idxBegin, idxEnd);
// Fill the key channel with random numbers between 0 and 10. A counting iterator is used here to give an integer value for each location as an input to prg::operator() // Fill the key channel with random numbers between 0 and 10. A counting iterator is used here to give an integer value for each location as an input to prg::operator()
thrust::transform(thrust::make_counting_iterator(0), thrust::make_counting_iterator(d_data.cols), keyBegin, prg(0, 10)); thrust::transform(thrust::make_counting_iterator(0), thrust::make_counting_iterator(d_data.cols), keyBegin, prg(0, 10));
// Sort the key channel and index channel such that the keys and indecies stay together // Sort the key channel and index channel such that the keys and indecies stay together
thrust::sort_by_key(keyBegin, keyEnd, idxBegin); thrust::sort_by_key(keyBegin, keyEnd, idxBegin);
cv::Mat h_idx(d_data);
}
//! [sort]
cv::Mat h_idx(d_data); // Randomly fill a row matrix with 100 elements between -1 and 1
} //! [random]
//! [sort] {
cv::cuda::GpuMat d_value(1, 100, CV_32F);
auto valueBegin = GpuMatBeginItr<float>(d_value);
auto valueEnd = GpuMatEndItr<float>(d_value);
thrust::transform(thrust::make_counting_iterator(0), thrust::make_counting_iterator(d_value.cols), valueBegin, prg(-1, 1));
// Randomly fill a row matrix with 100 elements between -1 and 1 cv::Mat h_value(d_value);
//! [random] }
{ //! [random]
cv::cuda::GpuMat d_value(1, 100, CV_32F);
auto valueBegin = GpuMatBeginItr<float>(d_value);
auto valueEnd = GpuMatEndItr<float>(d_value);
thrust::transform(thrust::make_counting_iterator(0), thrust::make_counting_iterator(d_value.cols), valueBegin, prg(-1, 1));
cv::Mat h_value(d_value); // OpenCV has count non zero, but what if you want to count a specific value?
} //! [count_value]
//! [random] {
cv::cuda::GpuMat d_value(1, 100, CV_32S);
d_value.setTo(cv::Scalar(0));
d_value.colRange(10, 50).setTo(cv::Scalar(15));
auto count = thrust::count(GpuMatBeginItr<int>(d_value), GpuMatEndItr<int>(d_value), 15);
std::cout << count << std::endl;
}
//! [count_value]
// OpenCV has count non zero, but what if you want to count a specific value? // Randomly fill an array then copy only values greater than 0. Perform these tasks on a stream.
//! [count_value] //! [copy_greater]
{ {
cv::cuda::GpuMat d_value(1, 100, CV_32S); cv::cuda::GpuMat d_value(1, 100, CV_32F);
d_value.setTo(cv::Scalar(0)); auto valueBegin = GpuMatBeginItr<float>(d_value);
d_value.colRange(10, 50).setTo(cv::Scalar(15)); auto valueEnd = GpuMatEndItr<float>(d_value);
auto count = thrust::count(GpuMatBeginItr<int>(d_value), GpuMatEndItr<int>(d_value), 15); cv::cuda::Stream stream;
std::cout << count << std::endl; //! [random_gen_stream]
} // Same as the random generation code from before except now the transformation is being performed on a stream
//! [count_value] thrust::transform(thrust::system::cuda::par.on(cv::cuda::StreamAccessor::getStream(stream)), thrust::make_counting_iterator(0), thrust::make_counting_iterator(d_value.cols), valueBegin, prg(-1, 1));
//! [random_gen_stream]
// Count the number of values we are going to copy
int count = thrust::count_if(thrust::system::cuda::par.on(cv::cuda::StreamAccessor::getStream(stream)), valueBegin, valueEnd, pred_greater<float>(0.0));
// Allocate a destination for copied values
cv::cuda::GpuMat d_valueGreater(1, count, CV_32F);
// Copy values that satisfy the predicate.
thrust::copy_if(thrust::system::cuda::par.on(cv::cuda::StreamAccessor::getStream(stream)), valueBegin, valueEnd, GpuMatBeginItr<float>(d_valueGreater), pred_greater<float>(0.0));
cv::Mat h_greater(d_valueGreater);
}
//! [copy_greater]
// Randomly fill an array then copy only values greater than 0. Perform these tasks on a stream. return 0;
//! [copy_greater]
{
cv::cuda::GpuMat d_value(1, 100, CV_32F);
auto valueBegin = GpuMatBeginItr<float>(d_value);
auto valueEnd = GpuMatEndItr<float>(d_value);
cv::cuda::Stream stream;
//! [random_gen_stream]
// Same as the random generation code from before except now the transformation is being performed on a stream
thrust::transform(thrust::system::cuda::par.on(cv::cuda::StreamAccessor::getStream(stream)), thrust::make_counting_iterator(0), thrust::make_counting_iterator(d_value.cols), valueBegin, prg(-1, 1));
//! [random_gen_stream]
// Count the number of values we are going to copy
int count = thrust::count_if(thrust::system::cuda::par.on(cv::cuda::StreamAccessor::getStream(stream)), valueBegin, valueEnd, pred_greater<float>(0.0));
// Allocate a destination for copied values
cv::cuda::GpuMat d_valueGreater(1, count, CV_32F);
// Copy values that satisfy the predicate.
thrust::copy_if(thrust::system::cuda::par.on(cv::cuda::StreamAccessor::getStream(stream)), valueBegin, valueEnd, GpuMatBeginItr<float>(d_valueGreater), pred_greater<float>(0.0));
cv::Mat h_greater(d_valueGreater);
}
//! [copy_greater]
return 0;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment