Commit 6620c5c9 authored by Marina Kolpakova's avatar Marina Kolpakova

Merge branch 'work'

parent 18918a5e
......@@ -44,25 +44,25 @@
#ifndef OPENCV_GPU_WARP_REDUCE_HPP__
#define OPENCV_GPU_WARP_REDUCE_HPP__
namespace cv { namespace gpu { namespace device
{
template <class T>
namespace cv { namespace gpu { namespace device
{
template <class T>
__device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
{
const unsigned int lane = tid & 31; // index of thread in warp (0..31)
if (lane < 16)
{
T partial = ptr[tid];
ptr[tid] = partial = partial + ptr[tid + 16];
ptr[tid] = partial = partial + ptr[tid + 8];
ptr[tid] = partial = partial + ptr[tid + 4];
ptr[tid] = partial = partial + ptr[tid + 2];
ptr[tid] = partial = partial + ptr[tid + 1];
}
if (lane < 16)
{
T partial = ptr[tid];
return ptr[tid - lane];
ptr[tid] = partial = partial + ptr[tid + 16];
ptr[tid] = partial = partial + ptr[tid + 8];
ptr[tid] = partial = partial + ptr[tid + 4];
ptr[tid] = partial = partial + ptr[tid + 2];
ptr[tid] = partial = partial + ptr[tid + 1];
}
return ptr[tid - lane];
}
}}} // namespace cv { namespace gpu { namespace device {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment