Commit 50f28c9e authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

added Warp::reduce function

parent 36e42084
......@@ -97,6 +97,25 @@ namespace cv { namespace gpu { namespace device
return out;
}
template <class T, class BinOp>
static __device__ __forceinline__ T reduce(volatile T *ptr, BinOp op)
{
const unsigned int lane = laneId();
if (lane < 16)
{
T partial = ptr[lane];
ptr[lane] = partial = op(partial, ptr[lane + 16]);
ptr[lane] = partial = op(partial, ptr[lane + 8]);
ptr[lane] = partial = op(partial, ptr[lane + 4]);
ptr[lane] = partial = op(partial, ptr[lane + 2]);
ptr[lane] = partial = op(partial, ptr[lane + 1]);
}
return *ptr;
}
template<typename OutIt, typename T>
static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
{
......@@ -109,4 +128,4 @@ namespace cv { namespace gpu { namespace device
};
}}} // namespace cv { namespace gpu { namespace device
#endif /* __OPENCV_GPU_DEVICE_WARP_HPP__ */
\ No newline at end of file
#endif /* __OPENCV_GPU_DEVICE_WARP_HPP__ */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment