Commit 5c572ffa authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #9509 from borisfom:cuda9-shfl

parents cae467cd dde04d5d
......@@ -51,6 +51,11 @@
namespace cv { namespace cuda { namespace device
{
#if __CUDACC_VER_MAJOR__ >= 9
# define __shfl(x, y, z) __shfl_sync(0xFFFFFFFFU, x, y, z)
# define __shfl_up(x, y, z) __shfl_up_sync(0xFFFFFFFFU, x, y, z)
# define __shfl_down(x, y, z) __shfl_down_sync(0xFFFFFFFFU, x, y, z)
#endif
template <typename T>
__device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize)
{
......@@ -148,6 +153,10 @@ namespace cv { namespace cuda { namespace device
}
}}}
# undef __shfl
# undef __shfl_up
# undef __shfl_down
//! @endcond
#endif // OPENCV_CUDA_WARP_SHUFFLE_HPP
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment