Commit 74f1162a authored by Anatoly Baksheev's avatar Anatoly Baksheev

tabs

parent 35d5a671
...@@ -38,68 +38,68 @@ ...@@ -38,68 +38,68 @@
// or tort (including negligence or otherwise) arising in any way out of // or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage. // the use of this software, even if advised of the possibility of such damage.
// //
//M*/ //M*/
#ifndef __OPENCV_GPU_DATAMOV_UTILS_HPP__ #ifndef __OPENCV_GPU_DATAMOV_UTILS_HPP__
#define __OPENCV_GPU_DATAMOV_UTILS_HPP__ #define __OPENCV_GPU_DATAMOV_UTILS_HPP__
#include "internal_shared.hpp"
#include "internal_shared.hpp"
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
#if __CUDA_ARCH__ >= 200 #if __CUDA_ARCH__ >= 200
// for Fermi memory space is detected automatically // for Fermi memory space is detected automatically
template <typename T> struct ForceGlob template <typename T> struct ForceGlob
{ {
__device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; } __device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; }
}; };
#else // __CUDA_ARCH__ >= 200 #else // __CUDA_ARCH__ >= 200
#if defined(_WIN64) || defined(__LP64__) #if defined(_WIN64) || defined(__LP64__)
// 64-bit register modifier for inlined asm // 64-bit register modifier for inlined asm
#define _OPENCV_ASM_PTR_ "l" #define _OPENCV_ASM_PTR_ "l"
#else #else
// 32-bit register modifier for inlined asm // 32-bit register modifier for inlined asm
#define _OPENCV_ASM_PTR_ "r" #define _OPENCV_ASM_PTR_ "r"
#endif #endif
template<class T> struct ForceGlob; template<class T> struct ForceGlob;
#define DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \ #define DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
template <> struct ForceGlob<base_type> \ template <> struct ForceGlob<base_type> \
{ \ { \
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \ __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
{ \ { \
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : _OPENCV_ASM_PTR_(ptr + offset)); \ asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : _OPENCV_ASM_PTR_(ptr + offset)); \
} \ } \
}; };
#define DEFINE_FORCE_GLOB_B(base_type, ptx_type) \ #define DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
template <> struct ForceGlob<base_type> \ template <> struct ForceGlob<base_type> \
{ \ { \
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \ __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
{ \ { \
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : _OPENCV_ASM_PTR_(ptr + offset)); \ asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : _OPENCV_ASM_PTR_(ptr + offset)); \
} \ } \
}; };
DEFINE_FORCE_GLOB_B(uchar, u8) DEFINE_FORCE_GLOB_B(uchar, u8)
DEFINE_FORCE_GLOB_B(schar, s8) DEFINE_FORCE_GLOB_B(schar, s8)
DEFINE_FORCE_GLOB_B(char, b8) DEFINE_FORCE_GLOB_B(char, b8)
DEFINE_FORCE_GLOB (ushort, u16, h) DEFINE_FORCE_GLOB (ushort, u16, h)
DEFINE_FORCE_GLOB (short, s16, h) DEFINE_FORCE_GLOB (short, s16, h)
DEFINE_FORCE_GLOB (uint, u32, r) DEFINE_FORCE_GLOB (uint, u32, r)
DEFINE_FORCE_GLOB (int, s32, r) DEFINE_FORCE_GLOB (int, s32, r)
DEFINE_FORCE_GLOB (float, f32, f) DEFINE_FORCE_GLOB (float, f32, f)
DEFINE_FORCE_GLOB (double, f64, d) DEFINE_FORCE_GLOB (double, f64, d)
#undef DEFINE_FORCE_GLOB #undef DEFINE_FORCE_GLOB
#undef DEFINE_FORCE_GLOB_B #undef DEFINE_FORCE_GLOB_B
#undef _OPENCV_ASM_PTR_ #undef _OPENCV_ASM_PTR_
#endif // __CUDA_ARCH__ >= 200 #endif // __CUDA_ARCH__ >= 200
}}} }}}
#endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__ #endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment