Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
1c1a61dd
Commit
1c1a61dd
authored
Jun 14, 2011
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added __forceinline__ to device functions
fixed BFM warning ("cannot tell what pointer points to")
parent
79f3260b
Hide whitespace changes
Inline
Side-by-side
Showing
21 changed files
with
982 additions
and
915 deletions
+982
-915
devmem2d.hpp
modules/gpu/include/opencv2/gpu/devmem2d.hpp
+1
-1
brute_force_matcher.cu
modules/gpu/src/cuda/brute_force_matcher.cu
+48
-38
calib3d.cu
modules/gpu/src/cuda/calib3d.cu
+3
-3
color.cu
modules/gpu/src/cuda/color.cu
+44
-44
element_operations.cu
modules/gpu/src/cuda/element_operations.cu
+24
-24
filters.cu
modules/gpu/src/cuda/filters.cu
+2
-2
match_template.cu
modules/gpu/src/cuda/match_template.cu
+29
-29
mathfunc.cu
modules/gpu/src/cuda/mathfunc.cu
+6
-6
matrix_operations.cu
modules/gpu/src/cuda/matrix_operations.cu
+9
-9
matrix_reductions.cu
modules/gpu/src/cuda/matrix_reductions.cu
+7
-7
stereobm.cu
modules/gpu/src/cuda/stereobm.cu
+2
-2
stereobp.cu
modules/gpu/src/cuda/stereobp.cu
+6
-6
stereocsbp.cu
modules/gpu/src/cuda/stereocsbp.cu
+3
-3
surf.cu
modules/gpu/src/cuda/surf.cu
+3
-3
border_interpolate.hpp
modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp
+200
-202
datamov_utils.hpp
modules/gpu/src/opencv2/gpu/device/datamov_utils.hpp
+98
-32
dynamic_smem.hpp
modules/gpu/src/opencv2/gpu/device/dynamic_smem.hpp
+31
-35
limits_gpu.hpp
modules/gpu/src/opencv2/gpu/device/limits_gpu.hpp
+164
-167
saturate_cast.hpp
modules/gpu/src/opencv2/gpu/device/saturate_cast.hpp
+37
-37
transform.hpp
modules/gpu/src/opencv2/gpu/device/transform.hpp
+10
-10
vecmath.hpp
modules/gpu/src/opencv2/gpu/device/vecmath.hpp
+255
-255
No files found.
modules/gpu/include/opencv2/gpu/devmem2d.hpp
View file @
1c1a61dd
...
...
@@ -56,7 +56,7 @@ namespace cv
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
#if defined(__CUDACC__)
#define __CV_GPU_HOST_DEVICE__ __host__ __device__
#define __CV_GPU_HOST_DEVICE__ __host__ __device__
__forceinline__
#else
#define __CV_GPU_HOST_DEVICE__
#endif
...
...
modules/gpu/src/cuda/brute_force_matcher.cu
View file @
1c1a61dd
...
...
@@ -42,6 +42,7 @@
#include "internal_shared.hpp"
#include "opencv2/gpu/device/limits_gpu.hpp"
#include "opencv2/gpu/device/datamov_utils.hpp"
using namespace cv::gpu;
using namespace cv::gpu::device;
...
...
@@ -60,7 +61,7 @@ namespace cv { namespace gpu { namespace bfmatcher
public:
explicit SingleMask(const PtrStep& mask_) : mask(mask_) {}
__device__ bool operator()(int queryIdx, int trainIdx) const
__device__
__forceinline__
bool operator()(int queryIdx, int trainIdx) const
{
return mask.ptr(queryIdx)[trainIdx] != 0;
}
...
...
@@ -74,14 +75,15 @@ namespace cv { namespace gpu { namespace bfmatcher
public:
explicit MaskCollection(PtrStep* maskCollection_) : maskCollection(maskCollection_) {}
__device__ void nextMask()
__device__
__forceinline__
void nextMask()
{
curMask = *maskCollection++;
}
__device__ bool operator()(int queryIdx, int trainIdx) const
{
return curMask.data == 0 || curMask.ptr(queryIdx)[trainIdx] != 0;
__device__ __forceinline__ bool operator()(int queryIdx, int trainIdx) const
{
uchar val;
return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(queryIdx), trainIdx, val), (val != 0));
}
private:
...
...
@@ -92,10 +94,10 @@ namespace cv { namespace gpu { namespace bfmatcher
class WithOutMask
{
public:
__device__ void nextMask()
__device__
__forceinline__
void nextMask()
{
}
__device__ bool operator()(int queryIdx, int trainIdx) const
__device__
__forceinline__
bool operator()(int queryIdx, int trainIdx) const
{
return true;
}
...
...
@@ -132,19 +134,19 @@ namespace cv { namespace gpu { namespace bfmatcher
typedef int ResultType;
typedef int ValueType;
__device__ L1Dist() : mySum(0) {}
__device__
__forceinline__
L1Dist() : mySum(0) {}
__device__ void reduceIter(int val1, int val2)
__device__
__forceinline__
void reduceIter(int val1, int val2)
{
mySum = __sad(val1, val2, mySum);
}
template <int BLOCK_DIM_X> __device__ void reduceAll(int* sdiff_row)
template <int BLOCK_DIM_X> __device__
__forceinline__
void reduceAll(int* sdiff_row)
{
SumReductor<BLOCK_DIM_X>::reduce(sdiff_row, mySum);
}
__device__ operator int() const
__device__
__forceinline__
operator int() const
{
return mySum;
}
...
...
@@ -158,19 +160,19 @@ namespace cv { namespace gpu { namespace bfmatcher
typedef float ResultType;
typedef float ValueType;
__device__ L1Dist() : mySum(0.0f) {}
__device__
__forceinline__
L1Dist() : mySum(0.0f) {}
__device__ void reduceIter(float val1, float val2)
__device__
__forceinline__
void reduceIter(float val1, float val2)
{
mySum += fabs(val1 - val2);
}
template <int BLOCK_DIM_X> __device__ void reduceAll(float* sdiff_row)
template <int BLOCK_DIM_X> __device__
__forceinline__
void reduceAll(float* sdiff_row)
{
SumReductor<BLOCK_DIM_X>::reduce(sdiff_row, mySum);
}
__device__ operator float() const
__device__
__forceinline__
operator float() const
{
return mySum;
}
...
...
@@ -185,20 +187,20 @@ namespace cv { namespace gpu { namespace bfmatcher
typedef float ResultType;
typedef float ValueType;
__device__ L2Dist() : mySum(0.0f) {}
__device__
__forceinline__
L2Dist() : mySum(0.0f) {}
__device__ void reduceIter(float val1, float val2)
__device__
__forceinline__
void reduceIter(float val1, float val2)
{
float reg = val1 - val2;
mySum += reg * reg;
}
template <int BLOCK_DIM_X> __device__ void reduceAll(float* sdiff_row)
template <int BLOCK_DIM_X> __device__
__forceinline__
void reduceAll(float* sdiff_row)
{
SumReductor<BLOCK_DIM_X>::reduce(sdiff_row, mySum);
}
__device__ operator float() const
__device__
__forceinline__
operator float() const
{
return sqrtf(mySum);
}
...
...
@@ -213,19 +215,19 @@ namespace cv { namespace gpu { namespace bfmatcher
typedef int ResultType;
typedef int ValueType;
__device__ HammingDist() : mySum(0) {}
__device__
__forceinline__
HammingDist() : mySum(0) {}
__device__ void reduceIter(int val1, int val2)
__device__
__forceinline__
void reduceIter(int val1, int val2)
{
mySum += __popc(val1 ^ val2);
}
template <int BLOCK_DIM_X> __device__ void reduceAll(int* sdiff_row)
template <int BLOCK_DIM_X> __device__
__forceinline__
void reduceAll(int* sdiff_row)
{
SumReductor<BLOCK_DIM_X>::reduce(sdiff_row, mySum);
}
__device__ operator int() const
__device__
__forceinline__
operator int() const
{
return mySum;
}
...
...
@@ -241,7 +243,11 @@ namespace cv { namespace gpu { namespace bfmatcher
__device__ void reduceDescDiff(const T* queryDescs, const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row)
{
for (int i = threadIdx.x; i < desc_len; i += BLOCK_DIM_X)
dist.reduceIter(queryDescs[i], trainDescs[i]);
{
T trainVal;
ForceGlob<T>::Load(trainDescs, i, trainVal);
dist.reduceIter(queryDescs[i], trainVal);
}
dist.reduceAll<BLOCK_DIM_X>(sdiff_row);
}
...
...
@@ -282,7 +288,9 @@ namespace cv { namespace gpu { namespace bfmatcher
{
if (ind < desc_len)
{
dist.reduceIter(*queryVals, trainDescs[ind]);
T trainVal;
ForceGlob<T>::Load(trainDescs, ind, trainVal);
dist.reduceIter(*queryVals, trainVal);
++queryVals;
...
...
@@ -293,7 +301,9 @@ namespace cv { namespace gpu { namespace bfmatcher
template <typename Dist, typename T>
static __device__ void calcWithoutCheck(const typename Dist::ValueType* queryVals, const T* trainDescs, Dist& dist)
{
dist.reduceIter(*queryVals, *trainDescs);
T trainVal;
ForceGlob<T>::Load(trainDescs, 0, trainVal);
dist.reduceIter(*queryVals, trainVal);
++queryVals;
trainDescs += blockDim.x;
...
...
@@ -304,13 +314,13 @@ namespace cv { namespace gpu { namespace bfmatcher
template <> struct UnrollDescDiff<0>
{
template <typename Dist, typename T>
static __device__ void calcCheck(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len,
static __device__
__forceinline__
void calcCheck(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len,
Dist& dist, int ind)
{
}
template <typename Dist, typename T>
static __device__ void calcWithoutCheck(const typename Dist::ValueType* queryVals, const T* trainDescs, Dist& dist)
static __device__
__forceinline__
void calcWithoutCheck(const typename Dist::ValueType* queryVals, const T* trainDescs, Dist& dist)
{
}
};
...
...
@@ -320,7 +330,7 @@ namespace cv { namespace gpu { namespace bfmatcher
struct DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, false>
{
template <typename Dist, typename T>
static __device__ void calc(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, Dist& dist)
static __device__
__forceinline__
void calc(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, Dist& dist)
{
UnrollDescDiff<MAX_DESCRIPTORS_LEN / BLOCK_DIM_X>::calcCheck(queryVals, trainDescs, desc_len, dist, threadIdx.x);
}
...
...
@@ -329,14 +339,14 @@ namespace cv { namespace gpu { namespace bfmatcher
struct DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, true>
{
template <typename Dist, typename T>
static __device__ void calc(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, Dist& dist)
static __device__
__forceinline__
void calc(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, Dist& dist)
{
UnrollDescDiff<MAX_DESCRIPTORS_LEN / BLOCK_DIM_X>::calcWithoutCheck(queryVals, trainDescs + threadIdx.x, dist);
}
};
template <int BLOCK_DIM_X, int MAX_DESCRIPTORS_LEN, bool DESC_LEN_EQ_MAX_LEN, typename Dist, typename T>
__device__ void reduceDescDiffCached(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row)
__device__
__forceinline__
void reduceDescDiffCached(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row)
{
DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN>::calc(queryVals, trainDescs, desc_len, dist);
...
...
@@ -419,13 +429,13 @@ namespace cv { namespace gpu { namespace bfmatcher
class ReduceDescCalculatorSimple
{
public:
__device__ void prepare(const T* queryDescs_, int, void*)
__device__
__forceinline__
void prepare(const T* queryDescs_, int, void*)
{
queryDescs = queryDescs_;
}
template <typename Dist>
__device__ void calc(const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row) const
__device__
__forceinline__
void calc(const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row) const
{
reduceDescDiff<BLOCK_DIM_X>(queryDescs, trainDescs, desc_len, dist, sdiff_row);
}
...
...
@@ -438,13 +448,13 @@ namespace cv { namespace gpu { namespace bfmatcher
class ReduceDescCalculatorCached
{
public:
__device__ void prepare(const T* queryDescs, int desc_len, U* smem)
__device__
__forceinline__
void prepare(const T* queryDescs, int desc_len, U* smem)
{
loadDescsVals<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN>(queryDescs, desc_len, queryVals, smem);
}
template <typename Dist>
__device__ void calc(const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row) const
__device__
__forceinline__
void calc(const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row) const
{
reduceDescDiffCached<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN>(queryVals, trainDescs, desc_len, dist, sdiff_row);
}
...
...
@@ -496,13 +506,13 @@ namespace cv { namespace gpu { namespace bfmatcher
}
template <typename Dist, typename ReduceDescCalculator, typename Mask>
__device__ void loop(int queryIdx, Mask& m, const ReduceDescCalculator& reduceDescCalc,
__device__
__forceinline__
void loop(int queryIdx, Mask& m, const ReduceDescCalculator& reduceDescCalc,
typename Dist::ResultType& myMin, int& myBestTrainIdx, int& myBestImgIdx, typename Dist::ResultType* sdiff_row) const
{
matchDescs<Dist>(queryIdx, 0, trainDescs, m, reduceDescCalc, myMin, myBestTrainIdx, myBestImgIdx, sdiff_row);
}
__device__ int desc_len() const
__device__
__forceinline__
int desc_len() const
{
return trainDescs.cols;
}
...
...
@@ -532,7 +542,7 @@ namespace cv { namespace gpu { namespace bfmatcher
}
}
__device__ int desc_len() const
__device__
__forceinline__
int desc_len() const
{
return desclen;
}
...
...
modules/gpu/src/cuda/calib3d.cu
View file @
1c1a61dd
...
...
@@ -56,7 +56,7 @@ namespace cv { namespace gpu
struct TransformOp
{
__device__ float3 operator()(float3 p) const
__device__
__forceinline__
float3 operator()(float3 p) const
{
return make_float3(
crot0.x * p.x + crot0.y * p.y + crot0.z * p.z + ctransl.x,
...
...
@@ -89,7 +89,7 @@ namespace cv { namespace gpu
struct ProjectOp
{
__device__ float2 operator()(float3 p) const
__device__
__forceinline__
float2 operator()(float3 p) const
{
// Rotate and translate in 3D
float3 t = make_float3(
...
...
@@ -128,7 +128,7 @@ namespace cv { namespace gpu
return SOLVE_PNP_RANSAC_MAX_NUM_ITERS;
}
__device__ float sqr(float x)
__device__
__forceinline__
float sqr(float x)
{
return x * x;
}
...
...
modules/gpu/src/cuda/color.cu
View file @
1c1a61dd
...
...
@@ -59,38 +59,38 @@ namespace cv { namespace gpu { namespace color
template<> struct ColorChannel<uchar>
{
typedef float worktype_f;
static __device__ uchar max() { return UCHAR_MAX; }
static __device__ uchar half() { return (uchar)(max()/2 + 1); }
static __device__
__forceinline__
uchar max() { return UCHAR_MAX; }
static __device__
__forceinline__
uchar half() { return (uchar)(max()/2 + 1); }
};
template<> struct ColorChannel<ushort>
{
typedef float worktype_f;
static __device__ ushort max() { return USHRT_MAX; }
static __device__ ushort half() { return (ushort)(max()/2 + 1); }
static __device__
__forceinline__
ushort max() { return USHRT_MAX; }
static __device__
__forceinline__
ushort half() { return (ushort)(max()/2 + 1); }
};
template<> struct ColorChannel<float>
{
typedef float worktype_f;
static __device__ float max() { return 1.f; }
static __device__ float half() { return 0.5f; }
static __device__
__forceinline__
float max() { return 1.f; }
static __device__
__forceinline__
float half() { return 0.5f; }
};
template <typename T>
__device__ void setAlpha(typename TypeVec<T, 3>::vec_t& vec, T val)
__device__
__forceinline__
void setAlpha(typename TypeVec<T, 3>::vec_t& vec, T val)
{
}
template <typename T>
__device__ void setAlpha(typename TypeVec<T, 4>::vec_t& vec, T val)
__device__
__forceinline__
void setAlpha(typename TypeVec<T, 4>::vec_t& vec, T val)
{
vec.w = val;
}
template <typename T>
__device__ T getAlpha(const typename TypeVec<T, 3>::vec_t& vec)
__device__
__forceinline__
T getAlpha(const typename TypeVec<T, 3>::vec_t& vec)
{
return ColorChannel<T>::max();
}
template <typename T>
__device__ T getAlpha(const typename TypeVec<T, 4>::vec_t& vec)
__device__
__forceinline__
T getAlpha(const typename TypeVec<T, 4>::vec_t& vec)
{
return vec.w;
}
...
...
@@ -114,7 +114,7 @@ namespace cv { namespace gpu { namespace color
explicit RGB2RGB(int bidx) : bidx(bidx) {}
__device__ dst_t operator()(const src_t& src) const
__device__
__forceinline__
dst_t operator()(const src_t& src) const
{
dst_t dst;
...
...
@@ -179,7 +179,7 @@ namespace cv { namespace gpu { namespace color
template <> struct RGB5x52RGBConverter<5>
{
template <typename D>
static __device__ void cvt(uint src, D& dst, int bidx)
static __device__
__forceinline__
void cvt(uint src, D& dst, int bidx)
{
(&dst.x)[bidx] = (uchar)(src << 3);
dst.y = (uchar)((src >> 2) & ~7);
...
...
@@ -190,7 +190,7 @@ namespace cv { namespace gpu { namespace color
template <> struct RGB5x52RGBConverter<6>
{
template <typename D>
static __device__ void cvt(uint src, D& dst, int bidx)
static __device__
__forceinline__
void cvt(uint src, D& dst, int bidx)
{
(&dst.x)[bidx] = (uchar)(src << 3);
dst.y = (uchar)((src >> 3) & ~3);
...
...
@@ -206,7 +206,7 @@ namespace cv { namespace gpu { namespace color
explicit RGB5x52RGB(int bidx) : bidx(bidx) {}
__device__ dst_t operator()(ushort src) const
__device__
__forceinline__
dst_t operator()(ushort src) const
{
dst_t dst;
RGB5x52RGBConverter<GREEN_BITS>::cvt((uint)src, dst, bidx);
...
...
@@ -221,18 +221,18 @@ namespace cv { namespace gpu { namespace color
template<> struct RGB2RGB5x5Converter<6>
{
template <typename T>
static __device__ ushort cvt(const T& src, int bidx)
static __device__
__forceinline__
ushort cvt(const T& src, int bidx)
{
return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~3) << 3) | (((&src.x)[bidx^2] & ~7) << 8));
}
};
template<> struct RGB2RGB5x5Converter<5>
{
static __device__ ushort cvt(const uchar3& src, int bidx)
static __device__
__forceinline__
ushort cvt(const uchar3& src, int bidx)
{
return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~7) << 2) | (((&src.x)[bidx^2] & ~7) << 7));
}
static __device__ ushort cvt(const uchar4& src, int bidx)
static __device__
__forceinline__
ushort cvt(const uchar4& src, int bidx)
{
return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~7) << 2) | (((&src.x)[bidx^2] & ~7) << 7) | (src.w ? 0x8000 : 0));
}
...
...
@@ -245,7 +245,7 @@ namespace cv { namespace gpu { namespace color
explicit RGB2RGB5x5(int bidx) : bidx(bidx) {}
__device__ ushort operator()(const src_t& src)
__device__
__forceinline__
ushort operator()(const src_t& src)
{
return RGB2RGB5x5Converter<GREEN_BITS>::cvt(src, bidx);
}
...
...
@@ -299,7 +299,7 @@ namespace cv { namespace gpu { namespace color
typedef T src_t;
typedef typename TypeVec<T, DSTCN>::vec_t dst_t;
__device__ dst_t operator()(const T& src) const
__device__
__forceinline__
dst_t operator()(const T& src) const
{
dst_t dst;
...
...
@@ -313,14 +313,14 @@ namespace cv { namespace gpu { namespace color
template <int GREEN_BITS> struct Gray2RGB5x5Converter;
template<> struct Gray2RGB5x5Converter<6>
{
static __device__ ushort cvt(uint t)
static __device__
__forceinline__
ushort cvt(uint t)
{
return (ushort)((t >> 3) | ((t & ~3) << 3) | ((t & ~7) << 8));
}
};
template<> struct Gray2RGB5x5Converter<5>
{
static __device__ ushort cvt(uint t)
static __device__
__forceinline__
ushort cvt(uint t)
{
t >>= 3;
return (ushort)(t | (t << 5) | (t << 10));
...
...
@@ -332,7 +332,7 @@ namespace cv { namespace gpu { namespace color
typedef uchar src_t;
typedef ushort dst_t;
__device__ ushort operator()(uchar src) const
__device__
__forceinline__
ushort operator()(uchar src) const
{
return Gray2RGB5x5Converter<GREEN_BITS>::cvt((uint)src);
}
...
...
@@ -406,14 +406,14 @@ namespace cv { namespace gpu { namespace color
template <int GREEN_BITS> struct RGB5x52GrayConverter;
template<> struct RGB5x52GrayConverter<6>
{
static __device__ uchar cvt(uint t)
static __device__
__forceinline__
uchar cvt(uint t)
{
return (uchar)CV_DESCALE(((t << 3) & 0xf8) * B2Y + ((t >> 3) & 0xfc) * G2Y + ((t >> 8) & 0xf8) * R2Y, yuv_shift);
}
};
template<> struct RGB5x52GrayConverter<5>
{
static __device__ uchar cvt(uint t)
static __device__
__forceinline__
uchar cvt(uint t)
{
return (uchar)CV_DESCALE(((t << 3) & 0xf8) * B2Y + ((t >> 2) & 0xf8) * G2Y + ((t >> 7) & 0xf8) * R2Y, yuv_shift);
}
...
...
@@ -424,18 +424,18 @@ namespace cv { namespace gpu { namespace color
typedef ushort src_t;
typedef uchar dst_t;
__device__ uchar operator()(ushort src) const
__device__
__forceinline__
uchar operator()(ushort src) const
{
return RGB5x52GrayConverter<GREEN_BITS>::cvt((uint)src);
}
};
template <typename T>
__device__ T RGB2GrayConvert(const T* src, int bidx)
__device__
__forceinline__
T RGB2GrayConvert(const T* src, int bidx)
{
return (T)CV_DESCALE((unsigned)(src[bidx] * B2Y + src[1] * G2Y + src[bidx^2] * R2Y), yuv_shift);
}
__device__ float RGB2GrayConvert(const float* src, int bidx)
__device__
__forceinline__
float RGB2GrayConvert(const float* src, int bidx)
{
const float cr = 0.299f;
const float cg = 0.587f;
...
...
@@ -451,7 +451,7 @@ namespace cv { namespace gpu { namespace color
explicit RGB2Gray(int bidx) : bidx(bidx) {}
__device__ T operator()(const src_t& src)
__device__
__forceinline__
T operator()(const src_t& src)
{
return RGB2GrayConvert(&src.x, bidx);
}
...
...
@@ -515,7 +515,7 @@ namespace cv { namespace gpu { namespace color
__constant__ float cYCrCbCoeffs_f[5];
template <typename T, typename D>
__device__ void RGB2YCrCbConvert(const T* src, D& dst, int bidx)
__device__
__forceinline__
void RGB2YCrCbConvert(const T* src, D& dst, int bidx)
{
const int delta = ColorChannel<T>::half() * (1 << yuv_shift);
...
...
@@ -528,7 +528,7 @@ namespace cv { namespace gpu { namespace color
dst.z = saturate_cast<T>(Cb);
}
template <typename D>
static __device__ void RGB2YCrCbConvert(const float* src, D& dst, int bidx)
static __device__
__forceinline__
void RGB2YCrCbConvert(const float* src, D& dst, int bidx)
{
dst.x = src[0] * cYCrCbCoeffs_f[0] + src[1] * cYCrCbCoeffs_f[1] + src[2] * cYCrCbCoeffs_f[2];
dst.y = (src[bidx^2] - dst.x) * cYCrCbCoeffs_f[3] + ColorChannel<float>::half();
...
...
@@ -561,7 +561,7 @@ namespace cv { namespace gpu { namespace color
RGB2YCrCb(int bidx, const coeff_t coeffs[5]) : RGB2YCrCbBase<T>(coeffs), bidx(bidx) {}
__device__ dst_t operator()(const src_t& src) const
__device__
__forceinline__
dst_t operator()(const src_t& src) const
{
dst_t dst;
RGB2YCrCbConvert(&src.x, dst, bidx);
...
...
@@ -573,7 +573,7 @@ namespace cv { namespace gpu { namespace color
};
template <typename T, typename D>
__device__ void YCrCb2RGBConvert(const T& src, D* dst, int bidx)
__device__
__forceinline__
void YCrCb2RGBConvert(const T& src, D* dst, int bidx)
{
const int b = src.x + CV_DESCALE((src.z - ColorChannel<D>::half()) * cYCrCbCoeffs_i[3], yuv_shift);
const int g = src.x + CV_DESCALE((src.z - ColorChannel<D>::half()) * cYCrCbCoeffs_i[2] + (src.y - ColorChannel<D>::half()) * cYCrCbCoeffs_i[1], yuv_shift);
...
...
@@ -584,7 +584,7 @@ namespace cv { namespace gpu { namespace color
dst[bidx^2] = saturate_cast<D>(r);
}
template <typename T>
__device__ void YCrCb2RGBConvert(const T& src, float* dst, int bidx)
__device__
__forceinline__
void YCrCb2RGBConvert(const T& src, float* dst, int bidx)
{
dst[bidx] = src.x + (src.z - ColorChannel<float>::half()) * cYCrCbCoeffs_f[3];
dst[1] = src.x + (src.z - ColorChannel<float>::half()) * cYCrCbCoeffs_f[2] + (src.y - ColorChannel<float>::half()) * cYCrCbCoeffs_f[1];
...
...
@@ -617,7 +617,7 @@ namespace cv { namespace gpu { namespace color
YCrCb2RGB(int bidx, const coeff_t coeffs[4]) : YCrCb2RGBBase<T>(coeffs), bidx(bidx) {}
__device__ dst_t operator()(const src_t& src) const
__device__
__forceinline__
dst_t operator()(const src_t& src) const
{
dst_t dst;
...
...
@@ -725,14 +725,14 @@ namespace cv { namespace gpu { namespace color
__constant__ float cXYZ_D65f[9];
template <typename T, typename D>
__device__ void RGB2XYZConvert(const T* src, D& dst)
__device__
__forceinline__
void RGB2XYZConvert(const T* src, D& dst)
{
dst.x = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[0] + src[1] * cXYZ_D65i[1] + src[2] * cXYZ_D65i[2], xyz_shift));
dst.y = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[3] + src[1] * cXYZ_D65i[4] + src[2] * cXYZ_D65i[5], xyz_shift));
dst.z = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[6] + src[1] * cXYZ_D65i[7] + src[2] * cXYZ_D65i[8], xyz_shift));
}
template <typename D>
__device__ void RGB2XYZConvert(const float* src, D& dst)
__device__
__forceinline__
void RGB2XYZConvert(const float* src, D& dst)
{
dst.x = src[0] * cXYZ_D65f[0] + src[1] * cXYZ_D65f[1] + src[2] * cXYZ_D65f[2];
dst.y = src[0] * cXYZ_D65f[3] + src[1] * cXYZ_D65f[4] + src[2] * cXYZ_D65f[5];
...
...
@@ -765,7 +765,7 @@ namespace cv { namespace gpu { namespace color
explicit RGB2XYZ(const coeff_t coeffs[9]) : RGB2XYZBase<T>(coeffs) {}
__device__ dst_t operator()(const src_t& src) const
__device__
__forceinline__
dst_t operator()(const src_t& src) const
{
dst_t dst;
RGB2XYZConvert(&src.x, dst);
...
...
@@ -774,14 +774,14 @@ namespace cv { namespace gpu { namespace color
};
template <typename T, typename D>
__device__ void XYZ2RGBConvert(const T& src, D* dst)
__device__
__forceinline__
void XYZ2RGBConvert(const T& src, D* dst)
{
dst[0] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[0] + src.y * cXYZ_D65i[1] + src.z * cXYZ_D65i[2], xyz_shift));
dst[1] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[3] + src.y * cXYZ_D65i[4] + src.z * cXYZ_D65i[5], xyz_shift));
dst[2] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[6] + src.y * cXYZ_D65i[7] + src.z * cXYZ_D65i[8], xyz_shift));
}
template <typename T>
__device__ void XYZ2RGBConvert(const T& src, float* dst)
__device__
__forceinline__
void XYZ2RGBConvert(const T& src, float* dst)
{
dst[0] = src.x * cXYZ_D65f[0] + src.y * cXYZ_D65f[1] + src.z * cXYZ_D65f[2];
dst[1] = src.x * cXYZ_D65f[3] + src.y * cXYZ_D65f[4] + src.z * cXYZ_D65f[5];
...
...
@@ -814,7 +814,7 @@ namespace cv { namespace gpu { namespace color
explicit XYZ2RGB(const coeff_t coeffs[9]) : XYZ2RGBBase<T>(coeffs) {}
__device__ dst_t operator()(const src_t& src) const
__device__
__forceinline__
dst_t operator()(const src_t& src) const
{
dst_t dst;
XYZ2RGBConvert(src, &dst.x);
...
...
@@ -987,7 +987,7 @@ namespace cv { namespace gpu { namespace color
explicit RGB2HSV(int bidx) : bidx(bidx) {}
__device__ dst_t operator()(const src_t& src) const
__device__
__forceinline__
dst_t operator()(const src_t& src) const
{
dst_t dst;
RGB2HSVConvert<HR>(&src.x, dst, bidx);
...
...
@@ -1062,7 +1062,7 @@ namespace cv { namespace gpu { namespace color
explicit HSV2RGB(int bidx) : bidx(bidx) {}
__device__ dst_t operator()(const src_t& src) const
__device__
__forceinline__
dst_t operator()(const src_t& src) const
{
dst_t dst;
HSV2RGBConvert<HR>(src, &dst.x, bidx);
...
...
@@ -1214,7 +1214,7 @@ namespace cv { namespace gpu { namespace color
explicit RGB2HLS(int bidx) : bidx(bidx) {}
__device__ dst_t operator()(const src_t& src) const
__device__
__forceinline__
dst_t operator()(const src_t& src) const
{
dst_t dst;
RGB2HLSConvert<HR>(&src.x, dst, bidx);
...
...
@@ -1295,7 +1295,7 @@ namespace cv { namespace gpu { namespace color
explicit HLS2RGB(int bidx) : bidx(bidx) {}
__device__ dst_t operator()(const src_t& src) const
__device__
__forceinline__
dst_t operator()(const src_t& src) const
{
dst_t dst;
HLS2RGBConvert<HR>(src, &dst.x, bidx);
...
...
modules/gpu/src/cuda/element_operations.cu
View file @
1c1a61dd
...
...
@@ -57,7 +57,7 @@ namespace cv { namespace gpu { namespace mathfunc
template <typename T1, typename T2>
struct NotEqual
{
__device__ uchar operator()(const T1& src1, const T2& src2)
__device__
__forceinline__
uchar operator()(const T1& src1, const T2& src2)
{
return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);
}
...
...
@@ -91,7 +91,7 @@ namespace cv { namespace gpu { namespace mathfunc
template <typename T>
struct UnOp<T, UN_OP_NOT>
{
static __device__ T call(T v) { return ~v; }
static __device__
__forceinline__
T call(T v) { return ~v; }
};
...
...
@@ -199,20 +199,20 @@ namespace cv { namespace gpu { namespace mathfunc
template <typename T>
struct BinOp<T, BIN_OP_OR>
{
static __device__ T call(T a, T b) { return a | b; }
static __device__
__forceinline__
T call(T a, T b) { return a | b; }
};
template <typename T>
struct BinOp<T, BIN_OP_AND>
{
static __device__ T call(T a, T b) { return a & b; }
static __device__
__forceinline__
T call(T a, T b) { return a & b; }
};
template <typename T>
struct BinOp<T, BIN_OP_XOR>
{
static __device__ T call(T a, T b) { return a ^ b; }
static __device__
__forceinline__
T call(T a, T b) { return a ^ b; }
};
...
...
@@ -357,15 +357,15 @@ namespace cv { namespace gpu { namespace mathfunc
struct MinOp
{
template <typename T>
__device__ T operator()(T a, T b)
__device__
__forceinline__
T operator()(T a, T b)
{
return min(a, b);
}
__device__ float operator()(float a, float b)
__device__
__forceinline__
float operator()(float a, float b)
{
return fmin(a, b);
}
__device__ double operator()(double a, double b)
__device__
__forceinline__
double operator()(double a, double b)
{
return fmin(a, b);
}
...
...
@@ -374,15 +374,15 @@ namespace cv { namespace gpu { namespace mathfunc
struct MaxOp
{
template <typename T>
__device__ T operator()(T a, T b)
__device__
__forceinline__
T operator()(T a, T b)
{
return max(a, b);
}
__device__ float operator()(float a, float b)
__device__
__forceinline__
float operator()(float a, float b)
{
return fmax(a, b);
}
__device__ double operator()(double a, double b)
__device__
__forceinline__
double operator()(double a, double b)
{
return fmax(a, b);
}
...
...
@@ -394,7 +394,7 @@ namespace cv { namespace gpu { namespace mathfunc
explicit ScalarMinOp(T s_) : s(s_) {}
__device__ T operator()(T a)
__device__
__forceinline__
T operator()(T a)
{
return min(a, s);
}
...
...
@@ -405,7 +405,7 @@ namespace cv { namespace gpu { namespace mathfunc
explicit ScalarMinOp(float s_) : s(s_) {}
__device__ float operator()(float a)
__device__
__forceinline__
float operator()(float a)
{
return fmin(a, s);
}
...
...
@@ -416,7 +416,7 @@ namespace cv { namespace gpu { namespace mathfunc
explicit ScalarMinOp(double s_) : s(s_) {}
__device__ double operator()(double a)
__device__
__forceinline__
double operator()(double a)
{
return fmin(a, s);
}
...
...
@@ -428,7 +428,7 @@ namespace cv { namespace gpu { namespace mathfunc
explicit ScalarMaxOp(T s_) : s(s_) {}
__device__ T operator()(T a)
__device__
__forceinline__
T operator()(T a)
{
return max(a, s);
}
...
...
@@ -439,7 +439,7 @@ namespace cv { namespace gpu { namespace mathfunc
explicit ScalarMaxOp(float s_) : s(s_) {}
__device__ float operator()(float a)
__device__
__forceinline__
float operator()(float a)
{
return fmax(a, s);
}
...
...
@@ -450,7 +450,7 @@ namespace cv { namespace gpu { namespace mathfunc
explicit ScalarMaxOp(double s_) : s(s_) {}
__device__ double operator()(double a)
__device__
__forceinline__
double operator()(double a)
{
return fmax(a, s);
}
...
...
@@ -524,7 +524,7 @@ namespace cv { namespace gpu { namespace mathfunc
{
ThreshBinary(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
__device__ T operator()(const T& src) const
__device__
__forceinline__
T operator()(const T& src) const
{
return src > thresh ? maxVal : 0;
}
...
...
@@ -538,7 +538,7 @@ namespace cv { namespace gpu { namespace mathfunc
{
ThreshBinaryInv(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
__device__ T operator()(const T& src) const
__device__
__forceinline__
T operator()(const T& src) const
{
return src > thresh ? 0 : maxVal;
}
...
...
@@ -552,7 +552,7 @@ namespace cv { namespace gpu { namespace mathfunc
{
ThreshTrunc(T thresh_, T) : thresh(thresh_) {}
__device__ T operator()(const T& src) const
__device__
__forceinline__
T operator()(const T& src) const
{
return min(src, thresh);
}
...
...
@@ -564,7 +564,7 @@ namespace cv { namespace gpu { namespace mathfunc
{
ThreshTrunc(float thresh_, float) : thresh(thresh_) {}
__device__ float operator()(const float& src) const
__device__
__forceinline__
float operator()(const float& src) const
{
return fmin(src, thresh);
}
...
...
@@ -576,7 +576,7 @@ namespace cv { namespace gpu { namespace mathfunc
{
ThreshTrunc(double thresh_, double) : thresh(thresh_) {}
__device__ double operator()(const double& src) const
__device__
__forceinline__
double operator()(const double& src) const
{
return fmin(src, thresh);
}
...
...
@@ -590,7 +590,7 @@ namespace cv { namespace gpu { namespace mathfunc
public:
ThreshToZero(T thresh_, T) : thresh(thresh_) {}
__device__ T operator()(const T& src) const
__device__
__forceinline__
T operator()(const T& src) const
{
return src > thresh ? src : 0;
}
...
...
@@ -604,7 +604,7 @@ namespace cv { namespace gpu { namespace mathfunc
public:
ThreshToZeroInv(T thresh_, T) : thresh(thresh_) {}
__device__ T operator()(const T& src) const
__device__
__forceinline__
T operator()(const T& src) const
{
return src > thresh ? 0 : src;
}
...
...
modules/gpu/src/cuda/filters.cu
View file @
1c1a61dd
...
...
@@ -406,7 +406,7 @@ namespace bf_krnls
template <int channels>
struct DistRgbMax
{
static __device__ uchar calc(const uchar* a, const uchar* b)
static __device__
__forceinline__
uchar calc(const uchar* a, const uchar* b)
{
uchar x = abs(a[0] - b[0]);
uchar y = abs(a[1] - b[1]);
...
...
@@ -418,7 +418,7 @@ namespace bf_krnls
template <>
struct DistRgbMax<1>
{
static __device__ uchar calc(const uchar* a, const uchar* b)
static __device__
__forceinline__
uchar calc(const uchar* a, const uchar* b)
{
return abs(a[0] - b[0]);
}
...
...
modules/gpu/src/cuda/match_template.cu
View file @
1c1a61dd
...
...
@@ -48,35 +48,35 @@ using namespace cv::gpu::device;
namespace cv { namespace gpu { namespace imgproc {
__device__ float sum(float v) { return v; }
__device__ float sum(float2 v) { return v.x + v.y; }
__device__ float sum(float3 v) { return v.x + v.y + v.z; }
__device__ float sum(float4 v) { return v.x + v.y + v.z + v.w; }
__device__ float first(float v) { return v; }
__device__ float first(float2 v) { return v.x; }
__device__ float first(float3 v) { return v.x; }
__device__ float first(float4 v) { return v.x; }
__device__ float mul(float a, float b) { return a * b; }
__device__ float2 mul(float2 a, float2 b) { return make_float2(a.x * b.x, a.y * b.y); }
__device__ float3 mul(float3 a, float3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); }
__device__ float4 mul(float4 a, float4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
__device__ float mul(uchar a, uchar b) { return a * b; }
__device__ float2 mul(uchar2 a, uchar2 b) { return make_float2(a.x * b.x, a.y * b.y); }
__device__ float3 mul(uchar3 a, uchar3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); }
__device__ float4 mul(uchar4 a, uchar4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
__device__ float sub(float a, float b) { return a - b; }
__device__ float2 sub(float2 a, float2 b) { return make_float2(a.x - b.x, a.y - b.y); }
__device__ float3 sub(float3 a, float3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); }
__device__ float4 sub(float4 a, float4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
__device__ float sub(uchar a, uchar b) { return a - b; }
__device__ float2 sub(uchar2 a, uchar2 b) { return make_float2(a.x - b.x, a.y - b.y); }
__device__ float3 sub(uchar3 a, uchar3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); }
__device__ float4 sub(uchar4 a, uchar4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
__device__
__forceinline__
float sum(float v) { return v; }
__device__
__forceinline__
float sum(float2 v) { return v.x + v.y; }
__device__
__forceinline__
float sum(float3 v) { return v.x + v.y + v.z; }
__device__
__forceinline__
float sum(float4 v) { return v.x + v.y + v.z + v.w; }
__device__
__forceinline__
float first(float v) { return v; }
__device__
__forceinline__
float first(float2 v) { return v.x; }
__device__
__forceinline__
float first(float3 v) { return v.x; }
__device__
__forceinline__
float first(float4 v) { return v.x; }
__device__
__forceinline__
float mul(float a, float b) { return a * b; }
__device__
__forceinline__
float2 mul(float2 a, float2 b) { return make_float2(a.x * b.x, a.y * b.y); }
__device__
__forceinline__
float3 mul(float3 a, float3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); }
__device__
__forceinline__
float4 mul(float4 a, float4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
__device__
__forceinline__
float mul(uchar a, uchar b) { return a * b; }
__device__
__forceinline__
float2 mul(uchar2 a, uchar2 b) { return make_float2(a.x * b.x, a.y * b.y); }
__device__
__forceinline__
float3 mul(uchar3 a, uchar3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); }
__device__
__forceinline__
float4 mul(uchar4 a, uchar4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
__device__
__forceinline__
float sub(float a, float b) { return a - b; }
__device__
__forceinline__
float2 sub(float2 a, float2 b) { return make_float2(a.x - b.x, a.y - b.y); }
__device__
__forceinline__
float3 sub(float3 a, float3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); }
__device__
__forceinline__
float4 sub(float4 a, float4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
__device__
__forceinline__
float sub(uchar a, uchar b) { return a - b; }
__device__
__forceinline__
float2 sub(uchar2 a, uchar2 b) { return make_float2(a.x - b.x, a.y - b.y); }
__device__
__forceinline__
float3 sub(uchar3 a, uchar3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); }
__device__
__forceinline__
float4 sub(uchar4 a, uchar4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
template <typename T, int cn>
...
...
modules/gpu/src/cuda/mathfunc.cu
View file @
1c1a61dd
...
...
@@ -60,27 +60,27 @@ namespace cv { namespace gpu { namespace mathfunc
{
struct Nothing
{
static __device__ void calc(int, int, float, float, float*, size_t, float)
static __device__
__forceinline__
void calc(int, int, float, float, float*, size_t, float)
{
}
};
struct Magnitude
{
static __device__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
static __device__
__forceinline__
void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
{
dst[y * dst_step + x] = sqrtf(x_data * x_data + y_data * y_data);
}
};
struct MagnitudeSqr
{
static __device__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
static __device__
__forceinline__
void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
{
dst[y * dst_step + x] = x_data * x_data + y_data * y_data;
}
};
struct Atan2
{
static __device__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale)
static __device__
__forceinline__
void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale)
{
dst[y * dst_step + x] = scale * atan2f(y_data, x_data);
}
...
...
@@ -104,14 +104,14 @@ namespace cv { namespace gpu { namespace mathfunc
struct NonEmptyMag
{
static __device__ float get(const float* mag, size_t mag_step, int x, int y)
static __device__
__forceinline__
float get(const float* mag, size_t mag_step, int x, int y)
{
return mag[y * mag_step + x];
}
};
struct EmptyMag
{
static __device__ float get(const float*, size_t, int, int)
static __device__
__forceinline__
float get(const float*, size_t, int, int)
{
return 1.0f;
}
...
...
modules/gpu/src/cuda/matrix_operations.cu
View file @
1c1a61dd
...
...
@@ -123,14 +123,14 @@ namespace cv { namespace gpu { namespace matrix_operations {
__constant__ float scalar_32f[4];
__constant__ double scalar_64f[4];
template <typename T> __device__ T readScalar(int i);
template <> __device__ uchar readScalar<uchar>(int i) {return scalar_8u[i];}
template <> __device__ schar readScalar<schar>(int i) {return scalar_8s[i];}
template <> __device__ ushort readScalar<ushort>(int i) {return scalar_16u[i];}
template <> __device__ short readScalar<short>(int i) {return scalar_16s[i];}
template <> __device__ int readScalar<int>(int i) {return scalar_32s[i];}
template <> __device__ float readScalar<float>(int i) {return scalar_32f[i];}
template <> __device__ double readScalar<double>(int i) {return scalar_64f[i];}
template <typename T> __device__
__forceinline__
T readScalar(int i);
template <> __device__
__forceinline__
uchar readScalar<uchar>(int i) {return scalar_8u[i];}
template <> __device__
__forceinline__
schar readScalar<schar>(int i) {return scalar_8s[i];}
template <> __device__
__forceinline__
ushort readScalar<ushort>(int i) {return scalar_16u[i];}
template <> __device__
__forceinline__
short readScalar<short>(int i) {return scalar_16s[i];}
template <> __device__
__forceinline__
int readScalar<int>(int i) {return scalar_32s[i];}
template <> __device__
__forceinline__
float readScalar<float>(int i) {return scalar_32f[i];}
template <> __device__
__forceinline__
double readScalar<double>(int i) {return scalar_64f[i];}
void writeScalar(const uchar* vals)
{
...
...
@@ -243,7 +243,7 @@ namespace cv { namespace gpu { namespace matrix_operations {
public:
Convertor(double alpha_, double beta_) : alpha(alpha_), beta(beta_) {}
__device__ D operator()(const T& src)
__device__
__forceinline__
D operator()(const T& src)
{
return saturate_cast<D>(alpha * src + beta);
}
...
...
modules/gpu/src/cuda/matrix_reductions.cu
View file @
1c1a61dd
...
...
@@ -78,7 +78,7 @@ namespace cv { namespace gpu { namespace mathfunc
{
explicit Mask8U(PtrStep mask): mask(mask) {}
__device__ bool operator()(int y, int x) const
__device__
__forceinline__
bool operator()(int y, int x) const
{
return mask.ptr(y)[x];
}
...
...
@@ -89,7 +89,7 @@ namespace cv { namespace gpu { namespace mathfunc
struct MaskTrue
{
__device__ bool operator()(int y, int x) const
__device__
__forceinline__
bool operator()(int y, int x) const
{
return true;
}
...
...
@@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace mathfunc
// Does min and max in shared memory
template <typename T>
__device__ void merge(uint tid, uint offset, volatile T* minval, volatile T* maxval)
__device__
__forceinline__
void merge(uint tid, uint offset, volatile T* minval, volatile T* maxval)
{
minval[tid] = min(minval[tid], minval[tid + offset]);
maxval[tid] = max(maxval[tid], maxval[tid + offset]);
...
...
@@ -976,16 +976,16 @@ namespace cv { namespace gpu { namespace mathfunc
template <> struct SumType<double> { typedef double R; };
template <typename R>
struct IdentityOp { static __device__ R call(R x) { return x; } };
struct IdentityOp { static __device__
__forceinline__
R call(R x) { return x; } };
template <typename R>
struct AbsOp { static __device__ R call(R x) { return abs(x); } };
struct AbsOp { static __device__
__forceinline__
R call(R x) { return abs(x); } };
template <>
struct AbsOp<uint> { static __device__ uint call(uint x) { return x; } };
struct AbsOp<uint> { static __device__
__forceinline__
uint call(uint x) { return x; } };
template <typename R>
struct SqrOp { static __device__ R call(R x) { return x * x; } };
struct SqrOp { static __device__
__forceinline__
R call(R x) { return x * x; } };
__constant__ int ctwidth;
__constant__ int ctheight;
...
...
modules/gpu/src/cuda/stereobm.cu
View file @
1c1a61dd
...
...
@@ -68,7 +68,7 @@ __constant__ size_t cminSSD_step;
__constant__ int cwidth;
__constant__ int cheight;
__device__ int SQ(int a)
__device__
__forceinline__
int SQ(int a)
{
return a * a;
}
...
...
@@ -419,7 +419,7 @@ extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output,
texture<unsigned char, 2, cudaReadModeNormalizedFloat> texForTF;
__device__ float sobel(int x, int y)
__device__
__forceinline__
float sobel(int x, int y)
{
float conv = tex2D(texForTF, x - 1, y - 1) * (-1) + tex2D(texForTF, x + 1, y - 1) * (1) +
tex2D(texForTF, x - 1, y ) * (-2) + tex2D(texForTF, x + 1, y ) * (2) +
...
...
modules/gpu/src/cuda/stereobp.cu
View file @
1c1a61dd
...
...
@@ -76,11 +76,11 @@ namespace cv { namespace gpu { namespace bp
template <int cn> struct PixDiff;
template <> struct PixDiff<1>
{
__device__ PixDiff(const uchar* ls)
__device__
__forceinline__
PixDiff(const uchar* ls)
{
l = *ls;
}
__device__ float operator()(const uchar* rs) const
__device__
__forceinline__
float operator()(const uchar* rs) const
{
return abs((int)l - *rs);
}
...
...
@@ -88,11 +88,11 @@ namespace cv { namespace gpu { namespace bp
};
template <> struct PixDiff<3>
{
__device__ PixDiff(const uchar* ls)
__device__
__forceinline__
PixDiff(const uchar* ls)
{
l = *((uchar3*)ls);
}
__device__ float operator()(const uchar* rs) const
__device__
__forceinline__
float operator()(const uchar* rs) const
{
const float tr = 0.299f;
const float tg = 0.587f;
...
...
@@ -108,11 +108,11 @@ namespace cv { namespace gpu { namespace bp
};
template <> struct PixDiff<4>
{
__device__ PixDiff(const uchar* ls)
__device__
__forceinline__
PixDiff(const uchar* ls)
{
l = *((uchar4*)ls);
}
__device__ float operator()(const uchar* rs) const
__device__
__forceinline__
float operator()(const uchar* rs) const
{
const float tr = 0.299f;
const float tg = 0.587f;
...
...
modules/gpu/src/cuda/stereocsbp.cu
View file @
1c1a61dd
...
...
@@ -102,14 +102,14 @@ namespace cv { namespace gpu { namespace csbp
template <int channels> struct DataCostPerPixel;
template <> struct DataCostPerPixel<1>
{
static __device__ float compute(const uchar* left, const uchar* right)
static __device__
__forceinline__
float compute(const uchar* left, const uchar* right)
{
return fmin(cdata_weight * abs((int)*left - *right), cdata_weight * cmax_data_term);
}
};
template <> struct DataCostPerPixel<3>
{
static __device__ float compute(const uchar* left, const uchar* right)
static __device__
__forceinline__
float compute(const uchar* left, const uchar* right)
{
float tb = 0.114f * abs((int)left[0] - right[0]);
float tg = 0.587f * abs((int)left[1] - right[1]);
...
...
@@ -120,7 +120,7 @@ namespace cv { namespace gpu { namespace csbp
};
template <> struct DataCostPerPixel<4>
{
static __device__ float compute(const uchar* left, const uchar* right)
static __device__
__forceinline__
float compute(const uchar* left, const uchar* right)
{
uchar4 l = *((const uchar4*)left);
uchar4 r = *((const uchar4*)right);
...
...
modules/gpu/src/cuda/surf.cu
View file @
1c1a61dd
...
...
@@ -122,7 +122,7 @@ namespace cv { namespace gpu { namespace surf
__constant__ float c_DY [3][5] = { {2, 0, 7, 3, 1}, {2, 3, 7, 6, -2}, {2, 6, 7, 9, 1} };
__constant__ float c_DXY[4][5] = { {1, 1, 4, 4, 1}, {5, 1, 8, 4, -1}, {1, 5, 4, 8, -1}, {5, 5, 8, 8, 1} };
__host__ __device__ int calcSize(int octave, int layer)
__host__ __device__
__forceinline__
int calcSize(int octave, int layer)
{
/* Wavelet size at first layer of first octave. */
const int HAAR_SIZE0 = 9;
...
...
@@ -189,7 +189,7 @@ namespace cv { namespace gpu { namespace surf
struct WithOutMask
{
static __device__ bool check(int, int, int)
static __device__
__forceinline__
bool check(int, int, int)
{
return true;
}
...
...
@@ -708,7 +708,7 @@ namespace cv { namespace gpu { namespace surf
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f
};
__device__ unsigned char calcWin(int i, int j, float centerX, float centerY, float win_offset, float cos_dir, float sin_dir)
__device__
__forceinline__
unsigned char calcWin(int i, int j, float centerX, float centerY, float win_offset, float cos_dir, float sin_dir)
{
float pixel_x = centerX + (win_offset + j) * cos_dir + (win_offset + i) * sin_dir;
float pixel_y = centerY - (win_offset + j) * sin_dir + (win_offset + i) * cos_dir;
...
...
modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp
View file @
1c1a61dd
...
...
@@ -40,208 +40,207 @@
//
//M*/
#ifndef __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
#define __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vecmath.hpp"
namespace
cv
{
namespace
gpu
namespace
cv
{
namespace
gpu
{
namespace
device
{
struct
BrdReflect101
{
explicit
BrdReflect101
(
int
len
)
:
last
(
len
-
1
)
{}
__device__
__forceinline__
int
idx_low
(
int
i
)
const
{
return
abs
(
i
);
}
__device__
__forceinline__
int
idx_high
(
int
i
)
const
{
return
last
-
abs
(
last
-
i
);
}
__device__
__forceinline__
int
idx
(
int
i
)
const
{
return
idx_low
(
idx_high
(
i
));
}
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
-
last
<=
mini
&&
maxi
<=
2
*
last
;
}
private
:
int
last
;
};
template
<
typename
D
>
struct
BrdRowReflect101
:
BrdReflect101
{
namespace
device
{
struct
BrdReflect101
{
explicit
BrdReflect101
(
int
len
)
:
last
(
len
-
1
)
{}
__device__
int
idx_low
(
int
i
)
const
{
return
abs
(
i
);
}
__device__
int
idx_high
(
int
i
)
const
{
return
last
-
abs
(
last
-
i
);
}
__device__
int
idx
(
int
i
)
const
{
return
idx_low
(
idx_high
(
i
));
}
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
-
last
<=
mini
&&
maxi
<=
2
*
last
;
}
private
:
int
last
;
};
template
<
typename
D
>
struct
BrdRowReflect101
:
BrdReflect101
{
explicit
BrdRowReflect101
(
int
len
)
:
BrdReflect101
(
len
)
{}
template
<
typename
T
>
__device__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_low
(
i
)]);
}
template
<
typename
T
>
__device__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_high
(
i
)]);
}
};
template
<
typename
D
>
struct
BrdColReflect101
:
BrdReflect101
{
BrdColReflect101
(
int
len
,
int
step
)
:
BrdReflect101
(
len
),
step
(
step
)
{}
template
<
typename
T
>
__device__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_low
(
i
)
*
step
]);
}
template
<
typename
T
>
__device__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_high
(
i
)
*
step
]);
}
private
:
int
step
;
};
struct
BrdReplicate
{
explicit
BrdReplicate
(
int
len
)
:
last
(
len
-
1
)
{}
__device__
int
idx_low
(
int
i
)
const
{
return
max
(
i
,
0
);
}
__device__
int
idx_high
(
int
i
)
const
{
return
min
(
i
,
last
);
}
__device__
int
idx
(
int
i
)
const
{
return
idx_low
(
idx_high
(
i
));
}
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
true
;
}
private
:
int
last
;
};
template
<
typename
D
>
struct
BrdRowReplicate
:
BrdReplicate
{
explicit
BrdRowReplicate
(
int
len
)
:
BrdReplicate
(
len
)
{}
template
<
typename
T
>
__device__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_low
(
i
)]);
}
template
<
typename
T
>
__device__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_high
(
i
)]);
}
};
template
<
typename
D
>
struct
BrdColReplicate
:
BrdReplicate
{
BrdColReplicate
(
int
len
,
int
step
)
:
BrdReplicate
(
len
),
step
(
step
)
{}
template
<
typename
T
>
__device__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_low
(
i
)
*
step
]);
}
template
<
typename
T
>
__device__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_high
(
i
)
*
step
]);
}
private
:
int
step
;
};
template
<
typename
D
>
struct
BrdRowConstant
{
explicit
BrdRowConstant
(
int
len_
,
const
D
&
val_
=
VecTraits
<
D
>::
all
(
0
))
:
len
(
len_
),
val
(
val_
)
{}
template
<
typename
T
>
__device__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
i
>=
0
?
saturate_cast
<
D
>
(
data
[
i
])
:
val
;
}
template
<
typename
T
>
__device__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
i
<
len
?
saturate_cast
<
D
>
(
data
[
i
])
:
val
;
}
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
true
;
}
private
:
int
len
;
D
val
;
};
template
<
typename
D
>
struct
BrdColConstant
{
BrdColConstant
(
int
len_
,
int
step_
,
const
D
&
val_
=
VecTraits
<
D
>::
all
(
0
))
:
len
(
len_
),
step
(
step_
),
val
(
val_
)
{}
template
<
typename
T
>
__device__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
i
>=
0
?
saturate_cast
<
D
>
(
data
[
i
*
step
])
:
val
;
}
template
<
typename
T
>
__device__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
i
<
len
?
saturate_cast
<
D
>
(
data
[
i
*
step
])
:
val
;
}
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
true
;
}
private
:
int
len
;
int
step
;
D
val
;
};
}
}
}
\ No newline at end of file
explicit
BrdRowReflect101
(
int
len
)
:
BrdReflect101
(
len
)
{}
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_low
(
i
)]);
}
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_high
(
i
)]);
}
};
template
<
typename
D
>
struct
BrdColReflect101
:
BrdReflect101
{
BrdColReflect101
(
int
len
,
int
step
)
:
BrdReflect101
(
len
),
step
(
step
)
{}
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_low
(
i
)
*
step
]);
}
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_high
(
i
)
*
step
]);
}
private
:
int
step
;
};
struct
BrdReplicate
{
explicit
BrdReplicate
(
int
len
)
:
last
(
len
-
1
)
{}
__device__
__forceinline__
int
idx_low
(
int
i
)
const
{
return
max
(
i
,
0
);
}
__device__
__forceinline__
int
idx_high
(
int
i
)
const
{
return
min
(
i
,
last
);
}
__device__
__forceinline__
int
idx
(
int
i
)
const
{
return
idx_low
(
idx_high
(
i
));
}
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
true
;
}
private
:
int
last
;
};
template
<
typename
D
>
struct
BrdRowReplicate
:
BrdReplicate
{
explicit
BrdRowReplicate
(
int
len
)
:
BrdReplicate
(
len
)
{}
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_low
(
i
)]);
}
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_high
(
i
)]);
}
};
template
<
typename
D
>
struct
BrdColReplicate
:
BrdReplicate
{
BrdColReplicate
(
int
len
,
int
step
)
:
BrdReplicate
(
len
),
step
(
step
)
{}
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_low
(
i
)
*
step
]);
}
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_high
(
i
)
*
step
]);
}
private
:
int
step
;
};
template
<
typename
D
>
struct
BrdRowConstant
{
explicit
BrdRowConstant
(
int
len_
,
const
D
&
val_
=
VecTraits
<
D
>::
all
(
0
))
:
len
(
len_
),
val
(
val_
)
{}
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
i
>=
0
?
saturate_cast
<
D
>
(
data
[
i
])
:
val
;
}
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
i
<
len
?
saturate_cast
<
D
>
(
data
[
i
])
:
val
;
}
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
true
;
}
private
:
int
len
;
D
val
;
};
template
<
typename
D
>
struct
BrdColConstant
{
BrdColConstant
(
int
len_
,
int
step_
,
const
D
&
val_
=
VecTraits
<
D
>::
all
(
0
))
:
len
(
len_
),
step
(
step_
),
val
(
val_
)
{}
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
i
,
const
T
*
data
)
const
{
return
i
>=
0
?
saturate_cast
<
D
>
(
data
[
i
*
step
])
:
val
;
}
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
i
,
const
T
*
data
)
const
{
return
i
<
len
?
saturate_cast
<
D
>
(
data
[
i
*
step
])
:
val
;
}
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
true
;
}
private
:
int
len
;
int
step
;
D
val
;
};
}}}
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
modules/gpu/src/opencv2/gpu/device/datamov_utils.hpp
View file @
1c1a61dd
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_DATAMOV_UTILS_HPP__
#define __OPENCV_GPU_DATAMOV_UTILS_HPP__
#include "internal_shared.hpp"
#if __CUDA_ARCH__ >= 200
namespace
cv
{
namespace
gpu
{
namespace
device
{
#if __CUDA_ARCH__ >= 200
// for Fermi memory space is detected automatically
template
<
typename
T
>
struct
ForceGlobLoad
{
__device__
__forceinline__
static
void
Ld
(
T
*
ptr
,
int
offset
,
T
&
val
)
{
val
=
d_ptr
[
offset
];
}
};
#else
// for Fermi memory space is detected automatically
template
<
typename
T
>
struct
ForceGlob
{
__device__
__forceinline__
static
void
Load
(
const
T
*
ptr
,
int
offset
,
T
&
val
)
{
val
=
d_ptr
[
offset
];
}
};
#else // __CUDA_ARCH__ >= 200
#if defined(_WIN64) || defined(__LP64__)
// 64-bit register modifier for inlined asm
#define _OPENCV_ASM_PTR_ "l"
#else
// 32-bit register modifier for inlined asm
#define _OPENCV_ASM_PTR_ "r"
#endif
#if defined(_WIN64) || defined(__LP64__)
// 64-bit register modifier for inlined asm
#define _OPENCV_ASM_PTR_ "l"
#else
// 32-bit register modifier for inlined asm
#define _OPENCV_ASM_PTR_ "r"
#endif
template
<
class
T
>
struct
ForceGlob
;
template
<
class
T
>
struct
ForceGlobLoad
;
#define DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
template <> struct ForceGlob<base_type> \
{ \
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
{ \
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : _OPENCV_ASM_PTR_(ptr + offset)); \
} \
};
#define DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
template <> struct ForceGlob<base_type> \
{ \
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
{ \
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : _OPENCV_ASM_PTR_(ptr + offset)); \
} \
};
DEFINE_FORCE_GLOB_B
(
uchar
,
u8
)
DEFINE_FORCE_GLOB_B
(
schar
,
s8
)
DEFINE_FORCE_GLOB_B
(
char
,
b8
)
DEFINE_FORCE_GLOB
(
ushort
,
u16
,
h
)
DEFINE_FORCE_GLOB
(
short
,
s16
,
h
)
DEFINE_FORCE_GLOB
(
uint
,
u32
,
r
)
DEFINE_FORCE_GLOB
(
int
,
s32
,
r
)
DEFINE_FORCE_GLOB
(
float
,
f32
,
f
)
DEFINE_FORCE_GLOB
(
double
,
f64
,
d
)
#undef DEFINE_FORCE_GLOB
#undef DEFINE_FORCE_GLOB_B
#undef _OPENCV_ASM_PTR_
#endif // __CUDA_ARCH__ >= 200
}}}
#define DEFINE_FORCE_GLOB_LOAD(base_type, ptx_type, reg_mod) \
template <> struct ForceGlobLoad<base_type> \
{ \
__device__ __forceinline__ static void Ld(type* ptr, int offset, type& val) \
{
\
asm
(
"ld.global."
#
ptx_type
" %0, [%1];"
:
"="
#
reg_mod
(
val
)
:
_OPENCV_ASM_PTR_
(
d_ptr
+
offset
));
\
}
\
};
DEFINE_FORCE_GLOB_LOAD
(
int
,
s32
,
r
)
DEFINE_FORCE_GLOB_LOAD
(
float
,
f32
,
f
)
#undef DEFINE_FORCE_GLOB_LOAD
#endif
#endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__
modules/gpu/src/opencv2/gpu/device/dynamic_smem.hpp
View file @
1c1a61dd
...
...
@@ -40,44 +40,41 @@
//
//M*/
#ifndef __OPENCV_GPU_DYNAMIC_SMEM_HPP__
#define __OPENCV_GPU_DYNAMIC_SMEM_HPP__
namespace
cv
{
namespace
gpu
namespace
cv
{
namespace
gpu
{
namespace
device
{
template
<
class
T
>
struct
DynamicSharedMem
{
namespace
device
{
template
<
class
T
>
struct
DynamicSharedMem
{
__device__
operator
T
*
()
{
extern
__shared__
int
__smem
[];
return
(
T
*
)
__smem
;
}
__device__
__forceinline__
operator
T
*
()
{
extern
__shared__
int
__smem
[];
return
(
T
*
)
__smem
;
}
__devic
e__
operator
const
T
*
()
const
{
extern
__shared__
int
__smem
[];
return
(
T
*
)
__smem
;
}
};
__device__
__forceinlin
e__
operator
const
T
*
()
const
{
extern
__shared__
int
__smem
[];
return
(
T
*
)
__smem
;
}
};
// specialize for double to avoid unaligned memory access compile errors
template
<>
struct
DynamicSharedMem
<
double
>
{
__devic
e__
operator
double
*
()
{
extern
__shared__
double
__smem_d
[];
return
(
double
*
)
__smem_d
;
}
// specialize for double to avoid unaligned memory access compile errors
template
<>
struct
DynamicSharedMem
<
double
>
{
__device__
__forceinlin
e__
operator
double
*
()
{
extern
__shared__
double
__smem_d
[];
return
(
double
*
)
__smem_d
;
}
__device__
operator
const
double
*
()
const
{
extern
__shared__
double
__smem_d
[];
return
(
double
*
)
__smem_d
;
}
};
__device__
__forceinline__
operator
const
double
*
()
const
{
extern
__shared__
double
__smem_d
[];
return
(
double
*
)
__smem_d
;
}
};
}}}
}
}
\ No newline at end of file
#endif // __OPENCV_GPU_DYNAMIC_SMEM_HPP__
modules/gpu/src/opencv2/gpu/device/limits_gpu.hpp
View file @
1c1a61dd
...
...
@@ -40,181 +40,179 @@
//
//M*/
#ifndef __OPENCV_GPU_LIMITS_GPU_HPP__
#define __OPENCV_GPU_LIMITS_GPU_HPP__
namespace
cv
{
namespace
gpu
{
namespace
device
{
template
<
class
T
>
struct
numeric_limits_gpu
{
typedef
T
type
;
__device__
static
type
min
()
{
return
type
();
};
__device__
static
type
max
()
{
return
type
();
};
__device__
static
type
epsilon
()
{
return
type
();
}
__device__
static
type
round_error
()
{
return
type
();
}
__device__
static
type
denorm_min
()
{
return
type
();
}
__device__
static
type
infinity
()
{
return
type
();
}
__device__
static
type
quiet_NaN
()
{
return
type
();
}
__device__
static
type
signaling_NaN
()
{
return
T
();
}
static
const
bool
is_signed
;
};
namespace
cv
{
namespace
gpu
{
namespace
device
{
template
<
class
T
>
struct
numeric_limits_gpu
{
typedef
T
type
;
__device__
__forceinline__
static
type
min
()
{
return
type
();
};
__device__
__forceinline__
static
type
max
()
{
return
type
();
};
__device__
__forceinline__
static
type
epsilon
()
{
return
type
();
}
__device__
__forceinline__
static
type
round_error
()
{
return
type
();
}
__device__
__forceinline__
static
type
denorm_min
()
{
return
type
();
}
__device__
__forceinline__
static
type
infinity
()
{
return
type
();
}
__device__
__forceinline__
static
type
quiet_NaN
()
{
return
type
();
}
__device__
__forceinline__
static
type
signaling_NaN
()
{
return
T
();
}
static
const
bool
is_signed
;
};
template
<>
struct
numeric_limits_gpu
<
bool
>
{
typedef
bool
type
;
__devic
e__
static
type
min
()
{
return
false
;
};
__devic
e__
static
type
max
()
{
return
true
;
};
__devic
e__
static
type
epsilon
();
__devic
e__
static
type
round_error
();
__devic
e__
static
type
denorm_min
();
__devic
e__
static
type
infinity
();
__devic
e__
static
type
quiet_NaN
();
__devic
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
false
;
};
template
<>
struct
numeric_limits_gpu
<
bool
>
{
typedef
bool
type
;
__device__
__forceinlin
e__
static
type
min
()
{
return
false
;
};
__device__
__forceinlin
e__
static
type
max
()
{
return
true
;
};
__device__
__forceinlin
e__
static
type
epsilon
();
__device__
__forceinlin
e__
static
type
round_error
();
__device__
__forceinlin
e__
static
type
denorm_min
();
__device__
__forceinlin
e__
static
type
infinity
();
__device__
__forceinlin
e__
static
type
quiet_NaN
();
__device__
__forceinlin
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
false
;
};
template
<>
struct
numeric_limits_gpu
<
char
>
{
typedef
char
type
;
__devic
e__
static
type
min
()
{
return
CHAR_MIN
;
};
__devic
e__
static
type
max
()
{
return
CHAR_MAX
;
};
__devic
e__
static
type
epsilon
();
__devic
e__
static
type
round_error
();
__devic
e__
static
type
denorm_min
();
__devic
e__
static
type
infinity
();
__devic
e__
static
type
quiet_NaN
();
__devic
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
(
char
)
-
1
==
-
1
;
};
template
<>
struct
numeric_limits_gpu
<
char
>
{
typedef
char
type
;
__device__
__forceinlin
e__
static
type
min
()
{
return
CHAR_MIN
;
};
__device__
__forceinlin
e__
static
type
max
()
{
return
CHAR_MAX
;
};
__device__
__forceinlin
e__
static
type
epsilon
();
__device__
__forceinlin
e__
static
type
round_error
();
__device__
__forceinlin
e__
static
type
denorm_min
();
__device__
__forceinlin
e__
static
type
infinity
();
__device__
__forceinlin
e__
static
type
quiet_NaN
();
__device__
__forceinlin
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
(
char
)
-
1
==
-
1
;
};
template
<>
struct
numeric_limits_gpu
<
unsigned
char
>
{
typedef
unsigned
char
type
;
__devic
e__
static
type
min
()
{
return
0
;
};
__devic
e__
static
type
max
()
{
return
UCHAR_MAX
;
};
__devic
e__
static
type
epsilon
();
__devic
e__
static
type
round_error
();
__devic
e__
static
type
denorm_min
();
__devic
e__
static
type
infinity
();
__devic
e__
static
type
quiet_NaN
();
__devic
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
false
;
};
template
<>
struct
numeric_limits_gpu
<
unsigned
char
>
{
typedef
unsigned
char
type
;
__device__
__forceinlin
e__
static
type
min
()
{
return
0
;
};
__device__
__forceinlin
e__
static
type
max
()
{
return
UCHAR_MAX
;
};
__device__
__forceinlin
e__
static
type
epsilon
();
__device__
__forceinlin
e__
static
type
round_error
();
__device__
__forceinlin
e__
static
type
denorm_min
();
__device__
__forceinlin
e__
static
type
infinity
();
__device__
__forceinlin
e__
static
type
quiet_NaN
();
__device__
__forceinlin
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
false
;
};
template
<>
struct
numeric_limits_gpu
<
short
>
{
typedef
short
type
;
__devic
e__
static
type
min
()
{
return
SHRT_MIN
;
};
__devic
e__
static
type
max
()
{
return
SHRT_MAX
;
};
__devic
e__
static
type
epsilon
();
__devic
e__
static
type
round_error
();
__devic
e__
static
type
denorm_min
();
__devic
e__
static
type
infinity
();
__devic
e__
static
type
quiet_NaN
();
__devic
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
true
;
};
template
<>
struct
numeric_limits_gpu
<
short
>
{
typedef
short
type
;
__device__
__forceinlin
e__
static
type
min
()
{
return
SHRT_MIN
;
};
__device__
__forceinlin
e__
static
type
max
()
{
return
SHRT_MAX
;
};
__device__
__forceinlin
e__
static
type
epsilon
();
__device__
__forceinlin
e__
static
type
round_error
();
__device__
__forceinlin
e__
static
type
denorm_min
();
__device__
__forceinlin
e__
static
type
infinity
();
__device__
__forceinlin
e__
static
type
quiet_NaN
();
__device__
__forceinlin
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
true
;
};
template
<>
struct
numeric_limits_gpu
<
unsigned
short
>
{
typedef
unsigned
short
type
;
__devic
e__
static
type
min
()
{
return
0
;
};
__devic
e__
static
type
max
()
{
return
USHRT_MAX
;
};
__devic
e__
static
type
epsilon
();
__devic
e__
static
type
round_error
();
__devic
e__
static
type
denorm_min
();
__devic
e__
static
type
infinity
();
__devic
e__
static
type
quiet_NaN
();
__devic
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
false
;
};
template
<>
struct
numeric_limits_gpu
<
unsigned
short
>
{
typedef
unsigned
short
type
;
__device__
__forceinlin
e__
static
type
min
()
{
return
0
;
};
__device__
__forceinlin
e__
static
type
max
()
{
return
USHRT_MAX
;
};
__device__
__forceinlin
e__
static
type
epsilon
();
__device__
__forceinlin
e__
static
type
round_error
();
__device__
__forceinlin
e__
static
type
denorm_min
();
__device__
__forceinlin
e__
static
type
infinity
();
__device__
__forceinlin
e__
static
type
quiet_NaN
();
__device__
__forceinlin
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
false
;
};
template
<>
struct
numeric_limits_gpu
<
int
>
{
typedef
int
type
;
__devic
e__
static
type
min
()
{
return
INT_MIN
;
};
__devic
e__
static
type
max
()
{
return
INT_MAX
;
};
__devic
e__
static
type
epsilon
();
__devic
e__
static
type
round_error
();
__devic
e__
static
type
denorm_min
();
__devic
e__
static
type
infinity
();
__devic
e__
static
type
quiet_NaN
();
__devic
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
true
;
};
template
<>
struct
numeric_limits_gpu
<
int
>
{
typedef
int
type
;
__device__
__forceinlin
e__
static
type
min
()
{
return
INT_MIN
;
};
__device__
__forceinlin
e__
static
type
max
()
{
return
INT_MAX
;
};
__device__
__forceinlin
e__
static
type
epsilon
();
__device__
__forceinlin
e__
static
type
round_error
();
__device__
__forceinlin
e__
static
type
denorm_min
();
__device__
__forceinlin
e__
static
type
infinity
();
__device__
__forceinlin
e__
static
type
quiet_NaN
();
__device__
__forceinlin
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
true
;
};
template
<>
struct
numeric_limits_gpu
<
unsigned
int
>
{
typedef
unsigned
int
type
;
__devic
e__
static
type
min
()
{
return
0
;
};
__devic
e__
static
type
max
()
{
return
UINT_MAX
;
};
__devic
e__
static
type
epsilon
();
__devic
e__
static
type
round_error
();
__devic
e__
static
type
denorm_min
();
__devic
e__
static
type
infinity
();
__devic
e__
static
type
quiet_NaN
();
__devic
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
false
;
};
template
<>
struct
numeric_limits_gpu
<
unsigned
int
>
{
typedef
unsigned
int
type
;
__device__
__forceinlin
e__
static
type
min
()
{
return
0
;
};
__device__
__forceinlin
e__
static
type
max
()
{
return
UINT_MAX
;
};
__device__
__forceinlin
e__
static
type
epsilon
();
__device__
__forceinlin
e__
static
type
round_error
();
__device__
__forceinlin
e__
static
type
denorm_min
();
__device__
__forceinlin
e__
static
type
infinity
();
__device__
__forceinlin
e__
static
type
quiet_NaN
();
__device__
__forceinlin
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
false
;
};
template
<>
struct
numeric_limits_gpu
<
long
>
{
typedef
long
type
;
__devic
e__
static
type
min
()
{
return
LONG_MIN
;
};
__devic
e__
static
type
max
()
{
return
LONG_MAX
;
};
__devic
e__
static
type
epsilon
();
__devic
e__
static
type
round_error
();
__devic
e__
static
type
denorm_min
();
__devic
e__
static
type
infinity
();
__devic
e__
static
type
quiet_NaN
();
__devic
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
true
;
};
template
<>
struct
numeric_limits_gpu
<
long
>
{
typedef
long
type
;
__device__
__forceinlin
e__
static
type
min
()
{
return
LONG_MIN
;
};
__device__
__forceinlin
e__
static
type
max
()
{
return
LONG_MAX
;
};
__device__
__forceinlin
e__
static
type
epsilon
();
__device__
__forceinlin
e__
static
type
round_error
();
__device__
__forceinlin
e__
static
type
denorm_min
();
__device__
__forceinlin
e__
static
type
infinity
();
__device__
__forceinlin
e__
static
type
quiet_NaN
();
__device__
__forceinlin
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
true
;
};
template
<>
struct
numeric_limits_gpu
<
unsigned
long
>
{
typedef
unsigned
long
type
;
__devic
e__
static
type
min
()
{
return
0
;
};
__devic
e__
static
type
max
()
{
return
ULONG_MAX
;
};
__devic
e__
static
type
epsilon
();
__devic
e__
static
type
round_error
();
__devic
e__
static
type
denorm_min
();
__devic
e__
static
type
infinity
();
__devic
e__
static
type
quiet_NaN
();
__devic
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
false
;
};
template
<>
struct
numeric_limits_gpu
<
float
>
{
typedef
float
type
;
__devic
e__
static
type
min
()
{
return
1.175494351e-38
f
/*FLT_MIN*/
;
};
__devic
e__
static
type
max
()
{
return
3.402823466e+38
f
/*FLT_MAX*/
;
};
__devic
e__
static
type
epsilon
()
{
return
1.192092896e-07
f
/*FLT_EPSILON*/
;
};
__devic
e__
static
type
round_error
();
__devic
e__
static
type
denorm_min
();
__devic
e__
static
type
infinity
();
__devic
e__
static
type
quiet_NaN
();
__devic
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
true
;
};
template
<>
struct
numeric_limits_gpu
<
unsigned
long
>
{
typedef
unsigned
long
type
;
__device__
__forceinlin
e__
static
type
min
()
{
return
0
;
};
__device__
__forceinlin
e__
static
type
max
()
{
return
ULONG_MAX
;
};
__device__
__forceinlin
e__
static
type
epsilon
();
__device__
__forceinlin
e__
static
type
round_error
();
__device__
__forceinlin
e__
static
type
denorm_min
();
__device__
__forceinlin
e__
static
type
infinity
();
__device__
__forceinlin
e__
static
type
quiet_NaN
();
__device__
__forceinlin
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
false
;
};
template
<>
struct
numeric_limits_gpu
<
float
>
{
typedef
float
type
;
__device__
__forceinlin
e__
static
type
min
()
{
return
1.175494351e-38
f
/*FLT_MIN*/
;
};
__device__
__forceinlin
e__
static
type
max
()
{
return
3.402823466e+38
f
/*FLT_MAX*/
;
};
__device__
__forceinlin
e__
static
type
epsilon
()
{
return
1.192092896e-07
f
/*FLT_EPSILON*/
;
};
__device__
__forceinlin
e__
static
type
round_error
();
__device__
__forceinlin
e__
static
type
denorm_min
();
__device__
__forceinlin
e__
static
type
infinity
();
__device__
__forceinlin
e__
static
type
quiet_NaN
();
__device__
__forceinlin
e__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
true
;
};
template
<>
struct
numeric_limits_gpu
<
double
>
{
typedef
double
type
;
__device__
static
type
min
()
{
return
2.2250738585072014e-308
/*DBL_MIN*/
;
};
__device__
static
type
max
()
{
return
1.7976931348623158e+308
/*DBL_MAX*/
;
};
__device__
static
type
epsilon
();
__device__
static
type
round_error
();
__device__
static
type
denorm_min
();
__device__
static
type
infinity
();
__device__
static
type
quiet_NaN
();
__device__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
true
;
};
}
}
}
\ No newline at end of file
template
<>
struct
numeric_limits_gpu
<
double
>
{
typedef
double
type
;
__device__
__forceinline__
static
type
min
()
{
return
2.2250738585072014e-308
/*DBL_MIN*/
;
};
__device__
__forceinline__
static
type
max
()
{
return
1.7976931348623158e+308
/*DBL_MAX*/
;
};
__device__
__forceinline__
static
type
epsilon
();
__device__
__forceinline__
static
type
round_error
();
__device__
__forceinline__
static
type
denorm_min
();
__device__
__forceinline__
static
type
infinity
();
__device__
__forceinline__
static
type
quiet_NaN
();
__device__
__forceinline__
static
type
signaling_NaN
();
static
const
bool
is_signed
=
true
;
};
}}}
#endif // __OPENCV_GPU_LIMITS_GPU_HPP__
modules/gpu/src/opencv2/gpu/device/saturate_cast.hpp
View file @
1c1a61dd
...
...
@@ -51,29 +51,29 @@ namespace cv
{
namespace
device
{
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
uchar
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
schar
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
ushort
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
short
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
uint
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
int
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
float
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
double
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
uchar
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
schar
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
ushort
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
short
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
uint
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
int
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
float
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
double
v
)
{
return
_Tp
(
v
);
}
template
<>
static
__device__
uchar
saturate_cast
<
uchar
>
(
schar
v
)
template
<>
static
__device__
__forceinline__
uchar
saturate_cast
<
uchar
>
(
schar
v
)
{
return
(
uchar
)
max
((
int
)
v
,
0
);
}
template
<>
static
__device__
uchar
saturate_cast
<
uchar
>
(
ushort
v
)
template
<>
static
__device__
__forceinline__
uchar
saturate_cast
<
uchar
>
(
ushort
v
)
{
return
(
uchar
)
min
((
uint
)
v
,
(
uint
)
UCHAR_MAX
);
}
template
<>
static
__device__
uchar
saturate_cast
<
uchar
>
(
int
v
)
template
<>
static
__device__
__forceinline__
uchar
saturate_cast
<
uchar
>
(
int
v
)
{
return
(
uchar
)((
uint
)
v
<=
UCHAR_MAX
?
v
:
v
>
0
?
UCHAR_MAX
:
0
);
}
template
<>
static
__device__
uchar
saturate_cast
<
uchar
>
(
uint
v
)
template
<>
static
__device__
__forceinline__
uchar
saturate_cast
<
uchar
>
(
uint
v
)
{
return
(
uchar
)
min
(
v
,
(
uint
)
UCHAR_MAX
);
}
template
<>
static
__device__
uchar
saturate_cast
<
uchar
>
(
short
v
)
template
<>
static
__device__
__forceinline__
uchar
saturate_cast
<
uchar
>
(
short
v
)
{
return
saturate_cast
<
uchar
>
((
uint
)
v
);
}
template
<>
static
__device__
uchar
saturate_cast
<
uchar
>
(
float
v
)
template
<>
static
__device__
__forceinline__
uchar
saturate_cast
<
uchar
>
(
float
v
)
{
int
iv
=
__float2int_rn
(
v
);
return
saturate_cast
<
uchar
>
(
iv
);
}
template
<>
static
__device__
uchar
saturate_cast
<
uchar
>
(
double
v
)
template
<>
static
__device__
__forceinline__
uchar
saturate_cast
<
uchar
>
(
double
v
)
{
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
int
iv
=
__double2int_rn
(
v
);
return
saturate_cast
<
uchar
>
(
iv
);
...
...
@@ -82,23 +82,23 @@ namespace cv
#endif
}
template
<>
static
__device__
schar
saturate_cast
<
schar
>
(
uchar
v
)
template
<>
static
__device__
__forceinline__
schar
saturate_cast
<
schar
>
(
uchar
v
)
{
return
(
schar
)
min
((
int
)
v
,
SCHAR_MAX
);
}
template
<>
static
__device__
schar
saturate_cast
<
schar
>
(
ushort
v
)
template
<>
static
__device__
__forceinline__
schar
saturate_cast
<
schar
>
(
ushort
v
)
{
return
(
schar
)
min
((
uint
)
v
,
(
uint
)
SCHAR_MAX
);
}
template
<>
static
__device__
schar
saturate_cast
<
schar
>
(
int
v
)
template
<>
static
__device__
__forceinline__
schar
saturate_cast
<
schar
>
(
int
v
)
{
return
(
schar
)((
uint
)(
v
-
SCHAR_MIN
)
<=
(
uint
)
UCHAR_MAX
?
v
:
v
>
0
?
SCHAR_MAX
:
SCHAR_MIN
);
}
template
<>
static
__device__
schar
saturate_cast
<
schar
>
(
short
v
)
template
<>
static
__device__
__forceinline__
schar
saturate_cast
<
schar
>
(
short
v
)
{
return
saturate_cast
<
schar
>
((
int
)
v
);
}
template
<>
static
__device__
schar
saturate_cast
<
schar
>
(
uint
v
)
template
<>
static
__device__
__forceinline__
schar
saturate_cast
<
schar
>
(
uint
v
)
{
return
(
schar
)
min
(
v
,
(
uint
)
SCHAR_MAX
);
}
template
<>
static
__device__
schar
saturate_cast
<
schar
>
(
float
v
)
template
<>
static
__device__
__forceinline__
schar
saturate_cast
<
schar
>
(
float
v
)
{
int
iv
=
__float2int_rn
(
v
);
return
saturate_cast
<
schar
>
(
iv
);
}
template
<>
static
__device__
schar
saturate_cast
<
schar
>
(
double
v
)
template
<>
static
__device__
__forceinline__
schar
saturate_cast
<
schar
>
(
double
v
)
{
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
int
iv
=
__double2int_rn
(
v
);
return
saturate_cast
<
schar
>
(
iv
);
...
...
@@ -107,17 +107,17 @@ namespace cv
#endif
}
template
<>
static
__device__
ushort
saturate_cast
<
ushort
>
(
schar
v
)
template
<>
static
__device__
__forceinline__
ushort
saturate_cast
<
ushort
>
(
schar
v
)
{
return
(
ushort
)
max
((
int
)
v
,
0
);
}
template
<>
static
__device__
ushort
saturate_cast
<
ushort
>
(
short
v
)
template
<>
static
__device__
__forceinline__
ushort
saturate_cast
<
ushort
>
(
short
v
)
{
return
(
ushort
)
max
((
int
)
v
,
0
);
}
template
<>
static
__device__
ushort
saturate_cast
<
ushort
>
(
int
v
)
template
<>
static
__device__
__forceinline__
ushort
saturate_cast
<
ushort
>
(
int
v
)
{
return
(
ushort
)((
uint
)
v
<=
(
uint
)
USHRT_MAX
?
v
:
v
>
0
?
USHRT_MAX
:
0
);
}
template
<>
static
__device__
ushort
saturate_cast
<
ushort
>
(
uint
v
)
template
<>
static
__device__
__forceinline__
ushort
saturate_cast
<
ushort
>
(
uint
v
)
{
return
(
ushort
)
min
(
v
,
(
uint
)
USHRT_MAX
);
}
template
<>
static
__device__
ushort
saturate_cast
<
ushort
>
(
float
v
)
template
<>
static
__device__
__forceinline__
ushort
saturate_cast
<
ushort
>
(
float
v
)
{
int
iv
=
__float2int_rn
(
v
);
return
saturate_cast
<
ushort
>
(
iv
);
}
template
<>
static
__device__
ushort
saturate_cast
<
ushort
>
(
double
v
)
template
<>
static
__device__
__forceinline__
ushort
saturate_cast
<
ushort
>
(
double
v
)
{
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
int
iv
=
__double2int_rn
(
v
);
return
saturate_cast
<
ushort
>
(
iv
);
...
...
@@ -126,18 +126,18 @@ namespace cv
#endif
}
template
<>
static
__device__
short
saturate_cast
<
short
>
(
ushort
v
)
template
<>
static
__device__
__forceinline__
short
saturate_cast
<
short
>
(
ushort
v
)
{
return
(
short
)
min
((
int
)
v
,
SHRT_MAX
);
}
template
<>
static
__device__
short
saturate_cast
<
short
>
(
int
v
)
template
<>
static
__device__
__forceinline__
short
saturate_cast
<
short
>
(
int
v
)
{
return
(
short
)((
uint
)(
v
-
SHRT_MIN
)
<=
(
uint
)
USHRT_MAX
?
v
:
v
>
0
?
SHRT_MAX
:
SHRT_MIN
);
}
template
<>
static
__device__
short
saturate_cast
<
short
>
(
uint
v
)
template
<>
static
__device__
__forceinline__
short
saturate_cast
<
short
>
(
uint
v
)
{
return
(
short
)
min
(
v
,
(
uint
)
SHRT_MAX
);
}
template
<>
static
__device__
short
saturate_cast
<
short
>
(
float
v
)
template
<>
static
__device__
__forceinline__
short
saturate_cast
<
short
>
(
float
v
)
{
int
iv
=
__float2int_rn
(
v
);
return
saturate_cast
<
short
>
(
iv
);
}
template
<>
static
__device__
short
saturate_cast
<
short
>
(
double
v
)
template
<>
static
__device__
__forceinline__
short
saturate_cast
<
short
>
(
double
v
)
{
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
int
iv
=
__double2int_rn
(
v
);
return
saturate_cast
<
short
>
(
iv
);
...
...
@@ -146,8 +146,8 @@ namespace cv
#endif
}
template
<>
static
__device__
int
saturate_cast
<
int
>
(
float
v
)
{
return
__float2int_rn
(
v
);
}
template
<>
static
__device__
int
saturate_cast
<
int
>
(
double
v
)
template
<>
static
__device__
__forceinline__
int
saturate_cast
<
int
>
(
float
v
)
{
return
__float2int_rn
(
v
);
}
template
<>
static
__device__
__forceinline__
int
saturate_cast
<
int
>
(
double
v
)
{
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
return
__double2int_rn
(
v
);
...
...
@@ -156,8 +156,8 @@ namespace cv
#endif
}
template
<>
static
__device__
uint
saturate_cast
<
uint
>
(
float
v
){
return
__float2uint_rn
(
v
);
}
template
<>
static
__device__
uint
saturate_cast
<
uint
>
(
double
v
)
template
<>
static
__device__
__forceinline__
uint
saturate_cast
<
uint
>
(
float
v
){
return
__float2uint_rn
(
v
);
}
template
<>
static
__device__
__forceinline__
uint
saturate_cast
<
uint
>
(
double
v
)
{
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
return
__double2uint_rn
(
v
);
...
...
modules/gpu/src/opencv2/gpu/device/transform.hpp
View file @
1c1a61dd
...
...
@@ -55,7 +55,7 @@ namespace cv { namespace gpu { namespace device
public
:
explicit
MaskReader
(
const
PtrStep
&
mask_
)
:
mask
(
mask_
)
{}
__device__
bool
operator
()(
int
y
,
int
x
)
const
{
return
mask
.
ptr
(
y
)[
x
];
}
__device__
__forceinline__
bool
operator
()(
int
y
,
int
x
)
const
{
return
mask
.
ptr
(
y
)[
x
];
}
private
:
PtrStep
mask
;
...
...
@@ -63,7 +63,7 @@ namespace cv { namespace gpu { namespace device
struct
NoMask
{
__device__
bool
operator
()(
int
y
,
int
x
)
const
{
return
true
;
}
__device__
__forceinline__
bool
operator
()(
int
y
,
int
x
)
const
{
return
true
;
}
};
//! Read Write Traits
...
...
@@ -121,14 +121,14 @@ namespace cv { namespace gpu { namespace device
template
<>
struct
OpUnroller
<
1
>
{
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
UnOp
&
op
,
int
x_shifted
,
int
y
)
static
__device__
__forceinline__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
UnOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src
.
x
);
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
BinOp
&
op
,
int
x_shifted
,
int
y
)
static
__device__
__forceinline__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
BinOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src1
.
x
,
src2
.
x
);
...
...
@@ -137,7 +137,7 @@ namespace cv { namespace gpu { namespace device
template
<>
struct
OpUnroller
<
2
>
{
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
UnOp
&
op
,
int
x_shifted
,
int
y
)
static
__device__
__forceinline__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
UnOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src
.
x
);
...
...
@@ -146,7 +146,7 @@ namespace cv { namespace gpu { namespace device
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
BinOp
&
op
,
int
x_shifted
,
int
y
)
static
__device__
__forceinline__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
BinOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src1
.
x
,
src2
.
x
);
...
...
@@ -157,7 +157,7 @@ namespace cv { namespace gpu { namespace device
template
<>
struct
OpUnroller
<
3
>
{
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
UnOp
&
op
,
int
x_shifted
,
int
y
)
static
__device__
__forceinline__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
UnOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src
.
x
);
...
...
@@ -168,7 +168,7 @@ namespace cv { namespace gpu { namespace device
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
BinOp
&
op
,
int
x_shifted
,
int
y
)
static
__device__
__forceinline__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
BinOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src1
.
x
,
src2
.
x
);
...
...
@@ -181,7 +181,7 @@ namespace cv { namespace gpu { namespace device
template
<>
struct
OpUnroller
<
4
>
{
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
UnOp
&
op
,
int
x_shifted
,
int
y
)
static
__device__
__forceinline__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
UnOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src
.
x
);
...
...
@@ -194,7 +194,7 @@ namespace cv { namespace gpu { namespace device
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
BinOp
&
op
,
int
x_shifted
,
int
y
)
static
__device__
__forceinline__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
BinOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src1
.
x
,
src2
.
x
);
...
...
modules/gpu/src/opencv2/gpu/device/vecmath.hpp
View file @
1c1a61dd
...
...
@@ -136,302 +136,302 @@ namespace cv
{
typedef
uchar
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
uchar
all
(
uchar
v
)
{
return
v
;}
static
__device__
__host__
uchar
make
(
uchar
x
)
{
return
x
;}
static
__device__
__
forceinline__
__
host__
uchar
all
(
uchar
v
)
{
return
v
;}
static
__device__
__
forceinline__
__
host__
uchar
make
(
uchar
x
)
{
return
x
;}
};
template
<>
struct
VecTraits
<
uchar1
>
{
typedef
uchar
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
uchar1
all
(
uchar
v
)
{
return
make_uchar1
(
v
);}
static
__device__
__host__
uchar1
make
(
uchar
x
)
{
return
make_uchar1
(
x
);}
static
__device__
__
forceinline__
__
host__
uchar1
all
(
uchar
v
)
{
return
make_uchar1
(
v
);}
static
__device__
__
forceinline__
__
host__
uchar1
make
(
uchar
x
)
{
return
make_uchar1
(
x
);}
};
template
<>
struct
VecTraits
<
uchar2
>
{
typedef
uchar
elem_t
;
enum
{
cn
=
2
};
static
__device__
__host__
uchar2
all
(
uchar
v
)
{
return
make_uchar2
(
v
,
v
);}
static
__device__
__host__
uchar2
make
(
uchar
x
,
uchar
y
)
{
return
make_uchar2
(
x
,
y
);}
static
__device__
__
forceinline__
__
host__
uchar2
all
(
uchar
v
)
{
return
make_uchar2
(
v
,
v
);}
static
__device__
__
forceinline__
__
host__
uchar2
make
(
uchar
x
,
uchar
y
)
{
return
make_uchar2
(
x
,
y
);}
};
template
<>
struct
VecTraits
<
uchar3
>
{
typedef
uchar
elem_t
;
enum
{
cn
=
3
};
static
__device__
__host__
uchar3
all
(
uchar
v
)
{
return
make_uchar3
(
v
,
v
,
v
);}
static
__device__
__host__
uchar3
make
(
uchar
x
,
uchar
y
,
uchar
z
)
{
return
make_uchar3
(
x
,
y
,
z
);}
static
__device__
__
forceinline__
__
host__
uchar3
all
(
uchar
v
)
{
return
make_uchar3
(
v
,
v
,
v
);}
static
__device__
__
forceinline__
__
host__
uchar3
make
(
uchar
x
,
uchar
y
,
uchar
z
)
{
return
make_uchar3
(
x
,
y
,
z
);}
};
template
<>
struct
VecTraits
<
uchar4
>
{
typedef
uchar
elem_t
;
enum
{
cn
=
4
};
static
__device__
__host__
uchar4
all
(
uchar
v
)
{
return
make_uchar4
(
v
,
v
,
v
,
v
);}
static
__device__
__host__
uchar4
make
(
uchar
x
,
uchar
y
,
uchar
z
,
uchar
w
)
{
return
make_uchar4
(
x
,
y
,
z
,
w
);}
static
__device__
__
forceinline__
__
host__
uchar4
all
(
uchar
v
)
{
return
make_uchar4
(
v
,
v
,
v
,
v
);}
static
__device__
__
forceinline__
__
host__
uchar4
make
(
uchar
x
,
uchar
y
,
uchar
z
,
uchar
w
)
{
return
make_uchar4
(
x
,
y
,
z
,
w
);}
};
template
<>
struct
VecTraits
<
char
>
{
typedef
char
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
char
all
(
char
v
)
{
return
v
;}
static
__device__
__host__
char
make
(
char
x
)
{
return
x
;}
static
__device__
__
forceinline__
__
host__
char
all
(
char
v
)
{
return
v
;}
static
__device__
__
forceinline__
__
host__
char
make
(
char
x
)
{
return
x
;}
};
template
<>
struct
VecTraits
<
schar
>
{
typedef
schar
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
schar
all
(
schar
v
)
{
return
v
;}
static
__device__
__host__
schar
make
(
schar
x
)
{
return
x
;}
static
__device__
__
forceinline__
__
host__
schar
all
(
schar
v
)
{
return
v
;}
static
__device__
__
forceinline__
__
host__
schar
make
(
schar
x
)
{
return
x
;}
};
template
<>
struct
VecTraits
<
char1
>
{
typedef
schar
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
char1
all
(
schar
v
)
{
return
make_char1
(
v
);}
static
__device__
__host__
char1
make
(
schar
x
)
{
return
make_char1
(
x
);}
static
__device__
__
forceinline__
__
host__
char1
all
(
schar
v
)
{
return
make_char1
(
v
);}
static
__device__
__
forceinline__
__
host__
char1
make
(
schar
x
)
{
return
make_char1
(
x
);}
};
template
<>
struct
VecTraits
<
char2
>
{
typedef
schar
elem_t
;
enum
{
cn
=
2
};
static
__device__
__host__
char2
all
(
schar
v
)
{
return
make_char2
(
v
,
v
);}
static
__device__
__host__
char2
make
(
schar
x
,
schar
y
)
{
return
make_char2
(
x
,
y
);}
static
__device__
__
forceinline__
__
host__
char2
all
(
schar
v
)
{
return
make_char2
(
v
,
v
);}
static
__device__
__
forceinline__
__
host__
char2
make
(
schar
x
,
schar
y
)
{
return
make_char2
(
x
,
y
);}
};
template
<>
struct
VecTraits
<
char3
>
{
typedef
schar
elem_t
;
enum
{
cn
=
3
};
static
__device__
__host__
char3
all
(
schar
v
)
{
return
make_char3
(
v
,
v
,
v
);}
static
__device__
__host__
char3
make
(
schar
x
,
schar
y
,
schar
z
)
{
return
make_char3
(
x
,
y
,
z
);}
static
__device__
__
forceinline__
__
host__
char3
all
(
schar
v
)
{
return
make_char3
(
v
,
v
,
v
);}
static
__device__
__
forceinline__
__
host__
char3
make
(
schar
x
,
schar
y
,
schar
z
)
{
return
make_char3
(
x
,
y
,
z
);}
};
template
<>
struct
VecTraits
<
char4
>
{
typedef
schar
elem_t
;
enum
{
cn
=
4
};
static
__device__
__host__
char4
all
(
schar
v
)
{
return
make_char4
(
v
,
v
,
v
,
v
);}
static
__device__
__host__
char4
make
(
schar
x
,
schar
y
,
schar
z
,
schar
w
)
{
return
make_char4
(
x
,
y
,
z
,
w
);}
static
__device__
__
forceinline__
__
host__
char4
all
(
schar
v
)
{
return
make_char4
(
v
,
v
,
v
,
v
);}
static
__device__
__
forceinline__
__
host__
char4
make
(
schar
x
,
schar
y
,
schar
z
,
schar
w
)
{
return
make_char4
(
x
,
y
,
z
,
w
);}
};
template
<>
struct
VecTraits
<
ushort
>
{
typedef
ushort
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
ushort
all
(
ushort
v
)
{
return
v
;}
static
__device__
__host__
ushort
make
(
ushort
x
)
{
return
x
;}
static
__device__
__
forceinline__
__
host__
ushort
all
(
ushort
v
)
{
return
v
;}
static
__device__
__
forceinline__
__
host__
ushort
make
(
ushort
x
)
{
return
x
;}
};
template
<>
struct
VecTraits
<
ushort1
>
{
typedef
ushort
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
ushort1
all
(
ushort
v
)
{
return
make_ushort1
(
v
);}
static
__device__
__host__
ushort1
make
(
ushort
x
)
{
return
make_ushort1
(
x
);}
static
__device__
__
forceinline__
__
host__
ushort1
all
(
ushort
v
)
{
return
make_ushort1
(
v
);}
static
__device__
__
forceinline__
__
host__
ushort1
make
(
ushort
x
)
{
return
make_ushort1
(
x
);}
};
template
<>
struct
VecTraits
<
ushort2
>
{
typedef
ushort
elem_t
;
enum
{
cn
=
2
};
static
__device__
__host__
ushort2
all
(
ushort
v
)
{
return
make_ushort2
(
v
,
v
);}
static
__device__
__host__
ushort2
make
(
ushort
x
,
ushort
y
)
{
return
make_ushort2
(
x
,
y
);}
static
__device__
__
forceinline__
__
host__
ushort2
all
(
ushort
v
)
{
return
make_ushort2
(
v
,
v
);}
static
__device__
__
forceinline__
__
host__
ushort2
make
(
ushort
x
,
ushort
y
)
{
return
make_ushort2
(
x
,
y
);}
};
template
<>
struct
VecTraits
<
ushort3
>
{
typedef
ushort
elem_t
;
enum
{
cn
=
3
};
static
__device__
__host__
ushort3
all
(
ushort
v
)
{
return
make_ushort3
(
v
,
v
,
v
);}
static
__device__
__host__
ushort3
make
(
ushort
x
,
ushort
y
,
ushort
z
)
{
return
make_ushort3
(
x
,
y
,
z
);}
static
__device__
__
forceinline__
__
host__
ushort3
all
(
ushort
v
)
{
return
make_ushort3
(
v
,
v
,
v
);}
static
__device__
__
forceinline__
__
host__
ushort3
make
(
ushort
x
,
ushort
y
,
ushort
z
)
{
return
make_ushort3
(
x
,
y
,
z
);}
};
template
<>
struct
VecTraits
<
ushort4
>
{
typedef
ushort
elem_t
;
enum
{
cn
=
4
};
static
__device__
__host__
ushort4
all
(
ushort
v
)
{
return
make_ushort4
(
v
,
v
,
v
,
v
);}
static
__device__
__host__
ushort4
make
(
ushort
x
,
ushort
y
,
ushort
z
,
ushort
w
)
{
return
make_ushort4
(
x
,
y
,
z
,
w
);}
static
__device__
__
forceinline__
__
host__
ushort4
all
(
ushort
v
)
{
return
make_ushort4
(
v
,
v
,
v
,
v
);}
static
__device__
__
forceinline__
__
host__
ushort4
make
(
ushort
x
,
ushort
y
,
ushort
z
,
ushort
w
)
{
return
make_ushort4
(
x
,
y
,
z
,
w
);}
};
template
<>
struct
VecTraits
<
short
>
{
typedef
short
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
short
all
(
short
v
)
{
return
v
;}
static
__device__
__host__
short
make
(
short
x
)
{
return
x
;}
static
__device__
__
forceinline__
__
host__
short
all
(
short
v
)
{
return
v
;}
static
__device__
__
forceinline__
__
host__
short
make
(
short
x
)
{
return
x
;}
};
template
<>
struct
VecTraits
<
short1
>
{
typedef
short
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
short1
all
(
short
v
)
{
return
make_short1
(
v
);}
static
__device__
__host__
short1
make
(
short
x
)
{
return
make_short1
(
x
);}
static
__device__
__
forceinline__
__
host__
short1
all
(
short
v
)
{
return
make_short1
(
v
);}
static
__device__
__
forceinline__
__
host__
short1
make
(
short
x
)
{
return
make_short1
(
x
);}
};
template
<>
struct
VecTraits
<
short2
>
{
typedef
short
elem_t
;
enum
{
cn
=
2
};
static
__device__
__host__
short2
all
(
short
v
)
{
return
make_short2
(
v
,
v
);}
static
__device__
__host__
short2
make
(
short
x
,
short
y
)
{
return
make_short2
(
x
,
y
);}
static
__device__
__
forceinline__
__
host__
short2
all
(
short
v
)
{
return
make_short2
(
v
,
v
);}
static
__device__
__
forceinline__
__
host__
short2
make
(
short
x
,
short
y
)
{
return
make_short2
(
x
,
y
);}
};
template
<>
struct
VecTraits
<
short3
>
{
typedef
short
elem_t
;
enum
{
cn
=
3
};
static
__device__
__host__
short3
all
(
short
v
)
{
return
make_short3
(
v
,
v
,
v
);}
static
__device__
__host__
short3
make
(
short
x
,
short
y
,
short
z
)
{
return
make_short3
(
x
,
y
,
z
);}
static
__device__
__
forceinline__
__
host__
short3
all
(
short
v
)
{
return
make_short3
(
v
,
v
,
v
);}
static
__device__
__
forceinline__
__
host__
short3
make
(
short
x
,
short
y
,
short
z
)
{
return
make_short3
(
x
,
y
,
z
);}
};
template
<>
struct
VecTraits
<
short4
>
{
typedef
short
elem_t
;
enum
{
cn
=
4
};
static
__device__
__host__
short4
all
(
short
v
)
{
return
make_short4
(
v
,
v
,
v
,
v
);}
static
__device__
__host__
short4
make
(
short
x
,
short
y
,
short
z
,
short
w
)
{
return
make_short4
(
x
,
y
,
z
,
w
);}
static
__device__
__
forceinline__
__
host__
short4
all
(
short
v
)
{
return
make_short4
(
v
,
v
,
v
,
v
);}
static
__device__
__
forceinline__
__
host__
short4
make
(
short
x
,
short
y
,
short
z
,
short
w
)
{
return
make_short4
(
x
,
y
,
z
,
w
);}
};
template
<>
struct
VecTraits
<
uint
>
{
typedef
uint
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
uint
all
(
uint
v
)
{
return
v
;}
static
__device__
__host__
uint
make
(
uint
x
)
{
return
x
;}
static
__device__
__
forceinline__
__
host__
uint
all
(
uint
v
)
{
return
v
;}
static
__device__
__
forceinline__
__
host__
uint
make
(
uint
x
)
{
return
x
;}
};
template
<>
struct
VecTraits
<
uint1
>
{
typedef
uint
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
uint1
all
(
uint
v
)
{
return
make_uint1
(
v
);}
static
__device__
__host__
uint1
make
(
uint
x
)
{
return
make_uint1
(
x
);}
static
__device__
__
forceinline__
__
host__
uint1
all
(
uint
v
)
{
return
make_uint1
(
v
);}
static
__device__
__
forceinline__
__
host__
uint1
make
(
uint
x
)
{
return
make_uint1
(
x
);}
};
template
<>
struct
VecTraits
<
uint2
>
{
typedef
uint
elem_t
;
enum
{
cn
=
2
};
static
__device__
__host__
uint2
all
(
uint
v
)
{
return
make_uint2
(
v
,
v
);}
static
__device__
__host__
uint2
make
(
uint
x
,
uint
y
)
{
return
make_uint2
(
x
,
y
);}
static
__device__
__
forceinline__
__
host__
uint2
all
(
uint
v
)
{
return
make_uint2
(
v
,
v
);}
static
__device__
__
forceinline__
__
host__
uint2
make
(
uint
x
,
uint
y
)
{
return
make_uint2
(
x
,
y
);}
};
template
<>
struct
VecTraits
<
uint3
>
{
typedef
uint
elem_t
;
enum
{
cn
=
3
};
static
__device__
__host__
uint3
all
(
uint
v
)
{
return
make_uint3
(
v
,
v
,
v
);}
static
__device__
__host__
uint3
make
(
uint
x
,
uint
y
,
uint
z
)
{
return
make_uint3
(
x
,
y
,
z
);}
static
__device__
__
forceinline__
__
host__
uint3
all
(
uint
v
)
{
return
make_uint3
(
v
,
v
,
v
);}
static
__device__
__
forceinline__
__
host__
uint3
make
(
uint
x
,
uint
y
,
uint
z
)
{
return
make_uint3
(
x
,
y
,
z
);}
};
template
<>
struct
VecTraits
<
uint4
>
{
typedef
uint
elem_t
;
enum
{
cn
=
4
};
static
__device__
__host__
uint4
all
(
uint
v
)
{
return
make_uint4
(
v
,
v
,
v
,
v
);}
static
__device__
__host__
uint4
make
(
uint
x
,
uint
y
,
uint
z
,
uint
w
)
{
return
make_uint4
(
x
,
y
,
z
,
w
);}
static
__device__
__
forceinline__
__
host__
uint4
all
(
uint
v
)
{
return
make_uint4
(
v
,
v
,
v
,
v
);}
static
__device__
__
forceinline__
__
host__
uint4
make
(
uint
x
,
uint
y
,
uint
z
,
uint
w
)
{
return
make_uint4
(
x
,
y
,
z
,
w
);}
};
template
<>
struct
VecTraits
<
int
>
{
typedef
int
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
int
all
(
int
v
)
{
return
v
;}
static
__device__
__host__
int
make
(
int
x
)
{
return
x
;}
static
__device__
__
forceinline__
__
host__
int
all
(
int
v
)
{
return
v
;}
static
__device__
__
forceinline__
__
host__
int
make
(
int
x
)
{
return
x
;}
};
template
<>
struct
VecTraits
<
int1
>
{
typedef
int
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
int1
all
(
int
v
)
{
return
make_int1
(
v
);}
static
__device__
__host__
int1
make
(
int
x
)
{
return
make_int1
(
x
);}
static
__device__
__
forceinline__
__
host__
int1
all
(
int
v
)
{
return
make_int1
(
v
);}
static
__device__
__
forceinline__
__
host__
int1
make
(
int
x
)
{
return
make_int1
(
x
);}
};
template
<>
struct
VecTraits
<
int2
>
{
typedef
int
elem_t
;
enum
{
cn
=
2
};
static
__device__
__host__
int2
all
(
int
v
)
{
return
make_int2
(
v
,
v
);}
static
__device__
__host__
int2
make
(
int
x
,
int
y
)
{
return
make_int2
(
x
,
y
);}
static
__device__
__
forceinline__
__
host__
int2
all
(
int
v
)
{
return
make_int2
(
v
,
v
);}
static
__device__
__
forceinline__
__
host__
int2
make
(
int
x
,
int
y
)
{
return
make_int2
(
x
,
y
);}
};
template
<>
struct
VecTraits
<
int3
>
{
typedef
int
elem_t
;
enum
{
cn
=
3
};
static
__device__
__host__
int3
all
(
int
v
)
{
return
make_int3
(
v
,
v
,
v
);}
static
__device__
__host__
int3
make
(
int
x
,
int
y
,
int
z
)
{
return
make_int3
(
x
,
y
,
z
);}
static
__device__
__
forceinline__
__
host__
int3
all
(
int
v
)
{
return
make_int3
(
v
,
v
,
v
);}
static
__device__
__
forceinline__
__
host__
int3
make
(
int
x
,
int
y
,
int
z
)
{
return
make_int3
(
x
,
y
,
z
);}
};
template
<>
struct
VecTraits
<
int4
>
{
typedef
int
elem_t
;
enum
{
cn
=
4
};
static
__device__
__host__
int4
all
(
int
v
)
{
return
make_int4
(
v
,
v
,
v
,
v
);}
static
__device__
__host__
int4
make
(
int
x
,
int
y
,
int
z
,
int
w
)
{
return
make_int4
(
x
,
y
,
z
,
w
);}
static
__device__
__
forceinline__
__
host__
int4
all
(
int
v
)
{
return
make_int4
(
v
,
v
,
v
,
v
);}
static
__device__
__
forceinline__
__
host__
int4
make
(
int
x
,
int
y
,
int
z
,
int
w
)
{
return
make_int4
(
x
,
y
,
z
,
w
);}
};
template
<>
struct
VecTraits
<
float
>
{
typedef
float
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
float
all
(
float
v
)
{
return
v
;}
static
__device__
__host__
float
make
(
float
x
)
{
return
x
;}
static
__device__
__
forceinline__
__
host__
float
all
(
float
v
)
{
return
v
;}
static
__device__
__
forceinline__
__
host__
float
make
(
float
x
)
{
return
x
;}
};
template
<>
struct
VecTraits
<
float1
>
{
typedef
float
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
float1
all
(
float
v
)
{
return
make_float1
(
v
);}
static
__device__
__host__
float1
make
(
float
x
)
{
return
make_float1
(
x
);}
static
__device__
__
forceinline__
__
host__
float1
all
(
float
v
)
{
return
make_float1
(
v
);}
static
__device__
__
forceinline__
__
host__
float1
make
(
float
x
)
{
return
make_float1
(
x
);}
};
template
<>
struct
VecTraits
<
float2
>
{
typedef
float
elem_t
;
enum
{
cn
=
2
};
static
__device__
__host__
float2
all
(
float
v
)
{
return
make_float2
(
v
,
v
);}
static
__device__
__host__
float2
make
(
float
x
,
float
y
)
{
return
make_float2
(
x
,
y
);}
static
__device__
__
forceinline__
__
host__
float2
all
(
float
v
)
{
return
make_float2
(
v
,
v
);}
static
__device__
__
forceinline__
__
host__
float2
make
(
float
x
,
float
y
)
{
return
make_float2
(
x
,
y
);}
};
template
<>
struct
VecTraits
<
float3
>
{
typedef
float
elem_t
;
enum
{
cn
=
3
};
static
__device__
__host__
float3
all
(
float
v
)
{
return
make_float3
(
v
,
v
,
v
);}
static
__device__
__host__
float3
make
(
float
x
,
float
y
,
float
z
)
{
return
make_float3
(
x
,
y
,
z
);}
static
__device__
__
forceinline__
__
host__
float3
all
(
float
v
)
{
return
make_float3
(
v
,
v
,
v
);}
static
__device__
__
forceinline__
__
host__
float3
make
(
float
x
,
float
y
,
float
z
)
{
return
make_float3
(
x
,
y
,
z
);}
};
template
<>
struct
VecTraits
<
float4
>
{
typedef
float
elem_t
;
enum
{
cn
=
4
};
static
__device__
__host__
float4
all
(
float
v
)
{
return
make_float4
(
v
,
v
,
v
,
v
);}
static
__device__
__host__
float4
make
(
float
x
,
float
y
,
float
z
,
float
w
)
{
return
make_float4
(
x
,
y
,
z
,
w
);}
static
__device__
__
forceinline__
__
host__
float4
all
(
float
v
)
{
return
make_float4
(
v
,
v
,
v
,
v
);}
static
__device__
__
forceinline__
__
host__
float4
make
(
float
x
,
float
y
,
float
z
,
float
w
)
{
return
make_float4
(
x
,
y
,
z
,
w
);}
};
template
<>
struct
VecTraits
<
double
>
{
typedef
double
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
double
all
(
double
v
)
{
return
v
;}
static
__device__
__host__
double
make
(
double
x
)
{
return
x
;}
static
__device__
__
forceinline__
__
host__
double
all
(
double
v
)
{
return
v
;}
static
__device__
__
forceinline__
__
host__
double
make
(
double
x
)
{
return
x
;}
};
template
<>
struct
VecTraits
<
double1
>
{
typedef
double
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
double1
all
(
double
v
)
{
return
make_double1
(
v
);}
static
__device__
__host__
double1
make
(
double
x
)
{
return
make_double1
(
x
);}
static
__device__
__
forceinline__
__
host__
double1
all
(
double
v
)
{
return
make_double1
(
v
);}
static
__device__
__
forceinline__
__
host__
double1
make
(
double
x
)
{
return
make_double1
(
x
);}
};
template
<>
struct
VecTraits
<
double2
>
{
typedef
double
elem_t
;
enum
{
cn
=
2
};
static
__device__
__host__
double2
all
(
double
v
)
{
return
make_double2
(
v
,
v
);}
static
__device__
__host__
double2
make
(
double
x
,
double
y
)
{
return
make_double2
(
x
,
y
);}
static
__device__
__
forceinline__
__
host__
double2
all
(
double
v
)
{
return
make_double2
(
v
,
v
);}
static
__device__
__
forceinline__
__
host__
double2
make
(
double
x
,
double
y
)
{
return
make_double2
(
x
,
y
);}
};
template
<>
struct
VecTraits
<
double3
>
{
typedef
double
elem_t
;
enum
{
cn
=
3
};
static
__device__
__host__
double3
all
(
double
v
)
{
return
make_double3
(
v
,
v
,
v
);}
static
__device__
__host__
double3
make
(
double
x
,
double
y
,
double
z
)
{
return
make_double3
(
x
,
y
,
z
);}
static
__device__
__
forceinline__
__
host__
double3
all
(
double
v
)
{
return
make_double3
(
v
,
v
,
v
);}
static
__device__
__
forceinline__
__
host__
double3
make
(
double
x
,
double
y
,
double
z
)
{
return
make_double3
(
x
,
y
,
z
);}
};
template
<>
struct
VecTraits
<
double4
>
{
typedef
double
elem_t
;
enum
{
cn
=
4
};
static
__device__
__host__
double4
all
(
double
v
)
{
return
make_double4
(
v
,
v
,
v
,
v
);}
static
__device__
__host__
double4
make
(
double
x
,
double
y
,
double
z
,
double
w
)
{
return
make_double4
(
x
,
y
,
z
,
w
);}
static
__device__
__
forceinline__
__
host__
double4
all
(
double
v
)
{
return
make_double4
(
v
,
v
,
v
,
v
);}
static
__device__
__
forceinline__
__
host__
double4
make
(
double
x
,
double
y
,
double
z
,
double
w
)
{
return
make_double4
(
x
,
y
,
z
,
w
);}
};
template
<
int
cn
,
typename
VecD
>
struct
SatCast
;
template
<
typename
VecD
>
struct
SatCast
<
1
,
VecD
>
{
template
<
typename
VecS
>
static
__device__
VecD
cast
(
const
VecS
&
v
)
static
__device__
__forceinline__
VecD
cast
(
const
VecS
&
v
)
{
typedef
typename
VecTraits
<
VecD
>::
elem_t
D
;
return
VecTraits
<
VecD
>::
make
(
saturate_cast
<
D
>
(
v
.
x
));
...
...
@@ -440,7 +440,7 @@ namespace cv
template
<
typename
VecD
>
struct
SatCast
<
2
,
VecD
>
{
template
<
typename
VecS
>
static
__device__
VecD
cast
(
const
VecS
&
v
)
static
__device__
__forceinline__
VecD
cast
(
const
VecS
&
v
)
{
typedef
typename
VecTraits
<
VecD
>::
elem_t
D
;
return
VecTraits
<
VecD
>::
make
(
saturate_cast
<
D
>
(
v
.
x
),
saturate_cast
<
D
>
(
v
.
y
));
...
...
@@ -449,7 +449,7 @@ namespace cv
template
<
typename
VecD
>
struct
SatCast
<
3
,
VecD
>
{
template
<
typename
VecS
>
static
__device__
VecD
cast
(
const
VecS
&
v
)
static
__device__
__forceinline__
VecD
cast
(
const
VecS
&
v
)
{
typedef
typename
VecTraits
<
VecD
>::
elem_t
D
;
return
VecTraits
<
VecD
>::
make
(
saturate_cast
<
D
>
(
v
.
x
),
saturate_cast
<
D
>
(
v
.
y
),
saturate_cast
<
D
>
(
v
.
z
));
...
...
@@ -458,635 +458,635 @@ namespace cv
template
<
typename
VecD
>
struct
SatCast
<
4
,
VecD
>
{
template
<
typename
VecS
>
static
__device__
VecD
cast
(
const
VecS
&
v
)
static
__device__
__forceinline__
VecD
cast
(
const
VecS
&
v
)
{
typedef
typename
VecTraits
<
VecD
>::
elem_t
D
;
return
VecTraits
<
VecD
>::
make
(
saturate_cast
<
D
>
(
v
.
x
),
saturate_cast
<
D
>
(
v
.
y
),
saturate_cast
<
D
>
(
v
.
z
),
saturate_cast
<
D
>
(
v
.
w
));
}
};
template
<
typename
VecD
,
typename
VecS
>
static
__device__
VecD
saturate_cast_caller
(
const
VecS
&
v
)
template
<
typename
VecD
,
typename
VecS
>
static
__device__
__forceinline__
VecD
saturate_cast_caller
(
const
VecS
&
v
)
{
return
SatCast
<
VecTraits
<
VecD
>::
cn
,
VecD
>::
cast
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
uchar1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
char1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
ushort1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
short1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
uint1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
int1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
float1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
uchar1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
char1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
ushort1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
short1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
uint1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
int1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
float1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
uchar2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
char2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
ushort2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
short2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
uint2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
int2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
float2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
uchar2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
char2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
ushort2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
short2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
uint2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
int2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
float2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
uchar3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
char3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
ushort3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
short3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
uint3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
int3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
float3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
uchar3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
char3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
ushort3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
short3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
uint3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
int3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
float3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
uchar4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
char4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
ushort4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
short4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
uint4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
int4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
float4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
uchar4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
char4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
ushort4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
short4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
uint4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
int4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_
_forceinline__
_
Tp
saturate_cast
(
const
float4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
static
__device__
uchar1
operator
+
(
const
uchar1
&
a
,
const
uchar1
&
b
)
static
__device__
__forceinline__
uchar1
operator
+
(
const
uchar1
&
a
,
const
uchar1
&
b
)
{
return
make_uchar1
(
a
.
x
+
b
.
x
);
}
static
__device__
uchar1
operator
-
(
const
uchar1
&
a
,
const
uchar1
&
b
)
static
__device__
__forceinline__
uchar1
operator
-
(
const
uchar1
&
a
,
const
uchar1
&
b
)
{
return
make_uchar1
(
a
.
x
-
b
.
x
);
}
static
__device__
uchar1
operator
*
(
const
uchar1
&
a
,
const
uchar1
&
b
)
static
__device__
__forceinline__
uchar1
operator
*
(
const
uchar1
&
a
,
const
uchar1
&
b
)
{
return
make_uchar1
(
a
.
x
*
b
.
x
);
}
static
__device__
uchar1
operator
/
(
const
uchar1
&
a
,
const
uchar1
&
b
)
static
__device__
__forceinline__
uchar1
operator
/
(
const
uchar1
&
a
,
const
uchar1
&
b
)
{
return
make_uchar1
(
a
.
x
/
b
.
x
);
}
static
__device__
float1
operator
*
(
const
uchar1
&
a
,
float
s
)
static
__device__
__forceinline__
float1
operator
*
(
const
uchar1
&
a
,
float
s
)
{
return
make_float1
(
a
.
x
*
s
);
}
static
__device__
uchar2
operator
+
(
const
uchar2
&
a
,
const
uchar2
&
b
)
static
__device__
__forceinline__
uchar2
operator
+
(
const
uchar2
&
a
,
const
uchar2
&
b
)
{
return
make_uchar2
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
);
}
static
__device__
uchar2
operator
-
(
const
uchar2
&
a
,
const
uchar2
&
b
)
static
__device__
__forceinline__
uchar2
operator
-
(
const
uchar2
&
a
,
const
uchar2
&
b
)
{
return
make_uchar2
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
);
}
static
__device__
uchar2
operator
*
(
const
uchar2
&
a
,
const
uchar2
&
b
)
static
__device__
__forceinline__
uchar2
operator
*
(
const
uchar2
&
a
,
const
uchar2
&
b
)
{
return
make_uchar2
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
);
}
static
__device__
uchar2
operator
/
(
const
uchar2
&
a
,
const
uchar2
&
b
)
static
__device__
__forceinline__
uchar2
operator
/
(
const
uchar2
&
a
,
const
uchar2
&
b
)
{
return
make_uchar2
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
);
}
static
__device__
float2
operator
*
(
const
uchar2
&
a
,
float
s
)
static
__device__
__forceinline__
float2
operator
*
(
const
uchar2
&
a
,
float
s
)
{
return
make_float2
(
a
.
x
*
s
,
a
.
y
*
s
);
}
static
__device__
uchar3
operator
+
(
const
uchar3
&
a
,
const
uchar3
&
b
)
static
__device__
__forceinline__
uchar3
operator
+
(
const
uchar3
&
a
,
const
uchar3
&
b
)
{
return
make_uchar3
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
);
}
static
__device__
uchar3
operator
-
(
const
uchar3
&
a
,
const
uchar3
&
b
)
static
__device__
__forceinline__
uchar3
operator
-
(
const
uchar3
&
a
,
const
uchar3
&
b
)
{
return
make_uchar3
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
);
}
static
__device__
uchar3
operator
*
(
const
uchar3
&
a
,
const
uchar3
&
b
)
static
__device__
__forceinline__
uchar3
operator
*
(
const
uchar3
&
a
,
const
uchar3
&
b
)
{
return
make_uchar3
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
);
}
static
__device__
uchar3
operator
/
(
const
uchar3
&
a
,
const
uchar3
&
b
)
static
__device__
__forceinline__
uchar3
operator
/
(
const
uchar3
&
a
,
const
uchar3
&
b
)
{
return
make_uchar3
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
);
}
static
__device__
float3
operator
*
(
const
uchar3
&
a
,
float
s
)
static
__device__
__forceinline__
float3
operator
*
(
const
uchar3
&
a
,
float
s
)
{
return
make_float3
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
);
}
static
__device__
uchar4
operator
+
(
const
uchar4
&
a
,
const
uchar4
&
b
)
static
__device__
__forceinline__
uchar4
operator
+
(
const
uchar4
&
a
,
const
uchar4
&
b
)
{
return
make_uchar4
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
,
a
.
w
+
b
.
w
);
}
static
__device__
uchar4
operator
-
(
const
uchar4
&
a
,
const
uchar4
&
b
)
static
__device__
__forceinline__
uchar4
operator
-
(
const
uchar4
&
a
,
const
uchar4
&
b
)
{
return
make_uchar4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
static
__device__
uchar4
operator
*
(
const
uchar4
&
a
,
const
uchar4
&
b
)
static
__device__
__forceinline__
uchar4
operator
*
(
const
uchar4
&
a
,
const
uchar4
&
b
)
{
return
make_uchar4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
static
__device__
uchar4
operator
/
(
const
uchar4
&
a
,
const
uchar4
&
b
)
static
__device__
__forceinline__
uchar4
operator
/
(
const
uchar4
&
a
,
const
uchar4
&
b
)
{
return
make_uchar4
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
,
a
.
w
/
b
.
w
);
}
static
__device__
float4
operator
*
(
const
uchar4
&
a
,
float
s
)
static
__device__
__forceinline__
float4
operator
*
(
const
uchar4
&
a
,
float
s
)
{
return
make_float4
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
,
a
.
w
*
s
);
}
static
__device__
char1
operator
+
(
const
char1
&
a
,
const
char1
&
b
)
static
__device__
__forceinline__
char1
operator
+
(
const
char1
&
a
,
const
char1
&
b
)
{
return
make_char1
(
a
.
x
+
b
.
x
);
}
static
__device__
char1
operator
-
(
const
char1
&
a
,
const
char1
&
b
)
static
__device__
__forceinline__
char1
operator
-
(
const
char1
&
a
,
const
char1
&
b
)
{
return
make_char1
(
a
.
x
-
b
.
x
);
}
static
__device__
char1
operator
*
(
const
char1
&
a
,
const
char1
&
b
)
static
__device__
__forceinline__
char1
operator
*
(
const
char1
&
a
,
const
char1
&
b
)
{
return
make_char1
(
a
.
x
*
b
.
x
);
}
static
__device__
char1
operator
/
(
const
char1
&
a
,
const
char1
&
b
)
static
__device__
__forceinline__
char1
operator
/
(
const
char1
&
a
,
const
char1
&
b
)
{
return
make_char1
(
a
.
x
/
b
.
x
);
}
static
__device__
float1
operator
*
(
const
char1
&
a
,
float
s
)
static
__device__
__forceinline__
float1
operator
*
(
const
char1
&
a
,
float
s
)
{
return
make_float1
(
a
.
x
*
s
);
}
static
__device__
char2
operator
+
(
const
char2
&
a
,
const
char2
&
b
)
static
__device__
__forceinline__
char2
operator
+
(
const
char2
&
a
,
const
char2
&
b
)
{
return
make_char2
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
);
}
static
__device__
char2
operator
-
(
const
char2
&
a
,
const
char2
&
b
)
static
__device__
__forceinline__
char2
operator
-
(
const
char2
&
a
,
const
char2
&
b
)
{
return
make_char2
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
);
}
static
__device__
char2
operator
*
(
const
char2
&
a
,
const
char2
&
b
)
static
__device__
__forceinline__
char2
operator
*
(
const
char2
&
a
,
const
char2
&
b
)
{
return
make_char2
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
);
}
static
__device__
char2
operator
/
(
const
char2
&
a
,
const
char2
&
b
)
static
__device__
__forceinline__
char2
operator
/
(
const
char2
&
a
,
const
char2
&
b
)
{
return
make_char2
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
);
}
static
__device__
float2
operator
*
(
const
char2
&
a
,
float
s
)
static
__device__
__forceinline__
float2
operator
*
(
const
char2
&
a
,
float
s
)
{
return
make_float2
(
a
.
x
*
s
,
a
.
y
*
s
);
}
static
__device__
char3
operator
+
(
const
char3
&
a
,
const
char3
&
b
)
static
__device__
__forceinline__
char3
operator
+
(
const
char3
&
a
,
const
char3
&
b
)
{
return
make_char3
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
);
}
static
__device__
char3
operator
-
(
const
char3
&
a
,
const
char3
&
b
)
static
__device__
__forceinline__
char3
operator
-
(
const
char3
&
a
,
const
char3
&
b
)
{
return
make_char3
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
);
}
static
__device__
char3
operator
*
(
const
char3
&
a
,
const
char3
&
b
)
static
__device__
__forceinline__
char3
operator
*
(
const
char3
&
a
,
const
char3
&
b
)
{
return
make_char3
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
);
}
static
__device__
char3
operator
/
(
const
char3
&
a
,
const
char3
&
b
)
static
__device__
__forceinline__
char3
operator
/
(
const
char3
&
a
,
const
char3
&
b
)
{
return
make_char3
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
);
}
static
__device__
float3
operator
*
(
const
char3
&
a
,
float
s
)
static
__device__
__forceinline__
float3
operator
*
(
const
char3
&
a
,
float
s
)
{
return
make_float3
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
);
}
static
__device__
char4
operator
+
(
const
char4
&
a
,
const
char4
&
b
)
static
__device__
__forceinline__
char4
operator
+
(
const
char4
&
a
,
const
char4
&
b
)
{
return
make_char4
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
,
a
.
w
+
b
.
w
);
}
static
__device__
char4
operator
-
(
const
char4
&
a
,
const
char4
&
b
)
static
__device__
__forceinline__
char4
operator
-
(
const
char4
&
a
,
const
char4
&
b
)
{
return
make_char4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
static
__device__
char4
operator
*
(
const
char4
&
a
,
const
char4
&
b
)
static
__device__
__forceinline__
char4
operator
*
(
const
char4
&
a
,
const
char4
&
b
)
{
return
make_char4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
static
__device__
char4
operator
/
(
const
char4
&
a
,
const
char4
&
b
)
static
__device__
__forceinline__
char4
operator
/
(
const
char4
&
a
,
const
char4
&
b
)
{
return
make_char4
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
,
a
.
w
/
b
.
w
);
}
static
__device__
float4
operator
*
(
const
char4
&
a
,
float
s
)
static
__device__
__forceinline__
float4
operator
*
(
const
char4
&
a
,
float
s
)
{
return
make_float4
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
,
a
.
w
*
s
);
}
static
__device__
ushort1
operator
+
(
const
ushort1
&
a
,
const
ushort1
&
b
)
static
__device__
__forceinline__
ushort1
operator
+
(
const
ushort1
&
a
,
const
ushort1
&
b
)
{
return
make_ushort1
(
a
.
x
+
b
.
x
);
}
static
__device__
ushort1
operator
-
(
const
ushort1
&
a
,
const
ushort1
&
b
)
static
__device__
__forceinline__
ushort1
operator
-
(
const
ushort1
&
a
,
const
ushort1
&
b
)
{
return
make_ushort1
(
a
.
x
-
b
.
x
);
}
static
__device__
ushort1
operator
*
(
const
ushort1
&
a
,
const
ushort1
&
b
)
static
__device__
__forceinline__
ushort1
operator
*
(
const
ushort1
&
a
,
const
ushort1
&
b
)
{
return
make_ushort1
(
a
.
x
*
b
.
x
);
}
static
__device__
ushort1
operator
/
(
const
ushort1
&
a
,
const
ushort1
&
b
)
static
__device__
__forceinline__
ushort1
operator
/
(
const
ushort1
&
a
,
const
ushort1
&
b
)
{
return
make_ushort1
(
a
.
x
/
b
.
x
);
}
static
__device__
float1
operator
*
(
const
ushort1
&
a
,
float
s
)
static
__device__
__forceinline__
float1
operator
*
(
const
ushort1
&
a
,
float
s
)
{
return
make_float1
(
a
.
x
*
s
);
}
static
__device__
ushort2
operator
+
(
const
ushort2
&
a
,
const
ushort2
&
b
)
static
__device__
__forceinline__
ushort2
operator
+
(
const
ushort2
&
a
,
const
ushort2
&
b
)
{
return
make_ushort2
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
);
}
static
__device__
ushort2
operator
-
(
const
ushort2
&
a
,
const
ushort2
&
b
)
static
__device__
__forceinline__
ushort2
operator
-
(
const
ushort2
&
a
,
const
ushort2
&
b
)
{
return
make_ushort2
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
);
}
static
__device__
ushort2
operator
*
(
const
ushort2
&
a
,
const
ushort2
&
b
)
static
__device__
__forceinline__
ushort2
operator
*
(
const
ushort2
&
a
,
const
ushort2
&
b
)
{
return
make_ushort2
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
);
}
static
__device__
ushort2
operator
/
(
const
ushort2
&
a
,
const
ushort2
&
b
)
static
__device__
__forceinline__
ushort2
operator
/
(
const
ushort2
&
a
,
const
ushort2
&
b
)
{
return
make_ushort2
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
);
}
static
__device__
float2
operator
*
(
const
ushort2
&
a
,
float
s
)
static
__device__
__forceinline__
float2
operator
*
(
const
ushort2
&
a
,
float
s
)
{
return
make_float2
(
a
.
x
*
s
,
a
.
y
*
s
);
}
static
__device__
ushort3
operator
+
(
const
ushort3
&
a
,
const
ushort3
&
b
)
static
__device__
__forceinline__
ushort3
operator
+
(
const
ushort3
&
a
,
const
ushort3
&
b
)
{
return
make_ushort3
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
);
}
static
__device__
ushort3
operator
-
(
const
ushort3
&
a
,
const
ushort3
&
b
)
static
__device__
__forceinline__
ushort3
operator
-
(
const
ushort3
&
a
,
const
ushort3
&
b
)
{
return
make_ushort3
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
);
}
static
__device__
ushort3
operator
*
(
const
ushort3
&
a
,
const
ushort3
&
b
)
static
__device__
__forceinline__
ushort3
operator
*
(
const
ushort3
&
a
,
const
ushort3
&
b
)
{
return
make_ushort3
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
);
}
static
__device__
ushort3
operator
/
(
const
ushort3
&
a
,
const
ushort3
&
b
)
static
__device__
__forceinline__
ushort3
operator
/
(
const
ushort3
&
a
,
const
ushort3
&
b
)
{
return
make_ushort3
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
);
}
static
__device__
float3
operator
*
(
const
ushort3
&
a
,
float
s
)
static
__device__
__forceinline__
float3
operator
*
(
const
ushort3
&
a
,
float
s
)
{
return
make_float3
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
);
}
static
__device__
ushort4
operator
+
(
const
ushort4
&
a
,
const
ushort4
&
b
)
static
__device__
__forceinline__
ushort4
operator
+
(
const
ushort4
&
a
,
const
ushort4
&
b
)
{
return
make_ushort4
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
,
a
.
w
+
b
.
w
);
}
static
__device__
ushort4
operator
-
(
const
ushort4
&
a
,
const
ushort4
&
b
)
static
__device__
__forceinline__
ushort4
operator
-
(
const
ushort4
&
a
,
const
ushort4
&
b
)
{
return
make_ushort4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
static
__device__
ushort4
operator
*
(
const
ushort4
&
a
,
const
ushort4
&
b
)
static
__device__
__forceinline__
ushort4
operator
*
(
const
ushort4
&
a
,
const
ushort4
&
b
)
{
return
make_ushort4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
static
__device__
ushort4
operator
/
(
const
ushort4
&
a
,
const
ushort4
&
b
)
static
__device__
__forceinline__
ushort4
operator
/
(
const
ushort4
&
a
,
const
ushort4
&
b
)
{
return
make_ushort4
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
,
a
.
w
/
b
.
w
);
}
static
__device__
float4
operator
*
(
const
ushort4
&
a
,
float
s
)
static
__device__
__forceinline__
float4
operator
*
(
const
ushort4
&
a
,
float
s
)
{
return
make_float4
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
,
a
.
w
*
s
);
}
static
__device__
short1
operator
+
(
const
short1
&
a
,
const
short1
&
b
)
static
__device__
__forceinline__
short1
operator
+
(
const
short1
&
a
,
const
short1
&
b
)
{
return
make_short1
(
a
.
x
+
b
.
x
);
}
static
__device__
short1
operator
-
(
const
short1
&
a
,
const
short1
&
b
)
static
__device__
__forceinline__
short1
operator
-
(
const
short1
&
a
,
const
short1
&
b
)
{
return
make_short1
(
a
.
x
-
b
.
x
);
}
static
__device__
short1
operator
*
(
const
short1
&
a
,
const
short1
&
b
)
static
__device__
__forceinline__
short1
operator
*
(
const
short1
&
a
,
const
short1
&
b
)
{
return
make_short1
(
a
.
x
*
b
.
x
);
}
static
__device__
short1
operator
/
(
const
short1
&
a
,
const
short1
&
b
)
static
__device__
__forceinline__
short1
operator
/
(
const
short1
&
a
,
const
short1
&
b
)
{
return
make_short1
(
a
.
x
/
b
.
x
);
}
static
__device__
float1
operator
*
(
const
short1
&
a
,
float
s
)
static
__device__
__forceinline__
float1
operator
*
(
const
short1
&
a
,
float
s
)
{
return
make_float1
(
a
.
x
*
s
);
}
static
__device__
short2
operator
+
(
const
short2
&
a
,
const
short2
&
b
)
static
__device__
__forceinline__
short2
operator
+
(
const
short2
&
a
,
const
short2
&
b
)
{
return
make_short2
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
);
}
static
__device__
short2
operator
-
(
const
short2
&
a
,
const
short2
&
b
)
static
__device__
__forceinline__
short2
operator
-
(
const
short2
&
a
,
const
short2
&
b
)
{
return
make_short2
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
);
}
static
__device__
short2
operator
*
(
const
short2
&
a
,
const
short2
&
b
)
static
__device__
__forceinline__
short2
operator
*
(
const
short2
&
a
,
const
short2
&
b
)
{
return
make_short2
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
);
}
static
__device__
short2
operator
/
(
const
short2
&
a
,
const
short2
&
b
)
static
__device__
__forceinline__
short2
operator
/
(
const
short2
&
a
,
const
short2
&
b
)
{
return
make_short2
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
);
}
static
__device__
float2
operator
*
(
const
short2
&
a
,
float
s
)
static
__device__
__forceinline__
float2
operator
*
(
const
short2
&
a
,
float
s
)
{
return
make_float2
(
a
.
x
*
s
,
a
.
y
*
s
);
}
static
__device__
short3
operator
+
(
const
short3
&
a
,
const
short3
&
b
)
static
__device__
__forceinline__
short3
operator
+
(
const
short3
&
a
,
const
short3
&
b
)
{
return
make_short3
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
);
}
static
__device__
short3
operator
-
(
const
short3
&
a
,
const
short3
&
b
)
static
__device__
__forceinline__
short3
operator
-
(
const
short3
&
a
,
const
short3
&
b
)
{
return
make_short3
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
);
}
static
__device__
short3
operator
*
(
const
short3
&
a
,
const
short3
&
b
)
static
__device__
__forceinline__
short3
operator
*
(
const
short3
&
a
,
const
short3
&
b
)
{
return
make_short3
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
);
}
static
__device__
short3
operator
/
(
const
short3
&
a
,
const
short3
&
b
)
static
__device__
__forceinline__
short3
operator
/
(
const
short3
&
a
,
const
short3
&
b
)
{
return
make_short3
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
);
}
static
__device__
float3
operator
*
(
const
short3
&
a
,
float
s
)
static
__device__
__forceinline__
float3
operator
*
(
const
short3
&
a
,
float
s
)
{
return
make_float3
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
);
}
static
__device__
short4
operator
+
(
const
short4
&
a
,
const
short4
&
b
)
static
__device__
__forceinline__
short4
operator
+
(
const
short4
&
a
,
const
short4
&
b
)
{
return
make_short4
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
,
a
.
w
+
b
.
w
);
}
static
__device__
short4
operator
-
(
const
short4
&
a
,
const
short4
&
b
)
static
__device__
__forceinline__
short4
operator
-
(
const
short4
&
a
,
const
short4
&
b
)
{
return
make_short4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
static
__device__
short4
operator
*
(
const
short4
&
a
,
const
short4
&
b
)
static
__device__
__forceinline__
short4
operator
*
(
const
short4
&
a
,
const
short4
&
b
)
{
return
make_short4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
static
__device__
short4
operator
/
(
const
short4
&
a
,
const
short4
&
b
)
static
__device__
__forceinline__
short4
operator
/
(
const
short4
&
a
,
const
short4
&
b
)
{
return
make_short4
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
,
a
.
w
/
b
.
w
);
}
static
__device__
float4
operator
*
(
const
short4
&
a
,
float
s
)
static
__device__
__forceinline__
float4
operator
*
(
const
short4
&
a
,
float
s
)
{
return
make_float4
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
,
a
.
w
*
s
);
}
static
__device__
int1
operator
+
(
const
int1
&
a
,
const
int1
&
b
)
static
__device__
__forceinline__
int1
operator
+
(
const
int1
&
a
,
const
int1
&
b
)
{
return
make_int1
(
a
.
x
+
b
.
x
);
}
static
__device__
int1
operator
-
(
const
int1
&
a
,
const
int1
&
b
)
static
__device__
__forceinline__
int1
operator
-
(
const
int1
&
a
,
const
int1
&
b
)
{
return
make_int1
(
a
.
x
-
b
.
x
);
}
static
__device__
int1
operator
*
(
const
int1
&
a
,
const
int1
&
b
)
static
__device__
__forceinline__
int1
operator
*
(
const
int1
&
a
,
const
int1
&
b
)
{
return
make_int1
(
a
.
x
*
b
.
x
);
}
static
__device__
int1
operator
/
(
const
int1
&
a
,
const
int1
&
b
)
static
__device__
__forceinline__
int1
operator
/
(
const
int1
&
a
,
const
int1
&
b
)
{
return
make_int1
(
a
.
x
/
b
.
x
);
}
static
__device__
float1
operator
*
(
const
int1
&
a
,
float
s
)
static
__device__
__forceinline__
float1
operator
*
(
const
int1
&
a
,
float
s
)
{
return
make_float1
(
a
.
x
*
s
);
}
static
__device__
int2
operator
+
(
const
int2
&
a
,
const
int2
&
b
)
static
__device__
__forceinline__
int2
operator
+
(
const
int2
&
a
,
const
int2
&
b
)
{
return
make_int2
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
);
}
static
__device__
int2
operator
-
(
const
int2
&
a
,
const
int2
&
b
)
static
__device__
__forceinline__
int2
operator
-
(
const
int2
&
a
,
const
int2
&
b
)
{
return
make_int2
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
);
}
static
__device__
int2
operator
*
(
const
int2
&
a
,
const
int2
&
b
)
static
__device__
__forceinline__
int2
operator
*
(
const
int2
&
a
,
const
int2
&
b
)
{
return
make_int2
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
);
}
static
__device__
int2
operator
/
(
const
int2
&
a
,
const
int2
&
b
)
static
__device__
__forceinline__
int2
operator
/
(
const
int2
&
a
,
const
int2
&
b
)
{
return
make_int2
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
);
}
static
__device__
float2
operator
*
(
const
int2
&
a
,
float
s
)
static
__device__
__forceinline__
float2
operator
*
(
const
int2
&
a
,
float
s
)
{
return
make_float2
(
a
.
x
*
s
,
a
.
y
*
s
);
}
static
__device__
int3
operator
+
(
const
int3
&
a
,
const
int3
&
b
)
static
__device__
__forceinline__
int3
operator
+
(
const
int3
&
a
,
const
int3
&
b
)
{
return
make_int3
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
);
}
static
__device__
int3
operator
-
(
const
int3
&
a
,
const
int3
&
b
)
static
__device__
__forceinline__
int3
operator
-
(
const
int3
&
a
,
const
int3
&
b
)
{
return
make_int3
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
);
}
static
__device__
int3
operator
*
(
const
int3
&
a
,
const
int3
&
b
)
static
__device__
__forceinline__
int3
operator
*
(
const
int3
&
a
,
const
int3
&
b
)
{
return
make_int3
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
);
}
static
__device__
int3
operator
/
(
const
int3
&
a
,
const
int3
&
b
)
static
__device__
__forceinline__
int3
operator
/
(
const
int3
&
a
,
const
int3
&
b
)
{
return
make_int3
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
);
}
static
__device__
float3
operator
*
(
const
int3
&
a
,
float
s
)
static
__device__
__forceinline__
float3
operator
*
(
const
int3
&
a
,
float
s
)
{
return
make_float3
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
);
}
static
__device__
int4
operator
+
(
const
int4
&
a
,
const
int4
&
b
)
static
__device__
__forceinline__
int4
operator
+
(
const
int4
&
a
,
const
int4
&
b
)
{
return
make_int4
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
,
a
.
w
+
b
.
w
);
}
static
__device__
int4
operator
-
(
const
int4
&
a
,
const
int4
&
b
)
static
__device__
__forceinline__
int4
operator
-
(
const
int4
&
a
,
const
int4
&
b
)
{
return
make_int4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
static
__device__
int4
operator
*
(
const
int4
&
a
,
const
int4
&
b
)
static
__device__
__forceinline__
int4
operator
*
(
const
int4
&
a
,
const
int4
&
b
)
{
return
make_int4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
static
__device__
int4
operator
/
(
const
int4
&
a
,
const
int4
&
b
)
static
__device__
__forceinline__
int4
operator
/
(
const
int4
&
a
,
const
int4
&
b
)
{
return
make_int4
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
,
a
.
w
/
b
.
w
);
}
static
__device__
float4
operator
*
(
const
int4
&
a
,
float
s
)
static
__device__
__forceinline__
float4
operator
*
(
const
int4
&
a
,
float
s
)
{
return
make_float4
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
,
a
.
w
*
s
);
}
static
__device__
uint1
operator
+
(
const
uint1
&
a
,
const
uint1
&
b
)
static
__device__
__forceinline__
uint1
operator
+
(
const
uint1
&
a
,
const
uint1
&
b
)
{
return
make_uint1
(
a
.
x
+
b
.
x
);
}
static
__device__
uint1
operator
-
(
const
uint1
&
a
,
const
uint1
&
b
)
static
__device__
__forceinline__
uint1
operator
-
(
const
uint1
&
a
,
const
uint1
&
b
)
{
return
make_uint1
(
a
.
x
-
b
.
x
);
}
static
__device__
uint1
operator
*
(
const
uint1
&
a
,
const
uint1
&
b
)
static
__device__
__forceinline__
uint1
operator
*
(
const
uint1
&
a
,
const
uint1
&
b
)
{
return
make_uint1
(
a
.
x
*
b
.
x
);
}
static
__device__
uint1
operator
/
(
const
uint1
&
a
,
const
uint1
&
b
)
static
__device__
__forceinline__
uint1
operator
/
(
const
uint1
&
a
,
const
uint1
&
b
)
{
return
make_uint1
(
a
.
x
/
b
.
x
);
}
static
__device__
float1
operator
*
(
const
uint1
&
a
,
float
s
)
static
__device__
__forceinline__
float1
operator
*
(
const
uint1
&
a
,
float
s
)
{
return
make_float1
(
a
.
x
*
s
);
}
static
__device__
uint2
operator
+
(
const
uint2
&
a
,
const
uint2
&
b
)
static
__device__
__forceinline__
uint2
operator
+
(
const
uint2
&
a
,
const
uint2
&
b
)
{
return
make_uint2
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
);
}
static
__device__
uint2
operator
-
(
const
uint2
&
a
,
const
uint2
&
b
)
static
__device__
__forceinline__
uint2
operator
-
(
const
uint2
&
a
,
const
uint2
&
b
)
{
return
make_uint2
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
);
}
static
__device__
uint2
operator
*
(
const
uint2
&
a
,
const
uint2
&
b
)
static
__device__
__forceinline__
uint2
operator
*
(
const
uint2
&
a
,
const
uint2
&
b
)
{
return
make_uint2
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
);
}
static
__device__
uint2
operator
/
(
const
uint2
&
a
,
const
uint2
&
b
)
static
__device__
__forceinline__
uint2
operator
/
(
const
uint2
&
a
,
const
uint2
&
b
)
{
return
make_uint2
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
);
}
static
__device__
float2
operator
*
(
const
uint2
&
a
,
float
s
)
static
__device__
__forceinline__
float2
operator
*
(
const
uint2
&
a
,
float
s
)
{
return
make_float2
(
a
.
x
*
s
,
a
.
y
*
s
);
}
static
__device__
uint3
operator
+
(
const
uint3
&
a
,
const
uint3
&
b
)
static
__device__
__forceinline__
uint3
operator
+
(
const
uint3
&
a
,
const
uint3
&
b
)
{
return
make_uint3
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
);
}
static
__device__
uint3
operator
-
(
const
uint3
&
a
,
const
uint3
&
b
)
static
__device__
__forceinline__
uint3
operator
-
(
const
uint3
&
a
,
const
uint3
&
b
)
{
return
make_uint3
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
);
}
static
__device__
uint3
operator
*
(
const
uint3
&
a
,
const
uint3
&
b
)
static
__device__
__forceinline__
uint3
operator
*
(
const
uint3
&
a
,
const
uint3
&
b
)
{
return
make_uint3
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
);
}
static
__device__
uint3
operator
/
(
const
uint3
&
a
,
const
uint3
&
b
)
static
__device__
__forceinline__
uint3
operator
/
(
const
uint3
&
a
,
const
uint3
&
b
)
{
return
make_uint3
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
);
}
static
__device__
float3
operator
*
(
const
uint3
&
a
,
float
s
)
static
__device__
__forceinline__
float3
operator
*
(
const
uint3
&
a
,
float
s
)
{
return
make_float3
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
);
}
static
__device__
uint4
operator
+
(
const
uint4
&
a
,
const
uint4
&
b
)
static
__device__
__forceinline__
uint4
operator
+
(
const
uint4
&
a
,
const
uint4
&
b
)
{
return
make_uint4
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
,
a
.
w
+
b
.
w
);
}
static
__device__
uint4
operator
-
(
const
uint4
&
a
,
const
uint4
&
b
)
static
__device__
__forceinline__
uint4
operator
-
(
const
uint4
&
a
,
const
uint4
&
b
)
{
return
make_uint4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
static
__device__
uint4
operator
*
(
const
uint4
&
a
,
const
uint4
&
b
)
static
__device__
__forceinline__
uint4
operator
*
(
const
uint4
&
a
,
const
uint4
&
b
)
{
return
make_uint4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
static
__device__
uint4
operator
/
(
const
uint4
&
a
,
const
uint4
&
b
)
static
__device__
__forceinline__
uint4
operator
/
(
const
uint4
&
a
,
const
uint4
&
b
)
{
return
make_uint4
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
,
a
.
w
/
b
.
w
);
}
static
__device__
float4
operator
*
(
const
uint4
&
a
,
float
s
)
static
__device__
__forceinline__
float4
operator
*
(
const
uint4
&
a
,
float
s
)
{
return
make_float4
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
,
a
.
w
*
s
);
}
static
__device__
float1
operator
+
(
const
float1
&
a
,
const
float1
&
b
)
static
__device__
__forceinline__
float1
operator
+
(
const
float1
&
a
,
const
float1
&
b
)
{
return
make_float1
(
a
.
x
+
b
.
x
);
}
static
__device__
float1
operator
-
(
const
float1
&
a
,
const
float1
&
b
)
static
__device__
__forceinline__
float1
operator
-
(
const
float1
&
a
,
const
float1
&
b
)
{
return
make_float1
(
a
.
x
-
b
.
x
);
}
static
__device__
float1
operator
*
(
const
float1
&
a
,
const
float1
&
b
)
static
__device__
__forceinline__
float1
operator
*
(
const
float1
&
a
,
const
float1
&
b
)
{
return
make_float1
(
a
.
x
*
b
.
x
);
}
static
__device__
float1
operator
/
(
const
float1
&
a
,
const
float1
&
b
)
static
__device__
__forceinline__
float1
operator
/
(
const
float1
&
a
,
const
float1
&
b
)
{
return
make_float1
(
a
.
x
/
b
.
x
);
}
static
__device__
float1
operator
*
(
const
float1
&
a
,
float
s
)
static
__device__
__forceinline__
float1
operator
*
(
const
float1
&
a
,
float
s
)
{
return
make_float1
(
a
.
x
*
s
);
}
static
__device__
float2
operator
+
(
const
float2
&
a
,
const
float2
&
b
)
static
__device__
__forceinline__
float2
operator
+
(
const
float2
&
a
,
const
float2
&
b
)
{
return
make_float2
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
);
}
static
__device__
float2
operator
-
(
const
float2
&
a
,
const
float2
&
b
)
static
__device__
__forceinline__
float2
operator
-
(
const
float2
&
a
,
const
float2
&
b
)
{
return
make_float2
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
);
}
static
__device__
float2
operator
*
(
const
float2
&
a
,
const
float2
&
b
)
static
__device__
__forceinline__
float2
operator
*
(
const
float2
&
a
,
const
float2
&
b
)
{
return
make_float2
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
);
}
static
__device__
float2
operator
/
(
const
float2
&
a
,
const
float2
&
b
)
static
__device__
__forceinline__
float2
operator
/
(
const
float2
&
a
,
const
float2
&
b
)
{
return
make_float2
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
);
}
static
__device__
float2
operator
*
(
const
float2
&
a
,
float
s
)
static
__device__
__forceinline__
float2
operator
*
(
const
float2
&
a
,
float
s
)
{
return
make_float2
(
a
.
x
*
s
,
a
.
y
*
s
);
}
static
__device__
float3
operator
+
(
const
float3
&
a
,
const
float3
&
b
)
static
__device__
__forceinline__
float3
operator
+
(
const
float3
&
a
,
const
float3
&
b
)
{
return
make_float3
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
);
}
static
__device__
float3
operator
-
(
const
float3
&
a
,
const
float3
&
b
)
static
__device__
__forceinline__
float3
operator
-
(
const
float3
&
a
,
const
float3
&
b
)
{
return
make_float3
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
);
}
static
__device__
float3
operator
*
(
const
float3
&
a
,
const
float3
&
b
)
static
__device__
__forceinline__
float3
operator
*
(
const
float3
&
a
,
const
float3
&
b
)
{
return
make_float3
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
);
}
static
__device__
float3
operator
/
(
const
float3
&
a
,
const
float3
&
b
)
static
__device__
__forceinline__
float3
operator
/
(
const
float3
&
a
,
const
float3
&
b
)
{
return
make_float3
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
);
}
static
__device__
float3
operator
*
(
const
float3
&
a
,
float
s
)
static
__device__
__forceinline__
float3
operator
*
(
const
float3
&
a
,
float
s
)
{
return
make_float3
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
);
}
static
__device__
float4
operator
+
(
const
float4
&
a
,
const
float4
&
b
)
static
__device__
__forceinline__
float4
operator
+
(
const
float4
&
a
,
const
float4
&
b
)
{
return
make_float4
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
,
a
.
w
+
b
.
w
);
}
static
__device__
float4
operator
-
(
const
float4
&
a
,
const
float4
&
b
)
static
__device__
__forceinline__
float4
operator
-
(
const
float4
&
a
,
const
float4
&
b
)
{
return
make_float4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
static
__device__
float4
operator
*
(
const
float4
&
a
,
const
float4
&
b
)
static
__device__
__forceinline__
float4
operator
*
(
const
float4
&
a
,
const
float4
&
b
)
{
return
make_float4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
static
__device__
float4
operator
/
(
const
float4
&
a
,
const
float4
&
b
)
static
__device__
__forceinline__
float4
operator
/
(
const
float4
&
a
,
const
float4
&
b
)
{
return
make_float4
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
,
a
.
w
/
b
.
w
);
}
static
__device__
float4
operator
*
(
const
float4
&
a
,
float
s
)
static
__device__
__forceinline__
float4
operator
*
(
const
float4
&
a
,
float
s
)
{
return
make_float4
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
,
a
.
w
*
s
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment