Commit c8a54f67 authored by marina.kolpakova's avatar marina.kolpakova

minor warning fix

parent d395bf15
...@@ -56,7 +56,7 @@ namespace cv { namespace gpu { namespace device ...@@ -56,7 +56,7 @@ namespace cv { namespace gpu { namespace device
__global__ void matchUnrolled(const DevMem2D_<T> query, int imgIdx, const DevMem2D_<T> train, float maxDistance, const Mask mask, __global__ void matchUnrolled(const DevMem2D_<T> query, int imgIdx, const DevMem2D_<T> train, float maxDistance, const Mask mask,
PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount) PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount)
{ {
#if __CUDA_ARCH__ >= 110 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
extern __shared__ int smem[]; extern __shared__ int smem[];
...@@ -168,7 +168,7 @@ namespace cv { namespace gpu { namespace device ...@@ -168,7 +168,7 @@ namespace cv { namespace gpu { namespace device
__global__ void match(const DevMem2D_<T> query, int imgIdx, const DevMem2D_<T> train, float maxDistance, const Mask mask, __global__ void match(const DevMem2D_<T> query, int imgIdx, const DevMem2D_<T> train, float maxDistance, const Mask mask,
PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount) PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount)
{ {
#if __CUDA_ARCH__ >= 110 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
extern __shared__ int smem[]; extern __shared__ int smem[];
......
...@@ -261,7 +261,7 @@ namespace cv { namespace gpu { namespace device ...@@ -261,7 +261,7 @@ namespace cv { namespace gpu { namespace device
__global__ void edgesHysteresisLocal(PtrStepi map, ushort2* st, int rows, int cols) __global__ void edgesHysteresisLocal(PtrStepi map, ushort2* st, int rows, int cols)
{ {
#if __CUDA_ARCH__ >= 120 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 120)
__shared__ int smem[18][18]; __shared__ int smem[18][18];
...@@ -358,7 +358,7 @@ namespace cv { namespace gpu { namespace device ...@@ -358,7 +358,7 @@ namespace cv { namespace gpu { namespace device
__global__ void edgesHysteresisGlobal(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols, int count) __global__ void edgesHysteresisGlobal(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols, int count)
{ {
#if __CUDA_ARCH__ >= 120 #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 120
const int stack_size = 512; const int stack_size = 512;
......
...@@ -64,7 +64,7 @@ namespace cv { namespace gpu { namespace device ...@@ -64,7 +64,7 @@ namespace cv { namespace gpu { namespace device
template <int KSIZE, typename T, typename D, typename B> template <int KSIZE, typename T, typename D, typename B>
__global__ void linearColumnFilter(const DevMem2D_<T> src, PtrStep<D> dst, const int anchor, const B brd) __global__ void linearColumnFilter(const DevMem2D_<T> src, PtrStep<D> dst, const int anchor, const B brd)
{ {
#if __CUDA_ARCH__ >= 200 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
const int BLOCK_DIM_X = 16; const int BLOCK_DIM_X = 16;
const int BLOCK_DIM_Y = 16; const int BLOCK_DIM_Y = 16;
const int PATCH_PER_BLOCK = 4; const int PATCH_PER_BLOCK = 4;
......
...@@ -223,7 +223,7 @@ namespace cv { namespace gpu { namespace device ...@@ -223,7 +223,7 @@ namespace cv { namespace gpu { namespace device
template <bool calcScore, class Mask> template <bool calcScore, class Mask>
__global__ void calcKeypoints(const DevMem2Db img, const Mask mask, short2* kpLoc, const unsigned int maxKeypoints, PtrStepi score, const int threshold) __global__ void calcKeypoints(const DevMem2Db img, const Mask mask, short2* kpLoc, const unsigned int maxKeypoints, PtrStepi score, const int threshold)
{ {
#if __CUDA_ARCH__ >= 110 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
const int j = threadIdx.x + blockIdx.x * blockDim.x + 3; const int j = threadIdx.x + blockIdx.x * blockDim.x + 3;
const int i = threadIdx.y + blockIdx.y * blockDim.y + 3; const int i = threadIdx.y + blockIdx.y * blockDim.y + 3;
...@@ -325,7 +325,7 @@ namespace cv { namespace gpu { namespace device ...@@ -325,7 +325,7 @@ namespace cv { namespace gpu { namespace device
__global__ void nonmaxSupression(const short2* kpLoc, int count, const DevMem2Di scoreMat, short2* locFinal, float* responseFinal) __global__ void nonmaxSupression(const short2* kpLoc, int count, const DevMem2Di scoreMat, short2* locFinal, float* responseFinal)
{ {
#if __CUDA_ARCH__ >= 110 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
const int kpIdx = threadIdx.x + blockIdx.x * blockDim.x; const int kpIdx = threadIdx.x + blockIdx.x * blockDim.x;
......
...@@ -63,7 +63,7 @@ namespace cv { namespace gpu { namespace device ...@@ -63,7 +63,7 @@ namespace cv { namespace gpu { namespace device
#define MERGE_THREADBLOCK_SIZE 256 #define MERGE_THREADBLOCK_SIZE 256
#define USE_SMEM_ATOMICS (__CUDA_ARCH__ >= 120) #define USE_SMEM_ATOMICS (defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 120))
namespace hist namespace hist
{ {
......
...@@ -59,7 +59,7 @@ namespace cv { namespace gpu { namespace device ...@@ -59,7 +59,7 @@ namespace cv { namespace gpu { namespace device
__global__ void shfl_integral_horizontal(const PtrStep_<uint4> img, PtrStep_<uint4> integral) __global__ void shfl_integral_horizontal(const PtrStep_<uint4> img, PtrStep_<uint4> integral)
{ {
#if __CUDA_ARCH__ >= 300 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
__shared__ int sums[128]; __shared__ int sums[128];
const int id = threadIdx.x; const int id = threadIdx.x;
...@@ -299,7 +299,7 @@ namespace cv { namespace gpu { namespace device ...@@ -299,7 +299,7 @@ namespace cv { namespace gpu { namespace device
// block sums. // block sums.
__global__ void shfl_integral_vertical(DevMem2D_<unsigned int> integral) __global__ void shfl_integral_vertical(DevMem2D_<unsigned int> integral)
{ {
#if __CUDA_ARCH__ >= 300 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
__shared__ unsigned int sums[32][9]; __shared__ unsigned int sums[32][9];
const int tidx = blockIdx.x * blockDim.x + threadIdx.x; const int tidx = blockIdx.x * blockDim.x + threadIdx.x;
......
...@@ -215,7 +215,7 @@ namespace cv { namespace gpu { namespace device ...@@ -215,7 +215,7 @@ namespace cv { namespace gpu { namespace device
maxval[blockIdx.y * gridDim.x + blockIdx.x] = (T)smaxval[0]; maxval[blockIdx.y * gridDim.x + blockIdx.x] = (T)smaxval[0];
} }
#if __CUDA_ARCH__ >= 110 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
__shared__ bool is_last; __shared__ bool is_last;
if (tid == 0) if (tid == 0)
...@@ -535,7 +535,7 @@ namespace cv { namespace gpu { namespace device ...@@ -535,7 +535,7 @@ namespace cv { namespace gpu { namespace device
findMinMaxLocInSmem<nthreads, best_type>(sminval, smaxval, sminloc, smaxloc, tid); findMinMaxLocInSmem<nthreads, best_type>(sminval, smaxval, sminloc, smaxloc, tid);
#if __CUDA_ARCH__ >= 110 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
__shared__ bool is_last; __shared__ bool is_last;
if (tid == 0) if (tid == 0)
...@@ -841,7 +841,7 @@ namespace cv { namespace gpu { namespace device ...@@ -841,7 +841,7 @@ namespace cv { namespace gpu { namespace device
sumInSmem<nthreads, uint>(scount, tid); sumInSmem<nthreads, uint>(scount, tid);
#if __CUDA_ARCH__ >= 110 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
__shared__ bool is_last; __shared__ bool is_last;
if (tid == 0) if (tid == 0)
...@@ -1034,7 +1034,7 @@ namespace cv { namespace gpu { namespace device ...@@ -1034,7 +1034,7 @@ namespace cv { namespace gpu { namespace device
sumInSmem<nthreads, R>(smem, tid); sumInSmem<nthreads, R>(smem, tid);
#if __CUDA_ARCH__ >= 110 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
__shared__ bool is_last; __shared__ bool is_last;
if (tid == 0) if (tid == 0)
...@@ -1115,7 +1115,7 @@ namespace cv { namespace gpu { namespace device ...@@ -1115,7 +1115,7 @@ namespace cv { namespace gpu { namespace device
sumInSmem<nthreads, R>(smem, tid); sumInSmem<nthreads, R>(smem, tid);
sumInSmem<nthreads, R>(smem + nthreads, tid); sumInSmem<nthreads, R>(smem + nthreads, tid);
#if __CUDA_ARCH__ >= 110 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
__shared__ bool is_last; __shared__ bool is_last;
if (tid == 0) if (tid == 0)
...@@ -1222,7 +1222,7 @@ namespace cv { namespace gpu { namespace device ...@@ -1222,7 +1222,7 @@ namespace cv { namespace gpu { namespace device
sumInSmem<nthreads, R>(smem + nthreads, tid); sumInSmem<nthreads, R>(smem + nthreads, tid);
sumInSmem<nthreads, R>(smem + 2 * nthreads, tid); sumInSmem<nthreads, R>(smem + 2 * nthreads, tid);
#if __CUDA_ARCH__ >= 110 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
__shared__ bool is_last; __shared__ bool is_last;
if (tid == 0) if (tid == 0)
...@@ -1339,7 +1339,7 @@ namespace cv { namespace gpu { namespace device ...@@ -1339,7 +1339,7 @@ namespace cv { namespace gpu { namespace device
sumInSmem<nthreads, R>(smem + 2 * nthreads, tid); sumInSmem<nthreads, R>(smem + 2 * nthreads, tid);
sumInSmem<nthreads, R>(smem + 3 * nthreads, tid); sumInSmem<nthreads, R>(smem + 3 * nthreads, tid);
#if __CUDA_ARCH__ >= 110 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
__shared__ bool is_last; __shared__ bool is_last;
if (tid == 0) if (tid == 0)
...@@ -1975,7 +1975,7 @@ namespace cv { namespace gpu { namespace device ...@@ -1975,7 +1975,7 @@ namespace cv { namespace gpu { namespace device
for (int c = 0; c < cn; ++c) for (int c = 0; c < cn; ++c)
myVal[c] = op.startValue(); myVal[c] = op.startValue();
#if __CUDA_ARCH__ >= 200 #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 200
// For cc >= 2.0 prefer L1 cache // For cc >= 2.0 prefer L1 cache
for (int x = threadIdx.x; x < src.cols; x += 256) for (int x = threadIdx.x; x < src.cols; x += 256)
......
...@@ -82,7 +82,7 @@ namespace cv { namespace gpu { namespace device ...@@ -82,7 +82,7 @@ namespace cv { namespace gpu { namespace device
smem3[tid] = val3; smem3[tid] = val3;
__syncthreads(); __syncthreads();
#if __CUDA_ARCH__ > 110 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 110)
if (tid < 128) if (tid < 128)
{ {
smem1[tid] = val1 += smem1[tid + 128]; smem1[tid] = val1 += smem1[tid + 128];
...@@ -138,7 +138,7 @@ namespace cv { namespace gpu { namespace device ...@@ -138,7 +138,7 @@ namespace cv { namespace gpu { namespace device
smem2[tid] = val2; smem2[tid] = val2;
__syncthreads(); __syncthreads();
#if __CUDA_ARCH__ > 110 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 110)
if (tid < 128) if (tid < 128)
{ {
smem1[tid] = val1 += smem1[tid + 128]; smem1[tid] = val1 += smem1[tid + 128];
...@@ -184,7 +184,7 @@ namespace cv { namespace gpu { namespace device ...@@ -184,7 +184,7 @@ namespace cv { namespace gpu { namespace device
smem1[tid] = val1; smem1[tid] = val1;
__syncthreads(); __syncthreads();
#if __CUDA_ARCH__ > 110 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 110)
if (tid < 128) if (tid < 128)
{ {
smem1[tid] = val1 += smem1[tid + 128]; smem1[tid] = val1 += smem1[tid + 128];
...@@ -271,7 +271,7 @@ namespace cv { namespace gpu { namespace device ...@@ -271,7 +271,7 @@ namespace cv { namespace gpu { namespace device
template <int cn, int PATCH_X, int PATCH_Y, bool calcErr> template <int cn, int PATCH_X, int PATCH_Y, bool calcErr>
__global__ void lkSparse(const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols) __global__ void lkSparse(const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols)
{ {
#if __CUDA_ARCH__ <= 110 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ <= 110)
__shared__ float smem1[128]; __shared__ float smem1[128];
__shared__ float smem2[128]; __shared__ float smem2[128];
__shared__ float smem3[128]; __shared__ float smem3[128];
......
...@@ -64,7 +64,7 @@ namespace cv { namespace gpu { namespace device ...@@ -64,7 +64,7 @@ namespace cv { namespace gpu { namespace device
template <int KSIZE, typename T, typename D, typename B> template <int KSIZE, typename T, typename D, typename B>
__global__ void linearRowFilter(const DevMem2D_<T> src, PtrStep<D> dst, const int anchor, const B brd) __global__ void linearRowFilter(const DevMem2D_<T> src, PtrStep<D> dst, const int anchor, const B brd)
{ {
#if __CUDA_ARCH__ >= 200 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
const int BLOCK_DIM_X = 32; const int BLOCK_DIM_X = 32;
const int BLOCK_DIM_Y = 8; const int BLOCK_DIM_Y = 8;
const int PATCH_PER_BLOCK = 4; const int PATCH_PER_BLOCK = 4;
......
...@@ -2070,7 +2070,7 @@ NCVStatus nppiStInterpolateFrames(const NppStInterpolationState *pState) ...@@ -2070,7 +2070,7 @@ NCVStatus nppiStInterpolateFrames(const NppStInterpolationState *pState)
//============================================================================== //==============================================================================
#if __CUDA_ARCH__ < 200 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
// FP32 atomic add // FP32 atomic add
static __forceinline__ __device__ float _atomicAdd(float *addr, float val) static __forceinline__ __device__ float _atomicAdd(float *addr, float val)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment