Commit e69c6fde authored by Marina Kolpakova's avatar Marina Kolpakova

minor formating changes

parent 7c160cdc
......@@ -44,9 +44,9 @@
#include <algorithm>
#include "internal_shared.hpp"
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
namespace canny
namespace canny
{
__global__ void calcSobelRowPass(const PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
{
......@@ -99,7 +99,7 @@ namespace cv { namespace gpu { namespace device
}
};
template <typename Norm> __global__ void calcMagnitude(const PtrStepi dx_buf, const PtrStepi dy_buf,
template <typename Norm> __global__ void calcMagnitude(const PtrStepi dx_buf, const PtrStepi dy_buf,
PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols)
{
__shared__ int sdx[18][16];
......@@ -175,7 +175,7 @@ namespace cv { namespace gpu { namespace device
}
//////////////////////////////////////////////////////////////////////////////////////////
#define CANNY_SHIFT 15
#define TG22 (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5)
......@@ -236,7 +236,7 @@ namespace cv { namespace gpu { namespace device
edge_type = 1 + (int)(m > high_thresh);
}
}
map.ptr(i + 1)[j + 1] = edge_type;
}
}
......@@ -270,7 +270,7 @@ namespace cv { namespace gpu { namespace device
const int tid = threadIdx.y * 16 + threadIdx.x;
const int lx = tid % 18;
const int ly = tid / 18;
const int ly = tid / 18;
if (ly < 14)
smem[ly][lx] = map.ptr(blockIdx.y * 16 + ly)[blockIdx.x * 16 + lx];
......@@ -294,10 +294,10 @@ namespace cv { namespace gpu { namespace device
n += smem[threadIdx.y ][threadIdx.x ] == 2;
n += smem[threadIdx.y ][threadIdx.x + 1] == 2;
n += smem[threadIdx.y ][threadIdx.x + 2] == 2;
n += smem[threadIdx.y + 1][threadIdx.x ] == 2;
n += smem[threadIdx.y + 1][threadIdx.x + 2] == 2;
n += smem[threadIdx.y + 2][threadIdx.x ] == 2;
n += smem[threadIdx.y + 2][threadIdx.x + 1] == 2;
n += smem[threadIdx.y + 2][threadIdx.x + 2] == 2;
......@@ -318,10 +318,10 @@ namespace cv { namespace gpu { namespace device
n += smem[threadIdx.y ][threadIdx.x ] == 1;
n += smem[threadIdx.y ][threadIdx.x + 1] == 1;
n += smem[threadIdx.y ][threadIdx.x + 2] == 1;
n += smem[threadIdx.y + 1][threadIdx.x ] == 1;
n += smem[threadIdx.y + 1][threadIdx.x + 2] == 1;
n += smem[threadIdx.y + 2][threadIdx.x ] == 1;
n += smem[threadIdx.y + 2][threadIdx.x + 1] == 1;
n += smem[threadIdx.y + 2][threadIdx.x + 2] == 1;
......@@ -361,7 +361,7 @@ namespace cv { namespace gpu { namespace device
#if __CUDA_ARCH__ >= 120
const int stack_size = 512;
__shared__ unsigned int s_counter;
__shared__ unsigned int s_ind;
__shared__ ushort2 s_st[stack_size];
......@@ -404,11 +404,11 @@ namespace cv { namespace gpu { namespace device
if (subTaskIdx < portion)
pos = s_st[s_counter - 1 - subTaskIdx];
__syncthreads();
if (threadIdx.x == 0)
s_counter -= portion;
__syncthreads();
if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows)
{
pos.x += c_dx[threadIdx.x & 7];
......@@ -452,7 +452,7 @@ namespace cv { namespace gpu { namespace device
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
unsigned int count;
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
......
......@@ -45,7 +45,7 @@
#include <opencv2/gpu/device/color.hpp>
#include <cvt_colot_internal.h>
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_rgba_traits<uchar>::functor_type)
{
......@@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace device
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_xyz4_traits<uchar>::functor_type)
{
......
......@@ -48,9 +48,9 @@
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/static_check.hpp"
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
namespace column_filter
namespace column_filter
{
#define MAX_KERNEL_SIZE 32
......@@ -146,7 +146,7 @@ namespace cv { namespace gpu { namespace device
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
const dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y * PATCH_PER_BLOCK));
B<T> brd(src.rows);
linearColumnFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
......@@ -162,7 +162,7 @@ namespace cv { namespace gpu { namespace device
{
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<D> dst, int anchor, int cc, cudaStream_t stream);
static const caller_t callers[5][33] =
static const caller_t callers[5][33] =
{
{
0,
......@@ -338,9 +338,9 @@ namespace cv { namespace gpu { namespace device
linearColumnFilter_caller<30, T, D, BrdColWrap>,
linearColumnFilter_caller<31, T, D, BrdColWrap>,
linearColumnFilter_caller<32, T, D, BrdColWrap>
}
}
};
loadKernel(kernel, ksize);
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, cc, stream);
......
......@@ -43,9 +43,9 @@
#include "internal_shared.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
namespace imgproc
namespace imgproc
{
template <typename Ptr2D, typename T> __global__ void copyMakeBorder(const Ptr2D src, DevMem2D_<T> dst, int top, int left)
{
......@@ -58,9 +58,9 @@ namespace cv { namespace gpu { namespace device
template <template <typename> class B, typename T> struct CopyMakeBorderDispatcher
{
static void call(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, int top, int left,
static void call(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, int top, int left,
const typename VecTraits<T>::elem_type* borderValue, cudaStream_t stream)
{
{
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
......@@ -75,20 +75,20 @@ namespace cv { namespace gpu { namespace device
}
};
template <typename T, int cn> void copyMakeBorder_gpu(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode,
template <typename T, int cn> void copyMakeBorder_gpu(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode,
const T* borderValue, cudaStream_t stream)
{
typedef typename TypeVec<T, cn>::vec_type vec_type;
typedef void (*caller_t)(const DevMem2D_<vec_type>& src, const DevMem2D_<vec_type>& dst, int top, int left, const T* borderValue, cudaStream_t stream);
static const caller_t callers[5] =
static const caller_t callers[5] =
{
CopyMakeBorderDispatcher<BrdReflect101, vec_type>::call,
CopyMakeBorderDispatcher<BrdReplicate, vec_type>::call,
CopyMakeBorderDispatcher<BrdConstant, vec_type>::call,
CopyMakeBorderDispatcher<BrdReflect, vec_type>::call,
CopyMakeBorderDispatcher<BrdWrap, vec_type>::call
CopyMakeBorderDispatcher<BrdReflect101, vec_type>::call,
CopyMakeBorderDispatcher<BrdReplicate, vec_type>::call,
CopyMakeBorderDispatcher<BrdConstant, vec_type>::call,
CopyMakeBorderDispatcher<BrdReflect, vec_type>::call,
CopyMakeBorderDispatcher<BrdWrap, vec_type>::call
};
callers[borderMode](DevMem2D_<vec_type>(src), DevMem2D_<vec_type>(dst), top, left, borderValue, stream);
......
......@@ -40,7 +40,7 @@
//
// Copyright (c) 2010, Paul Furgale, Chi Hay Tong
//
// The original code was written by Paul Furgale and Chi Hay Tong
// The original code was written by Paul Furgale and Chi Hay Tong
// and later optimized and prepared for integration into OpenCV by Itseez.
//
//M*/
......@@ -48,9 +48,9 @@
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/utility.hpp"
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
namespace fast
namespace fast
{
__device__ unsigned int g_counter = 0;
......@@ -78,14 +78,14 @@ namespace cv { namespace gpu { namespace device
d1 = diffType(v, C[0] & 0xff, th);
d1 = diffType(v, C[0] & 0xff, th);
d2 = diffType(v, C[2] & 0xff, th);
if ((d1 | d2) == 0)
return;
mask1 |= (d1 & 1) << 0;
mask2 |= ((d1 & 2) >> 1) << 0;
mask2 |= ((d1 & 2) >> 1) << 0;
mask1 |= (d2 & 1) << 8;
mask2 |= ((d2 & 2) >> 1) << 8;
......@@ -141,7 +141,7 @@ namespace cv { namespace gpu { namespace device
return;*/
mask1 |= (d1 & 1) << 1;
mask2 |= ((d1 & 2) >> 1) << 1;
mask2 |= ((d1 & 2) >> 1) << 1;
mask1 |= (d2 & 1) << 9;
mask2 |= ((d2 & 2) >> 1) << 9;
......@@ -169,7 +169,7 @@ namespace cv { namespace gpu { namespace device
return;*/
mask1 |= (d1 & 1) << 5;
mask2 |= ((d1 & 2) >> 1) << 5;
mask2 |= ((d1 & 2) >> 1) << 5;
mask1 |= (d2 & 1) << 13;
mask2 |= ((d2 & 2) >> 1) << 13;
......@@ -191,7 +191,7 @@ namespace cv { namespace gpu { namespace device
// 0 -> not a keypoint
__device__ __forceinline__ bool isKeyPoint(int mask1, int mask2)
{
return (__popc(mask1) > 8 && (c_table[(mask1 >> 3) - 63] & (1 << (mask1 & 7)))) ||
return (__popc(mask1) > 8 && (c_table[(mask1 >> 3) - 63] & (1 << (mask1 & 7)))) ||
(__popc(mask2) > 8 && (c_table[(mask2 >> 3) - 63] & (1 << (mask2 & 7))));
}
......@@ -212,14 +212,14 @@ namespace cv { namespace gpu { namespace device
calcMask(C, v, mid, mask1, mask2);
int isKp = static_cast<int>(isKeyPoint(mask1, mask2));
min = isKp * (mid + 1) + (isKp ^ 1) * min;
max = (isKp ^ 1) * (mid - 1) + isKp * max;
}
return min - 1;
}
template <bool calcScore, class Mask>
__global__ void calcKeypoints(const DevMem2Db img, const Mask mask, short2* kpLoc, const unsigned int maxKeypoints, PtrStepi score, const int threshold)
{
......@@ -243,7 +243,7 @@ namespace cv { namespace gpu { namespace device
C[2] |= static_cast<uint>(img(i - 1, j - 3)) << (3 * 8);
C[1] |= static_cast<uint>(img(i - 1, j + 3)) << 8;
C[3] |= static_cast<uint>(img(i, j - 3));
C[3] |= static_cast<uint>(img(i, j - 3));
v = static_cast<int>(img(i, j));
C[1] |= static_cast<uint>(img(i, j + 3));
......@@ -313,7 +313,7 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
unsigned int count;
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
......@@ -335,14 +335,14 @@ namespace cv { namespace gpu { namespace device
int score = scoreMat(loc.y, loc.x);
bool ismax =
bool ismax =
score > scoreMat(loc.y - 1, loc.x - 1) &&
score > scoreMat(loc.y - 1, loc.x ) &&
score > scoreMat(loc.y - 1, loc.x + 1) &&
score > scoreMat(loc.y , loc.x - 1) &&
score > scoreMat(loc.y , loc.x + 1) &&
score > scoreMat(loc.y + 1, loc.x - 1) &&
score > scoreMat(loc.y + 1, loc.x ) &&
score > scoreMat(loc.y + 1, loc.x + 1);
......@@ -375,7 +375,7 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
unsigned int new_count;
cudaSafeCall( cudaMemcpy(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
......
......@@ -40,7 +40,7 @@
//
// Copyright (c) 2010, Paul Furgale, Chi Hay Tong
//
// The original code was written by Paul Furgale and Chi Hay Tong
// The original code was written by Paul Furgale and Chi Hay Tong
// and later optimized and prepared for integration into OpenCV by Itseez.
//
//M*/
......@@ -50,9 +50,9 @@
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/utility.hpp"
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
namespace gfft
namespace gfft
{
texture<float, cudaTextureType2D, cudaReadModeElementType> eigTex(0, cudaFilterModePoint, cudaAddressModeClamp);
......@@ -117,7 +117,7 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
uint count;
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(uint), cudaMemcpyDeviceToHost) );
......@@ -126,9 +126,9 @@ namespace cv { namespace gpu { namespace device
class EigGreater
{
public:
__device__ __forceinline__ bool operator()(float2 a, float2 b) const
{
public:
__device__ __forceinline__ bool operator()(float2 a, float2 b) const
{
return tex2D(eigTex, a.x, a.y) > tex2D(eigTex, b.x, b.y);
}
};
......
......@@ -45,7 +45,7 @@
#include "opencv2/gpu/device/utility.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
#define UINT_BITS 32U
......@@ -65,7 +65,7 @@ namespace cv { namespace gpu { namespace device
#define USE_SMEM_ATOMICS (__CUDA_ARCH__ >= 120)
namespace hist
namespace hist
{
#if (!USE_SMEM_ATOMICS)
......@@ -173,7 +173,7 @@ namespace cv { namespace gpu { namespace device
{
histogram256<<<PARTIAL_HISTOGRAM256_COUNT, HISTOGRAM256_THREADBLOCK_SIZE, 0, stream>>>(
DevMem2D_<uint>(src),
buf,
buf,
static_cast<uint>(src.rows * src.step / sizeof(uint)),
src.cols);
......
This diff is collapsed.
......@@ -970,12 +970,12 @@ namespace cv { namespace gpu { namespace device
#undef IMPLEMENT_FILTER2D_TEX_READER
template <typename T, typename D>
void filter2D_gpu(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst,
int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
void filter2D_gpu(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst,
int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
int borderMode, const float* borderValue, cudaStream_t stream)
{
typedef void (*func_t)(const DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2D_<D> dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream);
static const func_t funcs[] =
static const func_t funcs[] =
{
Filter2DCaller<T, D, BrdReflect101>::call,
Filter2DCaller<T, D, BrdReplicate>::call,
......
......@@ -50,9 +50,9 @@
#include "safe_call.hpp"
#include "opencv2/gpu/device/common.hpp"
namespace cv { namespace gpu
namespace cv { namespace gpu
{
enum
enum
{
BORDER_REFLECT101_GPU = 0,
BORDER_REPLICATE_GPU,
......@@ -60,7 +60,7 @@ namespace cv { namespace gpu
BORDER_REFLECT_GPU,
BORDER_WRAP_GPU
};
// Converts CPU border extrapolation mode into GPU internal analogue.
// Returns true if the GPU analogue exists, false otherwise.
bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType);
......
This diff is collapsed.
......@@ -42,9 +42,9 @@
#include "internal_shared.hpp"
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
namespace mathfunc
namespace mathfunc
{
//////////////////////////////////////////////////////////////////////////////////////
// Cart <-> Polar
......@@ -79,7 +79,7 @@ namespace cv { namespace gpu { namespace device
}
};
template <typename Mag, typename Angle>
__global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr, size_t y_step,
__global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr, size_t y_step,
float* mag, size_t mag_step, float* angle, size_t angle_step, float scale, int width, int height)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
......@@ -137,11 +137,11 @@ namespace cv { namespace gpu { namespace device
grid.x = divUp(x.cols, threads.x);
grid.y = divUp(x.rows, threads.y);
const float scale = angleInDegrees ? (float)(180.0f / CV_PI) : 1.f;
cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>(
x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(),
x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(),
mag.data, mag.step/mag.elemSize(), angle.data, angle.step/angle.elemSize(), scale, x.cols, x.rows);
cudaSafeCall( cudaGetLastError() );
......@@ -152,7 +152,7 @@ namespace cv { namespace gpu { namespace device
void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream)
{
typedef void (*caller_t)(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream);
static const caller_t callers[2][2][2] =
static const caller_t callers[2][2][2] =
{
{
{
......@@ -187,10 +187,10 @@ namespace cv { namespace gpu { namespace device
grid.x = divUp(mag.cols, threads.x);
grid.y = divUp(mag.rows, threads.y);
const float scale = angleInDegrees ? (float)(CV_PI / 180.0f) : 1.0f;
polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.data, mag.step/mag.elemSize(),
polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.data, mag.step/mag.elemSize(),
angle.data, angle.step/angle.elemSize(), scale, x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(), mag.cols, mag.rows);
cudaSafeCall( cudaGetLastError() );
......@@ -201,7 +201,7 @@ namespace cv { namespace gpu { namespace device
void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream)
{
typedef void (*caller_t)(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream);
static const caller_t callers[2] =
static const caller_t callers[2] =
{
polarToCart_caller<NonEmptyMag>,
polarToCart_caller<EmptyMag>
......
This diff is collapsed.
......@@ -42,7 +42,7 @@
#include "opencv2/gpu/device/common.hpp"
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
namespace optical_flow
{
......@@ -50,7 +50,7 @@ namespace cv { namespace gpu { namespace device
#define NUM_VERTS_PER_ARROW 6
__global__ void NeedleMapAverageKernel(const DevMem2Df u, const PtrStepf v, PtrStepf u_avg, PtrStepf v_avg)
{
{
__shared__ float smem[2 * NEEDLE_MAP_SCALE];
volatile float* u_col_sum = smem;
......@@ -70,7 +70,7 @@ namespace cv { namespace gpu { namespace device
}
if (threadIdx.x < 8)
{
{
// now add the column sums
const uint X = threadIdx.x;
......@@ -80,8 +80,8 @@ namespace cv { namespace gpu { namespace device
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 1];
}
if (X | 0xfe == 0xfc) // bits 0 & 1 == 0
{
if (X | 0xfe == 0xfc) // bits 0 & 1 == 0
{
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 2];
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 2];
}
......@@ -110,7 +110,7 @@ namespace cv { namespace gpu { namespace device
v_avg(blockIdx.y, blockIdx.x) = v_col_sum[0];
}
}
void NeedleMapAverage_gpu(DevMem2Df u, DevMem2Df v, DevMem2Df u_avg, DevMem2Df v_avg)
{
const dim3 block(NEEDLE_MAP_SCALE);
......
......@@ -40,7 +40,7 @@
//
// Copyright (c) 2010, Paul Furgale, Chi Hay Tong
//
// The original code was written by Paul Furgale and Chi Hay Tong
// The original code was written by Paul Furgale and Chi Hay Tong
// and later optimized and prepared for integration into OpenCV by Itseez.
//
//M*/
......@@ -51,7 +51,7 @@
#include "opencv2/gpu/device/utility.hpp"
#include "opencv2/gpu/device/functional.hpp"
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
namespace orb
{
......@@ -59,7 +59,7 @@ namespace cv { namespace gpu { namespace device
// cull
int cull_gpu(int* loc, float* response, int size, int n_points)
{
{
thrust::device_ptr<int> loc_ptr(loc);
thrust::device_ptr<float> response_ptr(response);
......@@ -83,10 +83,10 @@ namespace cv { namespace gpu { namespace device
{
const short2 loc = loc_[ptidx];
const int r = blockSize / 2;
const int r = blockSize / 2;
const int x0 = loc.x - r;
const int y0 = loc.y - r;
int a = 0, b = 0, c = 0;
for (int ind = threadIdx.x; ind < blockSize * blockSize; ind += blockDim.x)
......@@ -94,12 +94,12 @@ namespace cv { namespace gpu { namespace device
const int i = ind / blockSize;
const int j = ind % blockSize;
int Ix = (img(y0 + i, x0 + j + 1) - img(y0 + i, x0 + j - 1)) * 2 +
(img(y0 + i - 1, x0 + j + 1) - img(y0 + i - 1, x0 + j - 1)) +
int Ix = (img(y0 + i, x0 + j + 1) - img(y0 + i, x0 + j - 1)) * 2 +
(img(y0 + i - 1, x0 + j + 1) - img(y0 + i - 1, x0 + j - 1)) +
(img(y0 + i + 1, x0 + j + 1) - img(y0 + i + 1, x0 + j - 1));
int Iy = (img(y0 + i + 1, x0 + j) - img(y0 + i - 1, x0 + j)) * 2 +
(img(y0 + i + 1, x0 + j - 1) - img(y0 + i - 1, x0 + j - 1)) +
int Iy = (img(y0 + i + 1, x0 + j) - img(y0 + i - 1, x0 + j)) * 2 +
(img(y0 + i + 1, x0 + j - 1) - img(y0 + i - 1, x0 + j - 1)) +
(img(y0 + i + 1, x0 + j + 1) - img(y0 + i - 1, x0 + j + 1));
a += Ix * Ix;
......@@ -160,7 +160,7 @@ namespace cv { namespace gpu { namespace device
int m_01 = 0, m_10 = 0;
const short2 loc = loc_[ptidx];
// Treat the center line differently, v=0
for (int u = threadIdx.x - half_k; u <= half_k; u += blockDim.x)
m_10 += u * image(loc.y, loc.x + u);
......@@ -173,7 +173,7 @@ namespace cv { namespace gpu { namespace device
int v_sum = 0;
int m_sum = 0;
const int d = c_u_max[v];
for (int u = threadIdx.x - d; u <= d; u += blockDim.x)
{
int val_plus = image(loc.y + v, loc.x + u);
......@@ -229,7 +229,7 @@ namespace cv { namespace gpu { namespace device
{
__device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
{
pattern_x += 16 * i;
pattern_x += 16 * i;
pattern_y += 16 * i;
int t0, t1, val;
......@@ -257,7 +257,7 @@ namespace cv { namespace gpu { namespace device
t0 = GET_VALUE(14); t1 = GET_VALUE(15);
val |= (t0 < t1) << 7;
return val;
}
};
......@@ -266,23 +266,23 @@ namespace cv { namespace gpu { namespace device
{
__device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
{
pattern_x += 12 * i;
pattern_x += 12 * i;
pattern_y += 12 * i;
int t0, t1, t2, val;
t0 = GET_VALUE(0); t1 = GET_VALUE(1); t2 = GET_VALUE(2);
val = t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0);
t0 = GET_VALUE(3); t1 = GET_VALUE(4); t2 = GET_VALUE(5);
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 2;
t0 = GET_VALUE(6); t1 = GET_VALUE(7); t2 = GET_VALUE(8);
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 4;
t0 = GET_VALUE(9); t1 = GET_VALUE(10); t2 = GET_VALUE(11);
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 6;
return val;
}
};
......@@ -291,9 +291,9 @@ namespace cv { namespace gpu { namespace device
{
__device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
{
pattern_x += 16 * i;
pattern_x += 16 * i;
pattern_y += 16 * i;
int t0, t1, t2, t3, k, val;
int a, b;
......@@ -304,7 +304,7 @@ namespace cv { namespace gpu { namespace device
if( t3 > t2 ) t2 = t3, b = 3;
k = t0 > t2 ? a : b;
val = k;
t0 = GET_VALUE(4); t1 = GET_VALUE(5);
t2 = GET_VALUE(6); t3 = GET_VALUE(7);
a = 0, b = 2;
......@@ -312,7 +312,7 @@ namespace cv { namespace gpu { namespace device
if( t3 > t2 ) t2 = t3, b = 3;
k = t0 > t2 ? a : b;
val |= k << 2;
t0 = GET_VALUE(8); t1 = GET_VALUE(9);
t2 = GET_VALUE(10); t3 = GET_VALUE(11);
a = 0, b = 2;
......@@ -320,7 +320,7 @@ namespace cv { namespace gpu { namespace device
if( t3 > t2 ) t2 = t3, b = 3;
k = t0 > t2 ? a : b;
val |= k << 4;
t0 = GET_VALUE(12); t1 = GET_VALUE(13);
t2 = GET_VALUE(14); t3 = GET_VALUE(15);
a = 0, b = 2;
......@@ -328,7 +328,7 @@ namespace cv { namespace gpu { namespace device
if( t3 > t2 ) t2 = t3, b = 3;
k = t0 > t2 ? a : b;
val |= k << 6;
return val;
}
};
......@@ -399,7 +399,7 @@ namespace cv { namespace gpu { namespace device
y[ptidx] = loc.y * scale;
}
}
void mergeLocation_gpu(const short2* loc, float* x, float* y, int npoints, float scale, cudaStream_t stream)
{
dim3 block(256);
......
......@@ -69,7 +69,7 @@ namespace cv { namespace gpu { namespace device
{
static void call(DevMem2D_<T> src, DevMem2Df mapx, DevMem2Df mapy, DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
......@@ -159,7 +159,7 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
};
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar2)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar4)
......@@ -188,7 +188,7 @@ namespace cv { namespace gpu { namespace device
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
{
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df mapx, DevMem2Df mapy,
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df mapx, DevMem2Df mapy,
DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int cc)
{
if (stream == 0)
......@@ -198,13 +198,13 @@ namespace cv { namespace gpu { namespace device
}
};
template <typename T> void remap_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap,
template <typename T> void remap_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap,
DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc)
{
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap,
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap,
DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int cc);
static const caller_t callers[3][5] =
static const caller_t callers[3][5] =
{
{
RemapDispatcher<PointFilter, BrdReflect101, T>::call,
......@@ -229,7 +229,7 @@ namespace cv { namespace gpu { namespace device
}
};
callers[interpolation][borderMode](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, xmap, ymap,
callers[interpolation][borderMode](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, xmap, ymap,
static_cast< DevMem2D_<T> >(dst), borderValue, stream, cc);
}
......
......@@ -228,7 +228,7 @@ namespace cv { namespace gpu { namespace device
}
};
template <typename T> void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy,
template <typename T> void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy,
DevMem2Db dst, int interpolation, cudaStream_t stream)
{
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream);
......@@ -244,7 +244,7 @@ namespace cv { namespace gpu { namespace device
if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
interpolation = 1;
callers[interpolation](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, fx, fy,
callers[interpolation](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, fx, fy,
static_cast< DevMem2D_<T> >(dst), stream);
}
......
......@@ -43,7 +43,7 @@
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/vec_traits.hpp"
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
namespace video_encoding
{
......@@ -159,12 +159,12 @@ namespace cv { namespace gpu { namespace device
void YV12_gpu(const DevMem2Db src, int cn, DevMem2Db dst)
{
typedef void (*func_t)(const DevMem2Db src, PtrStepb dst);
static const func_t funcs[] =
static const func_t funcs[] =
{
0, Gray_to_YV12_caller, 0, BGR_to_YV12_caller<3>, BGR_to_YV12_caller<4>
};
funcs[cn](src, dst);
}
}
......
......@@ -48,9 +48,9 @@
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/static_check.hpp"
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
namespace row_filter
namespace row_filter
{
#define MAX_KERNEL_SIZE 32
......@@ -79,7 +79,7 @@ namespace cv { namespace gpu { namespace device
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
__shared__ sum_t smem[BLOCK_DIM_Y][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_X];
const int y = blockIdx.y * BLOCK_DIM_Y + threadIdx.y;
if (y >= src.rows)
......@@ -161,7 +161,7 @@ namespace cv { namespace gpu { namespace device
{
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<D> dst, int anchor, int cc, cudaStream_t stream);
static const caller_t callers[5][33] =
static const caller_t callers[5][33] =
{
{
0,
......@@ -337,9 +337,9 @@ namespace cv { namespace gpu { namespace device
linearRowFilter_caller<30, T, D, BrdRowWrap>,
linearRowFilter_caller<31, T, D, BrdRowWrap>,
linearRowFilter_caller<32, T, D, BrdRowWrap>
}
}
};
loadKernel(kernel, ksize);
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, cc, stream);
......
......@@ -60,7 +60,7 @@
#define cublasSafeCall(expr) ___cublasSafeCall(expr, __FILE__, __LINE__)
#endif
namespace cv { namespace gpu
namespace cv { namespace gpu
{
void nppError(int err, const char *file, const int line, const char *func = "");
void ncvError(int err, const char *file, const int line, const char *func = "");
......
......@@ -42,12 +42,12 @@
#include "internal_shared.hpp"
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
namespace split_merge
namespace split_merge
{
template <typename T, size_t elem_size = sizeof(T)>
struct TypeTraits
struct TypeTraits
{
typedef T type;
typedef T type2;
......@@ -74,7 +74,7 @@ namespace cv { namespace gpu { namespace device
};
template <typename T>
struct TypeTraits<T, 4>
struct TypeTraits<T, 4>
{
typedef int type;
typedef int2 type2;
......@@ -83,7 +83,7 @@ namespace cv { namespace gpu { namespace device
};
template <typename T>
struct TypeTraits<T, 8>
struct TypeTraits<T, 8>
{
typedef double type;
typedef double2 type2;
......@@ -95,11 +95,11 @@ namespace cv { namespace gpu { namespace device
typedef void (*SplitFunction)(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& stream);
//------------------------------------------------------------
// Merge
// Merge
template <typename T>
__global__ void mergeC2_(const uchar* src0, size_t src0_step,
const uchar* src1, size_t src1_step,
__global__ void mergeC2_(const uchar* src0, size_t src0_step,
const uchar* src1, size_t src1_step,
int rows, int cols, uchar* dst, size_t dst_step)
{
typedef typename TypeTraits<T>::type2 dst_type;
......@@ -111,8 +111,8 @@ namespace cv { namespace gpu { namespace device
const T* src1_y = (const T*)(src1 + y * src1_step);
dst_type* dst_y = (dst_type*)(dst + y * dst_step);
if (x < cols && y < rows)
{
if (x < cols && y < rows)
{
dst_type dst_elem;
dst_elem.x = src0_y[x];
dst_elem.y = src1_y[x];
......@@ -122,9 +122,9 @@ namespace cv { namespace gpu { namespace device
template <typename T>
__global__ void mergeC3_(const uchar* src0, size_t src0_step,
const uchar* src1, size_t src1_step,
const uchar* src2, size_t src2_step,
__global__ void mergeC3_(const uchar* src0, size_t src0_step,
const uchar* src1, size_t src1_step,
const uchar* src2, size_t src2_step,
int rows, int cols, uchar* dst, size_t dst_step)
{
typedef typename TypeTraits<T>::type3 dst_type;
......@@ -137,8 +137,8 @@ namespace cv { namespace gpu { namespace device
const T* src2_y = (const T*)(src2 + y * src2_step);
dst_type* dst_y = (dst_type*)(dst + y * dst_step);
if (x < cols && y < rows)
{
if (x < cols && y < rows)
{
dst_type dst_elem;
dst_elem.x = src0_y[x];
dst_elem.y = src1_y[x];
......@@ -149,9 +149,9 @@ namespace cv { namespace gpu { namespace device
template <>
__global__ void mergeC3_<double>(const uchar* src0, size_t src0_step,
const uchar* src1, size_t src1_step,
const uchar* src2, size_t src2_step,
__global__ void mergeC3_<double>(const uchar* src0, size_t src0_step,
const uchar* src1, size_t src1_step,
const uchar* src2, size_t src2_step,
int rows, int cols, uchar* dst, size_t dst_step)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
......@@ -162,8 +162,8 @@ namespace cv { namespace gpu { namespace device
const double* src2_y = (const double*)(src2 + y * src2_step);
double* dst_y = (double*)(dst + y * dst_step);
if (x < cols && y < rows)
{
if (x < cols && y < rows)
{
dst_y[3 * x] = src0_y[x];
dst_y[3 * x + 1] = src1_y[x];
dst_y[3 * x + 2] = src2_y[x];
......@@ -172,10 +172,10 @@ namespace cv { namespace gpu { namespace device
template <typename T>
__global__ void mergeC4_(const uchar* src0, size_t src0_step,
const uchar* src1, size_t src1_step,
const uchar* src2, size_t src2_step,
const uchar* src3, size_t src3_step,
__global__ void mergeC4_(const uchar* src0, size_t src0_step,
const uchar* src1, size_t src1_step,
const uchar* src2, size_t src2_step,
const uchar* src3, size_t src3_step,
int rows, int cols, uchar* dst, size_t dst_step)
{
typedef typename TypeTraits<T>::type4 dst_type;
......@@ -189,8 +189,8 @@ namespace cv { namespace gpu { namespace device
const T* src3_y = (const T*)(src3 + y * src3_step);
dst_type* dst_y = (dst_type*)(dst + y * dst_step);
if (x < cols && y < rows)
{
if (x < cols && y < rows)
{
dst_type dst_elem;
dst_elem.x = src0_y[x];
dst_elem.y = src1_y[x];
......@@ -202,10 +202,10 @@ namespace cv { namespace gpu { namespace device
template <>
__global__ void mergeC4_<double>(const uchar* src0, size_t src0_step,
const uchar* src1, size_t src1_step,
const uchar* src2, size_t src2_step,
const uchar* src3, size_t src3_step,
__global__ void mergeC4_<double>(const uchar* src0, size_t src0_step,
const uchar* src1, size_t src1_step,
const uchar* src2, size_t src2_step,
const uchar* src3, size_t src3_step,
int rows, int cols, uchar* dst, size_t dst_step)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
......@@ -217,8 +217,8 @@ namespace cv { namespace gpu { namespace device
const double* src3_y = (const double*)(src3 + y * src3_step);
double2* dst_y = (double2*)(dst + y * dst_step);
if (x < cols && y < rows)
{
if (x < cols && y < rows)
{
dst_y[2 * x] = make_double2(src0_y[x], src1_y[x]);
dst_y[2 * x + 1] = make_double2(src2_y[x], src3_y[x]);
}
......@@ -303,7 +303,7 @@ namespace cv { namespace gpu { namespace device
template <typename T>
__global__ void splitC2_(const uchar* src, size_t src_step,
__global__ void splitC2_(const uchar* src, size_t src_step,
int rows, int cols,
uchar* dst0, size_t dst0_step,
uchar* dst1, size_t dst1_step)
......@@ -317,7 +317,7 @@ namespace cv { namespace gpu { namespace device
T* dst0_y = (T*)(dst0 + y * dst0_step);
T* dst1_y = (T*)(dst1 + y * dst1_step);
if (x < cols && y < rows)
if (x < cols && y < rows)
{
src_type src_elem = src_y[x];
dst0_y[x] = src_elem.x;
......@@ -327,7 +327,7 @@ namespace cv { namespace gpu { namespace device
template <typename T>
__global__ void splitC3_(const uchar* src, size_t src_step,
__global__ void splitC3_(const uchar* src, size_t src_step,
int rows, int cols,
uchar* dst0, size_t dst0_step,
uchar* dst1, size_t dst1_step,
......@@ -343,7 +343,7 @@ namespace cv { namespace gpu { namespace device
T* dst1_y = (T*)(dst1 + y * dst1_step);
T* dst2_y = (T*)(dst2 + y * dst2_step);
if (x < cols && y < rows)
if (x < cols && y < rows)
{
src_type src_elem = src_y[x];
dst0_y[x] = src_elem.x;
......@@ -368,7 +368,7 @@ namespace cv { namespace gpu { namespace device
double* dst1_y = (double*)(dst1 + y * dst1_step);
double* dst2_y = (double*)(dst2 + y * dst2_step);
if (x < cols && y < rows)
if (x < cols && y < rows)
{
dst0_y[x] = src_y[3 * x];
dst1_y[x] = src_y[3 * x + 1];
......@@ -395,7 +395,7 @@ namespace cv { namespace gpu { namespace device
T* dst2_y = (T*)(dst2 + y * dst2_step);
T* dst3_y = (T*)(dst3 + y * dst3_step);
if (x < cols && y < rows)
if (x < cols && y < rows)
{
src_type src_elem = src_y[x];
dst0_y[x] = src_elem.x;
......@@ -423,7 +423,7 @@ namespace cv { namespace gpu { namespace device
double* dst2_y = (double*)(dst2 + y * dst2_step);
double* dst3_y = (double*)(dst3 + y * dst3_step);
if (x < cols && y < rows)
if (x < cols && y < rows)
{
double2 src_elem1 = src_y[2 * x];
double2 src_elem2 = src_y[2 * x + 1];
......
......@@ -42,9 +42,9 @@
#include "internal_shared.hpp"
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
namespace stereobm
namespace stereobm
{
//////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////// Stereo BM ////////////////////////////////////////////////
......@@ -70,7 +70,7 @@ namespace cv { namespace gpu { namespace device
template<int RADIUS>
__device__ unsigned int CalcSSD(volatile unsigned int *col_ssd_cache, volatile unsigned int *col_ssd)
{
{
unsigned int cache = 0;
unsigned int cache2 = 0;
......@@ -401,8 +401,8 @@ namespace cv { namespace gpu { namespace device
prefilter_kernel<<<grid, threads, 0, stream>>>(output, prefilterCap);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaUnbindTexture (texForSobel ) );
}
......
......@@ -44,9 +44,9 @@
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/limits.hpp"
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
namespace stereobp
namespace stereobp
{
///////////////////////////////////////////////////////////////
/////////////////////// load constants ////////////////////////
......
......@@ -44,9 +44,9 @@
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/limits.hpp"
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
namespace stereocsbp
namespace stereocsbp
{
///////////////////////////////////////////////////////////////
/////////////////////// load constants ////////////////////////
......@@ -62,7 +62,7 @@ namespace cv { namespace gpu { namespace device
__constant__ int cth;
__constant__ size_t cimg_step;
__constant__ size_t cmsg_step;
__constant__ size_t cmsg_step;
__constant__ size_t cdisp_step1;
__constant__ size_t cdisp_step2;
......@@ -392,7 +392,7 @@ namespace cv { namespace gpu { namespace device
get_first_k_initial_local<<<grid, threads, 0, stream>>> (data_cost_selected, disp_selected_pyr, h, w, nr_plane);
else
get_first_k_initial_global<<<grid, threads, 0, stream>>>(data_cost_selected, disp_selected_pyr, h, w, nr_plane);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
......@@ -575,7 +575,7 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
callers[level](disp_selected_pyr, data_cost, rows, cols, h, w, level, nr_plane, channels, stream);
cudaSafeCall( cudaGetLastError() );
......@@ -588,13 +588,13 @@ namespace cv { namespace gpu { namespace device
template void compute_data_cost(const float* disp_selected_pyr, float* data_cost, size_t msg_step,
int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream);
///////////////////////////////////////////////////////////////
//////////////////////// init message /////////////////////////
///////////////////////////////////////////////////////////////
template <typename T>
__device__ void get_first_k_element_increase(T* u_new, T* d_new, T* l_new, T* r_new,
const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur,
......@@ -691,7 +691,7 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
......@@ -720,7 +720,7 @@ namespace cv { namespace gpu { namespace device
const float* u_cur, const float* d_cur, const float* l_cur, const float* r_cur,
float* selected_disp_pyr_new, const float* selected_disp_pyr_cur,
float* data_cost_selected, const float* data_cost, size_t msg_step,
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream);
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream);
///////////////////////////////////////////////////////////////
//////////////////// calc all iterations /////////////////////
......@@ -805,7 +805,7 @@ namespace cv { namespace gpu { namespace device
for(int t = 0; t < iters; ++t)
{
compute_message<<<grid, threads, 0, stream>>>(u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t & 1);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
}
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
......@@ -814,7 +814,7 @@ namespace cv { namespace gpu { namespace device
template void calc_all_iterations(short* u, short* d, short* l, short* r, const short* data_cost_selected, const short* selected_disp_pyr_cur, size_t msg_step,
int h, int w, int nr_plane, int iters, cudaStream_t stream);
template void calc_all_iterations(float* u, float* d, float* l, float* r, const float* data_cost_selected, const float* selected_disp_pyr_cur, size_t msg_step,
template void calc_all_iterations(float* u, float* d, float* l, float* r, const float* data_cost_selected, const float* selected_disp_pyr_cur, size_t msg_step,
int h, int w, int nr_plane, int iters, cudaStream_t stream);
......@@ -879,7 +879,7 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaDeviceSynchronize() );
}
template void compute_disp(const short* u, const short* d, const short* l, const short* r, const short* data_cost_selected, const short* disp_selected, size_t msg_step,
template void compute_disp(const short* u, const short* d, const short* l, const short* r, const short* data_cost_selected, const short* disp_selected, size_t msg_step,
const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream);
template void compute_disp(const float* u, const float* d, const float* l, const float* r, const float* data_cost_selected, const float* disp_selected, size_t msg_step,
......
......@@ -98,7 +98,7 @@ namespace cv { namespace gpu { namespace device
{
dim3 block(32, 8);
dim3 grid(divUp(xmap.cols, block.x), divUp(xmap.rows, block.y));
buildWarpMaps<Transform><<<grid, block, 0, stream>>>(xmap, ymap);
cudaSafeCall( cudaGetLastError() );
......@@ -158,7 +158,7 @@ namespace cv { namespace gpu { namespace device
{
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2D_<T> dst, const float* borderValue, int)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
......@@ -256,7 +256,7 @@ namespace cv { namespace gpu { namespace device
#undef OPENCV_GPU_IMPLEMENT_WARP_TEX
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcher
{
{
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int cc)
{
if (stream == 0)
......@@ -266,7 +266,7 @@ namespace cv { namespace gpu { namespace device
}
};
template <class Transform, typename T>
template <class Transform, typename T>
void warp_caller(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Db dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment