Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
1f1e24be
Commit
1f1e24be
authored
Nov 12, 2012
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
PyrLKOpticalFlow
parent
1b571bde
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
116 additions
and
239 deletions
+116
-239
pyrlk.cu
modules/gpu/src/cuda/pyrlk.cu
+106
-222
pyrlk.cpp
modules/gpu/src/pyrlk.cpp
+10
-17
No files found.
modules/gpu/src/cuda/pyrlk.cu
View file @
1f1e24be
...
...
@@ -52,167 +52,19 @@
#include "opencv2/gpu/device/functional.hpp"
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/reduce.hpp"
namespace cv { namespace gpu { namespace device
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace
{
namespace pyrlk
{
__constant__ int c_winSize_x;
__constant__ int c_winSize_y;
__constant__ int c_halfWin_x;
__constant__ int c_halfWin_y;
__constant__ int c_iters;
void loadConstants(int2 winSize, int iters)
{
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_y, &winSize.y, sizeof(int)) );
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_x, &halfWin.x, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_y, &halfWin.y, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_iters, &iters, sizeof(int)) );
}
__device__ void reduce(float& val1, float& val2, float& val3, float* smem1, float* smem2, float* smem3, int tid)
{
smem1[tid] = val1;
smem2[tid] = val2;
smem3[tid] = val3;
__syncthreads();
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 110)
if (tid < 128)
{
smem1[tid] = val1 += smem1[tid + 128];
smem2[tid] = val2 += smem2[tid + 128];
smem3[tid] = val3 += smem3[tid + 128];
}
__syncthreads();
#endif
if (tid < 64)
{
smem1[tid] = val1 += smem1[tid + 64];
smem2[tid] = val2 += smem2[tid + 64];
smem3[tid] = val3 += smem3[tid + 64];
}
__syncthreads();
if (tid < 32)
{
volatile float* vmem1 = smem1;
volatile float* vmem2 = smem2;
volatile float* vmem3 = smem3;
vmem1[tid] = val1 += vmem1[tid + 32];
vmem2[tid] = val2 += vmem2[tid + 32];
vmem3[tid] = val3 += vmem3[tid + 32];
vmem1[tid] = val1 += vmem1[tid + 16];
vmem2[tid] = val2 += vmem2[tid + 16];
vmem3[tid] = val3 += vmem3[tid + 16];
vmem1[tid] = val1 += vmem1[tid + 8];
vmem2[tid] = val2 += vmem2[tid + 8];
vmem3[tid] = val3 += vmem3[tid + 8];
vmem1[tid] = val1 += vmem1[tid + 4];
vmem2[tid] = val2 += vmem2[tid + 4];
vmem3[tid] = val3 += vmem3[tid + 4];
vmem1[tid] = val1 += vmem1[tid + 2];
vmem2[tid] = val2 += vmem2[tid + 2];
vmem3[tid] = val3 += vmem3[tid + 2];
vmem1[tid] = val1 += vmem1[tid + 1];
vmem2[tid] = val2 += vmem2[tid + 1];
vmem3[tid] = val3 += vmem3[tid + 1];
}
}
__device__ void reduce(float& val1, float& val2, float* smem1, float* smem2, int tid)
{
smem1[tid] = val1;
smem2[tid] = val2;
__syncthreads();
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 110)
if (tid < 128)
{
smem1[tid] = val1 += smem1[tid + 128];
smem2[tid] = val2 += smem2[tid + 128];
}
__syncthreads();
#endif
if (tid < 64)
{
smem1[tid] = val1 += smem1[tid + 64];
smem2[tid] = val2 += smem2[tid + 64];
}
__syncthreads();
if (tid < 32)
{
volatile float* vmem1 = smem1;
volatile float* vmem2 = smem2;
vmem1[tid] = val1 += vmem1[tid + 32];
vmem2[tid] = val2 += vmem2[tid + 32];
vmem1[tid] = val1 += vmem1[tid + 16];
vmem2[tid] = val2 += vmem2[tid + 16];
vmem1[tid] = val1 += vmem1[tid + 8];
vmem2[tid] = val2 += vmem2[tid + 8];
vmem1[tid] = val1 += vmem1[tid + 4];
vmem2[tid] = val2 += vmem2[tid + 4];
vmem1[tid] = val1 += vmem1[tid + 2];
vmem2[tid] = val2 += vmem2[tid + 2];
vmem1[tid] = val1 += vmem1[tid + 1];
vmem2[tid] = val2 += vmem2[tid + 1];
}
}
__device__ void reduce(float& val1, float* smem1, int tid)
{
smem1[tid] = val1;
__syncthreads();
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 110)
if (tid < 128)
{
smem1[tid] = val1 += smem1[tid + 128];
}
__syncthreads();
#endif
if (tid < 64)
{
smem1[tid] = val1 += smem1[tid + 64];
}
__syncthreads();
if (tid < 32)
{
volatile float* vmem1 = smem1;
vmem1[tid] = val1 += vmem1[tid + 32];
vmem1[tid] = val1 += vmem1[tid + 16];
vmem1[tid] = val1 += vmem1[tid + 8];
vmem1[tid] = val1 += vmem1[tid + 4];
vmem1[tid] = val1 += vmem1[tid + 2];
vmem1[tid] = val1 += vmem1[tid + 1];
}
}
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_If(false, cudaFilterModeLinear, cudaAddressModeClamp);
texture<float4, cudaTextureType2D, cudaReadModeElementType> tex_If4(false, cudaFilterModeLinear, cudaAddressModeClamp);
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_Ib(false, cudaFilterModePoint, cudaAddressModeClamp);
...
...
@@ -263,7 +115,7 @@ namespace cv { namespace gpu { namespace device
__device__ __forceinline__ float abs_(float a)
{
return ::fabs
(a);
return ::fabsf
(a);
}
__device__ __forceinline__ float4 abs_(const float4& a)
{
...
...
@@ -271,19 +123,19 @@ namespace cv { namespace gpu { namespace device
}
template <int cn, int PATCH_X, int PATCH_Y, bool calcErr>
__global__ void lkS
parse(const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols)
__global__ void s
parse(const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ <= 110)
__shared__ float smem1[128]
;
__shared__ float smem2[128];
__shared__ float smem3[128]
;
#else
__shared__ float smem1[256];
__shared__ float smem2[256
];
__shared__ float smem3[256
];
#endif
#if __CUDA_ARCH__ <= 110
const int BLOCK_SIZE = 128
;
#else
const int BLOCK_SIZE = 256
;
#endif
__shared__ float smem1[BLOCK_SIZE
];
__shared__ float smem2[BLOCK_SIZE
];
__shared__ float smem3[BLOCK_SIZE];
const
int tid = threadIdx.y * blockDim.x + threadIdx.x;
const unsigned
int tid = threadIdx.y * blockDim.x + threadIdx.x;
float2 prevPt = prevPts[blockIdx.x];
prevPt.x *= (1.0f / (1 << level));
...
...
@@ -338,7 +190,17 @@ namespace cv { namespace gpu { namespace device
}
}
reduce(A11, A12, A22, smem1, smem2, smem3, tid);
reduce<BLOCK_SIZE>(smem_tuple(smem1, smem2, smem3), thrust::tie(A11, A12, A22), tid, thrust::make_tuple(plus<float>(), plus<float>(), plus<float>()));
#if __CUDA_ARCH__ >= 300
if (tid == 0)
{
smem1[0] = A11;
smem2[0] = A12;
smem3[0] = A22;
}
#endif
__syncthreads();
A11 = smem1[0];
...
...
@@ -395,7 +257,16 @@ namespace cv { namespace gpu { namespace device
}
}
reduce(b1, b2, smem1, smem2, tid);
reduce<BLOCK_SIZE>(smem_tuple(smem1, smem2), thrust::tie(b1, b2), tid, thrust::make_tuple(plus<float>(), plus<float>()));
#if __CUDA_ARCH__ >= 300
if (tid == 0)
{
smem1[0] = b1;
smem2[0] = b2;
}
#endif
__syncthreads();
b1 = smem1[0];
...
...
@@ -428,7 +299,7 @@ namespace cv { namespace gpu { namespace device
}
}
reduce(errval, smem1, tid
);
reduce<BLOCK_SIZE>(smem1, errval, tid, plus<float>()
);
}
if (tid == 0)
...
...
@@ -444,15 +315,15 @@ namespace cv { namespace gpu { namespace device
}
template <int cn, int PATCH_X, int PATCH_Y>
void lkS
parse_caller(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
void s
parse_caller(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, cudaStream_t stream)
{
dim3 grid(ptcount);
if (level == 0 && err)
lkS
parse<cn, PATCH_X, PATCH_Y, true><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
s
parse<cn, PATCH_X, PATCH_Y, true><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
else
lkS
parse<cn, PATCH_X, PATCH_Y, false><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
s
parse<cn, PATCH_X, PATCH_Y, false><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
cudaSafeCall( cudaGetLastError() );
...
...
@@ -460,52 +331,8 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaDeviceSynchronize() );
}
void lkSparse1_gpu(PtrStepSzf I, PtrStepSzf J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, dim3 patch, cudaStream_t stream)
{
typedef void (*func_t)(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, cudaStream_t stream);
static const func_t funcs[5][5] =
{
{lkSparse_caller<1, 1, 1>, lkSparse_caller<1, 2, 1>, lkSparse_caller<1, 3, 1>, lkSparse_caller<1, 4, 1>, lkSparse_caller<1, 5, 1>},
{lkSparse_caller<1, 1, 2>, lkSparse_caller<1, 2, 2>, lkSparse_caller<1, 3, 2>, lkSparse_caller<1, 4, 2>, lkSparse_caller<1, 5, 2>},
{lkSparse_caller<1, 1, 3>, lkSparse_caller<1, 2, 3>, lkSparse_caller<1, 3, 3>, lkSparse_caller<1, 4, 3>, lkSparse_caller<1, 5, 3>},
{lkSparse_caller<1, 1, 4>, lkSparse_caller<1, 2, 4>, lkSparse_caller<1, 3, 4>, lkSparse_caller<1, 4, 4>, lkSparse_caller<1, 5, 4>},
{lkSparse_caller<1, 1, 5>, lkSparse_caller<1, 2, 5>, lkSparse_caller<1, 3, 5>, lkSparse_caller<1, 4, 5>, lkSparse_caller<1, 5, 5>}
};
bindTexture(&tex_If, I);
bindTexture(&tex_Jf, J);
funcs[patch.y - 1][patch.x - 1](I.rows, I.cols, prevPts, nextPts, status, err, ptcount,
level, block, stream);
}
void lkSparse4_gpu(PtrStepSz<float4> I, PtrStepSz<float4> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, dim3 patch, cudaStream_t stream)
{
typedef void (*func_t)(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, cudaStream_t stream);
static const func_t funcs[5][5] =
{
{lkSparse_caller<4, 1, 1>, lkSparse_caller<4, 2, 1>, lkSparse_caller<4, 3, 1>, lkSparse_caller<4, 4, 1>, lkSparse_caller<4, 5, 1>},
{lkSparse_caller<4, 1, 2>, lkSparse_caller<4, 2, 2>, lkSparse_caller<4, 3, 2>, lkSparse_caller<4, 4, 2>, lkSparse_caller<4, 5, 2>},
{lkSparse_caller<4, 1, 3>, lkSparse_caller<4, 2, 3>, lkSparse_caller<4, 3, 3>, lkSparse_caller<4, 4, 3>, lkSparse_caller<4, 5, 3>},
{lkSparse_caller<4, 1, 4>, lkSparse_caller<4, 2, 4>, lkSparse_caller<4, 3, 4>, lkSparse_caller<4, 4, 4>, lkSparse_caller<4, 5, 4>},
{lkSparse_caller<4, 1, 5>, lkSparse_caller<4, 2, 5>, lkSparse_caller<4, 3, 5>, lkSparse_caller<4, 4, 5>, lkSparse_caller<4, 5, 5>}
};
bindTexture(&tex_If4, I);
bindTexture(&tex_Jf4, J);
funcs[patch.y - 1][patch.x - 1](I.rows, I.cols, prevPts, nextPts, status, err, ptcount,
level, block, stream);
}
template <bool calcErr>
__global__ void lkD
ense(PtrStepf u, PtrStepf v, const PtrStepf prevU, const PtrStepf prevV, PtrStepf err, const int rows, const int cols)
__global__ void d
ense(PtrStepf u, PtrStepf v, const PtrStepf prevU, const PtrStepf prevV, PtrStepf err, const int rows, const int cols)
{
extern __shared__ int smem[];
...
...
@@ -649,9 +476,67 @@ namespace cv { namespace gpu { namespace device
err(y, x) = static_cast<float>(errval) / (c_winSize_x * c_winSize_y);
}
}
}
void lkDense_gpu(PtrStepSzb I, PtrStepSzf J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV,
PtrStepSzf err, int2 winSize, cudaStream_t stream)
namespace pyrlk
{
void loadConstants(int2 winSize, int iters)
{
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_y, &winSize.y, sizeof(int)) );
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_x, &halfWin.x, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_y, &halfWin.y, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_iters, &iters, sizeof(int)) );
}
void sparse1(PtrStepSzf I, PtrStepSzf J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, dim3 patch, cudaStream_t stream)
{
typedef void (*func_t)(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, cudaStream_t stream);
static const func_t funcs[5][5] =
{
{::sparse_caller<1, 1, 1>, ::sparse_caller<1, 2, 1>, ::sparse_caller<1, 3, 1>, ::sparse_caller<1, 4, 1>, ::sparse_caller<1, 5, 1>},
{::sparse_caller<1, 1, 2>, ::sparse_caller<1, 2, 2>, ::sparse_caller<1, 3, 2>, ::sparse_caller<1, 4, 2>, ::sparse_caller<1, 5, 2>},
{::sparse_caller<1, 1, 3>, ::sparse_caller<1, 2, 3>, ::sparse_caller<1, 3, 3>, ::sparse_caller<1, 4, 3>, ::sparse_caller<1, 5, 3>},
{::sparse_caller<1, 1, 4>, ::sparse_caller<1, 2, 4>, ::sparse_caller<1, 3, 4>, ::sparse_caller<1, 4, 4>, ::sparse_caller<1, 5, 4>},
{::sparse_caller<1, 1, 5>, ::sparse_caller<1, 2, 5>, ::sparse_caller<1, 3, 5>, ::sparse_caller<1, 4, 5>, ::sparse_caller<1, 5, 5>}
};
bindTexture(&tex_If, I);
bindTexture(&tex_Jf, J);
funcs[patch.y - 1][patch.x - 1](I.rows, I.cols, prevPts, nextPts, status, err, ptcount,
level, block, stream);
}
void sparse4(PtrStepSz<float4> I, PtrStepSz<float4> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, dim3 patch, cudaStream_t stream)
{
typedef void (*func_t)(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, cudaStream_t stream);
static const func_t funcs[5][5] =
{
{::sparse_caller<4, 1, 1>, ::sparse_caller<4, 2, 1>, ::sparse_caller<4, 3, 1>, ::sparse_caller<4, 4, 1>, ::sparse_caller<4, 5, 1>},
{::sparse_caller<4, 1, 2>, ::sparse_caller<4, 2, 2>, ::sparse_caller<4, 3, 2>, ::sparse_caller<4, 4, 2>, ::sparse_caller<4, 5, 2>},
{::sparse_caller<4, 1, 3>, ::sparse_caller<4, 2, 3>, ::sparse_caller<4, 3, 3>, ::sparse_caller<4, 4, 3>, ::sparse_caller<4, 5, 3>},
{::sparse_caller<4, 1, 4>, ::sparse_caller<4, 2, 4>, ::sparse_caller<4, 3, 4>, ::sparse_caller<4, 4, 4>, ::sparse_caller<4, 5, 4>},
{::sparse_caller<4, 1, 5>, ::sparse_caller<4, 2, 5>, ::sparse_caller<4, 3, 5>, ::sparse_caller<4, 4, 5>, ::sparse_caller<4, 5, 5>}
};
bindTexture(&tex_If4, I);
bindTexture(&tex_Jf4, J);
funcs[patch.y - 1][patch.x - 1](I.rows, I.cols, prevPts, nextPts, status, err, ptcount,
level, block, stream);
}
void dense(PtrStepSzb I, PtrStepSzf J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV, PtrStepSzf err, int2 winSize, cudaStream_t stream)
{
dim3 block(16, 16);
dim3 grid(divUp(I.cols, block.x), divUp(I.rows, block.y));
...
...
@@ -666,19 +551,18 @@ namespace cv { namespace gpu { namespace device
if (err.data)
{
lkD
ense<true><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, err, I.rows, I.cols);
::d
ense<true><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, err, I.rows, I.cols);
cudaSafeCall( cudaGetLastError() );
}
else
{
lkD
ense<false><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, PtrStepf(), I.rows, I.cols);
::d
ense<false><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, PtrStepf(), I.rows, I.cols);
cudaSafeCall( cudaGetLastError() );
}
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
}}}
}
#endif /* CUDA_DISABLER */
modules/gpu/src/pyrlk.cpp
View file @
1f1e24be
...
...
@@ -55,21 +55,18 @@ void cv::gpu::PyrLKOpticalFlow::releaseMemory() {}
#else
/* !defined (HAVE_CUDA) */
namespace
cv
{
namespace
gpu
{
namespace
device
namespace
pyrlk
{
namespace
pyrlk
{
void
loadConstants
(
int2
winSize
,
int
iters
);
void
lkSparse1_gpu
(
PtrStepSzf
I
,
PtrStepSzf
J
,
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
int
ptcount
,
void
sparse1
(
PtrStepSzf
I
,
PtrStepSzf
J
,
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
int
ptcount
,
int
level
,
dim3
block
,
dim3
patch
,
cudaStream_t
stream
=
0
);
void
lkSparse4_gpu
(
PtrStepSz
<
float4
>
I
,
PtrStepSz
<
float4
>
J
,
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
int
ptcount
,
void
sparse4
(
PtrStepSz
<
float4
>
I
,
PtrStepSz
<
float4
>
J
,
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
int
ptcount
,
int
level
,
dim3
block
,
dim3
patch
,
cudaStream_t
stream
=
0
);
void
lkDense_gpu
(
PtrStepSzb
I
,
PtrStepSzf
J
,
PtrStepSzf
u
,
PtrStepSzf
v
,
PtrStepSzf
prevU
,
PtrStepSzf
prevV
,
void
dense
(
PtrStepSzb
I
,
PtrStepSzf
J
,
PtrStepSzf
u
,
PtrStepSzf
v
,
PtrStepSzf
prevU
,
PtrStepSzf
prevV
,
PtrStepSzf
err
,
int2
winSize
,
cudaStream_t
stream
=
0
);
}
}}}
}
cv
::
gpu
::
PyrLKOpticalFlow
::
PyrLKOpticalFlow
()
{
...
...
@@ -104,8 +101,6 @@ namespace
void
cv
::
gpu
::
PyrLKOpticalFlow
::
sparse
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
const
GpuMat
&
prevPts
,
GpuMat
&
nextPts
,
GpuMat
&
status
,
GpuMat
*
err
)
{
using
namespace
cv
::
gpu
::
device
::
pyrlk
;
if
(
prevPts
.
empty
())
{
nextPts
.
release
();
...
...
@@ -166,19 +161,19 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next
pyrDown
(
nextPyr_
[
level
-
1
],
nextPyr_
[
level
]);
}
loadConstants
(
make_int2
(
winSize
.
width
,
winSize
.
height
),
iters
);
pyrlk
::
loadConstants
(
make_int2
(
winSize
.
width
,
winSize
.
height
),
iters
);
for
(
int
level
=
maxLevel
;
level
>=
0
;
level
--
)
{
if
(
cn
==
1
)
{
lkSparse1_gpu
(
prevPyr_
[
level
],
nextPyr_
[
level
],
pyrlk
::
sparse1
(
prevPyr_
[
level
],
nextPyr_
[
level
],
prevPts
.
ptr
<
float2
>
(),
nextPts
.
ptr
<
float2
>
(),
status
.
ptr
(),
level
==
0
&&
err
?
err
->
ptr
<
float
>
()
:
0
,
prevPts
.
cols
,
level
,
block
,
patch
);
}
else
{
lkSparse4_gpu
(
prevPyr_
[
level
],
nextPyr_
[
level
],
pyrlk
::
sparse4
(
prevPyr_
[
level
],
nextPyr_
[
level
],
prevPts
.
ptr
<
float2
>
(),
nextPts
.
ptr
<
float2
>
(),
status
.
ptr
(),
level
==
0
&&
err
?
err
->
ptr
<
float
>
()
:
0
,
prevPts
.
cols
,
level
,
block
,
patch
);
}
...
...
@@ -187,8 +182,6 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next
void
cv
::
gpu
::
PyrLKOpticalFlow
::
dense
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
GpuMat
&
u
,
GpuMat
&
v
,
GpuMat
*
err
)
{
using
namespace
cv
::
gpu
::
device
::
pyrlk
;
CV_Assert
(
prevImg
.
type
()
==
CV_8UC1
);
CV_Assert
(
prevImg
.
size
()
==
nextImg
.
size
()
&&
prevImg
.
type
()
==
nextImg
.
type
());
CV_Assert
(
maxLevel
>=
0
);
...
...
@@ -219,7 +212,7 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
vPyr_
[
1
].
setTo
(
Scalar
::
all
(
0
));
int2
winSize2i
=
make_int2
(
winSize
.
width
,
winSize
.
height
);
loadConstants
(
winSize2i
,
iters
);
pyrlk
::
loadConstants
(
winSize2i
,
iters
);
PtrStepSzf
derr
=
err
?
*
err
:
PtrStepSzf
();
...
...
@@ -229,7 +222,7 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
{
int
idx2
=
(
idx
+
1
)
&
1
;
lkDense_gpu
(
prevPyr_
[
level
],
nextPyr_
[
level
],
uPyr_
[
idx
],
vPyr_
[
idx
],
uPyr_
[
idx2
],
vPyr_
[
idx2
],
pyrlk
::
dense
(
prevPyr_
[
level
],
nextPyr_
[
level
],
uPyr_
[
idx
],
vPyr_
[
idx
],
uPyr_
[
idx2
],
vPyr_
[
idx2
],
level
==
0
?
derr
:
PtrStepSzf
(),
winSize2i
);
if
(
level
>
0
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment