Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
2e2bd557
Commit
2e2bd557
authored
Jun 15, 2012
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
new implementation of gpu::PyrLKOpticalFlow::dense (1.5 - 2x faster)
parent
af6b2e4e
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
243 additions
and
234 deletions
+243
-234
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+0
-2
perf_video.cpp
modules/gpu/perf/perf_video.cpp
+19
-4
pyrlk.cu
modules/gpu/src/cuda/pyrlk.cu
+193
-162
pyrlk.cpp
modules/gpu/src/pyrlk.cpp
+31
-63
pyrlk_optical_flow.cpp
samples/gpu/pyrlk_optical_flow.cpp
+0
-3
No files found.
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
2e2bd557
...
@@ -1754,7 +1754,6 @@ public:
...
@@ -1754,7 +1754,6 @@ public:
winSize
=
Size
(
21
,
21
);
winSize
=
Size
(
21
,
21
);
maxLevel
=
3
;
maxLevel
=
3
;
iters
=
30
;
iters
=
30
;
derivLambda
=
0.5
;
useInitialFlow
=
false
;
useInitialFlow
=
false
;
minEigThreshold
=
1e-4
f
;
minEigThreshold
=
1e-4
f
;
getMinEigenVals
=
false
;
getMinEigenVals
=
false
;
...
@@ -1769,7 +1768,6 @@ public:
...
@@ -1769,7 +1768,6 @@ public:
Size
winSize
;
Size
winSize
;
int
maxLevel
;
int
maxLevel
;
int
iters
;
int
iters
;
double
derivLambda
;
bool
useInitialFlow
;
bool
useInitialFlow
;
float
minEigThreshold
;
float
minEigThreshold
;
bool
getMinEigenVals
;
bool
getMinEigenVals
;
...
...
modules/gpu/perf/perf_video.cpp
View file @
2e2bd557
...
@@ -208,11 +208,18 @@ INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowSparse, testing::Combine(
...
@@ -208,11 +208,18 @@ INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowSparse, testing::Combine(
//////////////////////////////////////////////////////
//////////////////////////////////////////////////////
// PyrLKOpticalFlowDense
// PyrLKOpticalFlowDense
GPU_PERF_TEST_1
(
PyrLKOpticalFlowDense
,
cv
::
gpu
::
DeviceInfo
)
IMPLEMENT_PARAM_CLASS
(
Levels
,
int
)
IMPLEMENT_PARAM_CLASS
(
Iters
,
int
)
GPU_PERF_TEST
(
PyrLKOpticalFlowDense
,
cv
::
gpu
::
DeviceInfo
,
WinSize
,
Levels
,
Iters
)
{
{
cv
::
gpu
::
DeviceInfo
devInfo
=
G
etParam
(
);
cv
::
gpu
::
DeviceInfo
devInfo
=
G
ET_PARAM
(
0
);
cv
::
gpu
::
setDevice
(
devInfo
.
deviceID
());
cv
::
gpu
::
setDevice
(
devInfo
.
deviceID
());
int
winSize
=
GET_PARAM
(
1
);
int
levels
=
GET_PARAM
(
2
);
int
iters
=
GET_PARAM
(
3
);
cv
::
Mat
frame0_host
=
readImage
(
"gpu/opticalflow/frame0.png"
,
cv
::
IMREAD_GRAYSCALE
);
cv
::
Mat
frame0_host
=
readImage
(
"gpu/opticalflow/frame0.png"
,
cv
::
IMREAD_GRAYSCALE
);
ASSERT_FALSE
(
frame0_host
.
empty
());
ASSERT_FALSE
(
frame0_host
.
empty
());
...
@@ -226,9 +233,13 @@ GPU_PERF_TEST_1(PyrLKOpticalFlowDense, cv::gpu::DeviceInfo)
...
@@ -226,9 +233,13 @@ GPU_PERF_TEST_1(PyrLKOpticalFlowDense, cv::gpu::DeviceInfo)
cv
::
gpu
::
PyrLKOpticalFlow
pyrLK
;
cv
::
gpu
::
PyrLKOpticalFlow
pyrLK
;
pyrLK
.
winSize
=
cv
::
Size
(
winSize
,
winSize
);
pyrLK
.
maxLevel
=
levels
-
1
;
pyrLK
.
iters
=
iters
;
pyrLK
.
dense
(
frame0
,
frame1
,
u
,
v
);
pyrLK
.
dense
(
frame0
,
frame1
,
u
,
v
);
declare
.
time
(
1
0
);
declare
.
time
(
3
0
);
TEST_CYCLE
()
TEST_CYCLE
()
{
{
...
@@ -236,7 +247,11 @@ GPU_PERF_TEST_1(PyrLKOpticalFlowDense, cv::gpu::DeviceInfo)
...
@@ -236,7 +247,11 @@ GPU_PERF_TEST_1(PyrLKOpticalFlowDense, cv::gpu::DeviceInfo)
}
}
}
}
INSTANTIATE_TEST_CASE_P
(
Video
,
PyrLKOpticalFlowDense
,
ALL_DEVICES
);
INSTANTIATE_TEST_CASE_P
(
Video
,
PyrLKOpticalFlowDense
,
testing
::
Combine
(
ALL_DEVICES
,
testing
::
Values
(
WinSize
(
3
),
WinSize
(
5
),
WinSize
(
7
),
WinSize
(
9
),
WinSize
(
13
),
WinSize
(
17
),
WinSize
(
21
)),
testing
::
Values
(
Levels
(
1
),
Levels
(
2
),
Levels
(
3
)),
testing
::
Values
(
Iters
(
1
),
Iters
(
10
))));
//////////////////////////////////////////////////////
//////////////////////////////////////////////////////
// FarnebackOpticalFlowTest
// FarnebackOpticalFlowTest
...
...
modules/gpu/src/cuda/pyrlk.cu
View file @
2e2bd557
...
@@ -40,7 +40,7 @@
...
@@ -40,7 +40,7 @@
//
//
// Copyright (c) 2010, Paul Furgale, Chi Hay Tong
// Copyright (c) 2010, Paul Furgale, Chi Hay Tong
//
//
// The original code was written by Paul Furgale and Chi Hay Tong
// The original code was written by Paul Furgale and Chi Hay Tong
// and later optimized and prepared for integration into OpenCV by Itseez.
// and later optimized and prepared for integration into OpenCV by Itseez.
//
//
//M*/
//M*/
...
@@ -50,9 +50,9 @@
...
@@ -50,9 +50,9 @@
#include "opencv2/gpu/device/functional.hpp"
#include "opencv2/gpu/device/functional.hpp"
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/limits.hpp"
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
{
namespace pyrlk
namespace pyrlk
{
{
__constant__ int c_cn;
__constant__ int c_cn;
__constant__ float c_minEigThreshold;
__constant__ float c_minEigThreshold;
...
@@ -65,7 +65,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -65,7 +65,7 @@ namespace cv { namespace gpu { namespace device
void loadConstants(int cn, float minEigThreshold, int2 winSize, int iters)
void loadConstants(int cn, float minEigThreshold, int2 winSize, int iters)
{
{
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
cudaSafeCall( cudaMemcpyToSymbol(c_cn, &cn, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_cn, &cn, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_minEigThreshold, &minEigThreshold, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(c_minEigThreshold, &minEigThreshold, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) );
...
@@ -87,7 +87,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -87,7 +87,7 @@ namespace cv { namespace gpu { namespace device
const uchar src_val0 = src(y > 0 ? y - 1 : 1, x);
const uchar src_val0 = src(y > 0 ? y - 1 : 1, x);
const uchar src_val1 = src(y, x);
const uchar src_val1 = src(y, x);
const uchar src_val2 = src(y < rows - 1 ? y + 1 : rows - 2, x);
const uchar src_val2 = src(y < rows - 1 ? y + 1 : rows - 2, x);
dx_buf(y, x) = (src_val0 + src_val2) * 3 + src_val1 * 10;
dx_buf(y, x) = (src_val0 + src_val2) * 3 + src_val1 * 10;
dy_buf(y, x) = src_val2 - src_val0;
dy_buf(y, x) = src_val2 - src_val0;
}
}
...
@@ -113,7 +113,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -113,7 +113,7 @@ namespace cv { namespace gpu { namespace device
}
}
}
}
void calcSharrDeriv_gpu(DevMem2Db src, DevMem2D_<short> dx_buf, DevMem2D_<short> dy_buf, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy, int cn,
void calcSharrDeriv_gpu(DevMem2Db src, DevMem2D_<short> dx_buf, DevMem2D_<short> dy_buf, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy, int cn,
cudaStream_t stream)
cudaStream_t stream)
{
{
dim3 block(32, 8);
dim3 block(32, 8);
...
@@ -182,21 +182,21 @@ namespace cv { namespace gpu { namespace device
...
@@ -182,21 +182,21 @@ namespace cv { namespace gpu { namespace device
__syncthreads();
__syncthreads();
#if __CUDA_ARCH__ > 110
#if __CUDA_ARCH__ > 110
if (tid < 128)
if (tid < 128)
{
{
smem1[tid] = val1 += smem1[tid + 128];
smem1[tid] = val1 += smem1[tid + 128];
smem2[tid] = val2 += smem2[tid + 128];
smem2[tid] = val2 += smem2[tid + 128];
smem3[tid] = val3 += smem3[tid + 128];
smem3[tid] = val3 += smem3[tid + 128];
}
}
__syncthreads();
__syncthreads();
#endif
#endif
if (tid < 64)
if (tid < 64)
{
{
smem1[tid] = val1 += smem1[tid + 64];
smem1[tid] = val1 += smem1[tid + 64];
smem2[tid] = val2 += smem2[tid + 64];
smem2[tid] = val2 += smem2[tid + 64];
smem3[tid] = val3 += smem3[tid + 64];
smem3[tid] = val3 += smem3[tid + 64];
}
}
__syncthreads();
__syncthreads();
if (tid < 32)
if (tid < 32)
...
@@ -205,28 +205,28 @@ namespace cv { namespace gpu { namespace device
...
@@ -205,28 +205,28 @@ namespace cv { namespace gpu { namespace device
volatile float* vmem2 = smem2;
volatile float* vmem2 = smem2;
volatile float* vmem3 = smem3;
volatile float* vmem3 = smem3;
vmem1[tid] = val1 += vmem1[tid + 32];
vmem1[tid] = val1 += vmem1[tid + 32];
vmem2[tid] = val2 += vmem2[tid + 32];
vmem2[tid] = val2 += vmem2[tid + 32];
vmem3[tid] = val3 += vmem3[tid + 32];
vmem3[tid] = val3 += vmem3[tid + 32];
vmem1[tid] = val1 += vmem1[tid + 16];
vmem1[tid] = val1 += vmem1[tid + 16];
vmem2[tid] = val2 += vmem2[tid + 16];
vmem2[tid] = val2 += vmem2[tid + 16];
vmem3[tid] = val3 += vmem3[tid + 16];
vmem3[tid] = val3 += vmem3[tid + 16];
vmem1[tid] = val1 += vmem1[tid + 8];
vmem1[tid] = val1 += vmem1[tid + 8];
vmem2[tid] = val2 += vmem2[tid + 8];
vmem2[tid] = val2 += vmem2[tid + 8];
vmem3[tid] = val3 += vmem3[tid + 8];
vmem3[tid] = val3 += vmem3[tid + 8];
vmem1[tid] = val1 += vmem1[tid + 4];
vmem1[tid] = val1 += vmem1[tid + 4];
vmem2[tid] = val2 += vmem2[tid + 4];
vmem2[tid] = val2 += vmem2[tid + 4];
vmem3[tid] = val3 += vmem3[tid + 4];
vmem3[tid] = val3 += vmem3[tid + 4];
vmem1[tid] = val1 += vmem1[tid + 2];
vmem1[tid] = val1 += vmem1[tid + 2];
vmem2[tid] = val2 += vmem2[tid + 2];
vmem2[tid] = val2 += vmem2[tid + 2];
vmem3[tid] = val3 += vmem3[tid + 2];
vmem3[tid] = val3 += vmem3[tid + 2];
vmem1[tid] = val1 += vmem1[tid + 1];
vmem1[tid] = val1 += vmem1[tid + 1];
vmem2[tid] = val2 += vmem2[tid + 1];
vmem2[tid] = val2 += vmem2[tid + 1];
vmem3[tid] = val3 += vmem3[tid + 1];
vmem3[tid] = val3 += vmem3[tid + 1];
}
}
}
}
...
@@ -238,19 +238,19 @@ namespace cv { namespace gpu { namespace device
...
@@ -238,19 +238,19 @@ namespace cv { namespace gpu { namespace device
__syncthreads();
__syncthreads();
#if __CUDA_ARCH__ > 110
#if __CUDA_ARCH__ > 110
if (tid < 128)
if (tid < 128)
{
{
smem1[tid] = val1 += smem1[tid + 128];
smem1[tid] = val1 += smem1[tid + 128];
smem2[tid] = val2 += smem2[tid + 128];
smem2[tid] = val2 += smem2[tid + 128];
}
}
__syncthreads();
__syncthreads();
#endif
#endif
if (tid < 64)
if (tid < 64)
{
{
smem1[tid] = val1 += smem1[tid + 64];
smem1[tid] = val1 += smem1[tid + 64];
smem2[tid] = val2 += smem2[tid + 64];
smem2[tid] = val2 += smem2[tid + 64];
}
}
__syncthreads();
__syncthreads();
if (tid < 32)
if (tid < 32)
...
@@ -258,23 +258,23 @@ namespace cv { namespace gpu { namespace device
...
@@ -258,23 +258,23 @@ namespace cv { namespace gpu { namespace device
volatile float* vmem1 = smem1;
volatile float* vmem1 = smem1;
volatile float* vmem2 = smem2;
volatile float* vmem2 = smem2;
vmem1[tid] = val1 += vmem1[tid + 32];
vmem1[tid] = val1 += vmem1[tid + 32];
vmem2[tid] = val2 += vmem2[tid + 32];
vmem2[tid] = val2 += vmem2[tid + 32];
vmem1[tid] = val1 += vmem1[tid + 16];
vmem1[tid] = val1 += vmem1[tid + 16];
vmem2[tid] = val2 += vmem2[tid + 16];
vmem2[tid] = val2 += vmem2[tid + 16];
vmem1[tid] = val1 += vmem1[tid + 8];
vmem1[tid] = val1 += vmem1[tid + 8];
vmem2[tid] = val2 += vmem2[tid + 8];
vmem2[tid] = val2 += vmem2[tid + 8];
vmem1[tid] = val1 += vmem1[tid + 4];
vmem1[tid] = val1 += vmem1[tid + 4];
vmem2[tid] = val2 += vmem2[tid + 4];
vmem2[tid] = val2 += vmem2[tid + 4];
vmem1[tid] = val1 += vmem1[tid + 2];
vmem1[tid] = val1 += vmem1[tid + 2];
vmem2[tid] = val2 += vmem2[tid + 2];
vmem2[tid] = val2 += vmem2[tid + 2];
vmem1[tid] = val1 += vmem1[tid + 1];
vmem1[tid] = val1 += vmem1[tid + 1];
vmem2[tid] = val2 += vmem2[tid + 1];
vmem2[tid] = val2 += vmem2[tid + 1];
}
}
}
}
...
@@ -284,29 +284,29 @@ namespace cv { namespace gpu { namespace device
...
@@ -284,29 +284,29 @@ namespace cv { namespace gpu { namespace device
__syncthreads();
__syncthreads();
#if __CUDA_ARCH__ > 110
#if __CUDA_ARCH__ > 110
if (tid < 128)
if (tid < 128)
{
{
smem1[tid] = val1 += smem1[tid + 128];
smem1[tid] = val1 += smem1[tid + 128];
}
}
__syncthreads();
__syncthreads();
#endif
#endif
if (tid < 64)
if (tid < 64)
{
{
smem1[tid] = val1 += smem1[tid + 64];
smem1[tid] = val1 += smem1[tid + 64];
}
}
__syncthreads();
__syncthreads();
if (tid < 32)
if (tid < 32)
{
{
volatile float* vmem1 = smem1;
volatile float* vmem1 = smem1;
vmem1[tid] = val1 += vmem1[tid + 32];
vmem1[tid] = val1 += vmem1[tid + 32];
vmem1[tid] = val1 += vmem1[tid + 16];
vmem1[tid] = val1 += vmem1[tid + 16];
vmem1[tid] = val1 += vmem1[tid + 8];
vmem1[tid] = val1 += vmem1[tid + 8];
vmem1[tid] = val1 += vmem1[tid + 4];
vmem1[tid] = val1 += vmem1[tid + 4];
vmem1[tid] = val1 += vmem1[tid + 2];
vmem1[tid] = val1 += vmem1[tid + 2];
vmem1[tid] = val1 += vmem1[tid + 1];
vmem1[tid] = val1 += vmem1[tid + 1];
}
}
}
}
...
@@ -341,7 +341,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -341,7 +341,7 @@ namespace cv { namespace gpu { namespace device
{
{
status[blockIdx.x] = 0;
status[blockIdx.x] = 0;
if (calcErr)
if (calcErr)
err[blockIdx.x] = 0;
err[blockIdx.x] = 0;
}
}
...
@@ -349,7 +349,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -349,7 +349,7 @@ namespace cv { namespace gpu { namespace device
}
}
// extract the patch from the first image, compute covariation matrix of derivatives
// extract the patch from the first image, compute covariation matrix of derivatives
float A11 = 0;
float A11 = 0;
float A12 = 0;
float A12 = 0;
float A22 = 0;
float A22 = 0;
...
@@ -359,7 +359,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -359,7 +359,7 @@ namespace cv { namespace gpu { namespace device
int dIdy_patch[PATCH_Y][PATCH_X];
int dIdy_patch[PATCH_Y][PATCH_X];
for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i)
for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i)
{
{
for (int x = threadIdx.x, j = 0; x < c_winSize_x_cn; x += blockDim.x, ++j)
for (int x = threadIdx.x, j = 0; x < c_winSize_x_cn; x += blockDim.x, ++j)
{
{
I_patch[i][j] = linearFilter(I, prevPt, x, y);
I_patch[i][j] = linearFilter(I, prevPt, x, y);
...
@@ -369,7 +369,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -369,7 +369,7 @@ namespace cv { namespace gpu { namespace device
dIdx_patch[i][j] = ixval;
dIdx_patch[i][j] = ixval;
dIdy_patch[i][j] = iyval;
dIdy_patch[i][j] = iyval;
A11 += ixval * ixval;
A11 += ixval * ixval;
A12 += ixval * iyval;
A12 += ixval * iyval;
A22 += iyval * iyval;
A22 += iyval * iyval;
...
@@ -382,7 +382,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -382,7 +382,7 @@ namespace cv { namespace gpu { namespace device
A11 = smem1[0];
A11 = smem1[0];
A12 = smem2[0];
A12 = smem2[0];
A22 = smem3[0];
A22 = smem3[0];
A11 *= SCALE;
A11 *= SCALE;
A12 *= SCALE;
A12 *= SCALE;
A22 *= SCALE;
A22 *= SCALE;
...
@@ -390,8 +390,8 @@ namespace cv { namespace gpu { namespace device
...
@@ -390,8 +390,8 @@ namespace cv { namespace gpu { namespace device
{
{
float D = A11 * A22 - A12 * A12;
float D = A11 * A22 - A12 * A12;
float minEig = (A22 + A11 - ::sqrtf((A11 - A22) * (A11 - A22) + 4.f * A12 * A12)) / (2 * c_winSize_x * c_winSize_y);
float minEig = (A22 + A11 - ::sqrtf((A11 - A22) * (A11 - A22) + 4.f * A12 * A12)) / (2 * c_winSize_x * c_winSize_y);
if (calcErr && GET_MIN_EIGENVALS && tid == 0)
if (calcErr && GET_MIN_EIGENVALS && tid == 0)
err[blockIdx.x] = minEig;
err[blockIdx.x] = minEig;
if (minEig < c_minEigThreshold || D < numeric_limits<float>::epsilon())
if (minEig < c_minEigThreshold || D < numeric_limits<float>::epsilon())
...
@@ -403,7 +403,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -403,7 +403,7 @@ namespace cv { namespace gpu { namespace device
}
}
D = 1.f / D;
D = 1.f / D;
A11 *= D;
A11 *= D;
A12 *= D;
A12 *= D;
A22 *= D;
A22 *= D;
...
@@ -411,8 +411,8 @@ namespace cv { namespace gpu { namespace device
...
@@ -411,8 +411,8 @@ namespace cv { namespace gpu { namespace device
float2 nextPt = nextPts[blockIdx.x];
float2 nextPt = nextPts[blockIdx.x];
nextPt.x *= 2.f;
nextPt.x *= 2.f;
nextPt.y *= 2.f;
nextPt.y *= 2.f;
nextPt.x -= c_halfWin_x;
nextPt.x -= c_halfWin_x;
nextPt.y -= c_halfWin_y;
nextPt.y -= c_halfWin_y;
...
@@ -428,7 +428,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -428,7 +428,7 @@ namespace cv { namespace gpu { namespace device
float b1 = 0;
float b1 = 0;
float b2 = 0;
float b2 = 0;
for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i)
for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i)
{
{
for (int x = threadIdx.x, j = 0; x < c_winSize_x_cn; x += blockDim.x, ++j)
for (int x = threadIdx.x, j = 0; x < c_winSize_x_cn; x += blockDim.x, ++j)
...
@@ -439,7 +439,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -439,7 +439,7 @@ namespace cv { namespace gpu { namespace device
b2 += diff * dIdy_patch[i][j];
b2 += diff * dIdy_patch[i][j];
}
}
}
}
reduce(b1, b2, smem1, smem2, tid);
reduce(b1, b2, smem1, smem2, tid);
__syncthreads();
__syncthreads();
...
@@ -448,11 +448,11 @@ namespace cv { namespace gpu { namespace device
...
@@ -448,11 +448,11 @@ namespace cv { namespace gpu { namespace device
b1 *= SCALE;
b1 *= SCALE;
b2 *= SCALE;
b2 *= SCALE;
float2 delta;
float2 delta;
delta.x = A12 * b2 - A22 * b1;
delta.x = A12 * b2 - A22 * b1;
delta.y = A12 * b1 - A11 * b2;
delta.y = A12 * b1 - A11 * b2;
nextPt.x += delta.x;
nextPt.x += delta.x;
nextPt.y += delta.y;
nextPt.y += delta.y;
...
@@ -495,7 +495,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -495,7 +495,7 @@ namespace cv { namespace gpu { namespace device
template <int PATCH_X, int PATCH_Y>
template <int PATCH_X, int PATCH_Y>
void lkSparse_caller(DevMem2Db I, DevMem2Db J, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy,
void lkSparse_caller(DevMem2Db I, DevMem2Db J, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy,
const float2* prevPts, float2* nextPts, uchar* status, float* err, bool GET_MIN_EIGENVALS, int ptcount,
const float2* prevPts, float2* nextPts, uchar* status, float* err, bool GET_MIN_EIGENVALS, int ptcount,
int level, dim3 block, cudaStream_t stream)
int level, dim3 block, cudaStream_t stream)
{
{
dim3 grid(ptcount);
dim3 grid(ptcount);
...
@@ -532,109 +532,147 @@ namespace cv { namespace gpu { namespace device
...
@@ -532,109 +532,147 @@ namespace cv { namespace gpu { namespace device
}
}
void lkSparse_gpu(DevMem2Db I, DevMem2Db J, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy,
void lkSparse_gpu(DevMem2Db I, DevMem2Db J, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy,
const float2* prevPts, float2* nextPts, uchar* status, float* err, bool GET_MIN_EIGENVALS, int ptcount,
const float2* prevPts, float2* nextPts, uchar* status, float* err, bool GET_MIN_EIGENVALS, int ptcount,
int level, dim3 block, dim3 patch, cudaStream_t stream)
int level, dim3 block, dim3 patch, cudaStream_t stream)
{
{
typedef void (*func_t)(DevMem2Db I, DevMem2Db J, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy,
typedef void (*func_t)(DevMem2Db I, DevMem2Db J, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy,
const float2* prevPts, float2* nextPts, uchar* status, float* err, bool GET_MIN_EIGENVALS, int ptcount,
const float2* prevPts, float2* nextPts, uchar* status, float* err, bool GET_MIN_EIGENVALS, int ptcount,
int level, dim3 block, cudaStream_t stream);
int level, dim3 block, cudaStream_t stream);
static const func_t funcs[5][5] =
static const func_t funcs[5][5] =
{
{
{lkSparse_caller<1, 1>, lkSparse_caller<2, 1>, lkSparse_caller<3, 1>, lkSparse_caller<4, 1>, lkSparse_caller<5, 1>},
{lkSparse_caller<1, 1>, lkSparse_caller<2, 1>, lkSparse_caller<3, 1>, lkSparse_caller<4, 1>, lkSparse_caller<5, 1>},
{lkSparse_caller<1, 2>, lkSparse_caller<2, 2>, lkSparse_caller<3, 2>, lkSparse_caller<4, 2>, lkSparse_caller<5, 2>},
{lkSparse_caller<1, 2>, lkSparse_caller<2, 2>, lkSparse_caller<3, 2>, lkSparse_caller<4, 2>, lkSparse_caller<5, 2>},
{lkSparse_caller<1, 3>, lkSparse_caller<2, 3>, lkSparse_caller<3, 3>, lkSparse_caller<4, 3>, lkSparse_caller<5, 3>},
{lkSparse_caller<1, 3>, lkSparse_caller<2, 3>, lkSparse_caller<3, 3>, lkSparse_caller<4, 3>, lkSparse_caller<5, 3>},
{lkSparse_caller<1, 4>, lkSparse_caller<2, 4>, lkSparse_caller<3, 4>, lkSparse_caller<4, 4>, lkSparse_caller<5, 4>},
{lkSparse_caller<1, 4>, lkSparse_caller<2, 4>, lkSparse_caller<3, 4>, lkSparse_caller<4, 4>, lkSparse_caller<5, 4>},
{lkSparse_caller<1, 5>, lkSparse_caller<2, 5>, lkSparse_caller<3, 5>, lkSparse_caller<4, 5>, lkSparse_caller<5, 5>}
{lkSparse_caller<1, 5>, lkSparse_caller<2, 5>, lkSparse_caller<3, 5>, lkSparse_caller<4, 5>, lkSparse_caller<5, 5>}
};
};
funcs[patch.y - 1][patch.x - 1](I, J, dIdx, dIdy,
funcs[patch.y - 1][patch.x - 1](I, J, dIdx, dIdy,
prevPts, nextPts, status, err, GET_MIN_EIGENVALS, ptcount,
prevPts, nextPts, status, err, GET_MIN_EIGENVALS, ptcount,
level, block, stream);
level, block, stream);
}
}
template <bool calcErr, bool GET_MIN_EIGENVALS>
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_I(false, cudaFilterModePoint, cudaAddressModeClamp);
__global__ void lkDense(const PtrStepb I, const PtrStepb J, const PtrStep<short> dIdx, const PtrStep<short> dIdy,
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_J(false, cudaFilterModeLinear, cudaAddressModeClamp);
PtrStepf u, PtrStepf v, PtrStepf err, const int rows, const int cols)
template <bool calcErr>
__global__ void lkDense(PtrStepf u, PtrStepf v, const PtrStepf prevU, const PtrStepf prevV, PtrStepf err, const int rows, const int cols)
{
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
extern __shared__ int smem[];
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int patchWidth = blockDim.x + 2 * c_halfWin_x;
const int patchHeight = blockDim.y + 2 * c_halfWin_y;
int* I_patch = smem;
int* dIdx_patch = I_patch + patchWidth * patchHeight;
int* dIdy_patch = dIdx_patch + patchWidth * patchHeight;
const int xBase = blockIdx.x * blockDim.x;
const int yBase = blockIdx.y * blockDim.y;
for (int i = threadIdx.y; i < patchHeight; i += blockDim.y)
{
for (int j = threadIdx.x; j < patchWidth; j += blockDim.x)
{
float x = xBase - c_halfWin_x + j + 0.5f;
float y = yBase - c_halfWin_y + i + 0.5f;
I_patch[i * patchWidth + j] = tex2D(tex_I, x, y);
// Sharr Deriv
dIdx_patch[i * patchWidth + j] = 3 * tex2D(tex_I, x+1, y-1) + 10 * tex2D(tex_I, x+1, y) + 3 * tex2D(tex_I, x+1, y+1) -
(3 * tex2D(tex_I, x-1, y-1) + 10 * tex2D(tex_I, x-1, y) + 3 * tex2D(tex_I, x-1, y+1));
dIdy_patch[i * patchWidth + j] = 3 * tex2D(tex_I, x-1, y+1) + 10 * tex2D(tex_I, x, y+1) + 3 * tex2D(tex_I, x+1, y+1) -
(3 * tex2D(tex_I, x-1, y-1) + 10 * tex2D(tex_I, x, y-1) + 3 * tex2D(tex_I, x+1, y-1));
}
}
__syncthreads();
const int x = xBase + threadIdx.x;
const int y = yBase + threadIdx.y;
if (x >= cols || y >= rows)
if (x >= cols || y >= rows)
return;
return;
// extract the patch from the first image, compute covariation matrix of derivatives
int A11i = 0;
int A12i = 0;
float A11 = 0;
int A22i = 0;
float A12 = 0;
float A22 = 0;
for (int i = 0; i < c_winSize_y; ++i)
for (int i = 0; i < c_winSize_y; ++i)
{
{
for (int j = 0; j < c_winSize_x; ++j)
for (int j = 0; j < c_winSize_x; ++j)
{
{
int
ixval = dIdx(y - c_halfWin_y + i, x - c_halfWin_x + j)
;
int
dIdx = dIdx_patch[(threadIdx.y + i) * patchWidth + (threadIdx.x + j)]
;
int
iyval = dIdy(y - c_halfWin_y + i, x - c_halfWin_x + j)
;
int
dIdy = dIdy_patch[(threadIdx.y + i) * patchWidth + (threadIdx.x + j)]
;
A11
+= ixval * ixval
;
A11
i += dIdx * dIdx
;
A12
+= ixval * iyval
;
A12
i += dIdx * dIdy
;
A22
+= iyval * iyval
;
A22
i += dIdy * dIdy
;
}
}
}
}
A11 *= SCALE;
A12 *= SCALE;
A22 *= SCALE;
{
float A11 = A11i;
float D = A11 * A22 - A12 * A12
;
float A12 = A12i
;
float minEig = (A22 + A11 - ::sqrtf((A11 - A22) * (A11 - A22) + 4.f * A12 * A12)) / (2 * c_winSize_x * c_winSize_y)
;
float A22 = A22i
;
if (calcErr && GET_MIN_EIGENVALS)
float D = A11 * A22 - A12 * A12;
err(y, x) = minEig;
if (minEig < c_minEigThreshold || D < numeric_limits<float>::epsilon())
return;
D = 1.f / D;
if (D < numeric_limits<float>::epsilon())
{
A11 *= D;
if (calcErr)
A12 *= D;
err(y, x) = numeric_limits<float>::max();
A22 *= D;
return;
}
}
D = 1.f / D;
A11 *= D;
A12 *= D;
A22 *= D;
float2 nextPt;
float2 nextPt;
nextPt.x = x
- c_halfWin_x + u(y, x)
;
nextPt.x = x
+ prevU(y/2, x/2) * 2.0f
;
nextPt.y = y
- c_halfWin_y + v(y, x)
;
nextPt.y = y
+ prevV(y/2, x/2) * 2.0f
;
for (int k = 0; k < c_iters; ++k)
for (int k = 0; k < c_iters; ++k)
{
{
if (nextPt.x < -c_winSize_x || nextPt.x >= cols || nextPt.y < -c_winSize_y || nextPt.y >= rows)
if (nextPt.x < 0 || nextPt.x >= cols || nextPt.y < 0 || nextPt.y >= rows)
{
if (calcErr)
err(y, x) = numeric_limits<float>::max();
return;
return;
}
int b1 = 0;
int b2 = 0;
float b1 = 0;
float b2 = 0;
for (int i = 0; i < c_winSize_y; ++i)
for (int i = 0; i < c_winSize_y; ++i)
{
{
for (int j = 0; j < c_winSize_x; ++j)
for (int j = 0; j < c_winSize_x; ++j)
{
{
int I_val = I(y - c_halfWin_y + i, x - c_halfWin_x + j);
int I = I_patch[(threadIdx.y + i) * patchWidth + threadIdx.x + j];
int J = tex2D(tex_J, nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f);
int diff = linearFilter(J, nextPt, j, i) - CV_DESCALE(I_val * (1 << W_BITS), W_BITS1 - 5);
int diff = (J - I) * 32;
b1 += diff * dIdx(y - c_halfWin_y + i, x - c_halfWin_x + j);
int dIdx = dIdx_patch[(threadIdx.y + i) * patchWidth + (threadIdx.x + j)];
b2 += diff * dIdy(y - c_halfWin_y + i, x - c_halfWin_x + j);
int dIdy = dIdy_patch[(threadIdx.y + i) * patchWidth + (threadIdx.x + j)];
b1 += diff * dIdx;
b2 += diff * dIdy;
}
}
}
}
b1 *= SCALE;
b2 *= SCALE;
float2 delta;
float2 delta;
delta.x = A12 * b2 - A22 * b1;
delta.x = A12 * b2 - A22 * b1;
delta.y = A12 * b1 - A11 * b2;
delta.y = A12 * b1 - A11 * b2;
nextPt.x += delta.x;
nextPt.x += delta.x;
nextPt.y += delta.y;
nextPt.y += delta.y;
...
@@ -642,57 +680,50 @@ namespace cv { namespace gpu { namespace device
...
@@ -642,57 +680,50 @@ namespace cv { namespace gpu { namespace device
break;
break;
}
}
u(y, x) = nextPt.x - x
+ c_halfWin_x
;
u(y, x) = nextPt.x - x;
v(y, x) = nextPt.y - y
+ c_halfWin_y;
v(y, x) = nextPt.y - y
;
if (calcErr
&& !GET_MIN_EIGENVALS
)
if (calcErr)
{
{
float errval = 0.0f
;
int errval = 0
;
for (int i = 0; i < c_winSize_y; ++i)
for (int i = 0; i < c_winSize_y; ++i)
{
{
for (int j = 0; j < c_winSize_x; ++j)
for (int j = 0; j < c_winSize_x; ++j)
{
{
int I_val = I(y - c_halfWin_y + i, x - c_halfWin_x + j);
int I = I_patch[(threadIdx.y + i) * patchWidth + threadIdx.x + j];
int diff = linearFilter(J, nextPt, j, i) - CV_DESCALE(I_val * (1 << W_BITS), W_BITS1 - 5);
int J = tex2D(tex_J, nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f);
errval += ::fabsf((float)diff);
errval += ::abs(J - I);
}
}
}
}
errval /= 32 * c_winSize_x_cn * c_winSize_y;
err(y, x) = static_cast<float>(errval) / (c_winSize_x * c_winSize_y);
err(y, x) = errval;
}
}
}
}
void lkDense_gpu(DevMem2Db I, DevMem2D
b J, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy,
void lkDense_gpu(DevMem2Db I, DevMem2D
f J, DevMem2Df u, DevMem2Df v, DevMem2Df prevU, DevMem2Df prevV,
DevMem2Df u, DevMem2Df v, DevMem2Df* err, bool GET_MIN_EIGENVALS
, cudaStream_t stream)
DevMem2Df err, int2 winSize
, cudaStream_t stream)
{
{
dim3 block(
32, 8
);
dim3 block(
16, 16
);
dim3 grid(divUp(I.cols, block.x), divUp(I.rows, block.y));
dim3 grid(divUp(I.cols, block.x), divUp(I.rows, block.y));
if (err)
bindTexture(&tex_I, I);
{
bindTexture(&tex_J, J);
if (GET_MIN_EIGENVALS)
{
cudaSafeCall( cudaFuncSetCacheConfig(lkDense<true, true>, cudaFuncCachePreferL1) );
lkDense<true, true><<<grid, block, 0, stream>>>(I, J, dIdx, dIdy, u, v, *err, I.rows, I.cols);
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
cudaSafeCall( cudaGetLastError() );
const int patchWidth = block.x + 2 * halfWin.x;
}
const int patchHeight = block.y + 2 * halfWin.y;
else
size_t smem_size = 3 * patchWidth * patchHeight * sizeof(int);
{
cudaSafeCall( cudaFuncSetCacheConfig(lkDense<true, false>, cudaFuncCachePreferL1) );
lkDense<true, false><<<grid, block, 0, stream>>>(I, J, dIdx, dIdy, u, v, *err, I.rows, I.cols);
if (err.data)
cudaSafeCall( cudaGetLastError() );
{
}
lkDense<true><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, err, I.rows, I.cols);
cudaSafeCall( cudaGetLastError() );
}
}
else
else
{
{
cudaSafeCall( cudaFuncSetCacheConfig(lkDense<false, false>, cudaFuncCachePreferL1) );
lkDense<false><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, PtrStepf(), I.rows, I.cols);
lkDense<false, false><<<grid, block, 0, stream>>>(I, J, dIdx, dIdy, u, v, PtrStepf(), I.rows, I.cols);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
}
}
...
...
modules/gpu/src/pyrlk.cpp
View file @
2e2bd557
...
@@ -66,8 +66,8 @@ namespace cv { namespace gpu { namespace device
...
@@ -66,8 +66,8 @@ namespace cv { namespace gpu { namespace device
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
bool
GET_MIN_EIGENVALS
,
int
ptcount
,
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
bool
GET_MIN_EIGENVALS
,
int
ptcount
,
int
level
,
dim3
block
,
dim3
patch
,
cudaStream_t
stream
=
0
);
int
level
,
dim3
block
,
dim3
patch
,
cudaStream_t
stream
=
0
);
void
lkDense_gpu
(
DevMem2Db
I
,
DevMem2D
b
J
,
DevMem2D_
<
short
>
dIdx
,
DevMem2D_
<
short
>
dIdy
,
void
lkDense_gpu
(
DevMem2Db
I
,
DevMem2D
f
J
,
DevMem2Df
u
,
DevMem2Df
v
,
DevMem2Df
prevU
,
DevMem2Df
prevV
,
DevMem2Df
u
,
DevMem2Df
v
,
DevMem2Df
*
err
,
bool
GET_MIN_EIGENVALS
,
cudaStream_t
stream
=
0
);
DevMem2Df
err
,
int2
winSize
,
cudaStream_t
stream
=
0
);
}
}
}}}
}}}
...
@@ -160,16 +160,11 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next
...
@@ -160,16 +160,11 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next
return
;
return
;
}
}
derivLambda
=
std
::
min
(
std
::
max
(
derivLambda
,
0.0
),
1.0
);
iters
=
std
::
min
(
std
::
max
(
iters
,
0
),
100
);
const
int
cn
=
prevImg
.
channels
();
const
int
cn
=
prevImg
.
channels
();
dim3
block
,
patch
;
dim3
block
,
patch
;
calcPatchSize
(
winSize
,
cn
,
block
,
patch
,
isDeviceArch11_
);
calcPatchSize
(
winSize
,
cn
,
block
,
patch
,
isDeviceArch11_
);
CV_Assert
(
derivLambda
>=
0
);
CV_Assert
(
maxLevel
>=
0
&&
winSize
.
width
>
2
&&
winSize
.
height
>
2
);
CV_Assert
(
maxLevel
>=
0
&&
winSize
.
width
>
2
&&
winSize
.
height
>
2
);
CV_Assert
(
prevImg
.
size
()
==
nextImg
.
size
()
&&
prevImg
.
type
()
==
nextImg
.
type
());
CV_Assert
(
prevImg
.
size
()
==
nextImg
.
size
()
&&
prevImg
.
type
()
==
nextImg
.
type
());
CV_Assert
(
patch
.
x
>
0
&&
patch
.
x
<
6
&&
patch
.
y
>
0
&&
patch
.
y
<
6
);
CV_Assert
(
patch
.
x
>
0
&&
patch
.
x
<
6
&&
patch
.
y
>
0
&&
patch
.
y
<
6
);
...
@@ -227,80 +222,53 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
...
@@ -227,80 +222,53 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
{
{
using
namespace
cv
::
gpu
::
device
::
pyrlk
;
using
namespace
cv
::
gpu
::
device
::
pyrlk
;
derivLambda
=
std
::
min
(
std
::
max
(
derivLambda
,
0.0
),
1.0
);
iters
=
std
::
min
(
std
::
max
(
iters
,
0
),
100
);
CV_Assert
(
prevImg
.
type
()
==
CV_8UC1
);
CV_Assert
(
prevImg
.
type
()
==
CV_8UC1
);
CV_Assert
(
prevImg
.
size
()
==
nextImg
.
size
()
&&
prevImg
.
type
()
==
nextImg
.
type
());
CV_Assert
(
prevImg
.
size
()
==
nextImg
.
size
()
&&
prevImg
.
type
()
==
nextImg
.
type
());
CV_Assert
(
derivLambda
>=
0
);
CV_Assert
(
maxLevel
>=
0
);
CV_Assert
(
maxLevel
>=
0
&&
winSize
.
width
>
2
&&
winSize
.
height
>
2
);
CV_Assert
(
winSize
.
width
>
2
&&
winSize
.
height
>
2
);
if
(
useInitialFlow
)
{
CV_Assert
(
u
.
size
()
==
prevImg
.
size
()
&&
u
.
type
()
==
CV_32FC1
);
CV_Assert
(
v
.
size
()
==
prevImg
.
size
()
&&
v
.
type
()
==
CV_32FC1
);
}
else
{
u
.
create
(
prevImg
.
size
(),
CV_32FC1
);
v
.
create
(
prevImg
.
size
(),
CV_32FC1
);
u
.
setTo
(
Scalar
::
all
(
0
));
v
.
setTo
(
Scalar
::
all
(
0
));
}
if
(
err
)
if
(
err
)
err
->
create
(
prevImg
.
size
(),
CV_32FC1
);
err
->
create
(
prevImg
.
size
(),
CV_32FC1
);
// build the image pyramids.
// build the image pyramids.
// we pad each level with +/-winSize.{width|height}
// pixels to simplify the further patch extraction.
buildImagePyramid
(
prevImg
,
prevPyr_
,
true
);
buildImagePyramid
(
prevImg
,
prevPyr_
,
false
);
buildImagePyramid
(
nextImg
,
nextPyr_
,
true
);
buildImagePyramid
(
u
,
uPyr_
,
false
);
buildImagePyramid
(
v
,
vPyr_
,
false
);
// dI/dx ~ Ix, dI/dy ~ Iy
nextPyr_
.
resize
(
maxLevel
+
1
);
nextImg
.
convertTo
(
nextPyr_
[
0
],
CV_32F
);
for
(
int
level
=
1
;
level
<=
maxLevel
;
++
level
)
pyrDown
(
nextPyr_
[
level
-
1
],
nextPyr_
[
level
]);
uPyr_
.
resize
(
2
);
vPyr_
.
resize
(
2
);
ensureSizeIsEnough
(
prevImg
.
rows
+
winSize
.
height
*
2
,
prevImg
.
cols
+
winSize
.
width
*
2
,
CV_16SC1
,
dx_buf_
);
ensureSizeIsEnough
(
prevImg
.
size
(),
CV_32FC1
,
uPyr_
[
0
]);
ensureSizeIsEnough
(
prevImg
.
rows
+
winSize
.
height
*
2
,
prevImg
.
cols
+
winSize
.
width
*
2
,
CV_16SC1
,
dy_buf_
);
ensureSizeIsEnough
(
prevImg
.
size
(),
CV_32FC1
,
vPyr_
[
0
]);
ensureSizeIsEnough
(
prevImg
.
size
(),
CV_32FC1
,
uPyr_
[
1
]);
ensureSizeIsEnough
(
prevImg
.
size
(),
CV_32FC1
,
vPyr_
[
1
]);
uPyr_
[
1
].
setTo
(
Scalar
::
all
(
0
));
vPyr_
[
1
].
setTo
(
Scalar
::
all
(
0
));
loadConstants
(
1
,
minEigThreshold
,
make_int2
(
winSize
.
width
,
winSize
.
height
),
iters
);
int2
winSize2i
=
make_int2
(
winSize
.
width
,
winSize
.
height
);
loadConstants
(
1
,
minEigThreshold
,
winSize2i
,
iters
);
DevMem2Df
derr
=
err
?
*
err
:
DevMem2Df
();
DevMem2Df
derr
=
err
?
*
err
:
DevMem2Df
();
int
idx
=
0
;
for
(
int
level
=
maxLevel
;
level
>=
0
;
level
--
)
for
(
int
level
=
maxLevel
;
level
>=
0
;
level
--
)
{
{
Size
imgSize
=
prevPyr_
[
level
].
size
();
int
idx2
=
(
idx
+
1
)
&
1
;
GpuMat
dxWhole
(
imgSize
.
height
+
winSize
.
height
*
2
,
imgSize
.
width
+
winSize
.
width
*
2
,
dx_buf_
.
type
(),
dx_buf_
.
data
,
dx_buf_
.
step
);
GpuMat
dyWhole
(
imgSize
.
height
+
winSize
.
height
*
2
,
imgSize
.
width
+
winSize
.
width
*
2
,
dy_buf_
.
type
(),
dy_buf_
.
data
,
dy_buf_
.
step
);
dxWhole
.
setTo
(
Scalar
::
all
(
0
));
dyWhole
.
setTo
(
Scalar
::
all
(
0
));
GpuMat
dIdx
=
dxWhole
(
Rect
(
winSize
.
width
,
winSize
.
height
,
imgSize
.
width
,
imgSize
.
height
));
GpuMat
dIdy
=
dyWhole
(
Rect
(
winSize
.
width
,
winSize
.
height
,
imgSize
.
width
,
imgSize
.
height
));
calcSharrDeriv
(
prevPyr_
[
level
],
dIdx
,
dIdy
);
lkDense_gpu
(
prevPyr_
[
level
],
nextPyr_
[
level
],
uPyr_
[
idx
],
vPyr_
[
idx
],
uPyr_
[
idx2
],
vPyr_
[
idx2
],
level
==
0
?
derr
:
DevMem2Df
(),
winSize2i
);
lkDense_gpu
(
prevPyr_
[
level
],
nextPyr_
[
level
],
dIdx
,
dIdy
,
uPyr_
[
level
],
vPyr_
[
level
],
if
(
level
>
0
)
level
==
0
&&
err
?
&
derr
:
0
,
getMinEigenVals
);
idx
=
idx2
;
if
(
level
==
0
)
{
uPyr_
[
0
].
copyTo
(
u
);
vPyr_
[
0
].
copyTo
(
v
);
}
else
{
resize
(
uPyr_
[
level
],
uPyr_
[
level
-
1
],
uPyr_
[
level
-
1
].
size
());
resize
(
vPyr_
[
level
],
vPyr_
[
level
-
1
],
vPyr_
[
level
-
1
].
size
());
multiply
(
uPyr_
[
level
-
1
],
Scalar
::
all
(
2
),
uPyr_
[
level
-
1
]);
multiply
(
vPyr_
[
level
-
1
],
Scalar
::
all
(
2
),
vPyr_
[
level
-
1
]);
}
}
}
uPyr_
[
idx
].
copyTo
(
u
);
vPyr_
[
idx
].
copyTo
(
v
);
}
}
#endif
/* !defined (HAVE_CUDA) */
#endif
/* !defined (HAVE_CUDA) */
samples/gpu/pyrlk_optical_flow.cpp
View file @
2e2bd557
...
@@ -159,7 +159,6 @@ int main(int argc, const char* argv[])
...
@@ -159,7 +159,6 @@ int main(int argc, const char* argv[])
"{ win_size | win_size | 21 | specify windows size [PyrLK] }"
"{ win_size | win_size | 21 | specify windows size [PyrLK] }"
"{ max_level | max_level | 3 | specify max level [PyrLK] }"
"{ max_level | max_level | 3 | specify max level [PyrLK] }"
"{ iters | iters | 30 | specify iterations count [PyrLK] }"
"{ iters | iters | 30 | specify iterations count [PyrLK] }"
"{ deriv_lambda | deriv_lambda | 0.5 | specify deriv lambda [PyrLK] }"
"{ points | points | 4000 | specify points count [GoodFeatureToTrack] }"
"{ points | points | 4000 | specify points count [GoodFeatureToTrack] }"
"{ min_dist | min_dist | 0 | specify minimal distance between points [GoodFeatureToTrack] }"
;
"{ min_dist | min_dist | 0 | specify minimal distance between points [GoodFeatureToTrack] }"
;
...
@@ -186,7 +185,6 @@ int main(int argc, const char* argv[])
...
@@ -186,7 +185,6 @@ int main(int argc, const char* argv[])
int
winSize
=
cmd
.
get
<
int
>
(
"win_size"
);
int
winSize
=
cmd
.
get
<
int
>
(
"win_size"
);
int
maxLevel
=
cmd
.
get
<
int
>
(
"max_level"
);
int
maxLevel
=
cmd
.
get
<
int
>
(
"max_level"
);
int
iters
=
cmd
.
get
<
int
>
(
"iters"
);
int
iters
=
cmd
.
get
<
int
>
(
"iters"
);
double
derivLambda
=
cmd
.
get
<
double
>
(
"deriv_lambda"
);
int
points
=
cmd
.
get
<
int
>
(
"points"
);
int
points
=
cmd
.
get
<
int
>
(
"points"
);
double
minDist
=
cmd
.
get
<
double
>
(
"min_dist"
);
double
minDist
=
cmd
.
get
<
double
>
(
"min_dist"
);
...
@@ -235,7 +233,6 @@ int main(int argc, const char* argv[])
...
@@ -235,7 +233,6 @@ int main(int argc, const char* argv[])
d_pyrLK
.
winSize
.
height
=
winSize
;
d_pyrLK
.
winSize
.
height
=
winSize
;
d_pyrLK
.
maxLevel
=
maxLevel
;
d_pyrLK
.
maxLevel
=
maxLevel
;
d_pyrLK
.
iters
=
iters
;
d_pyrLK
.
iters
=
iters
;
d_pyrLK
.
derivLambda
=
derivLambda
;
GpuMat
d_frame0
(
frame0
);
GpuMat
d_frame0
(
frame0
);
GpuMat
d_frame1
(
frame1
);
GpuMat
d_frame1
(
frame1
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment