Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
f392ab0e
Commit
f392ab0e
authored
Jun 18, 2012
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
new implementation of gpu::PyrLKOpticalFlow::sparse (1.5 - 2x faster)
parent
33d9e235
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
286 additions
and
382 deletions
+286
-382
video.rst
modules/gpu/doc/video.rst
+0
-3
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+6
-38
perf_video.cpp
modules/gpu/perf/perf_video.cpp
+12
-7
perf_video.cpp
modules/gpu/perf_cpu/perf_video.cpp
+17
-4
pyrlk.cu
modules/gpu/src/cuda/pyrlk.cu
+171
-225
pyrlk.cpp
modules/gpu/src/pyrlk.cpp
+73
-90
test_video.cpp
modules/gpu/test/test_video.cpp
+7
-15
No files found.
modules/gpu/doc/video.rst
View file @
f392ab0e
...
...
@@ -204,10 +204,7 @@ Class used for calculating an optical flow. ::
Size winSize;
int maxLevel;
int iters;
double derivLambda;
bool useInitialFlow;
float minEigThreshold;
bool getMinEigenVals;
void releaseMemory();
};
...
...
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
f392ab0e
...
...
@@ -1749,60 +1749,28 @@ inline GoodFeaturesToTrackDetector_GPU::GoodFeaturesToTrackDetector_GPU(int maxC
class
CV_EXPORTS
PyrLKOpticalFlow
{
public
:
PyrLKOpticalFlow
()
{
winSize
=
Size
(
21
,
21
);
maxLevel
=
3
;
iters
=
30
;
useInitialFlow
=
false
;
minEigThreshold
=
1e-4
f
;
getMinEigenVals
=
false
;
isDeviceArch11_
=
!
DeviceInfo
().
supports
(
FEATURE_SET_COMPUTE_12
);
}
PyrLKOpticalFlow
();
void
sparse
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
const
GpuMat
&
prevPts
,
GpuMat
&
nextPts
,
GpuMat
&
status
,
GpuMat
*
err
=
0
);
void
dense
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
GpuMat
&
u
,
GpuMat
&
v
,
GpuMat
*
err
=
0
);
void
releaseMemory
();
Size
winSize
;
int
maxLevel
;
int
iters
;
bool
useInitialFlow
;
float
minEigThreshold
;
bool
getMinEigenVals
;
void
releaseMemory
()
{
dx_calcBuf_
.
release
();
dy_calcBuf_
.
release
();
prevPyr_
.
clear
();
nextPyr_
.
clear
();
dx_buf_
.
release
();
dy_buf_
.
release
();
uPyr_
.
clear
();
vPyr_
.
clear
();
}
private
:
void
calcSharrDeriv
(
const
GpuMat
&
src
,
GpuMat
&
dx
,
GpuMat
&
dy
);
void
buildImagePyramid
(
const
GpuMat
&
img0
,
vector
<
GpuMat
>&
pyr
,
bool
withBorder
);
GpuMat
dx_calcBuf_
;
GpuMat
dy_calcBuf_
;
vector
<
GpuMat
>
prevPyr_
;
vector
<
GpuMat
>
nextPyr_
;
GpuMat
dx_buf_
;
GpuMat
dy_buf_
;
GpuMat
buf_
;
vector
<
GpuMat
>
uPyr_
;
vector
<
GpuMat
>
vPyr_
;
GpuMat
uPyr_
[
2
]
;
GpuMat
vPyr_
[
2
]
;
bool
isDeviceArch11_
;
};
...
...
modules/gpu/perf/perf_video.cpp
View file @
f392ab0e
...
...
@@ -156,15 +156,19 @@ INSTANTIATE_TEST_CASE_P(Video, GoodFeaturesToTrack, testing::Combine(
IMPLEMENT_PARAM_CLASS
(
GraySource
,
bool
)
IMPLEMENT_PARAM_CLASS
(
Points
,
int
)
IMPLEMENT_PARAM_CLASS
(
WinSize
,
int
)
IMPLEMENT_PARAM_CLASS
(
Levels
,
int
)
IMPLEMENT_PARAM_CLASS
(
Iters
,
int
)
GPU_PERF_TEST
(
PyrLKOpticalFlowSparse
,
cv
::
gpu
::
DeviceInfo
,
GraySource
,
Points
,
WinSize
)
GPU_PERF_TEST
(
PyrLKOpticalFlowSparse
,
cv
::
gpu
::
DeviceInfo
,
GraySource
,
Points
,
WinSize
,
Levels
,
Iters
)
{
cv
::
gpu
::
DeviceInfo
devInfo
=
GET_PARAM
(
0
);
cv
::
gpu
::
setDevice
(
devInfo
.
deviceID
());
bool
useGray
=
GET_PARAM
(
1
);
int
points
=
GET_PARAM
(
2
);
int
win_size
=
GET_PARAM
(
3
);
int
winSize
=
GET_PARAM
(
3
);
int
levels
=
GET_PARAM
(
4
);
int
iters
=
GET_PARAM
(
5
);
cv
::
Mat
frame0_host
=
readImage
(
"gpu/opticalflow/frame0.png"
,
useGray
?
cv
::
IMREAD_GRAYSCALE
:
cv
::
IMREAD_COLOR
);
ASSERT_FALSE
(
frame0_host
.
empty
());
...
...
@@ -184,7 +188,9 @@ GPU_PERF_TEST(PyrLKOpticalFlowSparse, cv::gpu::DeviceInfo, GraySource, Points, W
detector
(
cv
::
gpu
::
GpuMat
(
gray_frame
),
pts
);
cv
::
gpu
::
PyrLKOpticalFlow
pyrLK
;
pyrLK
.
winSize
=
cv
::
Size
(
win_size
,
win_size
);
pyrLK
.
winSize
=
cv
::
Size
(
winSize
,
winSize
);
pyrLK
.
maxLevel
=
levels
-
1
;
pyrLK
.
iters
=
iters
;
cv
::
gpu
::
GpuMat
frame0
(
frame0_host
);
cv
::
gpu
::
GpuMat
frame1
(
frame1_host
);
...
...
@@ -203,14 +209,13 @@ INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowSparse, testing::Combine(
ALL_DEVICES
,
testing
::
Values
(
GraySource
(
true
),
GraySource
(
false
)),
testing
::
Values
(
Points
(
1000
),
Points
(
2000
),
Points
(
4000
),
Points
(
8000
)),
testing
::
Values
(
WinSize
(
17
),
WinSize
(
21
))));
testing
::
Values
(
WinSize
(
9
),
WinSize
(
13
),
WinSize
(
17
),
WinSize
(
21
)),
testing
::
Values
(
Levels
(
1
),
Levels
(
2
),
Levels
(
3
)),
testing
::
Values
(
Iters
(
1
),
Iters
(
10
),
Iters
(
30
))));
//////////////////////////////////////////////////////
// PyrLKOpticalFlowDense
IMPLEMENT_PARAM_CLASS
(
Levels
,
int
)
IMPLEMENT_PARAM_CLASS
(
Iters
,
int
)
GPU_PERF_TEST
(
PyrLKOpticalFlowDense
,
cv
::
gpu
::
DeviceInfo
,
WinSize
,
Levels
,
Iters
)
{
cv
::
gpu
::
DeviceInfo
devInfo
=
GET_PARAM
(
0
);
...
...
modules/gpu/perf_cpu/perf_video.cpp
View file @
f392ab0e
...
...
@@ -34,12 +34,16 @@ INSTANTIATE_TEST_CASE_P(Video, GoodFeaturesToTrack, testing::Combine(
IMPLEMENT_PARAM_CLASS
(
GraySource
,
bool
)
IMPLEMENT_PARAM_CLASS
(
Points
,
int
)
IMPLEMENT_PARAM_CLASS
(
WinSize
,
int
)
IMPLEMENT_PARAM_CLASS
(
Levels
,
int
)
IMPLEMENT_PARAM_CLASS
(
Iters
,
int
)
GPU_PERF_TEST
(
PyrLKOpticalFlowSparse
,
cv
::
gpu
::
DeviceInfo
,
GraySource
,
Points
,
WinSize
)
GPU_PERF_TEST
(
PyrLKOpticalFlowSparse
,
cv
::
gpu
::
DeviceInfo
,
GraySource
,
Points
,
WinSize
,
Levels
,
Iters
)
{
bool
useGray
=
GET_PARAM
(
1
);
int
points
=
GET_PARAM
(
2
);
int
win_size
=
GET_PARAM
(
3
);
int
levels
=
GET_PARAM
(
4
);
int
iters
=
GET_PARAM
(
5
);
cv
::
Mat
frame0
=
readImage
(
"gpu/opticalflow/frame0.png"
,
useGray
?
cv
::
IMREAD_GRAYSCALE
:
cv
::
IMREAD_COLOR
);
ASSERT_FALSE
(
frame0
.
empty
());
...
...
@@ -59,11 +63,17 @@ GPU_PERF_TEST(PyrLKOpticalFlowSparse, cv::gpu::DeviceInfo, GraySource, Points, W
cv
::
Mat
nextPts
;
cv
::
Mat
status
;
cv
::
calcOpticalFlowPyrLK
(
frame0
,
frame1
,
pts
,
nextPts
,
status
,
cv
::
noArray
(),
cv
::
Size
(
win_size
,
win_size
));
cv
::
calcOpticalFlowPyrLK
(
frame0
,
frame1
,
pts
,
nextPts
,
status
,
cv
::
noArray
(),
cv
::
Size
(
win_size
,
win_size
),
levels
-
1
,
cv
::
TermCriteria
(
cv
::
TermCriteria
::
COUNT
+
cv
::
TermCriteria
::
EPS
,
iters
,
0.01
));
declare
.
time
(
20.0
);
TEST_CYCLE
()
{
cv
::
calcOpticalFlowPyrLK
(
frame0
,
frame1
,
pts
,
nextPts
,
status
,
cv
::
noArray
(),
cv
::
Size
(
win_size
,
win_size
));
cv
::
calcOpticalFlowPyrLK
(
frame0
,
frame1
,
pts
,
nextPts
,
status
,
cv
::
noArray
(),
cv
::
Size
(
win_size
,
win_size
),
levels
-
1
,
cv
::
TermCriteria
(
cv
::
TermCriteria
::
COUNT
+
cv
::
TermCriteria
::
EPS
,
iters
,
0.01
));
}
}
...
...
@@ -71,7 +81,9 @@ INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowSparse, testing::Combine(
ALL_DEVICES
,
testing
::
Values
(
GraySource
(
true
),
GraySource
(
false
)),
testing
::
Values
(
Points
(
1000
),
Points
(
2000
),
Points
(
4000
),
Points
(
8000
)),
testing
::
Values
(
WinSize
(
17
),
WinSize
(
21
))));
testing
::
Values
(
WinSize
(
9
),
WinSize
(
13
),
WinSize
(
17
),
WinSize
(
21
)),
testing
::
Values
(
Levels
(
1
),
Levels
(
2
),
Levels
(
3
)),
testing
::
Values
(
Iters
(
1
),
Iters
(
10
),
Iters
(
30
))));
//////////////////////////////////////////////////////
// FarnebackOpticalFlowTest
...
...
@@ -100,6 +112,7 @@ GPU_PERF_TEST_1(FarnebackOpticalFlowTest, cv::gpu::DeviceInfo)
TEST_CYCLE
()
{
cv
::
calcOpticalFlowFarneback
(
frame0
,
frame1
,
flow
,
pyrScale
,
numLevels
,
winSize
,
numIters
,
polyN
,
polySigma
,
flags
);
}
}
...
...
modules/gpu/src/cuda/pyrlk.cu
View file @
f392ab0e
...
...
@@ -49,129 +49,30 @@
#include "opencv2/gpu/device/utility.hpp"
#include "opencv2/gpu/device/functional.hpp"
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
namespace cv { namespace gpu { namespace device
{
namespace pyrlk
{
__constant__ int c_cn;
__constant__ float c_minEigThreshold;
__constant__ int c_winSize_x;
__constant__ int c_winSize_y;
__constant__ int c_winSize_x_cn;
__constant__ int c_halfWin_x;
__constant__ int c_halfWin_y;
__constant__ int c_iters;
void loadConstants(int
cn, float minEigThreshold, int
2 winSize, int iters)
void loadConstants(int2 winSize, int iters)
{
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
cudaSafeCall( cudaMemcpyToSymbol(c_cn, &cn, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_minEigThreshold, &minEigThreshold, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_y, &winSize.y, sizeof(int)) );
winSize.x *= cn;
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x_cn, &winSize.x, sizeof(int))
);
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2
);
cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_x, &halfWin.x, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_y, &halfWin.y, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_iters, &iters, sizeof(int)) );
}
__global__ void calcSharrDeriv_vertical(const PtrStepb src, PtrStep<short> dx_buf, PtrStep<short> dy_buf, int rows, int colsn)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < rows && x < colsn)
{
const uchar src_val0 = src(y > 0 ? y - 1 : 1, x);
const uchar src_val1 = src(y, x);
const uchar src_val2 = src(y < rows - 1 ? y + 1 : rows - 2, x);
dx_buf(y, x) = (src_val0 + src_val2) * 3 + src_val1 * 10;
dy_buf(y, x) = src_val2 - src_val0;
}
}
__global__ void calcSharrDeriv_horizontal(const PtrStep<short> dx_buf, const PtrStep<short> dy_buf, PtrStep<short> dIdx, PtrStep<short> dIdy, int rows, int cols)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int colsn = cols * c_cn;
if (y < rows && x < colsn)
{
const short* dx_buf_row = dx_buf.ptr(y);
const short* dy_buf_row = dy_buf.ptr(y);
const int xr = x + c_cn < colsn ? x + c_cn : (cols - 2) * c_cn + x + c_cn - colsn;
const int xl = x - c_cn >= 0 ? x - c_cn : c_cn + x;
dIdx(y, x) = dx_buf_row[xr] - dx_buf_row[xl];
dIdy(y, x) = (dy_buf_row[xr] + dy_buf_row[xl]) * 3 + dy_buf_row[x] * 10;
}
}
void calcSharrDeriv_gpu(DevMem2Db src, DevMem2D_<short> dx_buf, DevMem2D_<short> dy_buf, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy, int cn,
cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(src.cols * cn, block.x), divUp(src.rows, block.y));
calcSharrDeriv_vertical<<<grid, block, 0, stream>>>(src, dx_buf, dy_buf, src.rows, src.cols * cn);
cudaSafeCall( cudaGetLastError() );
calcSharrDeriv_horizontal<<<grid, block, 0, stream>>>(dx_buf, dy_buf, dIdx, dIdy, src.rows, src.cols);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
#define W_BITS 14
#define W_BITS1 14
#define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
__device__ int linearFilter(const PtrStepb& src, float2 pt, int x, int y)
{
int2 ipt;
ipt.x = __float2int_rd(pt.x);
ipt.y = __float2int_rd(pt.y);
float a = pt.x - ipt.x;
float b = pt.y - ipt.y;
int iw00 = __float2int_rn((1.0f - a) * (1.0f - b) * (1 << W_BITS));
int iw01 = __float2int_rn(a * (1.0f - b) * (1 << W_BITS));
int iw10 = __float2int_rn((1.0f - a) * b * (1 << W_BITS));
int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
const uchar* src_row = src.ptr(ipt.y + y) + ipt.x * c_cn;
const uchar* src_row1 = src.ptr(ipt.y + y + 1) + ipt.x * c_cn;
return CV_DESCALE(src_row[x] * iw00 + src_row[x + c_cn] * iw01 + src_row1[x] * iw10 + src_row1[x + c_cn] * iw11, W_BITS1 - 5);
}
__device__ int linearFilter(const PtrStep<short>& src, float2 pt, int x, int y)
{
int2 ipt;
ipt.x = __float2int_rd(pt.x);
ipt.y = __float2int_rd(pt.y);
float a = pt.x - ipt.x;
float b = pt.y - ipt.y;
int iw00 = __float2int_rn((1.0f - a) * (1.0f - b) * (1 << W_BITS));
int iw01 = __float2int_rn(a * (1.0f - b) * (1 << W_BITS));
int iw10 = __float2int_rn((1.0f - a) * b * (1 << W_BITS));
int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
const short* src_row = src.ptr(ipt.y + y) + ipt.x * c_cn;
const short* src_row1 = src.ptr(ipt.y + y + 1) + ipt.x * c_cn;
return CV_DESCALE(src_row[x] * iw00 + src_row[x + c_cn] * iw01 + src_row1[x] * iw10 + src_row1[x + c_cn] * iw11, W_BITS1);
cudaSafeCall( cudaMemcpyToSymbol(c_iters, &iters, sizeof(int)) );
}
__device__ void reduce(float& val1, float& val2, float& val3, float* smem1, float* smem2, float* smem3, int tid)
...
...
@@ -310,11 +211,65 @@ namespace cv { namespace gpu { namespace device
}
}
#define SCALE (1.0f / (1 << 20))
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_If(false, cudaFilterModeLinear, cudaAddressModeClamp);
texture<float4, cudaTextureType2D, cudaReadModeElementType> tex_If4(false, cudaFilterModeLinear, cudaAddressModeClamp);
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_Ib(false, cudaFilterModePoint, cudaAddressModeClamp);
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_Jf(false, cudaFilterModeLinear, cudaAddressModeClamp);
texture<float4, cudaTextureType2D, cudaReadModeElementType> tex_Jf4(false, cudaFilterModeLinear, cudaAddressModeClamp);
template <int cn> struct Tex_I;
template <> struct Tex_I<1>
{
static __device__ __forceinline__ float read(float x, float y)
{
return tex2D(tex_If, x, y);
}
};
template <> struct Tex_I<4>
{
static __device__ __forceinline__ float4 read(float x, float y)
{
return tex2D(tex_If4, x, y);
}
};
template <int cn> struct Tex_J;
template <> struct Tex_J<1>
{
static __device__ __forceinline__ float read(float x, float y)
{
return tex2D(tex_Jf, x, y);
}
};
template <> struct Tex_J<4>
{
static __device__ __forceinline__ float4 read(float x, float y)
{
return tex2D(tex_Jf4, x, y);
}
};
__device__ __forceinline__ void accum(float& dst, float val)
{
dst += val;
}
__device__ __forceinline__ void accum(float& dst, const float4& val)
{
dst += val.x + val.y + val.z;
}
__device__ __forceinline__ float abs_(float a)
{
return ::fabs(a);
}
__device__ __forceinline__ float4 abs_(const float4& a)
{
return fabs(a);
}
template <int PATCH_X, int PATCH_Y, bool calcErr, bool GET_MIN_EIGENVALS>
__global__ void lkSparse(const PtrStepb I, const PtrStepb J, const PtrStep<short> dIdx, const PtrStep<short> dIdy,
const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols)
template <int cn, int PATCH_X, int PATCH_Y, bool calcErr>
__global__ void lkSparse(const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols)
{
#if __CUDA_ARCH__ <= 110
__shared__ float smem1[128];
...
...
@@ -332,47 +287,52 @@ namespace cv { namespace gpu { namespace device
prevPt.x *= (1.0f / (1 << level));
prevPt.y *= (1.0f / (1 << level));
prevPt.x -= c_halfWin_x;
prevPt.y -= c_halfWin_y;
if (prevPt.x < -c_winSize_x || prevPt.x >= cols || prevPt.y < -c_winSize_y || prevPt.y >= rows)
if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows)
{
if (level == 0 && tid == 0)
{
if (tid == 0 && level == 0)
status[blockIdx.x] = 0;
if (calcErr)
err[blockIdx.x] = 0;
}
return;
}
prevPt.x -= c_halfWin_x;
prevPt.y -= c_halfWin_y;
// extract the patch from the first image, compute covariation matrix of derivatives
float A11 = 0;
float A12 = 0;
float A22 = 0;
int I_patch[PATCH_Y][PATCH_X];
int dIdx_patch[PATCH_Y][PATCH_X];
int dIdy_patch[PATCH_Y][PATCH_X];
typedef typename TypeVec<float, cn>::vec_type work_type;
work_type I_patch [PATCH_Y][PATCH_X];
work_type dIdx_patch[PATCH_Y][PATCH_X];
work_type dIdy_patch[PATCH_Y][PATCH_X];
for (int y
= threadIdx.y, i = 0; y < c_winSize_y; y
+= blockDim.y, ++i)
for (int y
Base = threadIdx.y, i = 0; yBase < c_winSize_y; yBase
+= blockDim.y, ++i)
{
for (int x
= threadIdx.x, j = 0; x < c_winSize_x_cn; x
+= blockDim.x, ++j)
for (int x
Base = threadIdx.x, j = 0; xBase < c_winSize_x; xBase
+= blockDim.x, ++j)
{
I_patch[i][j] = linearFilter(I, prevPt, x, y);
float x = prevPt.x + xBase + 0.5f;
float y = prevPt.y + yBase + 0.5f;
I_patch[i][j] = Tex_I<cn>::read(x, y);
// Sharr Deriv
work_type dIdx = 3.0f * Tex_I<cn>::read(x+1, y-1) + 10.0f * Tex_I<cn>::read(x+1, y) + 3.0f * Tex_I<cn>::read(x+1, y+1) -
(3.0f * Tex_I<cn>::read(x-1, y-1) + 10.0f * Tex_I<cn>::read(x-1, y) + 3.0f * Tex_I<cn>::read(x-1, y+1));
int ixval = linearFilter(dIdx, prevPt, x, y);
int iyval = linearFilter(dIdy, prevPt, x, y
);
work_type dIdy = 3.0f * Tex_I<cn>::read(x-1, y+1) + 10.0f * Tex_I<cn>::read(x, y+1) + 3.0f * Tex_I<cn>::read(x+1, y+1) -
(3.0f * Tex_I<cn>::read(x-1, y-1) + 10.0f * Tex_I<cn>::read(x, y-1) + 3.0f * Tex_I<cn>::read(x+1, y-1)
);
dIdx_patch[i][j] =
ixval
;
dIdy_patch[i][j] =
iyval
;
dIdx_patch[i][j] =
dIdx
;
dIdy_patch[i][j] =
dIdy
;
A11 += ixval * ixval
;
A12 += ixval * iyval
;
A22 += iyval * iyval
;
accum(A11, dIdx * dIdx)
;
accum(A12, dIdx * dIdy)
;
accum(A22, dIdy * dIdy)
;
}
}
...
...
@@ -383,31 +343,21 @@ namespace cv { namespace gpu { namespace device
A12 = smem2[0];
A22 = smem3[0];
A11 *= SCALE;
A12 *= SCALE;
A22 *= SCALE;
float D = A11 * A22 - A12 * A12;
if (D < numeric_limits<float>::epsilon())
{
float D = A11 * A22 - A12 * A12;
float minEig = (A22 + A11 - ::sqrtf((A11 - A22) * (A11 - A22) + 4.f * A12 * A12)) / (2 * c_winSize_x * c_winSize_y);
if (calcErr && GET_MIN_EIGENVALS && tid == 0)
err[blockIdx.x] = minEig;
if (minEig < c_minEigThreshold || D < numeric_limits<float>::epsilon())
{
if (level == 0 && tid == 0)
status[blockIdx.x] = 0;
if (tid == 0 && level == 0)
status[blockIdx.x] = 0;
return;
}
return;
}
D = 1.f / D;
D = 1.f / D;
A11 *= D;
A12 *= D;
A22 *= D;
}
A11 *= D;
A12 *= D;
A22 *= D;
float2 nextPt = nextPts[blockIdx.x];
nextPt.x *= 2.f;
...
...
@@ -416,14 +366,14 @@ namespace cv { namespace gpu { namespace device
nextPt.x -= c_halfWin_x;
nextPt.y -= c_halfWin_y;
bool status_ = true;
for (int k = 0; k < c_iters; ++k)
{
if (nextPt.x < -c_
winSize_x || nextPt.x >= cols || nextPt.y < -c_winSize
_y || nextPt.y >= rows)
if (nextPt.x < -c_
halfWin_x || nextPt.x >= cols || nextPt.y < -c_halfWin
_y || nextPt.y >= rows)
{
status_ = false;
break;
if (tid == 0 && level == 0)
status[blockIdx.x] = 0;
return;
}
float b1 = 0;
...
...
@@ -431,12 +381,15 @@ namespace cv { namespace gpu { namespace device
for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i)
{
for (int x = threadIdx.x, j = 0; x < c_winSize_x
_cn
; x += blockDim.x, ++j)
for (int x = threadIdx.x, j = 0; x < c_winSize_x; x += blockDim.x, ++j)
{
int diff = linearFilter(J, nextPt, x, y) - I_patch[i][j];
work_type I_val = I_patch[i][j];
work_type J_val = Tex_J<cn>::read(nextPt.x + x + 0.5f, nextPt.y + y + 0.5f);
work_type diff = (J_val - I_val) * 32.0f;
b1 += diff * dIdx_patch[i][j]
;
b2 += diff * dIdy_patch[i][j]
;
accum(b1, diff * dIdx_patch[i][j])
;
accum(b2, diff * dIdy_patch[i][j])
;
}
}
...
...
@@ -446,9 +399,6 @@ namespace cv { namespace gpu { namespace device
b1 = smem1[0];
b2 = smem2[0];
b1 *= SCALE;
b2 *= SCALE;
float2 delta;
delta.x = A12 * b2 - A22 * b1;
delta.y = A12 * b1 - A11 * b2;
...
...
@@ -460,24 +410,23 @@ namespace cv { namespace gpu { namespace device
break;
}
if (nextPt.x < -c_winSize_x || nextPt.x >= cols || nextPt.y < -c_winSize_y || nextPt.y >= rows)
status_ = false;
float errval = 0.f;
if (calcErr && !GET_MIN_EIGENVALS && status_)
float errval = 0;
if (calcErr)
{
for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i)
{
for (int x = threadIdx.x, j = 0; x < c_winSize_x
_cn
; x += blockDim.x, ++j)
for (int x = threadIdx.x, j = 0; x < c_winSize_x; x += blockDim.x, ++j)
{
int diff = linearFilter(J, nextPt, x, y) - I_patch[i][j];
errval += ::fabsf((float)diff);
work_type I_val = I_patch[i][j];
work_type J_val = Tex_J<cn>::read(nextPt.x + x + 0.5f, nextPt.y + y + 0.5f);
work_type diff = J_val - I_val;
accum(errval, abs_(diff));
}
}
reduce(errval, smem1, tid);
errval /= 32 * c_winSize_x_cn * c_winSize_y;
}
if (tid == 0)
...
...
@@ -485,45 +434,23 @@ namespace cv { namespace gpu { namespace device
nextPt.x += c_halfWin_x;
nextPt.y += c_halfWin_y;
status[blockIdx.x] = status_;
nextPts[blockIdx.x] = nextPt;
if (calcErr
&& !GET_MIN_EIGENVALS
)
err[blockIdx.x] =
errval
;
if (calcErr)
err[blockIdx.x] =
static_cast<float>(errval) / (cn * c_winSize_x * c_winSize_y)
;
}
}
template <int PATCH_X, int PATCH_Y>
void lkSparse_caller(DevMem2Db I, DevMem2Db J, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy,
const float2* prevPts, float2* nextPts, uchar* status, float* err, bool GET_MIN_EIGENVALS, int ptcount,
template <int cn, int PATCH_X, int PATCH_Y>
void lkSparse_caller(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, cudaStream_t stream)
{
dim3 grid(ptcount);
if (level == 0 && err)
{
if (GET_MIN_EIGENVALS)
{
cudaSafeCall( cudaFuncSetCacheConfig(lkSparse<PATCH_X, PATCH_Y, true, true>, cudaFuncCachePreferL1) );
lkSparse<PATCH_X, PATCH_Y, true, true><<<grid, block>>>(I, J, dIdx, dIdy,
prevPts, nextPts, status, err, level, I.rows, I.cols);
}
else
{
cudaSafeCall( cudaFuncSetCacheConfig(lkSparse<PATCH_X, PATCH_Y, true, false>, cudaFuncCachePreferL1) );
lkSparse<PATCH_X, PATCH_Y, true, false><<<grid, block>>>(I, J, dIdx, dIdy,
prevPts, nextPts, status, err, level, I.rows, I.cols);
}
}
lkSparse<cn, PATCH_X, PATCH_Y, true><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
else
{
cudaSafeCall( cudaFuncSetCacheConfig(lkSparse<PATCH_X, PATCH_Y, false, false>, cudaFuncCachePreferL1) );
lkSparse<PATCH_X, PATCH_Y, false, false><<<grid, block>>>(I, J, dIdx, dIdy,
prevPts, nextPts, status, err, level, I.rows, I.cols);
}
lkSparse<cn, PATCH_X, PATCH_Y, false><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
cudaSafeCall( cudaGetLastError() );
...
...
@@ -531,30 +458,49 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaDeviceSynchronize() );
}
void lkSparse_gpu(DevMem2Db I, DevMem2Db J, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy,
const float2* prevPts, float2* nextPts, uchar* status, float* err, bool GET_MIN_EIGENVALS, int ptcount,
void lkSparse1_gpu(DevMem2Df I, DevMem2Df J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, dim3 patch, cudaStream_t stream)
{
typedef void (*func_t)(DevMem2Db I, DevMem2Db J, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy,
const float2* prevPts, float2* nextPts, uchar* status, float* err, bool GET_MIN_EIGENVALS, int ptcount,
typedef void (*func_t)(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, cudaStream_t stream);
static const func_t funcs[5][5] =
{
{lkSparse_caller<1, 1
>, lkSparse_caller<2, 1>, lkSparse_caller<3, 1>, lkSparse_caller<4, 1>, lkSparse_caller<
5, 1>},
{lkSparse_caller<1,
2>, lkSparse_caller<2, 2>, lkSparse_caller<3, 2>, lkSparse_caller<4, 2>, lkSparse_caller<
5, 2>},
{lkSparse_caller<1,
3>, lkSparse_caller<2, 3>, lkSparse_caller<3, 3>, lkSparse_caller<4, 3>, lkSparse_caller<
5, 3>},
{lkSparse_caller<1,
4>, lkSparse_caller<2, 4>, lkSparse_caller<3, 4>, lkSparse_caller<4, 4>, lkSparse_caller<
5, 4>},
{lkSparse_caller<1,
5>, lkSparse_caller<2, 5>, lkSparse_caller<3, 5>, lkSparse_caller<4, 5>, lkSparse_caller<
5, 5>}
{lkSparse_caller<1, 1
, 1>, lkSparse_caller<1, 2, 1>, lkSparse_caller<1, 3, 1>, lkSparse_caller<1, 4, 1>, lkSparse_caller<1,
5, 1>},
{lkSparse_caller<1,
1, 2>, lkSparse_caller<1, 2, 2>, lkSparse_caller<1, 3, 2>, lkSparse_caller<1, 4, 2>, lkSparse_caller<1,
5, 2>},
{lkSparse_caller<1,
1, 3>, lkSparse_caller<1, 2, 3>, lkSparse_caller<1, 3, 3>, lkSparse_caller<1, 4, 3>, lkSparse_caller<1,
5, 3>},
{lkSparse_caller<1,
1, 4>, lkSparse_caller<1, 2, 4>, lkSparse_caller<1, 3, 4>, lkSparse_caller<1, 4, 4>, lkSparse_caller<1,
5, 4>},
{lkSparse_caller<1,
1, 5>, lkSparse_caller<1, 2, 5>, lkSparse_caller<1, 3, 5>, lkSparse_caller<1, 4, 5>, lkSparse_caller<1,
5, 5>}
};
funcs[patch.y - 1][patch.x - 1](I, J, dIdx, dIdy,
prevPts, nextPts, status, err, GET_MIN_EIGENVALS, ptcount,
bindTexture(&tex_If, I);
bindTexture(&tex_Jf, J);
funcs[patch.y - 1][patch.x - 1](I.rows, I.cols, prevPts, nextPts, status, err, ptcount,
level, block, stream);
}
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_I(false, cudaFilterModePoint, cudaAddressModeClamp);
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_J(false, cudaFilterModeLinear, cudaAddressModeClamp);
void lkSparse4_gpu(DevMem2D_<float4> I, DevMem2D_<float4> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, dim3 patch, cudaStream_t stream)
{
typedef void (*func_t)(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, cudaStream_t stream);
static const func_t funcs[5][5] =
{
{lkSparse_caller<4, 1, 1>, lkSparse_caller<4, 2, 1>, lkSparse_caller<4, 3, 1>, lkSparse_caller<4, 4, 1>, lkSparse_caller<4, 5, 1>},
{lkSparse_caller<4, 1, 2>, lkSparse_caller<4, 2, 2>, lkSparse_caller<4, 3, 2>, lkSparse_caller<4, 4, 2>, lkSparse_caller<4, 5, 2>},
{lkSparse_caller<4, 1, 3>, lkSparse_caller<4, 2, 3>, lkSparse_caller<4, 3, 3>, lkSparse_caller<4, 4, 3>, lkSparse_caller<4, 5, 3>},
{lkSparse_caller<4, 1, 4>, lkSparse_caller<4, 2, 4>, lkSparse_caller<4, 3, 4>, lkSparse_caller<4, 4, 4>, lkSparse_caller<4, 5, 4>},
{lkSparse_caller<4, 1, 5>, lkSparse_caller<4, 2, 5>, lkSparse_caller<4, 3, 5>, lkSparse_caller<4, 4, 5>, lkSparse_caller<4, 5, 5>}
};
bindTexture(&tex_If4, I);
bindTexture(&tex_Jf4, J);
funcs[patch.y - 1][patch.x - 1](I.rows, I.cols, prevPts, nextPts, status, err, ptcount,
level, block, stream);
}
template <bool calcErr>
__global__ void lkDense(PtrStepf u, PtrStepf v, const PtrStepf prevU, const PtrStepf prevV, PtrStepf err, const int rows, const int cols)
...
...
@@ -578,15 +524,15 @@ namespace cv { namespace gpu { namespace device
float x = xBase - c_halfWin_x + j + 0.5f;
float y = yBase - c_halfWin_y + i + 0.5f;
I_patch[i * patchWidth + j] = tex2D(tex_I, x, y);
I_patch[i * patchWidth + j] = tex2D(tex_I
b
, x, y);
// Sharr Deriv
dIdx_patch[i * patchWidth + j] = 3 * tex2D(tex_I
, x+1, y-1) + 10 * tex2D(tex_I, x+1, y) + 3 * tex2D(tex_I
, x+1, y+1) -
(3 * tex2D(tex_I
, x-1, y-1) + 10 * tex2D(tex_I, x-1, y) + 3 * tex2D(tex_I
, x-1, y+1));
dIdx_patch[i * patchWidth + j] = 3 * tex2D(tex_I
b, x+1, y-1) + 10 * tex2D(tex_Ib, x+1, y) + 3 * tex2D(tex_Ib
, x+1, y+1) -
(3 * tex2D(tex_I
b, x-1, y-1) + 10 * tex2D(tex_Ib, x-1, y) + 3 * tex2D(tex_Ib
, x-1, y+1));
dIdy_patch[i * patchWidth + j] = 3 * tex2D(tex_I
, x-1, y+1) + 10 * tex2D(tex_I, x, y+1) + 3 * tex2D(tex_I
, x+1, y+1) -
(3 * tex2D(tex_I
, x-1, y-1) + 10 * tex2D(tex_I, x, y-1) + 3 * tex2D(tex_I
, x+1, y-1));
dIdy_patch[i * patchWidth + j] = 3 * tex2D(tex_I
b, x-1, y+1) + 10 * tex2D(tex_Ib, x, y+1) + 3 * tex2D(tex_Ib
, x+1, y+1) -
(3 * tex2D(tex_I
b, x-1, y-1) + 10 * tex2D(tex_Ib, x, y-1) + 3 * tex2D(tex_Ib
, x+1, y-1));
}
}
...
...
@@ -657,7 +603,7 @@ namespace cv { namespace gpu { namespace device
for (int j = 0; j < c_winSize_x; ++j)
{
int I = I_patch[(threadIdx.y + i) * patchWidth + threadIdx.x + j];
int J = tex2D(tex_J, nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f);
int J = tex2D(tex_J
f
, nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f);
int diff = (J - I) * 32;
...
...
@@ -692,7 +638,7 @@ namespace cv { namespace gpu { namespace device
for (int j = 0; j < c_winSize_x; ++j)
{
int I = I_patch[(threadIdx.y + i) * patchWidth + threadIdx.x + j];
int J = tex2D(tex_J, nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f);
int J = tex2D(tex_J
f
, nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f);
errval += ::abs(J - I);
}
...
...
@@ -708,8 +654,8 @@ namespace cv { namespace gpu { namespace device
dim3 block(16, 16);
dim3 grid(divUp(I.cols, block.x), divUp(I.rows, block.y));
bindTexture(&tex_I, I);
bindTexture(&tex_J, J);
bindTexture(&tex_I
b
, I);
bindTexture(&tex_J
f
, J);
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
const int patchWidth = block.x + 2 * halfWin.x;
...
...
modules/gpu/src/pyrlk.cpp
View file @
f392ab0e
...
...
@@ -48,8 +48,10 @@ using namespace cv::gpu;
#if !defined (HAVE_CUDA)
cv
::
gpu
::
PyrLKOpticalFlow
::
PyrLKOpticalFlow
()
{
throw_nogpu
();
}
void
cv
::
gpu
::
PyrLKOpticalFlow
::
sparse
(
const
GpuMat
&
,
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
*
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
PyrLKOpticalFlow
::
dense
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
*
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
PyrLKOpticalFlow
::
releaseMemory
()
{}
#else
/* !defined (HAVE_CUDA) */
...
...
@@ -57,13 +59,11 @@ namespace cv { namespace gpu { namespace device
{
namespace
pyrlk
{
void
loadConstants
(
int
cn
,
float
minEigThreshold
,
int
2
winSize
,
int
iters
);
void
loadConstants
(
int2
winSize
,
int
iters
);
void
calcSharrDeriv_gpu
(
DevMem2Db
src
,
DevMem2D_
<
short
>
dx_buf
,
DevMem2D_
<
short
>
dy_buf
,
DevMem2D_
<
short
>
dIdx
,
DevMem2D_
<
short
>
dIdy
,
int
cn
,
cudaStream_t
stream
=
0
);
void
lkSparse_gpu
(
DevMem2Db
I
,
DevMem2Db
J
,
DevMem2D_
<
short
>
dIdx
,
DevMem2D_
<
short
>
dIdy
,
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
bool
GET_MIN_EIGENVALS
,
int
ptcount
,
void
lkSparse1_gpu
(
DevMem2Df
I
,
DevMem2Df
J
,
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
int
ptcount
,
int
level
,
dim3
block
,
dim3
patch
,
cudaStream_t
stream
=
0
);
void
lkSparse4_gpu
(
DevMem2D_
<
float4
>
I
,
DevMem2D_
<
float4
>
J
,
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
int
ptcount
,
int
level
,
dim3
block
,
dim3
patch
,
cudaStream_t
stream
=
0
);
void
lkDense_gpu
(
DevMem2Db
I
,
DevMem2Df
J
,
DevMem2Df
u
,
DevMem2Df
v
,
DevMem2Df
prevU
,
DevMem2Df
prevV
,
...
...
@@ -71,65 +71,19 @@ namespace cv { namespace gpu { namespace device
}
}}}
void
cv
::
gpu
::
PyrLKOpticalFlow
::
calcSharrDeriv
(
const
GpuMat
&
src
,
GpuMat
&
dIdx
,
GpuMat
&
dIdy
)
cv
::
gpu
::
PyrLKOpticalFlow
::
PyrLKOpticalFlow
(
)
{
using
namespace
cv
::
gpu
::
device
::
pyrlk
;
CV_Assert
(
src
.
rows
>
1
&&
src
.
cols
>
1
);
CV_Assert
(
src
.
depth
()
==
CV_8U
);
const
int
cn
=
src
.
channels
();
ensureSizeIsEnough
(
src
.
size
(),
CV_MAKETYPE
(
CV_16S
,
cn
),
dx_calcBuf_
);
ensureSizeIsEnough
(
src
.
size
(),
CV_MAKETYPE
(
CV_16S
,
cn
),
dy_calcBuf_
);
calcSharrDeriv_gpu
(
src
,
dx_calcBuf_
,
dy_calcBuf_
,
dIdx
,
dIdy
,
cn
);
}
void
cv
::
gpu
::
PyrLKOpticalFlow
::
buildImagePyramid
(
const
GpuMat
&
img0
,
vector
<
GpuMat
>&
pyr
,
bool
withBorder
)
{
pyr
.
resize
(
maxLevel
+
1
);
Size
sz
=
img0
.
size
();
for
(
int
level
=
0
;
level
<=
maxLevel
;
++
level
)
{
GpuMat
temp
;
if
(
withBorder
)
{
temp
.
create
(
sz
.
height
+
winSize
.
height
*
2
,
sz
.
width
+
winSize
.
width
*
2
,
img0
.
type
());
pyr
[
level
]
=
temp
(
Rect
(
winSize
.
width
,
winSize
.
height
,
sz
.
width
,
sz
.
height
));
}
else
{
ensureSizeIsEnough
(
sz
,
img0
.
type
(),
pyr
[
level
]);
}
if
(
level
==
0
)
img0
.
copyTo
(
pyr
[
level
]);
else
pyrDown
(
pyr
[
level
-
1
],
pyr
[
level
]);
if
(
withBorder
)
copyMakeBorder
(
pyr
[
level
],
temp
,
winSize
.
height
,
winSize
.
height
,
winSize
.
width
,
winSize
.
width
,
BORDER_REFLECT_101
);
sz
=
Size
((
sz
.
width
+
1
)
/
2
,
(
sz
.
height
+
1
)
/
2
);
if
(
sz
.
width
<=
winSize
.
width
||
sz
.
height
<=
winSize
.
height
)
{
maxLevel
=
level
;
break
;
}
}
winSize
=
Size
(
21
,
21
);
maxLevel
=
3
;
iters
=
30
;
useInitialFlow
=
false
;
isDeviceArch11_
=
!
DeviceInfo
().
supports
(
FEATURE_SET_COMPUTE_12
);
}
namespace
{
void
calcPatchSize
(
cv
::
Size
winSize
,
int
cn
,
dim3
&
block
,
dim3
&
patch
,
bool
isDeviceArch11
)
void
calcPatchSize
(
cv
::
Size
winSize
,
dim3
&
block
,
dim3
&
patch
,
bool
isDeviceArch11
)
{
winSize
.
width
*=
cn
;
if
(
winSize
.
width
>
32
&&
winSize
.
width
>
2
*
winSize
.
height
)
{
block
.
x
=
isDeviceArch11
?
16
:
32
;
...
...
@@ -160,13 +114,13 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next
return
;
}
const
int
cn
=
prevImg
.
channels
();
dim3
block
,
patch
;
calcPatchSize
(
winSize
,
cn
,
block
,
patch
,
isDeviceArch11_
);
calcPatchSize
(
winSize
,
block
,
patch
,
isDeviceArch11_
);
CV_Assert
(
maxLevel
>=
0
&&
winSize
.
width
>
2
&&
winSize
.
height
>
2
);
CV_Assert
(
prevImg
.
type
()
==
CV_8UC1
||
prevImg
.
type
()
==
CV_8UC3
||
prevImg
.
type
()
==
CV_8UC4
);
CV_Assert
(
prevImg
.
size
()
==
nextImg
.
size
()
&&
prevImg
.
type
()
==
nextImg
.
type
());
CV_Assert
(
maxLevel
>=
0
);
CV_Assert
(
winSize
.
width
>
2
&&
winSize
.
height
>
2
);
CV_Assert
(
patch
.
x
>
0
&&
patch
.
x
<
6
&&
patch
.
y
>
0
&&
patch
.
y
<
6
);
CV_Assert
(
prevPts
.
rows
==
1
&&
prevPts
.
type
()
==
CV_32FC2
);
...
...
@@ -186,35 +140,48 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next
ensureSizeIsEnough
(
1
,
prevPts
.
cols
,
CV_32FC1
,
*
err
);
// build the image pyramids.
// we pad each level with +/-winSize.{width|height}
// pixels to simplify the further patch extraction.
buildImagePyramid
(
prevImg
,
prevPyr_
,
true
);
buildImagePyramid
(
nextImg
,
nextPyr_
,
true
);
prevPyr_
.
resize
(
maxLevel
+
1
);
nextPyr_
.
resize
(
maxLevel
+
1
);
// dI/dx ~ Ix, dI/dy ~ Iy
int
cn
=
prevImg
.
channels
();
ensureSizeIsEnough
(
prevImg
.
rows
+
winSize
.
height
*
2
,
prevImg
.
cols
+
winSize
.
width
*
2
,
CV_MAKETYPE
(
CV_16S
,
cn
),
dx_buf_
);
ensureSizeIsEnough
(
prevImg
.
rows
+
winSize
.
height
*
2
,
prevImg
.
cols
+
winSize
.
width
*
2
,
CV_MAKETYPE
(
CV_16S
,
cn
),
dy_buf_
);
if
(
cn
==
1
||
cn
==
4
)
{
prevImg
.
convertTo
(
prevPyr_
[
0
],
CV_32F
);
nextImg
.
convertTo
(
nextPyr_
[
0
],
CV_32F
);
}
else
{
cvtColor
(
prevImg
,
buf_
,
COLOR_BGR2BGRA
);
buf_
.
convertTo
(
prevPyr_
[
0
],
CV_32F
);
loadConstants
(
cn
,
minEigThreshold
,
make_int2
(
winSize
.
width
,
winSize
.
height
),
iters
);
cvtColor
(
nextImg
,
buf_
,
COLOR_BGR2BGRA
);
buf_
.
convertTo
(
nextPyr_
[
0
],
CV_32F
);
}
for
(
int
level
=
maxLevel
;
level
>=
0
;
level
--
)
for
(
int
level
=
1
;
level
<=
maxLevel
;
++
level
)
{
Size
imgSize
=
prevPyr_
[
level
].
size
();
GpuMat
dxWhole
(
imgSize
.
height
+
winSize
.
height
*
2
,
imgSize
.
width
+
winSize
.
width
*
2
,
dx_buf_
.
type
(),
dx_buf_
.
data
,
dx_buf_
.
step
);
GpuMat
dyWhole
(
imgSize
.
height
+
winSize
.
height
*
2
,
imgSize
.
width
+
winSize
.
width
*
2
,
dy_buf_
.
type
(),
dy_buf_
.
data
,
dy_buf_
.
step
);
dxWhole
.
setTo
(
Scalar
::
all
(
0
));
dyWhole
.
setTo
(
Scalar
::
all
(
0
));
GpuMat
dIdx
=
dxWhole
(
Rect
(
winSize
.
width
,
winSize
.
height
,
imgSize
.
width
,
imgSize
.
height
));
GpuMat
dIdy
=
dyWhole
(
Rect
(
winSize
.
width
,
winSize
.
height
,
imgSize
.
width
,
imgSize
.
height
));
pyrDown
(
prevPyr_
[
level
-
1
],
prevPyr_
[
level
]);
pyrDown
(
nextPyr_
[
level
-
1
],
nextPyr_
[
level
]);
}
calcSharrDeriv
(
prevPyr_
[
level
],
dIdx
,
dIdy
);
loadConstants
(
make_int2
(
winSize
.
width
,
winSize
.
height
),
iters
);
lkSparse_gpu
(
prevPyr_
[
level
],
nextPyr_
[
level
],
dIdx
,
dIdy
,
prevPts
.
ptr
<
float2
>
(),
nextPts
.
ptr
<
float2
>
(),
status
.
ptr
(),
level
==
0
&&
err
?
err
->
ptr
<
float
>
()
:
0
,
getMinEigenVals
,
prevPts
.
cols
,
level
,
block
,
patch
);
for
(
int
level
=
maxLevel
;
level
>=
0
;
level
--
)
{
if
(
cn
==
1
)
{
lkSparse1_gpu
(
prevPyr_
[
level
],
nextPyr_
[
level
],
prevPts
.
ptr
<
float2
>
(),
nextPts
.
ptr
<
float2
>
(),
status
.
ptr
(),
level
==
0
&&
err
?
err
->
ptr
<
float
>
()
:
0
,
prevPts
.
cols
,
level
,
block
,
patch
);
}
else
{
lkSparse4_gpu
(
prevPyr_
[
level
],
nextPyr_
[
level
],
prevPts
.
ptr
<
float2
>
(),
nextPts
.
ptr
<
float2
>
(),
status
.
ptr
(),
level
==
0
&&
err
?
err
->
ptr
<
float
>
()
:
0
,
prevPts
.
cols
,
level
,
block
,
patch
);
}
}
}
...
...
@@ -232,15 +199,17 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
// build the image pyramids.
buildImagePyramid
(
prevImg
,
prevPyr_
,
false
);
prevPyr_
.
resize
(
maxLevel
+
1
);
nextPyr_
.
resize
(
maxLevel
+
1
);
prevPyr_
[
0
]
=
prevImg
;
nextImg
.
convertTo
(
nextPyr_
[
0
],
CV_32F
);
for
(
int
level
=
1
;
level
<=
maxLevel
;
++
level
)
{
pyrDown
(
prevPyr_
[
level
-
1
],
prevPyr_
[
level
]);
pyrDown
(
nextPyr_
[
level
-
1
],
nextPyr_
[
level
]);
uPyr_
.
resize
(
2
);
vPyr_
.
resize
(
2
);
}
ensureSizeIsEnough
(
prevImg
.
size
(),
CV_32FC1
,
uPyr_
[
0
]);
ensureSizeIsEnough
(
prevImg
.
size
(),
CV_32FC1
,
vPyr_
[
0
]);
...
...
@@ -250,7 +219,7 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
vPyr_
[
1
].
setTo
(
Scalar
::
all
(
0
));
int2
winSize2i
=
make_int2
(
winSize
.
width
,
winSize
.
height
);
loadConstants
(
1
,
minEigThreshold
,
winSize2i
,
iters
);
loadConstants
(
winSize2i
,
iters
);
DevMem2Df
derr
=
err
?
*
err
:
DevMem2Df
();
...
...
@@ -271,4 +240,18 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
vPyr_
[
idx
].
copyTo
(
v
);
}
void
cv
::
gpu
::
PyrLKOpticalFlow
::
releaseMemory
()
{
prevPyr_
.
clear
();
nextPyr_
.
clear
();
buf_
.
release
();
uPyr_
[
0
].
release
();
vPyr_
[
0
].
release
();
uPyr_
[
1
].
release
();
vPyr_
[
1
].
release
();
}
#endif
/* !defined (HAVE_CUDA) */
modules/gpu/test/test_video.cpp
View file @
f392ab0e
...
...
@@ -249,8 +249,7 @@ TEST_P(PyrLKOpticalFlow, Sparse)
cv
::
gpu
::
GpuMat
d_nextPts
;
cv
::
gpu
::
GpuMat
d_status
;
cv
::
gpu
::
GpuMat
d_err
;
pyrLK
.
sparse
(
loadMat
(
frame0
),
loadMat
(
frame1
),
d_pts
,
d_nextPts
,
d_status
,
&
d_err
);
pyrLK
.
sparse
(
loadMat
(
frame0
),
loadMat
(
frame1
),
d_pts
,
d_nextPts
,
d_status
);
std
::
vector
<
cv
::
Point2f
>
nextPts
(
d_nextPts
.
cols
);
cv
::
Mat
nextPts_mat
(
1
,
d_nextPts
.
cols
,
CV_32FC2
,
(
void
*
)
&
nextPts
[
0
]);
...
...
@@ -260,22 +259,19 @@ TEST_P(PyrLKOpticalFlow, Sparse)
cv
::
Mat
status_mat
(
1
,
d_status
.
cols
,
CV_8UC1
,
(
void
*
)
&
status
[
0
]);
d_status
.
download
(
status_mat
);
std
::
vector
<
float
>
err
(
d_err
.
cols
);
cv
::
Mat
err_mat
(
1
,
d_err
.
cols
,
CV_32FC1
,
(
void
*
)
&
err
[
0
]);
d_err
.
download
(
err_mat
);
std
::
vector
<
cv
::
Point2f
>
nextPts_gold
;
std
::
vector
<
unsigned
char
>
status_gold
;
std
::
vector
<
float
>
err_gold
;
cv
::
calcOpticalFlowPyrLK
(
frame0
,
frame1
,
pts
,
nextPts_gold
,
status_gold
,
err_gold
);
cv
::
calcOpticalFlowPyrLK
(
frame0
,
frame1
,
pts
,
nextPts_gold
,
status_gold
,
cv
::
noArray
());
ASSERT_EQ
(
nextPts_gold
.
size
(),
nextPts
.
size
());
ASSERT_EQ
(
status_gold
.
size
(),
status
.
size
());
ASSERT_EQ
(
err_gold
.
size
(),
err
.
size
());
size_t
mistmatch
=
0
;
for
(
size_t
i
=
0
;
i
<
nextPts
.
size
();
++
i
)
{
cv
::
Point2i
a
=
nextPts
[
i
];
cv
::
Point2i
b
=
nextPts_gold
[
i
];
if
(
status
[
i
]
!=
status_gold
[
i
])
{
++
mistmatch
;
...
...
@@ -284,13 +280,9 @@ TEST_P(PyrLKOpticalFlow, Sparse)
if
(
status
[
i
])
{
cv
::
Point2i
a
=
nextPts
[
i
];
cv
::
Point2i
b
=
nextPts_gold
[
i
];
bool
eq
=
std
::
abs
(
a
.
x
-
b
.
x
)
<=
1
&&
std
::
abs
(
a
.
y
-
b
.
y
)
<=
1
;
bool
eq
=
std
::
abs
(
a
.
x
-
b
.
x
)
<
1
&&
std
::
abs
(
a
.
y
-
b
.
y
)
<
1
;
float
errdiff
=
std
::
abs
(
err
[
i
]
-
err_gold
[
i
]);
if
(
!
eq
||
errdiff
>
1e-1
)
if
(
!
eq
)
++
mistmatch
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment