Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
f392ab0e
Commit
f392ab0e
authored
Jun 18, 2012
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
new implementation of gpu::PyrLKOpticalFlow::sparse (1.5 - 2x faster)
parent
33d9e235
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
277 additions
and
373 deletions
+277
-373
video.rst
modules/gpu/doc/video.rst
+0
-3
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+6
-38
perf_video.cpp
modules/gpu/perf/perf_video.cpp
+12
-7
perf_video.cpp
modules/gpu/perf_cpu/perf_video.cpp
+17
-4
pyrlk.cu
modules/gpu/src/cuda/pyrlk.cu
+163
-217
pyrlk.cpp
modules/gpu/src/pyrlk.cpp
+72
-89
test_video.cpp
modules/gpu/test/test_video.cpp
+7
-15
No files found.
modules/gpu/doc/video.rst
View file @
f392ab0e
...
@@ -204,10 +204,7 @@ Class used for calculating an optical flow. ::
...
@@ -204,10 +204,7 @@ Class used for calculating an optical flow. ::
Size winSize;
Size winSize;
int maxLevel;
int maxLevel;
int iters;
int iters;
double derivLambda;
bool useInitialFlow;
bool useInitialFlow;
float minEigThreshold;
bool getMinEigenVals;
void releaseMemory();
void releaseMemory();
};
};
...
...
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
f392ab0e
...
@@ -1749,60 +1749,28 @@ inline GoodFeaturesToTrackDetector_GPU::GoodFeaturesToTrackDetector_GPU(int maxC
...
@@ -1749,60 +1749,28 @@ inline GoodFeaturesToTrackDetector_GPU::GoodFeaturesToTrackDetector_GPU(int maxC
class
CV_EXPORTS
PyrLKOpticalFlow
class
CV_EXPORTS
PyrLKOpticalFlow
{
{
public
:
public
:
PyrLKOpticalFlow
()
PyrLKOpticalFlow
();
{
winSize
=
Size
(
21
,
21
);
maxLevel
=
3
;
iters
=
30
;
useInitialFlow
=
false
;
minEigThreshold
=
1e-4
f
;
getMinEigenVals
=
false
;
isDeviceArch11_
=
!
DeviceInfo
().
supports
(
FEATURE_SET_COMPUTE_12
);
}
void
sparse
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
const
GpuMat
&
prevPts
,
GpuMat
&
nextPts
,
void
sparse
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
const
GpuMat
&
prevPts
,
GpuMat
&
nextPts
,
GpuMat
&
status
,
GpuMat
*
err
=
0
);
GpuMat
&
status
,
GpuMat
*
err
=
0
);
void
dense
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
GpuMat
&
u
,
GpuMat
&
v
,
GpuMat
*
err
=
0
);
void
dense
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
GpuMat
&
u
,
GpuMat
&
v
,
GpuMat
*
err
=
0
);
void
releaseMemory
();
Size
winSize
;
Size
winSize
;
int
maxLevel
;
int
maxLevel
;
int
iters
;
int
iters
;
bool
useInitialFlow
;
bool
useInitialFlow
;
float
minEigThreshold
;
bool
getMinEigenVals
;
void
releaseMemory
()
{
dx_calcBuf_
.
release
();
dy_calcBuf_
.
release
();
prevPyr_
.
clear
();
nextPyr_
.
clear
();
dx_buf_
.
release
();
dy_buf_
.
release
();
uPyr_
.
clear
();
vPyr_
.
clear
();
}
private
:
private
:
void
calcSharrDeriv
(
const
GpuMat
&
src
,
GpuMat
&
dx
,
GpuMat
&
dy
);
void
buildImagePyramid
(
const
GpuMat
&
img0
,
vector
<
GpuMat
>&
pyr
,
bool
withBorder
);
GpuMat
dx_calcBuf_
;
GpuMat
dy_calcBuf_
;
vector
<
GpuMat
>
prevPyr_
;
vector
<
GpuMat
>
prevPyr_
;
vector
<
GpuMat
>
nextPyr_
;
vector
<
GpuMat
>
nextPyr_
;
GpuMat
dx_buf_
;
GpuMat
buf_
;
GpuMat
dy_buf_
;
vector
<
GpuMat
>
uPyr_
;
GpuMat
uPyr_
[
2
]
;
vector
<
GpuMat
>
vPyr_
;
GpuMat
vPyr_
[
2
]
;
bool
isDeviceArch11_
;
bool
isDeviceArch11_
;
};
};
...
...
modules/gpu/perf/perf_video.cpp
View file @
f392ab0e
...
@@ -156,15 +156,19 @@ INSTANTIATE_TEST_CASE_P(Video, GoodFeaturesToTrack, testing::Combine(
...
@@ -156,15 +156,19 @@ INSTANTIATE_TEST_CASE_P(Video, GoodFeaturesToTrack, testing::Combine(
IMPLEMENT_PARAM_CLASS
(
GraySource
,
bool
)
IMPLEMENT_PARAM_CLASS
(
GraySource
,
bool
)
IMPLEMENT_PARAM_CLASS
(
Points
,
int
)
IMPLEMENT_PARAM_CLASS
(
Points
,
int
)
IMPLEMENT_PARAM_CLASS
(
WinSize
,
int
)
IMPLEMENT_PARAM_CLASS
(
WinSize
,
int
)
IMPLEMENT_PARAM_CLASS
(
Levels
,
int
)
IMPLEMENT_PARAM_CLASS
(
Iters
,
int
)
GPU_PERF_TEST
(
PyrLKOpticalFlowSparse
,
cv
::
gpu
::
DeviceInfo
,
GraySource
,
Points
,
WinSize
)
GPU_PERF_TEST
(
PyrLKOpticalFlowSparse
,
cv
::
gpu
::
DeviceInfo
,
GraySource
,
Points
,
WinSize
,
Levels
,
Iters
)
{
{
cv
::
gpu
::
DeviceInfo
devInfo
=
GET_PARAM
(
0
);
cv
::
gpu
::
DeviceInfo
devInfo
=
GET_PARAM
(
0
);
cv
::
gpu
::
setDevice
(
devInfo
.
deviceID
());
cv
::
gpu
::
setDevice
(
devInfo
.
deviceID
());
bool
useGray
=
GET_PARAM
(
1
);
bool
useGray
=
GET_PARAM
(
1
);
int
points
=
GET_PARAM
(
2
);
int
points
=
GET_PARAM
(
2
);
int
win_size
=
GET_PARAM
(
3
);
int
winSize
=
GET_PARAM
(
3
);
int
levels
=
GET_PARAM
(
4
);
int
iters
=
GET_PARAM
(
5
);
cv
::
Mat
frame0_host
=
readImage
(
"gpu/opticalflow/frame0.png"
,
useGray
?
cv
::
IMREAD_GRAYSCALE
:
cv
::
IMREAD_COLOR
);
cv
::
Mat
frame0_host
=
readImage
(
"gpu/opticalflow/frame0.png"
,
useGray
?
cv
::
IMREAD_GRAYSCALE
:
cv
::
IMREAD_COLOR
);
ASSERT_FALSE
(
frame0_host
.
empty
());
ASSERT_FALSE
(
frame0_host
.
empty
());
...
@@ -184,7 +188,9 @@ GPU_PERF_TEST(PyrLKOpticalFlowSparse, cv::gpu::DeviceInfo, GraySource, Points, W
...
@@ -184,7 +188,9 @@ GPU_PERF_TEST(PyrLKOpticalFlowSparse, cv::gpu::DeviceInfo, GraySource, Points, W
detector
(
cv
::
gpu
::
GpuMat
(
gray_frame
),
pts
);
detector
(
cv
::
gpu
::
GpuMat
(
gray_frame
),
pts
);
cv
::
gpu
::
PyrLKOpticalFlow
pyrLK
;
cv
::
gpu
::
PyrLKOpticalFlow
pyrLK
;
pyrLK
.
winSize
=
cv
::
Size
(
win_size
,
win_size
);
pyrLK
.
winSize
=
cv
::
Size
(
winSize
,
winSize
);
pyrLK
.
maxLevel
=
levels
-
1
;
pyrLK
.
iters
=
iters
;
cv
::
gpu
::
GpuMat
frame0
(
frame0_host
);
cv
::
gpu
::
GpuMat
frame0
(
frame0_host
);
cv
::
gpu
::
GpuMat
frame1
(
frame1_host
);
cv
::
gpu
::
GpuMat
frame1
(
frame1_host
);
...
@@ -203,14 +209,13 @@ INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowSparse, testing::Combine(
...
@@ -203,14 +209,13 @@ INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowSparse, testing::Combine(
ALL_DEVICES
,
ALL_DEVICES
,
testing
::
Values
(
GraySource
(
true
),
GraySource
(
false
)),
testing
::
Values
(
GraySource
(
true
),
GraySource
(
false
)),
testing
::
Values
(
Points
(
1000
),
Points
(
2000
),
Points
(
4000
),
Points
(
8000
)),
testing
::
Values
(
Points
(
1000
),
Points
(
2000
),
Points
(
4000
),
Points
(
8000
)),
testing
::
Values
(
WinSize
(
17
),
WinSize
(
21
))));
testing
::
Values
(
WinSize
(
9
),
WinSize
(
13
),
WinSize
(
17
),
WinSize
(
21
)),
testing
::
Values
(
Levels
(
1
),
Levels
(
2
),
Levels
(
3
)),
testing
::
Values
(
Iters
(
1
),
Iters
(
10
),
Iters
(
30
))));
//////////////////////////////////////////////////////
//////////////////////////////////////////////////////
// PyrLKOpticalFlowDense
// PyrLKOpticalFlowDense
IMPLEMENT_PARAM_CLASS
(
Levels
,
int
)
IMPLEMENT_PARAM_CLASS
(
Iters
,
int
)
GPU_PERF_TEST
(
PyrLKOpticalFlowDense
,
cv
::
gpu
::
DeviceInfo
,
WinSize
,
Levels
,
Iters
)
GPU_PERF_TEST
(
PyrLKOpticalFlowDense
,
cv
::
gpu
::
DeviceInfo
,
WinSize
,
Levels
,
Iters
)
{
{
cv
::
gpu
::
DeviceInfo
devInfo
=
GET_PARAM
(
0
);
cv
::
gpu
::
DeviceInfo
devInfo
=
GET_PARAM
(
0
);
...
...
modules/gpu/perf_cpu/perf_video.cpp
View file @
f392ab0e
...
@@ -34,12 +34,16 @@ INSTANTIATE_TEST_CASE_P(Video, GoodFeaturesToTrack, testing::Combine(
...
@@ -34,12 +34,16 @@ INSTANTIATE_TEST_CASE_P(Video, GoodFeaturesToTrack, testing::Combine(
IMPLEMENT_PARAM_CLASS
(
GraySource
,
bool
)
IMPLEMENT_PARAM_CLASS
(
GraySource
,
bool
)
IMPLEMENT_PARAM_CLASS
(
Points
,
int
)
IMPLEMENT_PARAM_CLASS
(
Points
,
int
)
IMPLEMENT_PARAM_CLASS
(
WinSize
,
int
)
IMPLEMENT_PARAM_CLASS
(
WinSize
,
int
)
IMPLEMENT_PARAM_CLASS
(
Levels
,
int
)
IMPLEMENT_PARAM_CLASS
(
Iters
,
int
)
GPU_PERF_TEST
(
PyrLKOpticalFlowSparse
,
cv
::
gpu
::
DeviceInfo
,
GraySource
,
Points
,
WinSize
)
GPU_PERF_TEST
(
PyrLKOpticalFlowSparse
,
cv
::
gpu
::
DeviceInfo
,
GraySource
,
Points
,
WinSize
,
Levels
,
Iters
)
{
{
bool
useGray
=
GET_PARAM
(
1
);
bool
useGray
=
GET_PARAM
(
1
);
int
points
=
GET_PARAM
(
2
);
int
points
=
GET_PARAM
(
2
);
int
win_size
=
GET_PARAM
(
3
);
int
win_size
=
GET_PARAM
(
3
);
int
levels
=
GET_PARAM
(
4
);
int
iters
=
GET_PARAM
(
5
);
cv
::
Mat
frame0
=
readImage
(
"gpu/opticalflow/frame0.png"
,
useGray
?
cv
::
IMREAD_GRAYSCALE
:
cv
::
IMREAD_COLOR
);
cv
::
Mat
frame0
=
readImage
(
"gpu/opticalflow/frame0.png"
,
useGray
?
cv
::
IMREAD_GRAYSCALE
:
cv
::
IMREAD_COLOR
);
ASSERT_FALSE
(
frame0
.
empty
());
ASSERT_FALSE
(
frame0
.
empty
());
...
@@ -59,11 +63,17 @@ GPU_PERF_TEST(PyrLKOpticalFlowSparse, cv::gpu::DeviceInfo, GraySource, Points, W
...
@@ -59,11 +63,17 @@ GPU_PERF_TEST(PyrLKOpticalFlowSparse, cv::gpu::DeviceInfo, GraySource, Points, W
cv
::
Mat
nextPts
;
cv
::
Mat
nextPts
;
cv
::
Mat
status
;
cv
::
Mat
status
;
cv
::
calcOpticalFlowPyrLK
(
frame0
,
frame1
,
pts
,
nextPts
,
status
,
cv
::
noArray
(),
cv
::
Size
(
win_size
,
win_size
));
cv
::
calcOpticalFlowPyrLK
(
frame0
,
frame1
,
pts
,
nextPts
,
status
,
cv
::
noArray
(),
cv
::
Size
(
win_size
,
win_size
),
levels
-
1
,
cv
::
TermCriteria
(
cv
::
TermCriteria
::
COUNT
+
cv
::
TermCriteria
::
EPS
,
iters
,
0.01
));
declare
.
time
(
20.0
);
TEST_CYCLE
()
TEST_CYCLE
()
{
{
cv
::
calcOpticalFlowPyrLK
(
frame0
,
frame1
,
pts
,
nextPts
,
status
,
cv
::
noArray
(),
cv
::
Size
(
win_size
,
win_size
));
cv
::
calcOpticalFlowPyrLK
(
frame0
,
frame1
,
pts
,
nextPts
,
status
,
cv
::
noArray
(),
cv
::
Size
(
win_size
,
win_size
),
levels
-
1
,
cv
::
TermCriteria
(
cv
::
TermCriteria
::
COUNT
+
cv
::
TermCriteria
::
EPS
,
iters
,
0.01
));
}
}
}
}
...
@@ -71,7 +81,9 @@ INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowSparse, testing::Combine(
...
@@ -71,7 +81,9 @@ INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowSparse, testing::Combine(
ALL_DEVICES
,
ALL_DEVICES
,
testing
::
Values
(
GraySource
(
true
),
GraySource
(
false
)),
testing
::
Values
(
GraySource
(
true
),
GraySource
(
false
)),
testing
::
Values
(
Points
(
1000
),
Points
(
2000
),
Points
(
4000
),
Points
(
8000
)),
testing
::
Values
(
Points
(
1000
),
Points
(
2000
),
Points
(
4000
),
Points
(
8000
)),
testing
::
Values
(
WinSize
(
17
),
WinSize
(
21
))));
testing
::
Values
(
WinSize
(
9
),
WinSize
(
13
),
WinSize
(
17
),
WinSize
(
21
)),
testing
::
Values
(
Levels
(
1
),
Levels
(
2
),
Levels
(
3
)),
testing
::
Values
(
Iters
(
1
),
Iters
(
10
),
Iters
(
30
))));
//////////////////////////////////////////////////////
//////////////////////////////////////////////////////
// FarnebackOpticalFlowTest
// FarnebackOpticalFlowTest
...
@@ -100,6 +112,7 @@ GPU_PERF_TEST_1(FarnebackOpticalFlowTest, cv::gpu::DeviceInfo)
...
@@ -100,6 +112,7 @@ GPU_PERF_TEST_1(FarnebackOpticalFlowTest, cv::gpu::DeviceInfo)
TEST_CYCLE
()
TEST_CYCLE
()
{
{
cv
::
calcOpticalFlowFarneback
(
frame0
,
frame1
,
flow
,
pyrScale
,
numLevels
,
winSize
,
numIters
,
polyN
,
polySigma
,
flags
);
}
}
}
}
...
...
modules/gpu/src/cuda/pyrlk.cu
View file @
f392ab0e
...
@@ -49,129 +49,30 @@
...
@@ -49,129 +49,30 @@
#include "opencv2/gpu/device/utility.hpp"
#include "opencv2/gpu/device/utility.hpp"
#include "opencv2/gpu/device/functional.hpp"
#include "opencv2/gpu/device/functional.hpp"
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
{
namespace pyrlk
namespace pyrlk
{
{
__constant__ int c_cn;
__constant__ float c_minEigThreshold;
__constant__ int c_winSize_x;
__constant__ int c_winSize_x;
__constant__ int c_winSize_y;
__constant__ int c_winSize_y;
__constant__ int c_winSize_x_cn;
__constant__ int c_halfWin_x;
__constant__ int c_halfWin_x;
__constant__ int c_halfWin_y;
__constant__ int c_halfWin_y;
__constant__ int c_iters;
__constant__ int c_iters;
void loadConstants(int
cn, float minEigThreshold, int
2 winSize, int iters)
void loadConstants(int2 winSize, int iters)
{
{
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
cudaSafeCall( cudaMemcpyToSymbol(c_cn, &cn, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_minEigThreshold, &minEigThreshold, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_y, &winSize.y, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_y, &winSize.y, sizeof(int)) );
winSize.x *= cn;
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x_cn, &winSize.x, sizeof(int))
);
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2
);
cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_x, &halfWin.x, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_x, &halfWin.x, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_y, &halfWin.y, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_y, &halfWin.y, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_iters, &iters, sizeof(int)) );
}
__global__ void calcSharrDeriv_vertical(const PtrStepb src, PtrStep<short> dx_buf, PtrStep<short> dy_buf, int rows, int colsn)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < rows && x < colsn)
{
const uchar src_val0 = src(y > 0 ? y - 1 : 1, x);
const uchar src_val1 = src(y, x);
const uchar src_val2 = src(y < rows - 1 ? y + 1 : rows - 2, x);
dx_buf(y, x) = (src_val0 + src_val2) * 3 + src_val1 * 10;
dy_buf(y, x) = src_val2 - src_val0;
}
}
__global__ void calcSharrDeriv_horizontal(const PtrStep<short> dx_buf, const PtrStep<short> dy_buf, PtrStep<short> dIdx, PtrStep<short> dIdy, int rows, int cols)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int colsn = cols * c_cn;
if (y < rows && x < colsn)
{
const short* dx_buf_row = dx_buf.ptr(y);
const short* dy_buf_row = dy_buf.ptr(y);
const int xr = x + c_cn < colsn ? x + c_cn : (cols - 2) * c_cn + x + c_cn - colsn;
const int xl = x - c_cn >= 0 ? x - c_cn : c_cn + x;
dIdx(y, x) = dx_buf_row[xr] - dx_buf_row[xl];
dIdy(y, x) = (dy_buf_row[xr] + dy_buf_row[xl]) * 3 + dy_buf_row[x] * 10;
}
}
void calcSharrDeriv_gpu(DevMem2Db src, DevMem2D_<short> dx_buf, DevMem2D_<short> dy_buf, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy, int cn,
cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(src.cols * cn, block.x), divUp(src.rows, block.y));
calcSharrDeriv_vertical<<<grid, block, 0, stream>>>(src, dx_buf, dy_buf, src.rows, src.cols * cn);
cudaSafeCall( cudaGetLastError() );
calcSharrDeriv_horizontal<<<grid, block, 0, stream>>>(dx_buf, dy_buf, dIdx, dIdy, src.rows, src.cols);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
#define W_BITS 14
#define W_BITS1 14
#define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
cudaSafeCall( cudaMemcpyToSymbol(c_iters, &iters, sizeof(int)) );
__device__ int linearFilter(const PtrStepb& src, float2 pt, int x, int y)
{
int2 ipt;
ipt.x = __float2int_rd(pt.x);
ipt.y = __float2int_rd(pt.y);
float a = pt.x - ipt.x;
float b = pt.y - ipt.y;
int iw00 = __float2int_rn((1.0f - a) * (1.0f - b) * (1 << W_BITS));
int iw01 = __float2int_rn(a * (1.0f - b) * (1 << W_BITS));
int iw10 = __float2int_rn((1.0f - a) * b * (1 << W_BITS));
int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
const uchar* src_row = src.ptr(ipt.y + y) + ipt.x * c_cn;
const uchar* src_row1 = src.ptr(ipt.y + y + 1) + ipt.x * c_cn;
return CV_DESCALE(src_row[x] * iw00 + src_row[x + c_cn] * iw01 + src_row1[x] * iw10 + src_row1[x + c_cn] * iw11, W_BITS1 - 5);
}
__device__ int linearFilter(const PtrStep<short>& src, float2 pt, int x, int y)
{
int2 ipt;
ipt.x = __float2int_rd(pt.x);
ipt.y = __float2int_rd(pt.y);
float a = pt.x - ipt.x;
float b = pt.y - ipt.y;
int iw00 = __float2int_rn((1.0f - a) * (1.0f - b) * (1 << W_BITS));
int iw01 = __float2int_rn(a * (1.0f - b) * (1 << W_BITS));
int iw10 = __float2int_rn((1.0f - a) * b * (1 << W_BITS));
int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
const short* src_row = src.ptr(ipt.y + y) + ipt.x * c_cn;
const short* src_row1 = src.ptr(ipt.y + y + 1) + ipt.x * c_cn;
return CV_DESCALE(src_row[x] * iw00 + src_row[x + c_cn] * iw01 + src_row1[x] * iw10 + src_row1[x + c_cn] * iw11, W_BITS1);
}
}
__device__ void reduce(float& val1, float& val2, float& val3, float* smem1, float* smem2, float* smem3, int tid)
__device__ void reduce(float& val1, float& val2, float& val3, float* smem1, float* smem2, float* smem3, int tid)
...
@@ -310,11 +211,65 @@ namespace cv { namespace gpu { namespace device
...
@@ -310,11 +211,65 @@ namespace cv { namespace gpu { namespace device
}
}
}
}
#define SCALE (1.0f / (1 << 20))
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_If(false, cudaFilterModeLinear, cudaAddressModeClamp);
texture<float4, cudaTextureType2D, cudaReadModeElementType> tex_If4(false, cudaFilterModeLinear, cudaAddressModeClamp);
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_Ib(false, cudaFilterModePoint, cudaAddressModeClamp);
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_Jf(false, cudaFilterModeLinear, cudaAddressModeClamp);
texture<float4, cudaTextureType2D, cudaReadModeElementType> tex_Jf4(false, cudaFilterModeLinear, cudaAddressModeClamp);
template <int cn> struct Tex_I;
template <> struct Tex_I<1>
{
static __device__ __forceinline__ float read(float x, float y)
{
return tex2D(tex_If, x, y);
}
};
template <> struct Tex_I<4>
{
static __device__ __forceinline__ float4 read(float x, float y)
{
return tex2D(tex_If4, x, y);
}
};
template <int cn> struct Tex_J;
template <> struct Tex_J<1>
{
static __device__ __forceinline__ float read(float x, float y)
{
return tex2D(tex_Jf, x, y);
}
};
template <> struct Tex_J<4>
{
static __device__ __forceinline__ float4 read(float x, float y)
{
return tex2D(tex_Jf4, x, y);
}
};
template <int PATCH_X, int PATCH_Y, bool calcErr, bool GET_MIN_EIGENVALS>
__device__ __forceinline__ void accum(float& dst, float val)
__global__ void lkSparse(const PtrStepb I, const PtrStepb J, const PtrStep<short> dIdx, const PtrStep<short> dIdy,
{
const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols)
dst += val;
}
__device__ __forceinline__ void accum(float& dst, const float4& val)
{
dst += val.x + val.y + val.z;
}
__device__ __forceinline__ float abs_(float a)
{
return ::fabs(a);
}
__device__ __forceinline__ float4 abs_(const float4& a)
{
return fabs(a);
}
template <int cn, int PATCH_X, int PATCH_Y, bool calcErr>
__global__ void lkSparse(const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols)
{
{
#if __CUDA_ARCH__ <= 110
#if __CUDA_ARCH__ <= 110
__shared__ float smem1[128];
__shared__ float smem1[128];
...
@@ -332,47 +287,52 @@ namespace cv { namespace gpu { namespace device
...
@@ -332,47 +287,52 @@ namespace cv { namespace gpu { namespace device
prevPt.x *= (1.0f / (1 << level));
prevPt.x *= (1.0f / (1 << level));
prevPt.y *= (1.0f / (1 << level));
prevPt.y *= (1.0f / (1 << level));
prevPt.x -= c_halfWin_x;
if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows)
prevPt.y -= c_halfWin_y;
if (prevPt.x < -c_winSize_x || prevPt.x >= cols || prevPt.y < -c_winSize_y || prevPt.y >= rows)
{
if (level == 0 && tid == 0)
{
{
if (tid == 0 && level == 0)
status[blockIdx.x] = 0;
status[blockIdx.x] = 0;
if (calcErr)
err[blockIdx.x] = 0;
}
return;
return;
}
}
prevPt.x -= c_halfWin_x;
prevPt.y -= c_halfWin_y;
// extract the patch from the first image, compute covariation matrix of derivatives
// extract the patch from the first image, compute covariation matrix of derivatives
float A11 = 0;
float A11 = 0;
float A12 = 0;
float A12 = 0;
float A22 = 0;
float A22 = 0;
int I_patch[PATCH_Y][PATCH_X];
typedef typename TypeVec<float, cn>::vec_type work_type;
int dIdx_patch[PATCH_Y][PATCH_X];
int dIdy_patch[PATCH_Y][PATCH_X];
for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i)
work_type I_patch [PATCH_Y][PATCH_X];
work_type dIdx_patch[PATCH_Y][PATCH_X];
work_type dIdy_patch[PATCH_Y][PATCH_X];
for (int yBase = threadIdx.y, i = 0; yBase < c_winSize_y; yBase += blockDim.y, ++i)
{
{
for (int x
= threadIdx.x, j = 0; x < c_winSize_x_cn; x
+= blockDim.x, ++j)
for (int x
Base = threadIdx.x, j = 0; xBase < c_winSize_x; xBase
+= blockDim.x, ++j)
{
{
I_patch[i][j] = linearFilter(I, prevPt, x, y);
float x = prevPt.x + xBase + 0.5f;
float y = prevPt.y + yBase + 0.5f;
int ixval = linearFilter(dIdx, prevPt, x, y);
I_patch[i][j] = Tex_I<cn>::read(x, y);
int iyval = linearFilter(dIdy, prevPt, x, y);
dIdx_patch[i][j] = ixval;
// Sharr Deriv
dIdy_patch[i][j] = iyval;
A11 += ixval * ixval;
work_type dIdx = 3.0f * Tex_I<cn>::read(x+1, y-1) + 10.0f * Tex_I<cn>::read(x+1, y) + 3.0f * Tex_I<cn>::read(x+1, y+1) -
A12 += ixval * iyval;
(3.0f * Tex_I<cn>::read(x-1, y-1) + 10.0f * Tex_I<cn>::read(x-1, y) + 3.0f * Tex_I<cn>::read(x-1, y+1));
A22 += iyval * iyval;
work_type dIdy = 3.0f * Tex_I<cn>::read(x-1, y+1) + 10.0f * Tex_I<cn>::read(x, y+1) + 3.0f * Tex_I<cn>::read(x+1, y+1) -
(3.0f * Tex_I<cn>::read(x-1, y-1) + 10.0f * Tex_I<cn>::read(x, y-1) + 3.0f * Tex_I<cn>::read(x+1, y-1));
dIdx_patch[i][j] = dIdx;
dIdy_patch[i][j] = dIdy;
accum(A11, dIdx * dIdx);
accum(A12, dIdx * dIdy);
accum(A22, dIdy * dIdy);
}
}
}
}
...
@@ -383,20 +343,11 @@ namespace cv { namespace gpu { namespace device
...
@@ -383,20 +343,11 @@ namespace cv { namespace gpu { namespace device
A12 = smem2[0];
A12 = smem2[0];
A22 = smem3[0];
A22 = smem3[0];
A11 *= SCALE;
A12 *= SCALE;
A22 *= SCALE;
{
float D = A11 * A22 - A12 * A12;
float D = A11 * A22 - A12 * A12;
float minEig = (A22 + A11 - ::sqrtf((A11 - A22) * (A11 - A22) + 4.f * A12 * A12)) / (2 * c_winSize_x * c_winSize_y);
if (calcErr && GET_MIN_EIGENVALS && tid == 0)
err[blockIdx.x] = minEig;
if (minEig < c_minEigThreshold ||
D < numeric_limits<float>::epsilon())
if (
D < numeric_limits<float>::epsilon())
{
{
if (level == 0 && tid
== 0)
if (tid == 0 && level
== 0)
status[blockIdx.x] = 0;
status[blockIdx.x] = 0;
return;
return;
...
@@ -407,7 +358,6 @@ namespace cv { namespace gpu { namespace device
...
@@ -407,7 +358,6 @@ namespace cv { namespace gpu { namespace device
A11 *= D;
A11 *= D;
A12 *= D;
A12 *= D;
A22 *= D;
A22 *= D;
}
float2 nextPt = nextPts[blockIdx.x];
float2 nextPt = nextPts[blockIdx.x];
nextPt.x *= 2.f;
nextPt.x *= 2.f;
...
@@ -416,14 +366,14 @@ namespace cv { namespace gpu { namespace device
...
@@ -416,14 +366,14 @@ namespace cv { namespace gpu { namespace device
nextPt.x -= c_halfWin_x;
nextPt.x -= c_halfWin_x;
nextPt.y -= c_halfWin_y;
nextPt.y -= c_halfWin_y;
bool status_ = true;
for (int k = 0; k < c_iters; ++k)
for (int k = 0; k < c_iters; ++k)
{
{
if (nextPt.x < -c_
winSize_x || nextPt.x >= cols || nextPt.y < -c_winSize
_y || nextPt.y >= rows)
if (nextPt.x < -c_
halfWin_x || nextPt.x >= cols || nextPt.y < -c_halfWin
_y || nextPt.y >= rows)
{
{
status_ = false;
if (tid == 0 && level == 0)
break;
status[blockIdx.x] = 0;
return;
}
}
float b1 = 0;
float b1 = 0;
...
@@ -431,12 +381,15 @@ namespace cv { namespace gpu { namespace device
...
@@ -431,12 +381,15 @@ namespace cv { namespace gpu { namespace device
for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i)
for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i)
{
{
for (int x = threadIdx.x, j = 0; x < c_winSize_x
_cn
; x += blockDim.x, ++j)
for (int x = threadIdx.x, j = 0; x < c_winSize_x; x += blockDim.x, ++j)
{
{
int diff = linearFilter(J, nextPt, x, y) - I_patch[i][j];
work_type I_val = I_patch[i][j];
work_type J_val = Tex_J<cn>::read(nextPt.x + x + 0.5f, nextPt.y + y + 0.5f);
work_type diff = (J_val - I_val) * 32.0f;
b1 += diff * dIdx_patch[i][j]
;
accum(b1, diff * dIdx_patch[i][j])
;
b2 += diff * dIdy_patch[i][j]
;
accum(b2, diff * dIdy_patch[i][j])
;
}
}
}
}
...
@@ -446,9 +399,6 @@ namespace cv { namespace gpu { namespace device
...
@@ -446,9 +399,6 @@ namespace cv { namespace gpu { namespace device
b1 = smem1[0];
b1 = smem1[0];
b2 = smem2[0];
b2 = smem2[0];
b1 *= SCALE;
b2 *= SCALE;
float2 delta;
float2 delta;
delta.x = A12 * b2 - A22 * b1;
delta.x = A12 * b2 - A22 * b1;
delta.y = A12 * b1 - A11 * b2;
delta.y = A12 * b1 - A11 * b2;
...
@@ -460,24 +410,23 @@ namespace cv { namespace gpu { namespace device
...
@@ -460,24 +410,23 @@ namespace cv { namespace gpu { namespace device
break;
break;
}
}
if (nextPt.x < -c_winSize_x || nextPt.x >= cols || nextPt.y < -c_winSize_y || nextPt.y >= rows)
float errval = 0;
status_ = false;
if (calcErr)
float errval = 0.f;
if (calcErr && !GET_MIN_EIGENVALS && status_)
{
{
for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i)
for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i)
{
{
for (int x = threadIdx.x, j = 0; x < c_winSize_x
_cn
; x += blockDim.x, ++j)
for (int x = threadIdx.x, j = 0; x < c_winSize_x; x += blockDim.x, ++j)
{
{
int diff = linearFilter(J, nextPt, x, y) - I_patch[i][j];
work_type I_val = I_patch[i][j];
errval += ::fabsf((float)diff);
work_type J_val = Tex_J<cn>::read(nextPt.x + x + 0.5f, nextPt.y + y + 0.5f);
work_type diff = J_val - I_val;
accum(errval, abs_(diff));
}
}
}
}
reduce(errval, smem1, tid);
reduce(errval, smem1, tid);
errval /= 32 * c_winSize_x_cn * c_winSize_y;
}
}
if (tid == 0)
if (tid == 0)
...
@@ -485,45 +434,23 @@ namespace cv { namespace gpu { namespace device
...
@@ -485,45 +434,23 @@ namespace cv { namespace gpu { namespace device
nextPt.x += c_halfWin_x;
nextPt.x += c_halfWin_x;
nextPt.y += c_halfWin_y;
nextPt.y += c_halfWin_y;
status[blockIdx.x] = status_;
nextPts[blockIdx.x] = nextPt;
nextPts[blockIdx.x] = nextPt;
if (calcErr
&& !GET_MIN_EIGENVALS
)
if (calcErr)
err[blockIdx.x] =
errval
;
err[blockIdx.x] =
static_cast<float>(errval) / (cn * c_winSize_x * c_winSize_y)
;
}
}
}
}
template <int PATCH_X, int PATCH_Y>
template <int cn, int PATCH_X, int PATCH_Y>
void lkSparse_caller(DevMem2Db I, DevMem2Db J, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy,
void lkSparse_caller(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
const float2* prevPts, float2* nextPts, uchar* status, float* err, bool GET_MIN_EIGENVALS, int ptcount,
int level, dim3 block, cudaStream_t stream)
int level, dim3 block, cudaStream_t stream)
{
{
dim3 grid(ptcount);
dim3 grid(ptcount);
if (level == 0 && err)
if (level == 0 && err)
{
lkSparse<cn, PATCH_X, PATCH_Y, true><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
if (GET_MIN_EIGENVALS)
{
cudaSafeCall( cudaFuncSetCacheConfig(lkSparse<PATCH_X, PATCH_Y, true, true>, cudaFuncCachePreferL1) );
lkSparse<PATCH_X, PATCH_Y, true, true><<<grid, block>>>(I, J, dIdx, dIdy,
prevPts, nextPts, status, err, level, I.rows, I.cols);
}
else
else
{
lkSparse<cn, PATCH_X, PATCH_Y, false><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
cudaSafeCall( cudaFuncSetCacheConfig(lkSparse<PATCH_X, PATCH_Y, true, false>, cudaFuncCachePreferL1) );
lkSparse<PATCH_X, PATCH_Y, true, false><<<grid, block>>>(I, J, dIdx, dIdy,
prevPts, nextPts, status, err, level, I.rows, I.cols);
}
}
else
{
cudaSafeCall( cudaFuncSetCacheConfig(lkSparse<PATCH_X, PATCH_Y, false, false>, cudaFuncCachePreferL1) );
lkSparse<PATCH_X, PATCH_Y, false, false><<<grid, block>>>(I, J, dIdx, dIdy,
prevPts, nextPts, status, err, level, I.rows, I.cols);
}
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
...
@@ -531,30 +458,49 @@ namespace cv { namespace gpu { namespace device
...
@@ -531,30 +458,49 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
}
void lkSparse_gpu(DevMem2Db I, DevMem2Db J, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy,
void lkSparse1_gpu(DevMem2Df I, DevMem2Df J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
const float2* prevPts, float2* nextPts, uchar* status, float* err, bool GET_MIN_EIGENVALS, int ptcount,
int level, dim3 block, dim3 patch, cudaStream_t stream)
int level, dim3 block, dim3 patch, cudaStream_t stream)
{
{
typedef void (*func_t)(DevMem2Db I, DevMem2Db J, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy,
typedef void (*func_t)(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
const float2* prevPts, float2* nextPts, uchar* status, float* err, bool GET_MIN_EIGENVALS, int ptcount,
int level, dim3 block, cudaStream_t stream);
int level, dim3 block, cudaStream_t stream);
static const func_t funcs[5][5] =
static const func_t funcs[5][5] =
{
{
{lkSparse_caller<1, 1
>, lkSparse_caller<2, 1>, lkSparse_caller<3, 1>, lkSparse_caller<4, 1>, lkSparse_caller<
5, 1>},
{lkSparse_caller<1, 1
, 1>, lkSparse_caller<1, 2, 1>, lkSparse_caller<1, 3, 1>, lkSparse_caller<1, 4, 1>, lkSparse_caller<1,
5, 1>},
{lkSparse_caller<1,
2>, lkSparse_caller<2, 2>, lkSparse_caller<3, 2>, lkSparse_caller<4, 2>, lkSparse_caller<
5, 2>},
{lkSparse_caller<1,
1, 2>, lkSparse_caller<1, 2, 2>, lkSparse_caller<1, 3, 2>, lkSparse_caller<1, 4, 2>, lkSparse_caller<1,
5, 2>},
{lkSparse_caller<1,
3>, lkSparse_caller<2, 3>, lkSparse_caller<3, 3>, lkSparse_caller<4, 3>, lkSparse_caller<
5, 3>},
{lkSparse_caller<1,
1, 3>, lkSparse_caller<1, 2, 3>, lkSparse_caller<1, 3, 3>, lkSparse_caller<1, 4, 3>, lkSparse_caller<1,
5, 3>},
{lkSparse_caller<1,
4>, lkSparse_caller<2, 4>, lkSparse_caller<3, 4>, lkSparse_caller<4, 4>, lkSparse_caller<
5, 4>},
{lkSparse_caller<1,
1, 4>, lkSparse_caller<1, 2, 4>, lkSparse_caller<1, 3, 4>, lkSparse_caller<1, 4, 4>, lkSparse_caller<1,
5, 4>},
{lkSparse_caller<1,
5>, lkSparse_caller<2, 5>, lkSparse_caller<3, 5>, lkSparse_caller<4, 5>, lkSparse_caller<
5, 5>}
{lkSparse_caller<1,
1, 5>, lkSparse_caller<1, 2, 5>, lkSparse_caller<1, 3, 5>, lkSparse_caller<1, 4, 5>, lkSparse_caller<1,
5, 5>}
};
};
funcs[patch.y - 1][patch.x - 1](I, J, dIdx, dIdy,
bindTexture(&tex_If, I);
prevPts, nextPts, status, err, GET_MIN_EIGENVALS, ptcount,
bindTexture(&tex_Jf, J);
funcs[patch.y - 1][patch.x - 1](I.rows, I.cols, prevPts, nextPts, status, err, ptcount,
level, block, stream);
level, block, stream);
}
}
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_I(false, cudaFilterModePoint, cudaAddressModeClamp);
void lkSparse4_gpu(DevMem2D_<float4> I, DevMem2D_<float4> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_J(false, cudaFilterModeLinear, cudaAddressModeClamp);
int level, dim3 block, dim3 patch, cudaStream_t stream)
{
typedef void (*func_t)(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, cudaStream_t stream);
static const func_t funcs[5][5] =
{
{lkSparse_caller<4, 1, 1>, lkSparse_caller<4, 2, 1>, lkSparse_caller<4, 3, 1>, lkSparse_caller<4, 4, 1>, lkSparse_caller<4, 5, 1>},
{lkSparse_caller<4, 1, 2>, lkSparse_caller<4, 2, 2>, lkSparse_caller<4, 3, 2>, lkSparse_caller<4, 4, 2>, lkSparse_caller<4, 5, 2>},
{lkSparse_caller<4, 1, 3>, lkSparse_caller<4, 2, 3>, lkSparse_caller<4, 3, 3>, lkSparse_caller<4, 4, 3>, lkSparse_caller<4, 5, 3>},
{lkSparse_caller<4, 1, 4>, lkSparse_caller<4, 2, 4>, lkSparse_caller<4, 3, 4>, lkSparse_caller<4, 4, 4>, lkSparse_caller<4, 5, 4>},
{lkSparse_caller<4, 1, 5>, lkSparse_caller<4, 2, 5>, lkSparse_caller<4, 3, 5>, lkSparse_caller<4, 4, 5>, lkSparse_caller<4, 5, 5>}
};
bindTexture(&tex_If4, I);
bindTexture(&tex_Jf4, J);
funcs[patch.y - 1][patch.x - 1](I.rows, I.cols, prevPts, nextPts, status, err, ptcount,
level, block, stream);
}
template <bool calcErr>
template <bool calcErr>
__global__ void lkDense(PtrStepf u, PtrStepf v, const PtrStepf prevU, const PtrStepf prevV, PtrStepf err, const int rows, const int cols)
__global__ void lkDense(PtrStepf u, PtrStepf v, const PtrStepf prevU, const PtrStepf prevV, PtrStepf err, const int rows, const int cols)
...
@@ -578,15 +524,15 @@ namespace cv { namespace gpu { namespace device
...
@@ -578,15 +524,15 @@ namespace cv { namespace gpu { namespace device
float x = xBase - c_halfWin_x + j + 0.5f;
float x = xBase - c_halfWin_x + j + 0.5f;
float y = yBase - c_halfWin_y + i + 0.5f;
float y = yBase - c_halfWin_y + i + 0.5f;
I_patch[i * patchWidth + j] = tex2D(tex_I, x, y);
I_patch[i * patchWidth + j] = tex2D(tex_I
b
, x, y);
// Sharr Deriv
// Sharr Deriv
dIdx_patch[i * patchWidth + j] = 3 * tex2D(tex_I
, x+1, y-1) + 10 * tex2D(tex_I, x+1, y) + 3 * tex2D(tex_I
, x+1, y+1) -
dIdx_patch[i * patchWidth + j] = 3 * tex2D(tex_I
b, x+1, y-1) + 10 * tex2D(tex_Ib, x+1, y) + 3 * tex2D(tex_Ib
, x+1, y+1) -
(3 * tex2D(tex_I
, x-1, y-1) + 10 * tex2D(tex_I, x-1, y) + 3 * tex2D(tex_I
, x-1, y+1));
(3 * tex2D(tex_I
b, x-1, y-1) + 10 * tex2D(tex_Ib, x-1, y) + 3 * tex2D(tex_Ib
, x-1, y+1));
dIdy_patch[i * patchWidth + j] = 3 * tex2D(tex_I
, x-1, y+1) + 10 * tex2D(tex_I, x, y+1) + 3 * tex2D(tex_I
, x+1, y+1) -
dIdy_patch[i * patchWidth + j] = 3 * tex2D(tex_I
b, x-1, y+1) + 10 * tex2D(tex_Ib, x, y+1) + 3 * tex2D(tex_Ib
, x+1, y+1) -
(3 * tex2D(tex_I
, x-1, y-1) + 10 * tex2D(tex_I, x, y-1) + 3 * tex2D(tex_I
, x+1, y-1));
(3 * tex2D(tex_I
b, x-1, y-1) + 10 * tex2D(tex_Ib, x, y-1) + 3 * tex2D(tex_Ib
, x+1, y-1));
}
}
}
}
...
@@ -657,7 +603,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -657,7 +603,7 @@ namespace cv { namespace gpu { namespace device
for (int j = 0; j < c_winSize_x; ++j)
for (int j = 0; j < c_winSize_x; ++j)
{
{
int I = I_patch[(threadIdx.y + i) * patchWidth + threadIdx.x + j];
int I = I_patch[(threadIdx.y + i) * patchWidth + threadIdx.x + j];
int J = tex2D(tex_J, nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f);
int J = tex2D(tex_J
f
, nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f);
int diff = (J - I) * 32;
int diff = (J - I) * 32;
...
@@ -692,7 +638,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -692,7 +638,7 @@ namespace cv { namespace gpu { namespace device
for (int j = 0; j < c_winSize_x; ++j)
for (int j = 0; j < c_winSize_x; ++j)
{
{
int I = I_patch[(threadIdx.y + i) * patchWidth + threadIdx.x + j];
int I = I_patch[(threadIdx.y + i) * patchWidth + threadIdx.x + j];
int J = tex2D(tex_J, nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f);
int J = tex2D(tex_J
f
, nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f);
errval += ::abs(J - I);
errval += ::abs(J - I);
}
}
...
@@ -708,8 +654,8 @@ namespace cv { namespace gpu { namespace device
...
@@ -708,8 +654,8 @@ namespace cv { namespace gpu { namespace device
dim3 block(16, 16);
dim3 block(16, 16);
dim3 grid(divUp(I.cols, block.x), divUp(I.rows, block.y));
dim3 grid(divUp(I.cols, block.x), divUp(I.rows, block.y));
bindTexture(&tex_I, I);
bindTexture(&tex_I
b
, I);
bindTexture(&tex_J, J);
bindTexture(&tex_J
f
, J);
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
const int patchWidth = block.x + 2 * halfWin.x;
const int patchWidth = block.x + 2 * halfWin.x;
...
...
modules/gpu/src/pyrlk.cpp
View file @
f392ab0e
...
@@ -48,8 +48,10 @@ using namespace cv::gpu;
...
@@ -48,8 +48,10 @@ using namespace cv::gpu;
#if !defined (HAVE_CUDA)
#if !defined (HAVE_CUDA)
cv
::
gpu
::
PyrLKOpticalFlow
::
PyrLKOpticalFlow
()
{
throw_nogpu
();
}
void
cv
::
gpu
::
PyrLKOpticalFlow
::
sparse
(
const
GpuMat
&
,
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
*
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
PyrLKOpticalFlow
::
sparse
(
const
GpuMat
&
,
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
*
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
PyrLKOpticalFlow
::
dense
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
*
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
PyrLKOpticalFlow
::
dense
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
*
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
PyrLKOpticalFlow
::
releaseMemory
()
{}
#else
/* !defined (HAVE_CUDA) */
#else
/* !defined (HAVE_CUDA) */
...
@@ -57,13 +59,11 @@ namespace cv { namespace gpu { namespace device
...
@@ -57,13 +59,11 @@ namespace cv { namespace gpu { namespace device
{
{
namespace
pyrlk
namespace
pyrlk
{
{
void
loadConstants
(
int
cn
,
float
minEigThreshold
,
int
2
winSize
,
int
iters
);
void
loadConstants
(
int2
winSize
,
int
iters
);
void
calcSharrDeriv_gpu
(
DevMem2Db
src
,
DevMem2D_
<
short
>
dx_buf
,
DevMem2D_
<
short
>
dy_buf
,
DevMem2D_
<
short
>
dIdx
,
DevMem2D_
<
short
>
dIdy
,
int
cn
,
void
lkSparse1_gpu
(
DevMem2Df
I
,
DevMem2Df
J
,
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
int
ptcount
,
cudaStream_t
stream
=
0
);
int
level
,
dim3
block
,
dim3
patch
,
cudaStream_t
stream
=
0
);
void
lkSparse4_gpu
(
DevMem2D_
<
float4
>
I
,
DevMem2D_
<
float4
>
J
,
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
int
ptcount
,
void
lkSparse_gpu
(
DevMem2Db
I
,
DevMem2Db
J
,
DevMem2D_
<
short
>
dIdx
,
DevMem2D_
<
short
>
dIdy
,
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
bool
GET_MIN_EIGENVALS
,
int
ptcount
,
int
level
,
dim3
block
,
dim3
patch
,
cudaStream_t
stream
=
0
);
int
level
,
dim3
block
,
dim3
patch
,
cudaStream_t
stream
=
0
);
void
lkDense_gpu
(
DevMem2Db
I
,
DevMem2Df
J
,
DevMem2Df
u
,
DevMem2Df
v
,
DevMem2Df
prevU
,
DevMem2Df
prevV
,
void
lkDense_gpu
(
DevMem2Db
I
,
DevMem2Df
J
,
DevMem2Df
u
,
DevMem2Df
v
,
DevMem2Df
prevU
,
DevMem2Df
prevV
,
...
@@ -71,65 +71,19 @@ namespace cv { namespace gpu { namespace device
...
@@ -71,65 +71,19 @@ namespace cv { namespace gpu { namespace device
}
}
}}}
}}}
void
cv
::
gpu
::
PyrLKOpticalFlow
::
calcSharrDeriv
(
const
GpuMat
&
src
,
GpuMat
&
dIdx
,
GpuMat
&
dIdy
)
cv
::
gpu
::
PyrLKOpticalFlow
::
PyrLKOpticalFlow
()
{
using
namespace
cv
::
gpu
::
device
::
pyrlk
;
CV_Assert
(
src
.
rows
>
1
&&
src
.
cols
>
1
);
CV_Assert
(
src
.
depth
()
==
CV_8U
);
const
int
cn
=
src
.
channels
();
ensureSizeIsEnough
(
src
.
size
(),
CV_MAKETYPE
(
CV_16S
,
cn
),
dx_calcBuf_
);
ensureSizeIsEnough
(
src
.
size
(),
CV_MAKETYPE
(
CV_16S
,
cn
),
dy_calcBuf_
);
calcSharrDeriv_gpu
(
src
,
dx_calcBuf_
,
dy_calcBuf_
,
dIdx
,
dIdy
,
cn
);
}
void
cv
::
gpu
::
PyrLKOpticalFlow
::
buildImagePyramid
(
const
GpuMat
&
img0
,
vector
<
GpuMat
>&
pyr
,
bool
withBorder
)
{
{
pyr
.
resize
(
maxLevel
+
1
);
winSize
=
Size
(
21
,
21
);
maxLevel
=
3
;
Size
sz
=
img0
.
size
();
iters
=
30
;
useInitialFlow
=
false
;
for
(
int
level
=
0
;
level
<=
maxLevel
;
++
level
)
isDeviceArch11_
=
!
DeviceInfo
().
supports
(
FEATURE_SET_COMPUTE_12
);
{
GpuMat
temp
;
if
(
withBorder
)
{
temp
.
create
(
sz
.
height
+
winSize
.
height
*
2
,
sz
.
width
+
winSize
.
width
*
2
,
img0
.
type
());
pyr
[
level
]
=
temp
(
Rect
(
winSize
.
width
,
winSize
.
height
,
sz
.
width
,
sz
.
height
));
}
else
{
ensureSizeIsEnough
(
sz
,
img0
.
type
(),
pyr
[
level
]);
}
if
(
level
==
0
)
img0
.
copyTo
(
pyr
[
level
]);
else
pyrDown
(
pyr
[
level
-
1
],
pyr
[
level
]);
if
(
withBorder
)
copyMakeBorder
(
pyr
[
level
],
temp
,
winSize
.
height
,
winSize
.
height
,
winSize
.
width
,
winSize
.
width
,
BORDER_REFLECT_101
);
sz
=
Size
((
sz
.
width
+
1
)
/
2
,
(
sz
.
height
+
1
)
/
2
);
if
(
sz
.
width
<=
winSize
.
width
||
sz
.
height
<=
winSize
.
height
)
{
maxLevel
=
level
;
break
;
}
}
}
}
namespace
namespace
{
{
void
calcPatchSize
(
cv
::
Size
winSize
,
int
cn
,
dim3
&
block
,
dim3
&
patch
,
bool
isDeviceArch11
)
void
calcPatchSize
(
cv
::
Size
winSize
,
dim3
&
block
,
dim3
&
patch
,
bool
isDeviceArch11
)
{
{
winSize
.
width
*=
cn
;
if
(
winSize
.
width
>
32
&&
winSize
.
width
>
2
*
winSize
.
height
)
if
(
winSize
.
width
>
32
&&
winSize
.
width
>
2
*
winSize
.
height
)
{
{
block
.
x
=
isDeviceArch11
?
16
:
32
;
block
.
x
=
isDeviceArch11
?
16
:
32
;
...
@@ -160,13 +114,13 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next
...
@@ -160,13 +114,13 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next
return
;
return
;
}
}
const
int
cn
=
prevImg
.
channels
();
dim3
block
,
patch
;
dim3
block
,
patch
;
calcPatchSize
(
winSize
,
cn
,
block
,
patch
,
isDeviceArch11_
);
calcPatchSize
(
winSize
,
block
,
patch
,
isDeviceArch11_
);
CV_Assert
(
maxLevel
>=
0
&&
winSize
.
width
>
2
&&
winSize
.
height
>
2
);
CV_Assert
(
prevImg
.
type
()
==
CV_8UC1
||
prevImg
.
type
()
==
CV_8UC3
||
prevImg
.
type
()
==
CV_8UC4
);
CV_Assert
(
prevImg
.
size
()
==
nextImg
.
size
()
&&
prevImg
.
type
()
==
nextImg
.
type
());
CV_Assert
(
prevImg
.
size
()
==
nextImg
.
size
()
&&
prevImg
.
type
()
==
nextImg
.
type
());
CV_Assert
(
maxLevel
>=
0
);
CV_Assert
(
winSize
.
width
>
2
&&
winSize
.
height
>
2
);
CV_Assert
(
patch
.
x
>
0
&&
patch
.
x
<
6
&&
patch
.
y
>
0
&&
patch
.
y
<
6
);
CV_Assert
(
patch
.
x
>
0
&&
patch
.
x
<
6
&&
patch
.
y
>
0
&&
patch
.
y
<
6
);
CV_Assert
(
prevPts
.
rows
==
1
&&
prevPts
.
type
()
==
CV_32FC2
);
CV_Assert
(
prevPts
.
rows
==
1
&&
prevPts
.
type
()
==
CV_32FC2
);
...
@@ -186,36 +140,49 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next
...
@@ -186,36 +140,49 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next
ensureSizeIsEnough
(
1
,
prevPts
.
cols
,
CV_32FC1
,
*
err
);
ensureSizeIsEnough
(
1
,
prevPts
.
cols
,
CV_32FC1
,
*
err
);
// build the image pyramids.
// build the image pyramids.
// we pad each level with +/-winSize.{width|height}
// pixels to simplify the further patch extraction.
buildImagePyramid
(
prevImg
,
prevPyr_
,
true
);
prevPyr_
.
resize
(
maxLevel
+
1
);
buildImagePyramid
(
nextImg
,
nextPyr_
,
true
);
nextPyr_
.
resize
(
maxLevel
+
1
);
// dI/dx ~ Ix, dI/dy ~ Iy
int
cn
=
prevImg
.
channels
();
ensureSizeIsEnough
(
prevImg
.
rows
+
winSize
.
height
*
2
,
prevImg
.
cols
+
winSize
.
width
*
2
,
CV_MAKETYPE
(
CV_16S
,
cn
),
dx_buf_
);
if
(
cn
==
1
||
cn
==
4
)
ensureSizeIsEnough
(
prevImg
.
rows
+
winSize
.
height
*
2
,
prevImg
.
cols
+
winSize
.
width
*
2
,
CV_MAKETYPE
(
CV_16S
,
cn
),
dy_buf_
);
{
prevImg
.
convertTo
(
prevPyr_
[
0
],
CV_32F
);
nextImg
.
convertTo
(
nextPyr_
[
0
],
CV_32F
);
}
else
{
cvtColor
(
prevImg
,
buf_
,
COLOR_BGR2BGRA
);
buf_
.
convertTo
(
prevPyr_
[
0
],
CV_32F
);
loadConstants
(
cn
,
minEigThreshold
,
make_int2
(
winSize
.
width
,
winSize
.
height
),
iters
);
cvtColor
(
nextImg
,
buf_
,
COLOR_BGR2BGRA
);
buf_
.
convertTo
(
nextPyr_
[
0
],
CV_32F
);
}
for
(
int
level
=
maxLevel
;
level
>=
0
;
level
--
)
for
(
int
level
=
1
;
level
<=
maxLevel
;
++
level
)
{
{
Size
imgSize
=
prevPyr_
[
level
].
size
();
pyrDown
(
prevPyr_
[
level
-
1
],
prevPyr_
[
level
]);
pyrDown
(
nextPyr_
[
level
-
1
],
nextPyr_
[
level
]);
GpuMat
dxWhole
(
imgSize
.
height
+
winSize
.
height
*
2
,
imgSize
.
width
+
winSize
.
width
*
2
,
dx_buf_
.
type
(),
dx_buf_
.
data
,
dx_buf_
.
step
);
}
GpuMat
dyWhole
(
imgSize
.
height
+
winSize
.
height
*
2
,
imgSize
.
width
+
winSize
.
width
*
2
,
dy_buf_
.
type
(),
dy_buf_
.
data
,
dy_buf_
.
step
);
dxWhole
.
setTo
(
Scalar
::
all
(
0
));
dyWhole
.
setTo
(
Scalar
::
all
(
0
));
GpuMat
dIdx
=
dxWhole
(
Rect
(
winSize
.
width
,
winSize
.
height
,
imgSize
.
width
,
imgSize
.
height
));
GpuMat
dIdy
=
dyWhole
(
Rect
(
winSize
.
width
,
winSize
.
height
,
imgSize
.
width
,
imgSize
.
height
));
calcSharrDeriv
(
prevPyr_
[
level
],
dIdx
,
dIdy
);
loadConstants
(
make_int2
(
winSize
.
width
,
winSize
.
height
),
iters
);
lkSparse_gpu
(
prevPyr_
[
level
],
nextPyr_
[
level
],
dIdx
,
dIdy
,
for
(
int
level
=
maxLevel
;
level
>=
0
;
level
--
)
prevPts
.
ptr
<
float2
>
(),
nextPts
.
ptr
<
float2
>
(),
status
.
ptr
(),
level
==
0
&&
err
?
err
->
ptr
<
float
>
()
:
0
,
getMinEigenVals
,
prevPts
.
cols
,
{
if
(
cn
==
1
)
{
lkSparse1_gpu
(
prevPyr_
[
level
],
nextPyr_
[
level
],
prevPts
.
ptr
<
float2
>
(),
nextPts
.
ptr
<
float2
>
(),
status
.
ptr
(),
level
==
0
&&
err
?
err
->
ptr
<
float
>
()
:
0
,
prevPts
.
cols
,
level
,
block
,
patch
);
level
,
block
,
patch
);
}
}
else
{
lkSparse4_gpu
(
prevPyr_
[
level
],
nextPyr_
[
level
],
prevPts
.
ptr
<
float2
>
(),
nextPts
.
ptr
<
float2
>
(),
status
.
ptr
(),
level
==
0
&&
err
?
err
->
ptr
<
float
>
()
:
0
,
prevPts
.
cols
,
level
,
block
,
patch
);
}
}
}
}
void
cv
::
gpu
::
PyrLKOpticalFlow
::
dense
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
GpuMat
&
u
,
GpuMat
&
v
,
GpuMat
*
err
)
void
cv
::
gpu
::
PyrLKOpticalFlow
::
dense
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
GpuMat
&
u
,
GpuMat
&
v
,
GpuMat
*
err
)
...
@@ -232,15 +199,17 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
...
@@ -232,15 +199,17 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
// build the image pyramids.
// build the image pyramids.
buildImagePyramid
(
prevImg
,
prevPyr_
,
false
);
prevPyr_
.
resize
(
maxLevel
+
1
);
nextPyr_
.
resize
(
maxLevel
+
1
);
nextPyr_
.
resize
(
maxLevel
+
1
);
prevPyr_
[
0
]
=
prevImg
;
nextImg
.
convertTo
(
nextPyr_
[
0
],
CV_32F
);
nextImg
.
convertTo
(
nextPyr_
[
0
],
CV_32F
);
for
(
int
level
=
1
;
level
<=
maxLevel
;
++
level
)
for
(
int
level
=
1
;
level
<=
maxLevel
;
++
level
)
{
pyrDown
(
prevPyr_
[
level
-
1
],
prevPyr_
[
level
]);
pyrDown
(
nextPyr_
[
level
-
1
],
nextPyr_
[
level
]);
pyrDown
(
nextPyr_
[
level
-
1
],
nextPyr_
[
level
]);
}
uPyr_
.
resize
(
2
);
vPyr_
.
resize
(
2
);
ensureSizeIsEnough
(
prevImg
.
size
(),
CV_32FC1
,
uPyr_
[
0
]);
ensureSizeIsEnough
(
prevImg
.
size
(),
CV_32FC1
,
uPyr_
[
0
]);
ensureSizeIsEnough
(
prevImg
.
size
(),
CV_32FC1
,
vPyr_
[
0
]);
ensureSizeIsEnough
(
prevImg
.
size
(),
CV_32FC1
,
vPyr_
[
0
]);
...
@@ -250,7 +219,7 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
...
@@ -250,7 +219,7 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
vPyr_
[
1
].
setTo
(
Scalar
::
all
(
0
));
vPyr_
[
1
].
setTo
(
Scalar
::
all
(
0
));
int2
winSize2i
=
make_int2
(
winSize
.
width
,
winSize
.
height
);
int2
winSize2i
=
make_int2
(
winSize
.
width
,
winSize
.
height
);
loadConstants
(
1
,
minEigThreshold
,
winSize2i
,
iters
);
loadConstants
(
winSize2i
,
iters
);
DevMem2Df
derr
=
err
?
*
err
:
DevMem2Df
();
DevMem2Df
derr
=
err
?
*
err
:
DevMem2Df
();
...
@@ -271,4 +240,18 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
...
@@ -271,4 +240,18 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
vPyr_
[
idx
].
copyTo
(
v
);
vPyr_
[
idx
].
copyTo
(
v
);
}
}
void
cv
::
gpu
::
PyrLKOpticalFlow
::
releaseMemory
()
{
prevPyr_
.
clear
();
nextPyr_
.
clear
();
buf_
.
release
();
uPyr_
[
0
].
release
();
vPyr_
[
0
].
release
();
uPyr_
[
1
].
release
();
vPyr_
[
1
].
release
();
}
#endif
/* !defined (HAVE_CUDA) */
#endif
/* !defined (HAVE_CUDA) */
modules/gpu/test/test_video.cpp
View file @
f392ab0e
...
@@ -249,8 +249,7 @@ TEST_P(PyrLKOpticalFlow, Sparse)
...
@@ -249,8 +249,7 @@ TEST_P(PyrLKOpticalFlow, Sparse)
cv
::
gpu
::
GpuMat
d_nextPts
;
cv
::
gpu
::
GpuMat
d_nextPts
;
cv
::
gpu
::
GpuMat
d_status
;
cv
::
gpu
::
GpuMat
d_status
;
cv
::
gpu
::
GpuMat
d_err
;
pyrLK
.
sparse
(
loadMat
(
frame0
),
loadMat
(
frame1
),
d_pts
,
d_nextPts
,
d_status
);
pyrLK
.
sparse
(
loadMat
(
frame0
),
loadMat
(
frame1
),
d_pts
,
d_nextPts
,
d_status
,
&
d_err
);
std
::
vector
<
cv
::
Point2f
>
nextPts
(
d_nextPts
.
cols
);
std
::
vector
<
cv
::
Point2f
>
nextPts
(
d_nextPts
.
cols
);
cv
::
Mat
nextPts_mat
(
1
,
d_nextPts
.
cols
,
CV_32FC2
,
(
void
*
)
&
nextPts
[
0
]);
cv
::
Mat
nextPts_mat
(
1
,
d_nextPts
.
cols
,
CV_32FC2
,
(
void
*
)
&
nextPts
[
0
]);
...
@@ -260,22 +259,19 @@ TEST_P(PyrLKOpticalFlow, Sparse)
...
@@ -260,22 +259,19 @@ TEST_P(PyrLKOpticalFlow, Sparse)
cv
::
Mat
status_mat
(
1
,
d_status
.
cols
,
CV_8UC1
,
(
void
*
)
&
status
[
0
]);
cv
::
Mat
status_mat
(
1
,
d_status
.
cols
,
CV_8UC1
,
(
void
*
)
&
status
[
0
]);
d_status
.
download
(
status_mat
);
d_status
.
download
(
status_mat
);
std
::
vector
<
float
>
err
(
d_err
.
cols
);
cv
::
Mat
err_mat
(
1
,
d_err
.
cols
,
CV_32FC1
,
(
void
*
)
&
err
[
0
]);
d_err
.
download
(
err_mat
);
std
::
vector
<
cv
::
Point2f
>
nextPts_gold
;
std
::
vector
<
cv
::
Point2f
>
nextPts_gold
;
std
::
vector
<
unsigned
char
>
status_gold
;
std
::
vector
<
unsigned
char
>
status_gold
;
std
::
vector
<
float
>
err_gold
;
cv
::
calcOpticalFlowPyrLK
(
frame0
,
frame1
,
pts
,
nextPts_gold
,
status_gold
,
cv
::
noArray
());
cv
::
calcOpticalFlowPyrLK
(
frame0
,
frame1
,
pts
,
nextPts_gold
,
status_gold
,
err_gold
);
ASSERT_EQ
(
nextPts_gold
.
size
(),
nextPts
.
size
());
ASSERT_EQ
(
nextPts_gold
.
size
(),
nextPts
.
size
());
ASSERT_EQ
(
status_gold
.
size
(),
status
.
size
());
ASSERT_EQ
(
status_gold
.
size
(),
status
.
size
());
ASSERT_EQ
(
err_gold
.
size
(),
err
.
size
());
size_t
mistmatch
=
0
;
size_t
mistmatch
=
0
;
for
(
size_t
i
=
0
;
i
<
nextPts
.
size
();
++
i
)
for
(
size_t
i
=
0
;
i
<
nextPts
.
size
();
++
i
)
{
{
cv
::
Point2i
a
=
nextPts
[
i
];
cv
::
Point2i
b
=
nextPts_gold
[
i
];
if
(
status
[
i
]
!=
status_gold
[
i
])
if
(
status
[
i
]
!=
status_gold
[
i
])
{
{
++
mistmatch
;
++
mistmatch
;
...
@@ -284,13 +280,9 @@ TEST_P(PyrLKOpticalFlow, Sparse)
...
@@ -284,13 +280,9 @@ TEST_P(PyrLKOpticalFlow, Sparse)
if
(
status
[
i
])
if
(
status
[
i
])
{
{
cv
::
Point2i
a
=
nextPts
[
i
];
bool
eq
=
std
::
abs
(
a
.
x
-
b
.
x
)
<=
1
&&
std
::
abs
(
a
.
y
-
b
.
y
)
<=
1
;
cv
::
Point2i
b
=
nextPts_gold
[
i
];
bool
eq
=
std
::
abs
(
a
.
x
-
b
.
x
)
<
1
&&
std
::
abs
(
a
.
y
-
b
.
y
)
<
1
;
float
errdiff
=
std
::
abs
(
err
[
i
]
-
err_gold
[
i
]);
if
(
!
eq
||
errdiff
>
1e-1
)
if
(
!
eq
)
++
mistmatch
;
++
mistmatch
;
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment