Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
ec5bdc7d
Commit
ec5bdc7d
authored
Mar 05, 2012
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added patch error calculation to gpu::PyrLKOpticalFlow
parent
27ecc999
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
138 additions
and
33 deletions
+138
-33
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+2
-0
pyrlk.cu
modules/gpu/src/cuda/pyrlk.cu
+124
-23
pyrlk.cpp
modules/gpu/src/pyrlk.cpp
+4
-4
test_video.cpp
modules/gpu/test/test_video.cpp
+2
-2
tests.cpp
samples/gpu/performance/tests.cpp
+6
-4
No files found.
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
ec5bdc7d
...
...
@@ -1817,6 +1817,7 @@ public:
derivLambda
=
0.5
;
useInitialFlow
=
false
;
minEigThreshold
=
1e-4
f
;
getMinEigenVals
=
false
;
}
void
sparse
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
const
GpuMat
&
prevPts
,
GpuMat
&
nextPts
,
...
...
@@ -1830,6 +1831,7 @@ public:
double
derivLambda
;
bool
useInitialFlow
;
float
minEigThreshold
;
bool
getMinEigenVals
;
void
releaseMemory
()
{
...
...
modules/gpu/src/cuda/pyrlk.cu
View file @
ec5bdc7d
...
...
@@ -274,9 +274,39 @@ namespace cv { namespace gpu { namespace device
}
}
__device__ void reduce(float& val1, float* smem1, int tid)
{
smem1[tid] = val1;
__syncthreads();
if (tid < 128)
{
smem1[tid] = val1 += smem1[tid + 128];
}
__syncthreads();
if (tid < 64)
{
smem1[tid] = val1 += smem1[tid + 64];
}
__syncthreads();
if (tid < 32)
{
volatile float* vmem1 = smem1;
vmem1[tid] = val1 += vmem1[tid + 32];
vmem1[tid] = val1 += vmem1[tid + 16];
vmem1[tid] = val1 += vmem1[tid + 8];
vmem1[tid] = val1 += vmem1[tid + 4];
vmem1[tid] = val1 += vmem1[tid + 2];
vmem1[tid] = val1 += vmem1[tid + 1];
}
}
#define SCALE (1.0f / (1 << 20))
template <int PATCH_X, int PATCH_Y, bool calcErr>
template <int PATCH_X, int PATCH_Y, bool calcErr
, bool GET_MIN_EIGENVALS
>
__global__ void lkSparse(const PtrStepb I, const PtrStepb J, const PtrStep<short> dIdx, const PtrStep<short> dIdy,
const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols)
{
...
...
@@ -349,7 +379,7 @@ namespace cv { namespace gpu { namespace device
float D = A11 * A22 - A12 * A12;
float minEig = (A22 + A11 - ::sqrtf((A11 - A22) * (A11 - A22) + 4.f * A12 * A12)) / (2 * c_winSize_x * c_winSize_y);
if (calcErr && tid == 0)
if (calcErr &&
GET_MIN_EIGENVALS &&
tid == 0)
err[blockIdx.x] = minEig;
if (minEig < c_minEigThreshold || D < numeric_limits<float>::epsilon())
...
...
@@ -415,38 +445,76 @@ namespace cv { namespace gpu { namespace device
nextPt.y += delta.y;
if (::fabs(delta.x) < 0.01f && ::fabs(delta.y) < 0.01f)
{
nextPt.x -= delta.x * 0.5f;
nextPt.y -= delta.y * 0.5f;
break;
}
}
if (tid == 0)
{
if (nextPt.x < -c_winSize_x || nextPt.x >= cols || nextPt.y < -c_winSize_y || nextPt.y >= rows)
status_ = false;
// TODO : Why do we compute patch error in shifted window?
nextPt.x += c_halfWin_x;
nextPt.y += c_halfWin_y;
nextPts[blockIdx.x] = nextPt;
float errval = 0.f;
if (calcErr && !GET_MIN_EIGENVALS && status_)
{
for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i)
{
for (int x = threadIdx.x, j = 0; x < c_winSize_x_cn; x += blockDim.x, ++j)
{
int diff = linearFilter(J, nextPt, x, y) - I_patch[i][j];
errval += ::fabsf((float)diff);
}
}
reduce(errval, smem1, tid);
errval /= 32 * c_winSize_x_cn * c_winSize_y;
}
if (tid == 0)
{
status[blockIdx.x] = status_;
nextPts[blockIdx.x] = nextPt;
if (calcErr && !GET_MIN_EIGENVALS)
err[blockIdx.x] = errval;
}
}
template <int PATCH_X, int PATCH_Y>
void lkSparse_caller(DevMem2Db I, DevMem2Db J, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy,
const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
const float2* prevPts, float2* nextPts, uchar* status, float* err,
bool GET_MIN_EIGENVALS,
int ptcount,
int level, dim3 block, cudaStream_t stream)
{
dim3 grid(ptcount);
if (err)
if (
level == 0 &&
err)
{
cudaSafeCall( cudaFuncSetCacheConfig(lkSparse<PATCH_X, PATCH_Y, true>, cudaFuncCachePreferL1) );
if (GET_MIN_EIGENVALS)
{
cudaSafeCall( cudaFuncSetCacheConfig(lkSparse<PATCH_X, PATCH_Y, true, true>, cudaFuncCachePreferL1) );
lkSparse<PATCH_X, PATCH_Y
, true><<<grid, block>>>(I, J, dIdx, dIdy,
lkSparse<PATCH_X, PATCH_Y, true
, true><<<grid, block>>>(I, J, dIdx, dIdy,
prevPts, nextPts, status, err, level, I.rows, I.cols);
}
else
{
cudaSafeCall( cudaFuncSetCacheConfig(lkSparse<PATCH_X, PATCH_Y
, false>, cudaFuncCachePreferL1) );
cudaSafeCall( cudaFuncSetCacheConfig(lkSparse<PATCH_X, PATCH_Y, true
, false>, cudaFuncCachePreferL1) );
lkSparse<PATCH_X, PATCH_Y, false><<<grid, block>>>(I, J, dIdx, dIdy,
lkSparse<PATCH_X, PATCH_Y, true, false><<<grid, block>>>(I, J, dIdx, dIdy,
prevPts, nextPts, status, err, level, I.rows, I.cols);
}
}
else
{
cudaSafeCall( cudaFuncSetCacheConfig(lkSparse<PATCH_X, PATCH_Y, false, false>, cudaFuncCachePreferL1) );
lkSparse<PATCH_X, PATCH_Y, false, false><<<grid, block>>>(I, J, dIdx, dIdy,
prevPts, nextPts, status, err, level, I.rows, I.cols);
}
...
...
@@ -457,11 +525,11 @@ namespace cv { namespace gpu { namespace device
}
void lkSparse_gpu(DevMem2Db I, DevMem2Db J, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy,
const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
const float2* prevPts, float2* nextPts, uchar* status, float* err,
bool GET_MIN_EIGENVALS,
int ptcount,
int level, dim3 block, dim3 patch, cudaStream_t stream)
{
typedef void (*func_t)(DevMem2Db I, DevMem2Db J, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy,
const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
const float2* prevPts, float2* nextPts, uchar* status, float* err,
bool GET_MIN_EIGENVALS,
int ptcount,
int level, dim3 block, cudaStream_t stream);
static const func_t funcs[5][5] =
...
...
@@ -474,11 +542,11 @@ namespace cv { namespace gpu { namespace device
};
funcs[patch.y - 1][patch.x - 1](I, J, dIdx, dIdy,
prevPts, nextPts, status, err, ptcount,
prevPts, nextPts, status, err,
GET_MIN_EIGENVALS,
ptcount,
level, block, stream);
}
template <bool calcErr>
template <bool calcErr
, bool GET_MIN_EIGENVALS
>
__global__ void lkDense(const PtrStepb I, const PtrStepb J, const PtrStep<short> dIdx, const PtrStep<short> dIdy,
PtrStepf u, PtrStepf v, PtrStepf err, const int rows, const int cols)
{
...
...
@@ -515,7 +583,7 @@ namespace cv { namespace gpu { namespace device
float D = A11 * A22 - A12 * A12;
float minEig = (A22 + A11 - ::sqrtf((A11 - A22) * (A11 - A22) + 4.f * A12 * A12)) / (2 * c_winSize_x * c_winSize_y);
if (calcErr)
if (calcErr
&& GET_MIN_EIGENVALS
)
err(y, x) = minEig;
if (minEig < c_minEigThreshold || D < numeric_limits<float>::epsilon())
...
...
@@ -567,28 +635,61 @@ namespace cv { namespace gpu { namespace device
break;
}
u(y, x) = nextPt.x - x + c_halfWin_x;
v(y, x) = nextPt.y - y + c_halfWin_y;
// TODO : Why do we compute patch error in shifted window?
nextPt.x += c_halfWin_x;
nextPt.y += c_halfWin_y;
u(y, x) = nextPt.x - x;
v(y, x) = nextPt.y - y;
if (calcErr && !GET_MIN_EIGENVALS)
{
float errval = 0.0f;
for (int i = 0; i < c_winSize_y; ++i)
{
for (int j = 0; j < c_winSize_x; ++j)
{
int I_val = I(y - c_halfWin_y + i, x - c_halfWin_x + j);
int diff = linearFilter(J, nextPt, j, i) - CV_DESCALE(I_val * (1 << W_BITS), W_BITS1 - 5);
errval += ::fabsf((float)diff);
}
}
errval /= 32 * c_winSize_x_cn * c_winSize_y;
err(y, x) = errval;
}
}
void lkDense_gpu(DevMem2Db I, DevMem2Db J, DevMem2D_<short> dIdx, DevMem2D_<short> dIdy,
DevMem2Df u, DevMem2Df v, DevMem2Df* err, cudaStream_t stream)
DevMem2Df u, DevMem2Df v, DevMem2Df* err,
bool GET_MIN_EIGENVALS,
cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(I.cols, block.x), divUp(I.rows, block.y));
if (err)
{
cudaSafeCall( cudaFuncSetCacheConfig(lkDense<true>, cudaFuncCachePreferL1) );
if (GET_MIN_EIGENVALS)
{
cudaSafeCall( cudaFuncSetCacheConfig(lkDense<true, true>, cudaFuncCachePreferL1) );
lkDense<true, true><<<grid, block, 0, stream>>>(I, J, dIdx, dIdy, u, v, *err, I.rows, I.cols);
cudaSafeCall( cudaGetLastError() );
}
else
{
cudaSafeCall( cudaFuncSetCacheConfig(lkDense<true, false>, cudaFuncCachePreferL1) );
lkDense<tru
e><<<grid, block, 0, stream>>>(I, J, dIdx, dIdy, u, v, *err, I.rows, I.cols);
lkDense<true, fals
e><<<grid, block, 0, stream>>>(I, J, dIdx, dIdy, u, v, *err, I.rows, I.cols);
cudaSafeCall( cudaGetLastError() );
}
}
else
{
cudaSafeCall( cudaFuncSetCacheConfig(lkDense<false>, cudaFuncCachePreferL1) );
cudaSafeCall( cudaFuncSetCacheConfig(lkDense<false
, false
>, cudaFuncCachePreferL1) );
lkDense<false><<<grid, block, 0, stream>>>(I, J, dIdx, dIdy, u, v, PtrStepf(), I.rows, I.cols);
lkDense<false
, false
><<<grid, block, 0, stream>>>(I, J, dIdx, dIdy, u, v, PtrStepf(), I.rows, I.cols);
cudaSafeCall( cudaGetLastError() );
}
...
...
modules/gpu/src/pyrlk.cpp
View file @
ec5bdc7d
...
...
@@ -63,11 +63,11 @@ namespace cv { namespace gpu { namespace device
cudaStream_t
stream
=
0
);
void
lkSparse_gpu
(
DevMem2Db
I
,
DevMem2Db
J
,
DevMem2D_
<
short
>
dIdx
,
DevMem2D_
<
short
>
dIdy
,
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
int
ptcount
,
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
bool
GET_MIN_EIGENVALS
,
int
ptcount
,
int
level
,
dim3
block
,
dim3
patch
,
cudaStream_t
stream
=
0
);
void
lkDense_gpu
(
DevMem2Db
I
,
DevMem2Db
J
,
DevMem2D_
<
short
>
dIdx
,
DevMem2D_
<
short
>
dIdy
,
DevMem2Df
u
,
DevMem2Df
v
,
DevMem2Df
*
err
,
cudaStream_t
stream
=
0
);
DevMem2Df
u
,
DevMem2Df
v
,
DevMem2Df
*
err
,
bool
GET_MIN_EIGENVALS
,
cudaStream_t
stream
=
0
);
}
}}}
...
...
@@ -205,7 +205,7 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next
calcSharrDeriv
(
prevPyr_
[
level
],
dIdx
,
dIdy
);
lkSparse_gpu
(
prevPyr_
[
level
],
nextPyr_
[
level
],
dIdx
,
dIdy
,
prevPts
.
ptr
<
float2
>
(),
nextPts
.
ptr
<
float2
>
(),
status
.
ptr
(),
level
==
0
&&
err
?
err
->
ptr
<
float
>
()
:
0
,
prevPts
.
cols
,
prevPts
.
ptr
<
float2
>
(),
nextPts
.
ptr
<
float2
>
(),
status
.
ptr
(),
level
==
0
&&
err
?
err
->
ptr
<
float
>
()
:
0
,
getMinEigenVals
,
prevPts
.
cols
,
level
,
block
,
patch
);
}
}
...
...
@@ -272,7 +272,7 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
calcSharrDeriv
(
prevPyr_
[
level
],
dIdx
,
dIdy
);
lkDense_gpu
(
prevPyr_
[
level
],
nextPyr_
[
level
],
dIdx
,
dIdy
,
uPyr_
[
level
],
vPyr_
[
level
],
level
==
0
&&
err
?
&
derr
:
0
);
level
==
0
&&
err
?
&
derr
:
0
,
getMinEigenVals
);
if
(
level
==
0
)
{
...
...
modules/gpu/test/test_video.cpp
View file @
ec5bdc7d
...
...
@@ -358,7 +358,7 @@ PARAM_TEST_CASE(PyrLKOpticalFlowSparse, cv::gpu::DeviceInfo, bool)
cv
::
goodFeaturesToTrack
(
gray_frame
,
pts
,
1000
,
0.01
,
0.0
);
cv
::
calcOpticalFlowPyrLK
(
frame0
,
frame1
,
pts
,
nextPts_gold
,
status_gold
,
err_gold
,
cv
::
Size
(
21
,
21
),
3
,
cv
::
TermCriteria
(
cv
::
TermCriteria
::
COUNT
+
cv
::
TermCriteria
::
EPS
,
30
,
0.01
),
0.5
,
CV_LKFLOW_GET_MIN_EIGENVALS
);
cv
::
TermCriteria
(
cv
::
TermCriteria
::
COUNT
+
cv
::
TermCriteria
::
EPS
,
30
,
0.01
),
0.5
);
}
};
...
...
@@ -410,7 +410,7 @@ TEST_P(PyrLKOpticalFlowSparse, Accuracy)
bool
eq
=
std
::
abs
(
a
.
x
-
b
.
x
)
<
1
&&
std
::
abs
(
a
.
y
-
b
.
y
)
<
1
;
float
errdiff
=
std
::
abs
(
err
[
i
]
-
err_gold
[
i
]);
if
(
!
eq
||
errdiff
>
1e-
4
)
if
(
!
eq
||
errdiff
>
1e-
1
)
++
mistmatch
;
}
}
...
...
samples/gpu/performance/tests.cpp
View file @
ec5bdc7d
...
...
@@ -1157,10 +1157,12 @@ TEST(PyrLKOpticalFlow)
vector
<
Point2f
>
nextPts
;
vector
<
unsigned
char
>
status
;
calcOpticalFlowPyrLK
(
frame0
,
frame1
,
pts
,
nextPts
,
status
,
noArray
());
vector
<
float
>
err
;
calcOpticalFlowPyrLK
(
frame0
,
frame1
,
pts
,
nextPts
,
status
,
err
);
CPU_ON
;
calcOpticalFlowPyrLK
(
frame0
,
frame1
,
pts
,
nextPts
,
status
,
noArray
()
);
calcOpticalFlowPyrLK
(
frame0
,
frame1
,
pts
,
nextPts
,
status
,
err
);
CPU_OFF
;
gpu
::
PyrLKOpticalFlow
d_pyrLK
;
...
...
@@ -1176,10 +1178,10 @@ TEST(PyrLKOpticalFlow)
gpu
::
GpuMat
d_status
;
gpu
::
GpuMat
d_err
;
d_pyrLK
.
sparse
(
d_frame0
,
d_frame1
,
d_pts
,
d_nextPts
,
d_status
);
d_pyrLK
.
sparse
(
d_frame0
,
d_frame1
,
d_pts
,
d_nextPts
,
d_status
,
&
d_err
);
GPU_ON
;
d_pyrLK
.
sparse
(
d_frame0
,
d_frame1
,
d_pts
,
d_nextPts
,
d_status
);
d_pyrLK
.
sparse
(
d_frame0
,
d_frame1
,
d_pts
,
d_nextPts
,
d_status
,
&
d_err
);
GPU_OFF
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment