Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
381216aa
Commit
381216aa
authored
Dec 31, 2014
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refactor cudaoptflow public API:
* use opaque algorithm interfaces * add stream support
parent
19c6bbe7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
1164 additions
and
831 deletions
+1164
-831
cudaoptflow.hpp
modules/cudaoptflow/include/opencv2/cudaoptflow.hpp
+196
-201
brox.cpp
modules/cudaoptflow/src/brox.cpp
+116
-52
pyrlk.cu
modules/cudaoptflow/src/cuda/pyrlk.cu
+6
-6
tvl1flow.cu
modules/cudaoptflow/src/cuda/tvl1flow.cu
+23
-12
farneback.cpp
modules/cudaoptflow/src/farneback.cpp
+320
-239
pyrlk.cpp
modules/cudaoptflow/src/pyrlk.cpp
+225
-125
tvl1flow.cpp
modules/cudaoptflow/src/tvl1flow.cpp
+278
-196
No files found.
modules/cudaoptflow/include/opencv2/cudaoptflow.hpp
View file @
381216aa
...
...
@@ -61,49 +61,94 @@ namespace cv { namespace cuda {
//! @addtogroup cudaoptflow
//! @{
/** @brief Class computing the optical flow for two images using Brox et al Optical Flow algorithm
(@cite Brox2004). :
//
// Interface
//
/** @brief Base interface for dense optical flow algorithms.
*/
class
CV_EXPORTS
BroxOpticalFlow
class
CV_EXPORTS
DenseOpticalFlow
:
public
Algorithm
{
public
:
BroxOpticalFlow
(
float
alpha_
,
float
gamma_
,
float
scale_factor_
,
int
inner_iterations_
,
int
outer_iterations_
,
int
solver_iterations_
)
:
alpha
(
alpha_
),
gamma
(
gamma_
),
scale_factor
(
scale_factor_
),
inner_iterations
(
inner_iterations_
),
outer_iterations
(
outer_iterations_
),
solver_iterations
(
solver_iterations_
)
{
}
/** @brief Calculates a dense optical flow.
//! Compute optical flow
//! frame0 - source frame (supports only CV_32FC1 type)
//! frame1 - frame to track (with the same size and type as frame0)
//! u - flow horizontal component (along x axis)
//! v - flow vertical component (along y axis)
void
operator
()(
const
GpuMat
&
frame0
,
const
GpuMat
&
frame1
,
GpuMat
&
u
,
GpuMat
&
v
,
Stream
&
stream
=
Stream
::
Null
());
@param I0 first input image.
@param I1 second input image of the same size and the same type as I0.
@param flow computed flow image that has the same size as I0 and type CV_32FC2.
@param stream Stream for the asynchronous version.
*/
virtual
void
calc
(
InputArray
I0
,
InputArray
I1
,
InputOutputArray
flow
,
Stream
&
stream
=
Stream
::
Null
())
=
0
;
};
//! flow smoothness
float
alpha
;
/** @brief Base interface for sparse optical flow algorithms.
*/
class
CV_EXPORTS
SparseOpticalFlow
:
public
Algorithm
{
public
:
/** @brief Calculates a sparse optical flow.
@param prevImg First input image.
@param nextImg Second input image of the same size and the same type as prevImg.
@param prevPts Vector of 2D points for which the flow needs to be found.
@param nextPts Output vector of 2D points containing the calculated new positions of input features in the second image.
@param status Output status vector. Each element of the vector is set to 1 if the
flow for the corresponding features has been found. Otherwise, it is set to 0.
@param err Optional output vector that contains error response for each point (inverse confidence).
@param stream Stream for the asynchronous version.
*/
virtual
void
calc
(
InputArray
prevImg
,
InputArray
nextImg
,
InputArray
prevPts
,
InputOutputArray
nextPts
,
OutputArray
status
,
OutputArray
err
=
cv
::
noArray
(),
Stream
&
stream
=
Stream
::
Null
())
=
0
;
};
//! gradient constancy importance
float
gamma
;
//
// BroxOpticalFlow
//
//! pyramid scale factor
float
scale_factor
;
/** @brief Class computing the optical flow for two images using Brox et al Optical Flow algorithm (@cite Brox2004).
*/
class
CV_EXPORTS
BroxOpticalFlow
:
public
DenseOpticalFlow
{
public
:
virtual
double
getFlowSmoothness
()
const
=
0
;
virtual
void
setFlowSmoothness
(
double
alpha
)
=
0
;
virtual
double
getGradientConstancyImportance
()
const
=
0
;
virtual
void
setGradientConstancyImportance
(
double
gamma
)
=
0
;
virtual
double
getPyramidScaleFactor
()
const
=
0
;
virtual
void
setPyramidScaleFactor
(
double
scale_factor
)
=
0
;
//! number of lagged non-linearity iterations (inner loop)
int
inner_iterations
;
virtual
int
getInnerIterations
()
const
=
0
;
virtual
void
setInnerIterations
(
int
inner_iterations
)
=
0
;
//! number of warping iterations (number of pyramid levels)
int
outer_iterations
;
virtual
int
getOuterIterations
()
const
=
0
;
virtual
void
setOuterIterations
(
int
outer_iterations
)
=
0
;
//! number of linear system solver iterations
int
solver_iterations
;
GpuMat
buf
;
virtual
int
getSolverIterations
()
const
=
0
;
virtual
void
setSolverIterations
(
int
solver_iterations
)
=
0
;
static
Ptr
<
BroxOpticalFlow
>
create
(
double
alpha
=
0.197
,
double
gamma
=
50.0
,
double
scale_factor
=
0.8
,
int
inner_iterations
=
5
,
int
outer_iterations
=
150
,
int
solver_iterations
=
10
);
};
/** @brief Class used for calculating an optical flow.
//
// PyrLKOpticalFlow
//
/** @brief Class used for calculating a sparse optical flow.
The class can calculate an optical flow for a sparse feature set
or dense optical flow
using the
The class can calculate an optical flow for a sparse feature set using the
iterative Lucas-Kanade method with pyramids.
@sa calcOpticalFlowPyrLK
...
...
@@ -112,158 +157,116 @@ iterative Lucas-Kanade method with pyramids.
- An example of the Lucas Kanade optical flow algorithm can be found at
opencv_source_code/samples/gpu/pyrlk_optical_flow.cpp
*/
class
CV_EXPORTS
PyrLK
OpticalFlow
class
CV_EXPORTS
SparsePyrLKOpticalFlow
:
public
Sparse
OpticalFlow
{
public
:
PyrLKOpticalFlow
();
/** @brief Calculate an optical flow for a sparse feature set.
@param prevImg First 8-bit input image (supports both grayscale and color images).
@param nextImg Second input image of the same size and the same type as prevImg .
@param prevPts Vector of 2D points for which the flow needs to be found. It must be one row matrix
with CV_32FC2 type.
@param nextPts Output vector of 2D points (with single-precision floating-point coordinates)
containing the calculated new positions of input features in the second image. When useInitialFlow
is true, the vector must have the same size as in the input.
@param status Output status vector (CV_8UC1 type). Each element of the vector is set to 1 if the
flow for the corresponding features has been found. Otherwise, it is set to 0.
@param err Output vector (CV_32FC1 type) that contains the difference between patches around the
original and moved points or min eigen value if getMinEigenVals is checked. It can be NULL, if not
needed.
@sa calcOpticalFlowPyrLK
*/
void
sparse
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
const
GpuMat
&
prevPts
,
GpuMat
&
nextPts
,
GpuMat
&
status
,
GpuMat
*
err
=
0
);
/** @brief Calculate dense optical flow.
@param prevImg First 8-bit grayscale input image.
@param nextImg Second input image of the same size and the same type as prevImg .
@param u Horizontal component of the optical flow of the same size as input images, 32-bit
floating-point, single-channel
@param v Vertical component of the optical flow of the same size as input images, 32-bit
floating-point, single-channel
@param err Output vector (CV_32FC1 type) that contains the difference between patches around the
original and moved points or min eigen value if getMinEigenVals is checked. It can be NULL, if not
needed.
*/
void
dense
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
GpuMat
&
u
,
GpuMat
&
v
,
GpuMat
*
err
=
0
);
/** @brief Releases inner buffers memory.
*/
void
releaseMemory
();
virtual
Size
getWinSize
()
const
=
0
;
virtual
void
setWinSize
(
Size
winSize
)
=
0
;
Size
winSize
;
int
maxLevel
;
int
iters
;
bool
useInitialFlow
;
virtual
int
getMaxLevel
()
const
=
0
;
virtual
void
setMaxLevel
(
int
maxLevel
)
=
0
;
private
:
std
::
vector
<
GpuMat
>
prevPyr_
;
std
::
vector
<
GpuMat
>
nextPyr_
;
virtual
int
getNumIters
()
const
=
0
;
virtual
void
setNumIters
(
int
iters
)
=
0
;
GpuMat
buf_
;
virtual
bool
getUseInitialFlow
()
const
=
0
;
virtual
void
setUseInitialFlow
(
bool
useInitialFlow
)
=
0
;
GpuMat
uPyr_
[
2
];
GpuMat
vPyr_
[
2
];
static
Ptr
<
SparsePyrLKOpticalFlow
>
create
(
Size
winSize
=
Size
(
21
,
21
),
int
maxLevel
=
3
,
int
iters
=
30
,
bool
useInitialFlow
=
false
);
};
/** @brief Class computing a dense optical flow using the Gunnar Farneback’s algorithm. :
/** @brief Class used for calculating a dense optical flow.
The class can calculate an optical flow for a dense optical flow using the
iterative Lucas-Kanade method with pyramids.
*/
class
CV_EXPORTS
Farneback
OpticalFlow
class
CV_EXPORTS
DensePyrLKOpticalFlow
:
public
Dense
OpticalFlow
{
public
:
FarnebackOpticalFlow
()
{
numLevels
=
5
;
pyrScale
=
0.5
;
fastPyramids
=
false
;
winSize
=
13
;
numIters
=
10
;
polyN
=
5
;
polySigma
=
1.1
;
flags
=
0
;
}
int
numLevels
;
double
pyrScale
;
bool
fastPyramids
;
int
winSize
;
int
numIters
;
int
polyN
;
double
polySigma
;
int
flags
;
/** @brief Computes a dense optical flow using the Gunnar Farneback’s algorithm.
@param frame0 First 8-bit gray-scale input image
@param frame1 Second 8-bit gray-scale input image
@param flowx Flow horizontal component
@param flowy Flow vertical component
@param s Stream
@sa calcOpticalFlowFarneback
*/
void
operator
()(
const
GpuMat
&
frame0
,
const
GpuMat
&
frame1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
,
Stream
&
s
=
Stream
::
Null
());
virtual
Size
getWinSize
()
const
=
0
;
virtual
void
setWinSize
(
Size
winSize
)
=
0
;
/** @brief Releases unused auxiliary memory buffers.
*/
void
releaseMemory
()
{
frames_
[
0
].
release
();
frames_
[
1
].
release
();
pyrLevel_
[
0
].
release
();
pyrLevel_
[
1
].
release
();
M_
.
release
();
bufM_
.
release
();
R_
[
0
].
release
();
R_
[
1
].
release
();
blurredFrame_
[
0
].
release
();
blurredFrame_
[
1
].
release
();
pyramid0_
.
clear
();
pyramid1_
.
clear
();
}
private
:
void
prepareGaussian
(
int
n
,
double
sigma
,
float
*
g
,
float
*
xg
,
float
*
xxg
,
double
&
ig11
,
double
&
ig03
,
double
&
ig33
,
double
&
ig55
);
void
setPolynomialExpansionConsts
(
int
n
,
double
sigma
);
void
updateFlow_boxFilter
(
const
GpuMat
&
R0
,
const
GpuMat
&
R1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
,
GpuMat
&
M
,
GpuMat
&
bufM
,
int
blockSize
,
bool
updateMatrices
,
Stream
streams
[]);
void
updateFlow_gaussianBlur
(
const
GpuMat
&
R0
,
const
GpuMat
&
R1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
,
GpuMat
&
M
,
GpuMat
&
bufM
,
int
blockSize
,
bool
updateMatrices
,
Stream
streams
[]);
GpuMat
frames_
[
2
];
GpuMat
pyrLevel_
[
2
],
M_
,
bufM_
,
R_
[
2
],
blurredFrame_
[
2
];
std
::
vector
<
GpuMat
>
pyramid0_
,
pyramid1_
;
virtual
int
getMaxLevel
()
const
=
0
;
virtual
void
setMaxLevel
(
int
maxLevel
)
=
0
;
virtual
int
getNumIters
()
const
=
0
;
virtual
void
setNumIters
(
int
iters
)
=
0
;
virtual
bool
getUseInitialFlow
()
const
=
0
;
virtual
void
setUseInitialFlow
(
bool
useInitialFlow
)
=
0
;
static
Ptr
<
DensePyrLKOpticalFlow
>
create
(
Size
winSize
=
Size
(
13
,
13
),
int
maxLevel
=
3
,
int
iters
=
30
,
bool
useInitialFlow
=
false
);
};
// Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
//
// see reference:
// [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
// [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
class
CV_EXPORTS
OpticalFlowDual_TVL1_CUDA
// FarnebackOpticalFlow
//
/** @brief Class computing a dense optical flow using the Gunnar Farneback’s algorithm.
*/
class
CV_EXPORTS
FarnebackOpticalFlow
:
public
DenseOpticalFlow
{
public
:
OpticalFlowDual_TVL1_CUDA
();
virtual
int
getNumLevels
()
const
=
0
;
virtual
void
setNumLevels
(
int
numLevels
)
=
0
;
virtual
double
getPyrScale
()
const
=
0
;
virtual
void
setPyrScale
(
double
pyrScale
)
=
0
;
virtual
bool
getFastPyramids
()
const
=
0
;
virtual
void
setFastPyramids
(
bool
fastPyramids
)
=
0
;
void
operator
()(
const
GpuMat
&
I0
,
const
GpuMat
&
I1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
);
virtual
int
getWinSize
()
const
=
0
;
virtual
void
setWinSize
(
int
winSize
)
=
0
;
void
collectGarbage
();
virtual
int
getNumIters
()
const
=
0
;
virtual
void
setNumIters
(
int
numIters
)
=
0
;
virtual
int
getPolyN
()
const
=
0
;
virtual
void
setPolyN
(
int
polyN
)
=
0
;
virtual
double
getPolySigma
()
const
=
0
;
virtual
void
setPolySigma
(
double
polySigma
)
=
0
;
virtual
int
getFlags
()
const
=
0
;
virtual
void
setFlags
(
int
flags
)
=
0
;
static
Ptr
<
FarnebackOpticalFlow
>
create
(
int
numLevels
=
5
,
double
pyrScale
=
0.5
,
bool
fastPyramids
=
false
,
int
winSize
=
13
,
int
numIters
=
10
,
int
polyN
=
5
,
double
polySigma
=
1.1
,
int
flags
=
0
);
};
//
// OpticalFlowDual_TVL1
//
/** @brief Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method.
*
* @sa C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
* @sa Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
*/
class
CV_EXPORTS
OpticalFlowDual_TVL1
:
public
DenseOpticalFlow
{
public
:
/**
* Time step of the numerical scheme.
*/
double
tau
;
virtual
double
getTau
()
const
=
0
;
virtual
void
setTau
(
double
tau
)
=
0
;
/**
* Weight parameter for the data term, attachment parameter.
...
...
@@ -271,7 +274,8 @@ public:
* The smaller this parameter is, the smoother the solutions we obtain.
* It depends on the range of motions of the images, so its value should be adapted to each image sequence.
*/
double
lambda
;
virtual
double
getLambda
()
const
=
0
;
virtual
void
setLambda
(
double
lambda
)
=
0
;
/**
* Weight parameter for (u - v)^2, tightness parameter.
...
...
@@ -279,20 +283,23 @@ public:
* In theory, it should have a small value in order to maintain both parts in correspondence.
* The method is stable for a large range of values of this parameter.
*/
virtual
double
getGamma
()
const
=
0
;
virtual
void
setGamma
(
double
gamma
)
=
0
;
double
gamma
;
/**
* parameter used for motion estimation. It adds a variable allowing for illumination variations
* Set this parameter to 1. if you have varying illumination.
* See: Chambolle et al, A First-Order Primal-Dual Algorithm for Convex Problems with Applications to Imaging
* Journal of Mathematical imaging and vision, may 2011 Vol 40 issue 1, pp 120-145
*/
double
theta
;
* parameter used for motion estimation. It adds a variable allowing for illumination variations
* Set this parameter to 1. if you have varying illumination.
* See: Chambolle et al, A First-Order Primal-Dual Algorithm for Convex Problems with Applications to Imaging
* Journal of Mathematical imaging and vision, may 2011 Vol 40 issue 1, pp 120-145
*/
virtual
double
getTheta
()
const
=
0
;
virtual
void
setTheta
(
double
theta
)
=
0
;
/**
* Number of scales used to create the pyramid of images.
*/
int
nscales
;
virtual
int
getNumScales
()
const
=
0
;
virtual
void
setNumScales
(
int
nscales
)
=
0
;
/**
* Number of warpings per scale.
...
...
@@ -300,51 +307,39 @@ public:
* This is a parameter that assures the stability of the method.
* It also affects the running time, so it is a compromise between speed and accuracy.
*/
int
warps
;
virtual
int
getNumWarps
()
const
=
0
;
virtual
void
setNumWarps
(
int
warps
)
=
0
;
/**
* Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
* A small value will yield more accurate solutions at the expense of a slower convergence.
*/
double
epsilon
;
virtual
double
getEpsilon
()
const
=
0
;
virtual
void
setEpsilon
(
double
epsilon
)
=
0
;
/**
* Stopping criterion iterations number used in the numerical scheme.
*/
int
iterations
;
double
scaleStep
;
bool
useInitialFlow
;
private
:
void
procOneScale
(
const
GpuMat
&
I0
,
const
GpuMat
&
I1
,
GpuMat
&
u1
,
GpuMat
&
u2
,
GpuMat
&
u3
);
std
::
vector
<
GpuMat
>
I0s
;
std
::
vector
<
GpuMat
>
I1s
;
std
::
vector
<
GpuMat
>
u1s
;
std
::
vector
<
GpuMat
>
u2s
;
std
::
vector
<
GpuMat
>
u3s
;
GpuMat
I1x_buf
;
GpuMat
I1y_buf
;
GpuMat
I1w_buf
;
GpuMat
I1wx_buf
;
GpuMat
I1wy_buf
;
GpuMat
grad_buf
;
GpuMat
rho_c_buf
;
GpuMat
p11_buf
;
GpuMat
p12_buf
;
GpuMat
p21_buf
;
GpuMat
p22_buf
;
GpuMat
p31_buf
;
GpuMat
p32_buf
;
GpuMat
diff_buf
;
GpuMat
norm_buf
;
virtual
int
getNumIterations
()
const
=
0
;
virtual
void
setNumIterations
(
int
iterations
)
=
0
;
virtual
double
getScaleStep
()
const
=
0
;
virtual
void
setScaleStep
(
double
scaleStep
)
=
0
;
virtual
bool
getUseInitialFlow
()
const
=
0
;
virtual
void
setUseInitialFlow
(
bool
useInitialFlow
)
=
0
;
static
Ptr
<
OpticalFlowDual_TVL1
>
create
(
double
tau
=
0.25
,
double
lambda
=
0.15
,
double
theta
=
0.3
,
int
nscales
=
5
,
int
warps
=
5
,
double
epsilon
=
0.01
,
int
iterations
=
300
,
double
scaleStep
=
0.8
,
double
gamma
=
0.0
,
bool
useInitialFlow
=
false
);
};
//! @}
...
...
modules/cudaoptflow/src/brox.cpp
View file @
381216aa
...
...
@@ -47,84 +47,148 @@ using namespace cv::cuda;
#if !defined (HAVE_CUDA) || !defined (HAVE_OPENCV_CUDALEGACY) || defined (CUDA_DISABLER)
void
cv
::
cuda
::
BroxOpticalFlow
::
operator
()(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
Stream
&
)
{
throw_no_cuda
();
}
Ptr
<
BroxOpticalFlow
>
cv
::
cuda
::
BroxOpticalFlow
::
create
(
double
,
double
,
double
,
int
,
int
,
int
)
{
throw_no_cuda
();
return
Ptr
<
BroxOpticalFlow
>
();
}
#else
namespace
{
size_t
getBufSize
(
const
NCVBroxOpticalFlowDescriptor
&
desc
,
const
NCVMatrix
<
Ncv32f
>&
frame0
,
const
NCVMatrix
<
Ncv32f
>&
frame1
,
NCVMatrix
<
Ncv32f
>&
u
,
NCVMatrix
<
Ncv32f
>&
v
,
const
cudaDeviceProp
&
devProp
)
namespace
{
class
BroxOpticalFlowImpl
:
public
BroxOpticalFlow
{
NCVMemStackAllocator
gpuCounter
(
static_cast
<
Ncv32u
>
(
devProp
.
textureAlignment
));
public
:
BroxOpticalFlowImpl
(
double
alpha
,
double
gamma
,
double
scale_factor
,
int
inner_iterations
,
int
outer_iterations
,
int
solver_iterations
)
:
alpha_
(
alpha
),
gamma_
(
gamma
),
scale_factor_
(
scale_factor
),
inner_iterations_
(
inner_iterations
),
outer_iterations_
(
outer_iterations
),
solver_iterations_
(
solver_iterations
)
{
}
virtual
void
calc
(
InputArray
I0
,
InputArray
I1
,
InputOutputArray
flow
,
Stream
&
stream
);
virtual
double
getFlowSmoothness
()
const
{
return
alpha_
;
}
virtual
void
setFlowSmoothness
(
double
alpha
)
{
alpha_
=
static_cast
<
float
>
(
alpha
);
}
virtual
double
getGradientConstancyImportance
()
const
{
return
gamma_
;
}
virtual
void
setGradientConstancyImportance
(
double
gamma
)
{
gamma_
=
static_cast
<
float
>
(
gamma
);
}
virtual
double
getPyramidScaleFactor
()
const
{
return
scale_factor_
;
}
virtual
void
setPyramidScaleFactor
(
double
scale_factor
)
{
scale_factor_
=
static_cast
<
float
>
(
scale_factor
);
}
//! number of lagged non-linearity iterations (inner loop)
virtual
int
getInnerIterations
()
const
{
return
inner_iterations_
;
}
virtual
void
setInnerIterations
(
int
inner_iterations
)
{
inner_iterations_
=
inner_iterations
;
}
//! number of warping iterations (number of pyramid levels)
virtual
int
getOuterIterations
()
const
{
return
outer_iterations_
;
}
virtual
void
setOuterIterations
(
int
outer_iterations
)
{
outer_iterations_
=
outer_iterations
;
}
//! number of linear system solver iterations
virtual
int
getSolverIterations
()
const
{
return
solver_iterations_
;
}
virtual
void
setSolverIterations
(
int
solver_iterations
)
{
solver_iterations_
=
solver_iterations
;
}
private
:
//! flow smoothness
float
alpha_
;
//! gradient constancy importance
float
gamma_
;
//! pyramid scale factor
float
scale_factor_
;
//! number of lagged non-linearity iterations (inner loop)
int
inner_iterations_
;
//! number of warping iterations (number of pyramid levels)
int
outer_iterations_
;
//! number of linear system solver iterations
int
solver_iterations_
;
};
static
size_t
getBufSize
(
const
NCVBroxOpticalFlowDescriptor
&
desc
,
const
NCVMatrix
<
Ncv32f
>&
frame0
,
const
NCVMatrix
<
Ncv32f
>&
frame1
,
NCVMatrix
<
Ncv32f
>&
u
,
NCVMatrix
<
Ncv32f
>&
v
,
size_t
textureAlignment
)
{
NCVMemStackAllocator
gpuCounter
(
static_cast
<
Ncv32u
>
(
textureAlignment
));
ncvSafeCall
(
NCVBroxOpticalFlow
(
desc
,
gpuCounter
,
frame0
,
frame1
,
u
,
v
,
0
)
);
return
gpuCounter
.
maxSize
();
}
}
namespace
{
static
void
outputHandler
(
const
String
&
msg
)
{
CV_Error
(
cv
::
Error
::
GpuApiCallError
,
msg
.
c_str
());
}
}
static
void
outputHandler
(
const
String
&
msg
)
{
CV_Error
(
cv
::
Error
::
GpuApiCallError
,
msg
.
c_str
());
}
void
cv
::
cuda
::
BroxOpticalFlow
::
operator
()(
const
GpuMat
&
frame0
,
const
GpuMat
&
frame1
,
GpuMat
&
u
,
GpuMat
&
v
,
Stream
&
s
)
{
ncvSetDebugOutputHandler
(
outputHandler
);
void
BroxOpticalFlowImpl
::
calc
(
InputArray
_I0
,
InputArray
_I1
,
InputOutputArray
_flow
,
Stream
&
stream
)
{
const
GpuMat
frame0
=
_I0
.
getGpuMat
();
const
GpuMat
frame1
=
_I1
.
getGpuMat
();
CV_Assert
(
frame0
.
type
()
==
CV_32FC1
);
CV_Assert
(
frame1
.
size
()
==
frame0
.
size
()
&&
frame1
.
type
()
==
frame0
.
type
()
);
CV_Assert
(
frame0
.
type
()
==
CV_32FC1
);
CV_Assert
(
frame1
.
size
()
==
frame0
.
size
()
&&
frame1
.
type
()
==
frame0
.
type
()
);
u
.
create
(
frame0
.
size
(),
CV_32FC1
);
v
.
create
(
frame0
.
size
(),
CV_32FC1
);
ncvSetDebugOutputHandler
(
outputHandler
);
cudaDeviceProp
devProp
;
cudaSafeCall
(
cudaGetDeviceProperties
(
&
devProp
,
getDevice
())
);
BufferPool
pool
(
stream
);
GpuMat
u
=
pool
.
getBuffer
(
frame0
.
size
(),
CV_32FC1
);
GpuMat
v
=
pool
.
getBuffer
(
frame0
.
size
(),
CV_32FC1
);
NCVBroxOpticalFlowDescriptor
desc
;
NCVBroxOpticalFlowDescriptor
desc
;
desc
.
alpha
=
alpha_
;
desc
.
gamma
=
gamma_
;
desc
.
scale_factor
=
scale_factor_
;
desc
.
number_of_inner_iterations
=
inner_iterations_
;
desc
.
number_of_outer_iterations
=
outer_iterations_
;
desc
.
number_of_solver_iterations
=
solver_iterations_
;
desc
.
alpha
=
alpha
;
desc
.
gamma
=
gamma
;
desc
.
scale_factor
=
scale_factor
;
desc
.
number_of_inner_iterations
=
inner_iterations
;
desc
.
number_of_outer_iterations
=
outer_iterations
;
desc
.
number_of_solver_iterations
=
solver_iterations
;
NCVMemSegment
frame0MemSeg
;
frame0MemSeg
.
begin
.
memtype
=
NCVMemoryTypeDevice
;
frame0MemSeg
.
begin
.
ptr
=
const_cast
<
uchar
*>
(
frame0
.
data
);
frame0MemSeg
.
size
=
frame0
.
step
*
frame0
.
rows
;
NCVMemSegment
frame0
MemSeg
;
frame0
MemSeg
.
begin
.
memtype
=
NCVMemoryTypeDevice
;
frame0MemSeg
.
begin
.
ptr
=
const_cast
<
uchar
*>
(
frame0
.
data
);
frame0MemSeg
.
size
=
frame0
.
step
*
frame0
.
rows
;
NCVMemSegment
frame1
MemSeg
;
frame1
MemSeg
.
begin
.
memtype
=
NCVMemoryTypeDevice
;
frame1MemSeg
.
begin
.
ptr
=
const_cast
<
uchar
*>
(
frame1
.
data
);
frame1MemSeg
.
size
=
frame1
.
step
*
frame1
.
rows
;
NCVMemSegment
frame1
MemSeg
;
frame1
MemSeg
.
begin
.
memtype
=
NCVMemoryTypeDevice
;
frame1MemSeg
.
begin
.
ptr
=
const_cast
<
uchar
*>
(
frame1
.
data
);
frame1MemSeg
.
size
=
frame1
.
step
*
frame1
.
rows
;
NCVMemSegment
u
MemSeg
;
u
MemSeg
.
begin
.
memtype
=
NCVMemoryTypeDevice
;
uMemSeg
.
begin
.
ptr
=
u
.
ptr
(
);
uMemSeg
.
size
=
u
.
step
*
u
.
rows
;
NCVMemSegment
u
MemSeg
;
u
MemSeg
.
begin
.
memtype
=
NCVMemoryTypeDevice
;
uMemSeg
.
begin
.
ptr
=
u
.
ptr
();
uMemSeg
.
size
=
u
.
step
*
u
.
rows
;
NCVMemSegment
v
MemSeg
;
v
MemSeg
.
begin
.
memtype
=
NCVMemoryTypeDevice
;
vMemSeg
.
begin
.
ptr
=
v
.
ptr
();
vMemSeg
.
size
=
v
.
step
*
v
.
rows
;
NCVMemSegment
vMemSeg
;
vMemSeg
.
begin
.
memtype
=
NCVMemoryTypeDevice
;
vMemSeg
.
begin
.
ptr
=
v
.
ptr
();
vMemSeg
.
size
=
v
.
step
*
v
.
rows
;
DeviceInfo
devInfo
;
size_t
textureAlignment
=
devInfo
.
textureAlignment
();
NCVMatrixReuse
<
Ncv32f
>
frame0Mat
(
frame0MemSeg
,
static_cast
<
Ncv32u
>
(
devProp
.
textureAlignment
),
frame0
.
cols
,
frame0
.
rows
,
static_cast
<
Ncv32u
>
(
frame0
.
step
));
NCVMatrixReuse
<
Ncv32f
>
frame1Mat
(
frame1MemSeg
,
static_cast
<
Ncv32u
>
(
devProp
.
textureAlignment
),
frame1
.
cols
,
frame1
.
rows
,
static_cast
<
Ncv32u
>
(
frame1
.
step
));
NCVMatrixReuse
<
Ncv32f
>
uMat
(
uMemSeg
,
static_cast
<
Ncv32u
>
(
devProp
.
textureAlignment
),
u
.
cols
,
u
.
rows
,
static_cast
<
Ncv32u
>
(
u
.
step
));
NCVMatrixReuse
<
Ncv32f
>
vMat
(
vMemSeg
,
static_cast
<
Ncv32u
>
(
devProp
.
textureAlignment
),
v
.
cols
,
v
.
rows
,
static_cast
<
Ncv32u
>
(
v
.
step
));
NCVMatrixReuse
<
Ncv32f
>
frame0Mat
(
frame0MemSeg
,
static_cast
<
Ncv32u
>
(
textureAlignment
),
frame0
.
cols
,
frame0
.
rows
,
static_cast
<
Ncv32u
>
(
frame0
.
step
));
NCVMatrixReuse
<
Ncv32f
>
frame1Mat
(
frame1MemSeg
,
static_cast
<
Ncv32u
>
(
textureAlignment
),
frame1
.
cols
,
frame1
.
rows
,
static_cast
<
Ncv32u
>
(
frame1
.
step
));
NCVMatrixReuse
<
Ncv32f
>
uMat
(
uMemSeg
,
static_cast
<
Ncv32u
>
(
textureAlignment
),
u
.
cols
,
u
.
rows
,
static_cast
<
Ncv32u
>
(
u
.
step
));
NCVMatrixReuse
<
Ncv32f
>
vMat
(
vMemSeg
,
static_cast
<
Ncv32u
>
(
textureAlignment
),
v
.
cols
,
v
.
rows
,
static_cast
<
Ncv32u
>
(
v
.
step
));
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
s
);
size_t
bufSize
=
getBufSize
(
desc
,
frame0Mat
,
frame1Mat
,
uMat
,
vMat
,
textureAlignment
);
GpuMat
buf
=
pool
.
getBuffer
(
1
,
static_cast
<
int
>
(
bufSize
),
CV_8UC1
);
size_t
bufSize
=
getBufSize
(
desc
,
frame0Mat
,
frame1Mat
,
uMat
,
vMat
,
devProp
);
NCVMemStackAllocator
gpuAllocator
(
NCVMemoryTypeDevice
,
bufSize
,
static_cast
<
Ncv32u
>
(
textureAlignment
),
buf
.
ptr
()
);
ensureSizeIsEnough
(
1
,
static_cast
<
int
>
(
bufSize
),
CV_8UC1
,
buf
);
ncvSafeCall
(
NCVBroxOpticalFlow
(
desc
,
gpuAllocator
,
frame0Mat
,
frame1Mat
,
uMat
,
vMat
,
StreamAccessor
::
getStream
(
stream
))
);
NCVMemStackAllocator
gpuAllocator
(
NCVMemoryTypeDevice
,
bufSize
,
static_cast
<
Ncv32u
>
(
devProp
.
textureAlignment
),
buf
.
ptr
());
GpuMat
flows
[]
=
{
u
,
v
};
cuda
::
merge
(
flows
,
2
,
_flow
,
stream
);
}
}
ncvSafeCall
(
NCVBroxOpticalFlow
(
desc
,
gpuAllocator
,
frame0Mat
,
frame1Mat
,
uMat
,
vMat
,
stream
)
);
Ptr
<
BroxOpticalFlow
>
cv
::
cuda
::
BroxOpticalFlow
::
create
(
double
alpha
,
double
gamma
,
double
scale_factor
,
int
inner_iterations
,
int
outer_iterations
,
int
solver_iterations
)
{
return
makePtr
<
BroxOpticalFlowImpl
>
(
alpha
,
gamma
,
scale_factor
,
inner_iterations
,
outer_iterations
,
solver_iterations
);
}
#endif
/* HAVE_CUDA */
modules/cudaoptflow/src/cuda/pyrlk.cu
View file @
381216aa
...
...
@@ -472,16 +472,16 @@ namespace pyrlk
}
}
void loadConstants(int2 winSize, int iters)
void loadConstants(int2 winSize, int iters
, cudaStream_t stream
)
{
cudaSafeCall( cudaMemcpyToSymbol
(c_winSize_x, &winSize.x, sizeof(int)
) );
cudaSafeCall( cudaMemcpyToSymbol
(c_winSize_y, &winSize.y, sizeof(int)
) );
cudaSafeCall( cudaMemcpyToSymbol
Async(c_winSize_x, &winSize.x, sizeof(int), 0, cudaMemcpyHostToDevice, stream
) );
cudaSafeCall( cudaMemcpyToSymbol
Async(c_winSize_y, &winSize.y, sizeof(int), 0, cudaMemcpyHostToDevice, stream
) );
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
cudaSafeCall( cudaMemcpyToSymbol
(c_halfWin_x, &halfWin.x, sizeof(int)
) );
cudaSafeCall( cudaMemcpyToSymbol
(c_halfWin_y, &halfWin.y, sizeof(int)
) );
cudaSafeCall( cudaMemcpyToSymbol
Async(c_halfWin_x, &halfWin.x, sizeof(int), 0, cudaMemcpyHostToDevice, stream
) );
cudaSafeCall( cudaMemcpyToSymbol
Async(c_halfWin_y, &halfWin.y, sizeof(int), 0, cudaMemcpyHostToDevice, stream
) );
cudaSafeCall( cudaMemcpyToSymbol
(c_iters, &iters, sizeof(int)
) );
cudaSafeCall( cudaMemcpyToSymbol
Async(c_iters, &iters, sizeof(int), 0, cudaMemcpyHostToDevice, stream
) );
}
void sparse1(PtrStepSzf I, PtrStepSzf J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
...
...
modules/cudaoptflow/src/cuda/tvl1flow.cu
View file @
381216aa
...
...
@@ -66,15 +66,16 @@ namespace tvl1flow
dy(y, x) = 0.5f * (src(::min(y + 1, src.rows - 1), x) - src(::max(y - 1, 0), x));
}
void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy)
void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy
, cudaStream_t stream
)
{
const dim3 block(32, 8);
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
centeredGradientKernel<<<grid, block>>>(src, dx, dy);
centeredGradientKernel<<<grid, block
, 0, stream
>>>(src, dx, dy);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
if (!stream)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
...
...
@@ -164,7 +165,10 @@ namespace tvl1flow
rho(y, x) = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
}
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho)
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y,
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx,
PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho,
cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(I0.cols, block.x), divUp(I0.rows, block.y));
...
...
@@ -173,10 +177,11 @@ namespace tvl1flow
bindTexture(&tex_I1x, I1x);
bindTexture(&tex_I1y, I1y);
warpBackwardKernel<<<grid, block>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
warpBackwardKernel<<<grid, block
, 0, stream
>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
if (!stream)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
...
...
@@ -292,15 +297,17 @@ namespace tvl1flow
PtrStepSzf grad, PtrStepSzf rho_c,
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf error,
float l_t, float theta, float gamma, bool calcError)
float l_t, float theta, float gamma, bool calcError,
cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));
estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, error, l_t, theta, gamma, calcError);
estimateUKernel<<<grid, block
, 0, stream
>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, error, l_t, theta, gamma, calcError);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
if (!stream)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
...
...
@@ -346,15 +353,19 @@ namespace tvl1flow
}
}
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, float taut, float gamma)
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3,
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,
float taut, float gamma,
cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y));
estimateDualVariablesKernel<<<grid, block>>>(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma);
estimateDualVariablesKernel<<<grid, block
, 0, stream
>>>(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
if (!stream)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
...
...
modules/cudaoptflow/src/farneback.cpp
View file @
381216aa
...
...
@@ -42,23 +42,21 @@
#include "precomp.hpp"
#define MIN_SIZE 32
#define S(x) StreamAccessor::getStream(x)
// CUDA resize() is fast, but it differs from the CPU analog. Disabling this flag
// leads to an inefficient code. It's for debug purposes only.
#define ENABLE_CUDA_RESIZE 1
using
namespace
cv
;
using
namespace
cv
::
cuda
;
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
void
cv
::
cuda
::
FarnebackOpticalFlow
::
operator
()(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
Stream
&
)
{
throw_no_cuda
();
}
Ptr
<
FarnebackOpticalFlow
>
cv
::
cuda
::
FarnebackOpticalFlow
::
create
(
int
,
double
,
bool
,
int
,
int
,
int
,
double
,
int
)
{
throw_no_cuda
();
return
Ptr
<
BroxOpticalFlow
>
();
}
#else
#define MIN_SIZE 32
// CUDA resize() is fast, but it differs from the CPU analog. Disabling this flag
// leads to an inefficient code. It's for debug purposes only.
#define ENABLE_CUDA_RESIZE 1
namespace
cv
{
namespace
cuda
{
namespace
device
{
namespace
optflow_farneback
{
void
setPolynomialExpansionConsts
(
...
...
@@ -76,8 +74,6 @@ namespace cv { namespace cuda { namespace device { namespace optflow_farneback
void
updateFlowGpu
(
const
PtrStepSzf
M
,
PtrStepSzf
flowx
,
PtrStepSzf
flowy
,
cudaStream_t
stream
);
/*void boxFilterGpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream);*/
void
boxFilter5Gpu
(
const
PtrStepSzf
src
,
int
ksizeHalf
,
PtrStepSzf
dst
,
cudaStream_t
stream
);
void
boxFilter5Gpu_CC11
(
const
PtrStepSzf
src
,
int
ksizeHalf
,
PtrStepSzf
dst
,
cudaStream_t
stream
);
...
...
@@ -93,296 +89,381 @@ namespace cv { namespace cuda { namespace device { namespace optflow_farneback
void
gaussianBlur5Gpu_CC11
(
const
PtrStepSzf
src
,
int
ksizeHalf
,
PtrStepSzf
dst
,
int
borderType
,
cudaStream_t
stream
);
}}}}
// namespace cv { namespace cuda { namespace cudev { namespace optflow_farneback
}}}}
namespace
{
GpuMat
allocMatFromBuf
(
int
rows
,
int
cols
,
int
type
,
GpuMat
&
mat
)
class
FarnebackOpticalFlowImpl
:
public
FarnebackOpticalFlow
{
if
(
!
mat
.
empty
()
&&
mat
.
type
()
==
type
&&
mat
.
rows
>=
rows
&&
mat
.
cols
>=
cols
)
return
mat
(
Rect
(
0
,
0
,
cols
,
rows
));
public
:
FarnebackOpticalFlowImpl
(
int
numLevels
,
double
pyrScale
,
bool
fastPyramids
,
int
winSize
,
int
numIters
,
int
polyN
,
double
polySigma
,
int
flags
)
:
numLevels_
(
numLevels
),
pyrScale_
(
pyrScale
),
fastPyramids_
(
fastPyramids
),
winSize_
(
winSize
),
numIters_
(
numIters
),
polyN_
(
polyN
),
polySigma_
(
polySigma
),
flags_
(
flags
)
{
}
return
mat
=
GpuMat
(
rows
,
cols
,
type
);
}
}
virtual
int
getNumLevels
()
const
{
return
numLevels_
;
}
virtual
void
setNumLevels
(
int
numLevels
)
{
numLevels_
=
numLevels
;
}
void
cv
::
cuda
::
FarnebackOpticalFlow
::
prepareGaussian
(
int
n
,
double
sigma
,
float
*
g
,
float
*
xg
,
float
*
xxg
,
double
&
ig11
,
double
&
ig03
,
double
&
ig33
,
double
&
ig55
)
{
double
s
=
0.
;
for
(
int
x
=
-
n
;
x
<=
n
;
x
++
)
{
g
[
x
]
=
(
float
)
std
::
exp
(
-
x
*
x
/
(
2
*
sigma
*
sigma
));
s
+=
g
[
x
];
}
virtual
double
getPyrScale
()
const
{
return
pyrScale_
;
}
virtual
void
setPyrScale
(
double
pyrScale
)
{
pyrScale_
=
pyrScale
;
}
s
=
1.
/
s
;
for
(
int
x
=
-
n
;
x
<=
n
;
x
++
)
{
g
[
x
]
=
(
float
)(
g
[
x
]
*
s
);
xg
[
x
]
=
(
float
)(
x
*
g
[
x
]);
xxg
[
x
]
=
(
float
)(
x
*
x
*
g
[
x
]);
}
virtual
bool
getFastPyramids
()
const
{
return
fastPyramids_
;
}
virtual
void
setFastPyramids
(
bool
fastPyramids
)
{
fastPyramids_
=
fastPyramids
;
}
Mat_
<
double
>
G
(
6
,
6
);
G
.
setTo
(
0
);
virtual
int
getWinSize
()
const
{
return
winSize_
;
}
virtual
void
setWinSize
(
int
winSize
)
{
winSize_
=
winSize
;
}
for
(
int
y
=
-
n
;
y
<=
n
;
y
++
)
{
for
(
int
x
=
-
n
;
x
<=
n
;
x
++
)
{
G
(
0
,
0
)
+=
g
[
y
]
*
g
[
x
];
G
(
1
,
1
)
+=
g
[
y
]
*
g
[
x
]
*
x
*
x
;
G
(
3
,
3
)
+=
g
[
y
]
*
g
[
x
]
*
x
*
x
*
x
*
x
;
G
(
5
,
5
)
+=
g
[
y
]
*
g
[
x
]
*
x
*
x
*
y
*
y
;
}
}
virtual
int
getNumIters
()
const
{
return
numIters_
;
}
virtual
void
setNumIters
(
int
numIters
)
{
numIters_
=
numIters
;
}
//G[0][0] = 1.;
G
(
2
,
2
)
=
G
(
0
,
3
)
=
G
(
0
,
4
)
=
G
(
3
,
0
)
=
G
(
4
,
0
)
=
G
(
1
,
1
);
G
(
4
,
4
)
=
G
(
3
,
3
);
G
(
3
,
4
)
=
G
(
4
,
3
)
=
G
(
5
,
5
);
// invG:
// [ x e e ]
// [ y ]
// [ y ]
// [ e z ]
// [ e z ]
// [ u ]
Mat_
<
double
>
invG
=
G
.
inv
(
DECOMP_CHOLESKY
);
ig11
=
invG
(
1
,
1
);
ig03
=
invG
(
0
,
3
);
ig33
=
invG
(
3
,
3
);
ig55
=
invG
(
5
,
5
);
}
virtual
int
getPolyN
()
const
{
return
polyN_
;
}
virtual
void
setPolyN
(
int
polyN
)
{
polyN_
=
polyN
;
}
virtual
double
getPolySigma
()
const
{
return
polySigma_
;
}
virtual
void
setPolySigma
(
double
polySigma
)
{
polySigma_
=
polySigma
;
}
void
cv
::
cuda
::
FarnebackOpticalFlow
::
setPolynomialExpansionConsts
(
int
n
,
double
sigma
)
{
std
::
vector
<
float
>
buf
(
n
*
6
+
3
);
float
*
g
=
&
buf
[
0
]
+
n
;
float
*
xg
=
g
+
n
*
2
+
1
;
float
*
xxg
=
xg
+
n
*
2
+
1
;
virtual
int
getFlags
()
const
{
return
flags_
;
}
virtual
void
setFlags
(
int
flags
)
{
flags_
=
flags
;
}
if
(
sigma
<
FLT_EPSILON
)
sigma
=
n
*
0.3
;
virtual
void
calc
(
InputArray
I0
,
InputArray
I1
,
InputOutputArray
flow
,
Stream
&
stream
);
double
ig11
,
ig03
,
ig33
,
ig55
;
prepareGaussian
(
n
,
sigma
,
g
,
xg
,
xxg
,
ig11
,
ig03
,
ig33
,
ig55
);
private
:
int
numLevels_
;
double
pyrScale_
;
bool
fastPyramids_
;
int
winSize_
;
int
numIters_
;
int
polyN_
;
double
polySigma_
;
int
flags_
;
device
::
optflow_farneback
::
setPolynomialExpansionConsts
(
n
,
g
,
xg
,
xxg
,
static_cast
<
float
>
(
ig11
),
static_cast
<
float
>
(
ig03
),
static_cast
<
float
>
(
ig33
),
static_cast
<
float
>
(
ig55
));
}
private
:
void
prepareGaussian
(
int
n
,
double
sigma
,
float
*
g
,
float
*
xg
,
float
*
xxg
,
double
&
ig11
,
double
&
ig03
,
double
&
ig33
,
double
&
ig55
);
void
setPolynomialExpansionConsts
(
int
n
,
double
sigma
);
void
cv
::
cuda
::
FarnebackOpticalFlow
::
updateFlow_boxFilter
(
const
GpuMat
&
R0
,
const
GpuMat
&
R1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
,
GpuMat
&
M
,
GpuMat
&
bufM
,
int
blockSize
,
bool
updateMatrices
,
Stream
streams
[])
{
if
(
deviceSupports
(
FEATURE_SET_COMPUTE_12
))
device
::
optflow_farneback
::
boxFilter5Gpu
(
M
,
blockSize
/
2
,
bufM
,
S
(
streams
[
0
]));
else
device
::
optflow_farneback
::
boxFilter5Gpu_CC11
(
M
,
blockSize
/
2
,
bufM
,
S
(
streams
[
0
]));
swap
(
M
,
bufM
);
for
(
int
i
=
1
;
i
<
5
;
++
i
)
streams
[
i
].
waitForCompletion
();
device
::
optflow_farneback
::
updateFlowGpu
(
M
,
flowx
,
flowy
,
S
(
streams
[
0
]));
if
(
updateMatrices
)
device
::
optflow_farneback
::
updateMatricesGpu
(
flowx
,
flowy
,
R0
,
R1
,
M
,
S
(
streams
[
0
]));
}
void
updateFlow_boxFilter
(
const
GpuMat
&
R0
,
const
GpuMat
&
R1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
,
GpuMat
&
M
,
GpuMat
&
bufM
,
int
blockSize
,
bool
updateMatrices
,
Stream
streams
[]);
void
updateFlow_gaussianBlur
(
const
GpuMat
&
R0
,
const
GpuMat
&
R1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
,
GpuMat
&
M
,
GpuMat
&
bufM
,
int
blockSize
,
bool
updateMatrices
,
Stream
streams
[]);
void
cv
::
cuda
::
FarnebackOpticalFlow
::
updateFlow_gaussianBlur
(
const
GpuMat
&
R0
,
const
GpuMat
&
R1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
,
GpuMat
&
M
,
GpuMat
&
bufM
,
int
blockSize
,
bool
updateMatrices
,
Stream
streams
[])
{
if
(
deviceSupports
(
FEATURE_SET_COMPUTE_12
))
device
::
optflow_farneback
::
gaussianBlur5Gpu
(
M
,
blockSize
/
2
,
bufM
,
BORDER_REPLICATE
,
S
(
streams
[
0
]));
else
device
::
optflow_farneback
::
gaussianBlur5Gpu_CC11
(
M
,
blockSize
/
2
,
bufM
,
BORDER_REPLICATE
,
S
(
streams
[
0
]));
swap
(
M
,
bufM
);
device
::
optflow_farneback
::
updateFlowGpu
(
M
,
flowx
,
flowy
,
S
(
streams
[
0
]));
if
(
updateMatrices
)
device
::
optflow_farneback
::
updateMatricesGpu
(
flowx
,
flowy
,
R0
,
R1
,
M
,
S
(
streams
[
0
]));
}
void
calcImpl
(
const
GpuMat
&
frame0
,
const
GpuMat
&
frame1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
,
Stream
&
stream
);
GpuMat
frames_
[
2
];
GpuMat
pyrLevel_
[
2
],
M_
,
bufM_
,
R_
[
2
],
blurredFrame_
[
2
];
std
::
vector
<
GpuMat
>
pyramid0_
,
pyramid1_
;
};
void
cv
::
cuda
::
FarnebackOpticalFlow
::
operator
()(
const
GpuMat
&
frame0
,
const
GpuMat
&
frame1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
,
Stream
&
s
)
{
CV_Assert
(
frame0
.
channels
()
==
1
&&
frame1
.
channels
()
==
1
);
CV_Assert
(
frame0
.
size
()
==
frame1
.
size
());
CV_Assert
(
polyN
==
5
||
polyN
==
7
);
CV_Assert
(
!
fastPyramids
||
std
::
abs
(
pyrScale
-
0.5
)
<
1e-6
);
Stream
streams
[
5
];
if
(
S
(
s
))
streams
[
0
]
=
s
;
Size
size
=
frame0
.
size
();
GpuMat
prevFlowX
,
prevFlowY
,
curFlowX
,
curFlowY
;
flowx
.
create
(
size
,
CV_32F
);
flowy
.
create
(
size
,
CV_32F
);
GpuMat
flowx0
=
flowx
;
GpuMat
flowy0
=
flowy
;
// Crop unnecessary levels
double
scale
=
1
;
int
numLevelsCropped
=
0
;
for
(;
numLevelsCropped
<
numLevels
;
numLevelsCropped
++
)
void
FarnebackOpticalFlowImpl
::
calc
(
InputArray
_frame0
,
InputArray
_frame1
,
InputOutputArray
_flow
,
Stream
&
stream
)
{
scale
*=
pyrScale
;
if
(
size
.
width
*
scale
<
MIN_SIZE
||
size
.
height
*
scale
<
MIN_SIZE
)
break
;
const
GpuMat
frame0
=
_frame0
.
getGpuMat
();
const
GpuMat
frame1
=
_frame1
.
getGpuMat
();
BufferPool
pool
(
stream
);
GpuMat
flowx
=
pool
.
getBuffer
(
frame0
.
size
(),
CV_32FC1
);
GpuMat
flowy
=
pool
.
getBuffer
(
frame0
.
size
(),
CV_32FC1
);
calcImpl
(
frame0
,
frame1
,
flowx
,
flowy
,
stream
);
GpuMat
flows
[]
=
{
flowx
,
flowy
};
cuda
::
merge
(
flows
,
2
,
_flow
,
stream
);
}
frame0
.
convertTo
(
frames_
[
0
],
CV_32F
,
streams
[
0
]);
frame1
.
convertTo
(
frames_
[
1
],
CV_32F
,
streams
[
1
]);
GpuMat
allocMatFromBuf
(
int
rows
,
int
cols
,
int
type
,
GpuMat
&
mat
)
{
if
(
!
mat
.
empty
()
&&
mat
.
type
()
==
type
&&
mat
.
rows
>=
rows
&&
mat
.
cols
>=
cols
)
return
mat
(
Rect
(
0
,
0
,
cols
,
rows
));
return
mat
=
GpuMat
(
rows
,
cols
,
type
);
}
if
(
fastPyramids
)
void
FarnebackOpticalFlowImpl
::
prepareGaussian
(
int
n
,
double
sigma
,
float
*
g
,
float
*
xg
,
float
*
xxg
,
double
&
ig11
,
double
&
ig03
,
double
&
ig33
,
double
&
ig55
)
{
// Build Gaussian pyramids using pyrDown()
pyramid0_
.
resize
(
numLevelsCropped
+
1
);
pyramid1_
.
resize
(
numLevelsCropped
+
1
);
pyramid0_
[
0
]
=
frames_
[
0
];
pyramid1_
[
0
]
=
frames_
[
1
];
for
(
int
i
=
1
;
i
<=
numLevelsCropped
;
++
i
)
double
s
=
0.
;
for
(
int
x
=
-
n
;
x
<=
n
;
x
++
)
{
cuda
::
pyrDown
(
pyramid0_
[
i
-
1
],
pyramid0_
[
i
],
streams
[
0
]);
cuda
::
pyrDown
(
pyramid1_
[
i
-
1
],
pyramid1_
[
i
],
streams
[
1
]);
g
[
x
]
=
(
float
)
std
::
exp
(
-
x
*
x
/
(
2
*
sigma
*
sigma
));
s
+=
g
[
x
];
}
s
=
1.
/
s
;
for
(
int
x
=
-
n
;
x
<=
n
;
x
++
)
{
g
[
x
]
=
(
float
)(
g
[
x
]
*
s
);
xg
[
x
]
=
(
float
)(
x
*
g
[
x
]);
xxg
[
x
]
=
(
float
)(
x
*
x
*
g
[
x
]);
}
Mat_
<
double
>
G
(
6
,
6
);
G
.
setTo
(
0
);
for
(
int
y
=
-
n
;
y
<=
n
;
y
++
)
{
for
(
int
x
=
-
n
;
x
<=
n
;
x
++
)
{
G
(
0
,
0
)
+=
g
[
y
]
*
g
[
x
];
G
(
1
,
1
)
+=
g
[
y
]
*
g
[
x
]
*
x
*
x
;
G
(
3
,
3
)
+=
g
[
y
]
*
g
[
x
]
*
x
*
x
*
x
*
x
;
G
(
5
,
5
)
+=
g
[
y
]
*
g
[
x
]
*
x
*
x
*
y
*
y
;
}
}
//G[0][0] = 1.;
G
(
2
,
2
)
=
G
(
0
,
3
)
=
G
(
0
,
4
)
=
G
(
3
,
0
)
=
G
(
4
,
0
)
=
G
(
1
,
1
);
G
(
4
,
4
)
=
G
(
3
,
3
);
G
(
3
,
4
)
=
G
(
4
,
3
)
=
G
(
5
,
5
);
// invG:
// [ x e e ]
// [ y ]
// [ y ]
// [ e z ]
// [ e z ]
// [ u ]
Mat_
<
double
>
invG
=
G
.
inv
(
DECOMP_CHOLESKY
);
ig11
=
invG
(
1
,
1
);
ig03
=
invG
(
0
,
3
);
ig33
=
invG
(
3
,
3
);
ig55
=
invG
(
5
,
5
);
}
setPolynomialExpansionConsts
(
polyN
,
polySigma
);
device
::
optflow_farneback
::
setUpdateMatricesConsts
();
void
FarnebackOpticalFlowImpl
::
setPolynomialExpansionConsts
(
int
n
,
double
sigma
)
{
std
::
vector
<
float
>
buf
(
n
*
6
+
3
);
float
*
g
=
&
buf
[
0
]
+
n
;
float
*
xg
=
g
+
n
*
2
+
1
;
float
*
xxg
=
xg
+
n
*
2
+
1
;
if
(
sigma
<
FLT_EPSILON
)
sigma
=
n
*
0.3
;
double
ig11
,
ig03
,
ig33
,
ig55
;
prepareGaussian
(
n
,
sigma
,
g
,
xg
,
xxg
,
ig11
,
ig03
,
ig33
,
ig55
);
for
(
int
k
=
numLevelsCropped
;
k
>=
0
;
k
--
)
device
::
optflow_farneback
::
setPolynomialExpansionConsts
(
n
,
g
,
xg
,
xxg
,
static_cast
<
float
>
(
ig11
),
static_cast
<
float
>
(
ig03
),
static_cast
<
float
>
(
ig33
),
static_cast
<
float
>
(
ig55
));
}
void
FarnebackOpticalFlowImpl
::
updateFlow_boxFilter
(
const
GpuMat
&
R0
,
const
GpuMat
&
R1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
,
GpuMat
&
M
,
GpuMat
&
bufM
,
int
blockSize
,
bool
updateMatrices
,
Stream
streams
[])
{
streams
[
0
].
waitForCompletion
();
if
(
deviceSupports
(
FEATURE_SET_COMPUTE_12
))
device
::
optflow_farneback
::
boxFilter5Gpu
(
M
,
blockSize
/
2
,
bufM
,
StreamAccessor
::
getStream
(
streams
[
0
]));
else
device
::
optflow_farneback
::
boxFilter5Gpu_CC11
(
M
,
blockSize
/
2
,
bufM
,
StreamAccessor
::
getStream
(
streams
[
0
]));
swap
(
M
,
bufM
);
scale
=
1
;
for
(
int
i
=
0
;
i
<
k
;
i
++
)
scale
*=
pyrScale
;
for
(
int
i
=
1
;
i
<
5
;
++
i
)
streams
[
i
].
waitForCompletion
();
device
::
optflow_farneback
::
updateFlowGpu
(
M
,
flowx
,
flowy
,
StreamAccessor
::
getStream
(
streams
[
0
]))
;
double
sigma
=
(
1.
/
scale
-
1
)
*
0.5
;
int
smoothSize
=
cvRound
(
sigma
*
5
)
|
1
;
smoothSize
=
std
::
max
(
smoothSize
,
3
);
if
(
updateMatrices
)
device
::
optflow_farneback
::
updateMatricesGpu
(
flowx
,
flowy
,
R0
,
R1
,
M
,
StreamAccessor
::
getStream
(
streams
[
0
]))
;
}
int
width
=
cvRound
(
size
.
width
*
scale
);
int
height
=
cvRound
(
size
.
height
*
scale
);
void
FarnebackOpticalFlowImpl
::
updateFlow_gaussianBlur
(
const
GpuMat
&
R0
,
const
GpuMat
&
R1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
,
GpuMat
&
M
,
GpuMat
&
bufM
,
int
blockSize
,
bool
updateMatrices
,
Stream
streams
[])
{
if
(
deviceSupports
(
FEATURE_SET_COMPUTE_12
))
device
::
optflow_farneback
::
gaussianBlur5Gpu
(
M
,
blockSize
/
2
,
bufM
,
BORDER_REPLICATE
,
StreamAccessor
::
getStream
(
streams
[
0
]));
else
device
::
optflow_farneback
::
gaussianBlur5Gpu_CC11
(
M
,
blockSize
/
2
,
bufM
,
BORDER_REPLICATE
,
StreamAccessor
::
getStream
(
streams
[
0
]));
swap
(
M
,
bufM
);
if
(
fastPyramids
)
{
width
=
pyramid0_
[
k
].
cols
;
height
=
pyramid0_
[
k
].
rows
;
}
device
::
optflow_farneback
::
updateFlowGpu
(
M
,
flowx
,
flowy
,
StreamAccessor
::
getStream
(
streams
[
0
]));
if
(
k
>
0
)
if
(
updateMatrices
)
device
::
optflow_farneback
::
updateMatricesGpu
(
flowx
,
flowy
,
R0
,
R1
,
M
,
StreamAccessor
::
getStream
(
streams
[
0
]));
}
void
FarnebackOpticalFlowImpl
::
calcImpl
(
const
GpuMat
&
frame0
,
const
GpuMat
&
frame1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
,
Stream
&
stream
)
{
CV_Assert
(
frame0
.
channels
()
==
1
&&
frame1
.
channels
()
==
1
);
CV_Assert
(
frame0
.
size
()
==
frame1
.
size
());
CV_Assert
(
polyN_
==
5
||
polyN_
==
7
);
CV_Assert
(
!
fastPyramids_
||
std
::
abs
(
pyrScale_
-
0.5
)
<
1e-6
);
Stream
streams
[
5
];
if
(
stream
)
streams
[
0
]
=
stream
;
Size
size
=
frame0
.
size
();
GpuMat
prevFlowX
,
prevFlowY
,
curFlowX
,
curFlowY
;
flowx
.
create
(
size
,
CV_32F
);
flowy
.
create
(
size
,
CV_32F
);
GpuMat
flowx0
=
flowx
;
GpuMat
flowy0
=
flowy
;
// Crop unnecessary levels
double
scale
=
1
;
int
numLevelsCropped
=
0
;
for
(;
numLevelsCropped
<
numLevels_
;
numLevelsCropped
++
)
{
curFlowX
.
create
(
height
,
width
,
CV_32F
);
curFlowY
.
create
(
height
,
width
,
CV_32F
);
scale
*=
pyrScale_
;
if
(
size
.
width
*
scale
<
MIN_SIZE
||
size
.
height
*
scale
<
MIN_SIZE
)
break
;
}
else
frame0
.
convertTo
(
frames_
[
0
],
CV_32F
,
streams
[
0
]);
frame1
.
convertTo
(
frames_
[
1
],
CV_32F
,
streams
[
1
]);
if
(
fastPyramids_
)
{
curFlowX
=
flowx0
;
curFlowY
=
flowy0
;
// Build Gaussian pyramids using pyrDown()
pyramid0_
.
resize
(
numLevelsCropped
+
1
);
pyramid1_
.
resize
(
numLevelsCropped
+
1
);
pyramid0_
[
0
]
=
frames_
[
0
];
pyramid1_
[
0
]
=
frames_
[
1
];
for
(
int
i
=
1
;
i
<=
numLevelsCropped
;
++
i
)
{
cuda
::
pyrDown
(
pyramid0_
[
i
-
1
],
pyramid0_
[
i
],
streams
[
0
]);
cuda
::
pyrDown
(
pyramid1_
[
i
-
1
],
pyramid1_
[
i
],
streams
[
1
]);
}
}
if
(
!
prevFlowX
.
data
)
setPolynomialExpansionConsts
(
polyN_
,
polySigma_
);
device
::
optflow_farneback
::
setUpdateMatricesConsts
();
for
(
int
k
=
numLevelsCropped
;
k
>=
0
;
k
--
)
{
if
(
flags
&
OPTFLOW_USE_INITIAL_FLOW
)
streams
[
0
].
waitForCompletion
();
scale
=
1
;
for
(
int
i
=
0
;
i
<
k
;
i
++
)
scale
*=
pyrScale_
;
double
sigma
=
(
1.
/
scale
-
1
)
*
0.5
;
int
smoothSize
=
cvRound
(
sigma
*
5
)
|
1
;
smoothSize
=
std
::
max
(
smoothSize
,
3
);
int
width
=
cvRound
(
size
.
width
*
scale
);
int
height
=
cvRound
(
size
.
height
*
scale
);
if
(
fastPyramids_
)
{
cuda
::
resize
(
flowx0
,
curFlowX
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
,
streams
[
0
]);
cuda
::
resize
(
flowy0
,
curFlowY
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
,
streams
[
1
]);
curFlowX
.
convertTo
(
curFlowX
,
curFlowX
.
depth
(),
scale
,
streams
[
0
]);
curFlowY
.
convertTo
(
curFlowY
,
curFlowY
.
depth
(),
scale
,
streams
[
1
]);
width
=
pyramid0_
[
k
].
cols
;
height
=
pyramid0_
[
k
].
rows
;
}
if
(
k
>
0
)
{
curFlowX
.
create
(
height
,
width
,
CV_32F
);
curFlowY
.
create
(
height
,
width
,
CV_32F
);
}
else
{
curFlowX
.
setTo
(
0
,
streams
[
0
])
;
curFlowY
.
setTo
(
0
,
streams
[
1
])
;
curFlowX
=
flowx0
;
curFlowY
=
flowy0
;
}
}
else
{
cuda
::
resize
(
prevFlowX
,
curFlowX
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
,
streams
[
0
]);
cuda
::
resize
(
prevFlowY
,
curFlowY
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
,
streams
[
1
]);
curFlowX
.
convertTo
(
curFlowX
,
curFlowX
.
depth
(),
1.
/
pyrScale
,
streams
[
0
]);
curFlowY
.
convertTo
(
curFlowY
,
curFlowY
.
depth
(),
1.
/
pyrScale
,
streams
[
1
]);
}
GpuMat
M
=
allocMatFromBuf
(
5
*
height
,
width
,
CV_32F
,
M_
);
GpuMat
bufM
=
allocMatFromBuf
(
5
*
height
,
width
,
CV_32F
,
bufM_
);
GpuMat
R
[
2
]
=
{
allocMatFromBuf
(
5
*
height
,
width
,
CV_32F
,
R_
[
0
]),
allocMatFromBuf
(
5
*
height
,
width
,
CV_32F
,
R_
[
1
])
};
if
(
!
prevFlowX
.
data
)
{
if
(
flags_
&
OPTFLOW_USE_INITIAL_FLOW
)
{
cuda
::
resize
(
flowx0
,
curFlowX
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
,
streams
[
0
]);
cuda
::
resize
(
flowy0
,
curFlowY
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
,
streams
[
1
]);
curFlowX
.
convertTo
(
curFlowX
,
curFlowX
.
depth
(),
scale
,
streams
[
0
]);
curFlowY
.
convertTo
(
curFlowY
,
curFlowY
.
depth
(),
scale
,
streams
[
1
]);
}
else
{
curFlowX
.
setTo
(
0
,
streams
[
0
]);
curFlowY
.
setTo
(
0
,
streams
[
1
]);
}
}
else
{
cuda
::
resize
(
prevFlowX
,
curFlowX
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
,
streams
[
0
]);
cuda
::
resize
(
prevFlowY
,
curFlowY
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
,
streams
[
1
]);
curFlowX
.
convertTo
(
curFlowX
,
curFlowX
.
depth
(),
1.
/
pyrScale_
,
streams
[
0
]);
curFlowY
.
convertTo
(
curFlowY
,
curFlowY
.
depth
(),
1.
/
pyrScale_
,
streams
[
1
]);
}
if
(
fastPyramids
)
{
device
::
optflow_farneback
::
polynomialExpansionGpu
(
pyramid0_
[
k
],
polyN
,
R
[
0
],
S
(
streams
[
0
]));
device
::
optflow_farneback
::
polynomialExpansionGpu
(
pyramid1_
[
k
],
polyN
,
R
[
1
],
S
(
streams
[
1
]));
}
else
{
GpuMat
blurredFrame
[
2
]
=
GpuMat
M
=
allocMatFromBuf
(
5
*
height
,
width
,
CV_32F
,
M_
);
GpuMat
bufM
=
allocMatFromBuf
(
5
*
height
,
width
,
CV_32F
,
bufM_
);
GpuMat
R
[
2
]
=
{
allocMatFromBuf
(
size
.
height
,
size
.
width
,
CV_32F
,
blurredFrame
_
[
0
]),
allocMatFromBuf
(
size
.
height
,
size
.
width
,
CV_32F
,
blurredFrame
_
[
1
])
allocMatFromBuf
(
5
*
height
,
width
,
CV_32F
,
R
_
[
0
]),
allocMatFromBuf
(
5
*
height
,
width
,
CV_32F
,
R
_
[
1
])
};
GpuMat
pyrLevel
[
2
]
=
if
(
fastPyramids_
)
{
allocMatFromBuf
(
height
,
width
,
CV_32F
,
pyrLevel_
[
0
]),
allocMatFromBuf
(
height
,
width
,
CV_32F
,
pyrLevel_
[
1
])
};
device
::
optflow_farneback
::
polynomialExpansionGpu
(
pyramid0_
[
k
],
polyN_
,
R
[
0
],
StreamAccessor
::
getStream
(
streams
[
0
]));
device
::
optflow_farneback
::
polynomialExpansionGpu
(
pyramid1_
[
k
],
polyN_
,
R
[
1
],
StreamAccessor
::
getStream
(
streams
[
1
]));
}
else
{
GpuMat
blurredFrame
[
2
]
=
{
allocMatFromBuf
(
size
.
height
,
size
.
width
,
CV_32F
,
blurredFrame_
[
0
]),
allocMatFromBuf
(
size
.
height
,
size
.
width
,
CV_32F
,
blurredFrame_
[
1
])
};
GpuMat
pyrLevel
[
2
]
=
{
allocMatFromBuf
(
height
,
width
,
CV_32F
,
pyrLevel_
[
0
]),
allocMatFromBuf
(
height
,
width
,
CV_32F
,
pyrLevel_
[
1
])
};
Mat
g
=
getGaussianKernel
(
smoothSize
,
sigma
,
CV_32F
);
device
::
optflow_farneback
::
setGaussianBlurKernel
(
g
.
ptr
<
float
>
(
smoothSize
/
2
),
smoothSize
/
2
);
for
(
int
i
=
0
;
i
<
2
;
i
++
)
{
device
::
optflow_farneback
::
gaussianBlurGpu
(
frames_
[
i
],
smoothSize
/
2
,
blurredFrame
[
i
],
BORDER_REFLECT101
,
StreamAccessor
::
getStream
(
streams
[
i
]));
cuda
::
resize
(
blurredFrame
[
i
],
pyrLevel
[
i
],
Size
(
width
,
height
),
0.0
,
0.0
,
INTER_LINEAR
,
streams
[
i
]);
device
::
optflow_farneback
::
polynomialExpansionGpu
(
pyrLevel
[
i
],
polyN_
,
R
[
i
],
StreamAccessor
::
getStream
(
streams
[
i
]));
}
}
Mat
g
=
getGaussianKernel
(
smoothSize
,
sigma
,
CV_32F
);
device
::
optflow_farneback
::
setGaussianBlurKernel
(
g
.
ptr
<
float
>
(
smoothSize
/
2
),
smoothSize
/
2
);
streams
[
1
].
waitForCompletion
(
);
device
::
optflow_farneback
::
updateMatricesGpu
(
curFlowX
,
curFlowY
,
R
[
0
],
R
[
1
],
M
,
StreamAccessor
::
getStream
(
streams
[
0
])
);
for
(
int
i
=
0
;
i
<
2
;
i
++
)
if
(
flags_
&
OPTFLOW_FARNEBACK_GAUSSIAN
)
{
device
::
optflow_farneback
::
gaussianBlurGpu
(
frames_
[
i
],
smoothSize
/
2
,
blurredFrame
[
i
],
BORDER_REFLECT101
,
S
(
streams
[
i
]));
cuda
::
resize
(
blurredFrame
[
i
],
pyrLevel
[
i
],
Size
(
width
,
height
),
0.0
,
0.0
,
INTER_LINEAR
,
streams
[
i
]);
device
::
optflow_farneback
::
polynomialExpansionGpu
(
pyrLevel
[
i
],
polyN
,
R
[
i
],
S
(
streams
[
i
]));
Mat
g
=
getGaussianKernel
(
winSize_
,
winSize_
/
2
*
0.3
f
,
CV_32F
);
device
::
optflow_farneback
::
setGaussianBlurKernel
(
g
.
ptr
<
float
>
(
winSize_
/
2
),
winSize_
/
2
);
}
for
(
int
i
=
0
;
i
<
numIters_
;
i
++
)
{
if
(
flags_
&
OPTFLOW_FARNEBACK_GAUSSIAN
)
updateFlow_gaussianBlur
(
R
[
0
],
R
[
1
],
curFlowX
,
curFlowY
,
M
,
bufM
,
winSize_
,
i
<
numIters_
-
1
,
streams
);
else
updateFlow_boxFilter
(
R
[
0
],
R
[
1
],
curFlowX
,
curFlowY
,
M
,
bufM
,
winSize_
,
i
<
numIters_
-
1
,
streams
);
}
}
streams
[
1
].
waitForCompletion
();
device
::
optflow_farneback
::
updateMatricesGpu
(
curFlowX
,
curFlowY
,
R
[
0
],
R
[
1
],
M
,
S
(
streams
[
0
]));
if
(
flags
&
OPTFLOW_FARNEBACK_GAUSSIAN
)
{
Mat
g
=
getGaussianKernel
(
winSize
,
winSize
/
2
*
0.3
f
,
CV_32F
);
device
::
optflow_farneback
::
setGaussianBlurKernel
(
g
.
ptr
<
float
>
(
winSize
/
2
),
winSize
/
2
);
}
for
(
int
i
=
0
;
i
<
numIters
;
i
++
)
{
if
(
flags
&
OPTFLOW_FARNEBACK_GAUSSIAN
)
updateFlow_gaussianBlur
(
R
[
0
],
R
[
1
],
curFlowX
,
curFlowY
,
M
,
bufM
,
winSize
,
i
<
numIters
-
1
,
streams
);
else
updateFlow_boxFilter
(
R
[
0
],
R
[
1
],
curFlowX
,
curFlowY
,
M
,
bufM
,
winSize
,
i
<
numIters
-
1
,
streams
);
prevFlowX
=
curFlowX
;
prevFlowY
=
curFlowY
;
}
prevFlowX
=
curFlowX
;
prevFlowY
=
curFlowY
;
}
flowx
=
curFlowX
;
flowy
=
curFlowY
;
flowx
=
curFlowX
;
flowy
=
curFlowY
;
if
(
!
stream
)
streams
[
0
].
waitForCompletion
();
}
}
if
(
!
S
(
s
))
streams
[
0
].
waitForCompletion
();
Ptr
<
FarnebackOpticalFlow
>
cv
::
cuda
::
FarnebackOpticalFlow
::
create
(
int
numLevels
,
double
pyrScale
,
bool
fastPyramids
,
int
winSize
,
int
numIters
,
int
polyN
,
double
polySigma
,
int
flags
)
{
return
makePtr
<
FarnebackOpticalFlowImpl
>
(
numLevels
,
pyrScale
,
fastPyramids
,
winSize
,
numIters
,
polyN
,
polySigma
,
flags
);
}
#endif
modules/cudaoptflow/src/pyrlk.cpp
View file @
381216aa
...
...
@@ -47,37 +47,54 @@ using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
cv
::
cuda
::
PyrLKOpticalFlow
::
PyrLKOpticalFlow
()
{
throw_no_cuda
();
}
void
cv
::
cuda
::
PyrLKOpticalFlow
::
sparse
(
const
GpuMat
&
,
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
*
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
PyrLKOpticalFlow
::
dense
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
*
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
PyrLKOpticalFlow
::
releaseMemory
()
{}
Ptr
<
SparsePyrLKOpticalFlow
>
cv
::
cuda
::
SparsePyrLKOpticalFlow
::
create
(
Size
,
int
,
int
,
bool
)
{
throw_no_cuda
();
return
Ptr
<
SparsePyrLKOpticalFlow
>
();
}
Ptr
<
DensePyrLKOpticalFlow
>
cv
::
cuda
::
DensePyrLKOpticalFlow
::
create
(
Size
,
int
,
int
,
bool
)
{
throw_no_cuda
();
return
Ptr
<
SparsePyrLKOpticalFlow
>
();
}
#else
/* !defined (HAVE_CUDA) */
namespace
pyrlk
{
void
loadConstants
(
int2
winSize
,
int
iters
);
void
loadConstants
(
int2
winSize
,
int
iters
,
cudaStream_t
stream
);
void
sparse1
(
PtrStepSzf
I
,
PtrStepSzf
J
,
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
int
ptcount
,
int
level
,
dim3
block
,
dim3
patch
,
cudaStream_t
stream
=
0
);
int
level
,
dim3
block
,
dim3
patch
,
cudaStream_t
stream
);
void
sparse4
(
PtrStepSz
<
float4
>
I
,
PtrStepSz
<
float4
>
J
,
const
float2
*
prevPts
,
float2
*
nextPts
,
uchar
*
status
,
float
*
err
,
int
ptcount
,
int
level
,
dim3
block
,
dim3
patch
,
cudaStream_t
stream
=
0
);
int
level
,
dim3
block
,
dim3
patch
,
cudaStream_t
stream
);
void
dense
(
PtrStepSzb
I
,
PtrStepSzf
J
,
PtrStepSzf
u
,
PtrStepSzf
v
,
PtrStepSzf
prevU
,
PtrStepSzf
prevV
,
PtrStepSzf
err
,
int2
winSize
,
cudaStream_t
stream
=
0
);
}
cv
::
cuda
::
PyrLKOpticalFlow
::
PyrLKOpticalFlow
()
{
winSize
=
Size
(
21
,
21
);
maxLevel
=
3
;
iters
=
30
;
useInitialFlow
=
false
;
PtrStepSzf
err
,
int2
winSize
,
cudaStream_t
stream
);
}
namespace
{
void
calcPatchSize
(
cv
::
Size
winSize
,
dim3
&
block
,
dim3
&
patch
)
class
PyrLKOpticalFlowBase
{
public
:
PyrLKOpticalFlowBase
(
Size
winSize
,
int
maxLevel
,
int
iters
,
bool
useInitialFlow
);
void
sparse
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
const
GpuMat
&
prevPts
,
GpuMat
&
nextPts
,
GpuMat
&
status
,
GpuMat
*
err
,
Stream
&
stream
);
void
dense
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
GpuMat
&
u
,
GpuMat
&
v
,
Stream
&
stream
);
protected
:
Size
winSize_
;
int
maxLevel_
;
int
iters_
;
bool
useInitialFlow_
;
private
:
std
::
vector
<
GpuMat
>
prevPyr_
;
std
::
vector
<
GpuMat
>
nextPyr_
;
};
PyrLKOpticalFlowBase
::
PyrLKOpticalFlowBase
(
Size
winSize
,
int
maxLevel
,
int
iters
,
bool
useInitialFlow
)
:
winSize_
(
winSize
),
maxLevel_
(
maxLevel
),
iters_
(
iters
),
useInitialFlow_
(
useInitialFlow
)
{
}
void
calcPatchSize
(
Size
winSize
,
dim3
&
block
,
dim3
&
patch
)
{
if
(
winSize
.
width
>
32
&&
winSize
.
width
>
2
*
winSize
.
height
)
{
...
...
@@ -95,156 +112,239 @@ namespace
block
.
z
=
patch
.
z
=
1
;
}
}
void
cv
::
cuda
::
PyrLKOpticalFlow
::
sparse
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
const
GpuMat
&
prevPts
,
GpuMat
&
nextPts
,
GpuMat
&
status
,
GpuMat
*
err
)
{
if
(
prevPts
.
empty
())
void
PyrLKOpticalFlowBase
::
sparse
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
const
GpuMat
&
prevPts
,
GpuMat
&
nextPts
,
GpuMat
&
status
,
GpuMat
*
err
,
Stream
&
stream
)
{
nextPts
.
release
();
status
.
release
();
if
(
err
)
err
->
release
();
return
;
}
if
(
prevPts
.
empty
())
{
nextPts
.
release
();
status
.
release
();
if
(
err
)
err
->
release
();
return
;
}
dim3
block
,
patch
;
calcPatchSize
(
winSize
,
block
,
patch
);
dim3
block
,
patch
;
calcPatchSize
(
winSize_
,
block
,
patch
);
CV_Assert
(
prevImg
.
channels
()
==
1
||
prevImg
.
channels
()
==
3
||
prevImg
.
channels
()
==
4
);
CV_Assert
(
prevImg
.
size
()
==
nextImg
.
size
()
&&
prevImg
.
type
()
==
nextImg
.
type
()
);
CV_Assert
(
maxLevel
>=
0
);
CV_Assert
(
winSize
.
width
>
2
&&
winSize
.
height
>
2
);
CV_Assert
(
patch
.
x
>
0
&&
patch
.
x
<
6
&&
patch
.
y
>
0
&&
patch
.
y
<
6
);
CV_Assert
(
prevPts
.
rows
==
1
&&
prevPts
.
type
()
==
CV_32FC2
);
CV_Assert
(
prevImg
.
channels
()
==
1
||
prevImg
.
channels
()
==
3
||
prevImg
.
channels
()
==
4
);
CV_Assert
(
prevImg
.
size
()
==
nextImg
.
size
()
&&
prevImg
.
type
()
==
nextImg
.
type
()
);
CV_Assert
(
maxLevel_
>=
0
);
CV_Assert
(
winSize_
.
width
>
2
&&
winSize_
.
height
>
2
);
CV_Assert
(
patch
.
x
>
0
&&
patch
.
x
<
6
&&
patch
.
y
>
0
&&
patch
.
y
<
6
);
CV_Assert
(
prevPts
.
rows
==
1
&&
prevPts
.
type
()
==
CV_32FC2
);
if
(
useInitialFlow
)
CV_Assert
(
nextPts
.
size
()
==
prevPts
.
size
()
&&
nextPts
.
type
()
==
CV_32FC2
);
else
ensureSizeIsEnough
(
1
,
prevPts
.
cols
,
prevPts
.
type
(),
nextPts
);
if
(
useInitialFlow_
)
CV_Assert
(
nextPts
.
size
()
==
prevPts
.
size
()
&&
nextPts
.
type
()
==
prevPts
.
type
()
);
else
ensureSizeIsEnough
(
1
,
prevPts
.
cols
,
prevPts
.
type
(),
nextPts
);
GpuMat
temp1
=
(
useInitialFlow
?
nextPts
:
prevPts
).
reshape
(
1
);
GpuMat
temp2
=
nextPts
.
reshape
(
1
);
cuda
::
multiply
(
temp1
,
Scalar
::
all
(
1.0
/
(
1
<<
maxLevel
)
/
2.0
),
temp2
);
GpuMat
temp1
=
(
useInitialFlow_
?
nextPts
:
prevPts
).
reshape
(
1
);
GpuMat
temp2
=
nextPts
.
reshape
(
1
);
cuda
::
multiply
(
temp1
,
Scalar
::
all
(
1.0
/
(
1
<<
maxLevel_
)
/
2.0
),
temp2
,
1
,
-
1
,
stream
);
ensureSizeIsEnough
(
1
,
prevPts
.
cols
,
CV_8UC1
,
status
);
status
.
setTo
(
Scalar
::
all
(
1
)
);
ensureSizeIsEnough
(
1
,
prevPts
.
cols
,
CV_8UC1
,
status
);
status
.
setTo
(
Scalar
::
all
(
1
),
stream
);
if
(
err
)
ensureSizeIsEnough
(
1
,
prevPts
.
cols
,
CV_32FC1
,
*
err
);
if
(
err
)
ensureSizeIsEnough
(
1
,
prevPts
.
cols
,
CV_32FC1
,
*
err
);
// build the image pyramids.
// build the image pyramids.
prevPyr_
.
resize
(
maxLevel
+
1
);
nextPyr_
.
resize
(
maxLevel
+
1
);
BufferPool
pool
(
stream
);
int
cn
=
prevImg
.
channels
();
prevPyr_
.
resize
(
maxLevel_
+
1
);
nextPyr_
.
resize
(
maxLevel_
+
1
);
if
(
cn
==
1
||
cn
==
4
)
{
prevImg
.
convertTo
(
prevPyr_
[
0
],
CV_32F
);
nextImg
.
convertTo
(
nextPyr_
[
0
],
CV_32F
);
}
else
{
cuda
::
cvtColor
(
prevImg
,
buf_
,
COLOR_BGR2BGRA
);
buf_
.
convertTo
(
prevPyr_
[
0
],
CV_32F
);
int
cn
=
prevImg
.
channels
();
if
(
cn
==
1
||
cn
==
4
)
{
prevImg
.
convertTo
(
prevPyr_
[
0
],
CV_32F
,
stream
);
nextImg
.
convertTo
(
nextPyr_
[
0
],
CV_32F
,
stream
);
}
else
{
GpuMat
buf
=
pool
.
getBuffer
(
prevImg
.
size
(),
CV_MAKE_TYPE
(
prevImg
.
depth
(),
4
));
cuda
::
cvtColor
(
nextImg
,
buf_
,
COLOR_BGR2BGRA
);
buf_
.
convertTo
(
nextPyr_
[
0
],
CV_32F
);
cuda
::
cvtColor
(
prevImg
,
buf
,
COLOR_BGR2BGRA
,
0
,
stream
);
buf
.
convertTo
(
prevPyr_
[
0
],
CV_32F
,
stream
);
cuda
::
cvtColor
(
nextImg
,
buf
,
COLOR_BGR2BGRA
,
0
,
stream
);
buf
.
convertTo
(
nextPyr_
[
0
],
CV_32F
,
stream
);
}
for
(
int
level
=
1
;
level
<=
maxLevel_
;
++
level
)
{
cuda
::
pyrDown
(
prevPyr_
[
level
-
1
],
prevPyr_
[
level
],
stream
);
cuda
::
pyrDown
(
nextPyr_
[
level
-
1
],
nextPyr_
[
level
],
stream
);
}
pyrlk
::
loadConstants
(
make_int2
(
winSize_
.
width
,
winSize_
.
height
),
iters_
,
StreamAccessor
::
getStream
(
stream
));
for
(
int
level
=
maxLevel_
;
level
>=
0
;
level
--
)
{
if
(
cn
==
1
)
{
pyrlk
::
sparse1
(
prevPyr_
[
level
],
nextPyr_
[
level
],
prevPts
.
ptr
<
float2
>
(),
nextPts
.
ptr
<
float2
>
(),
status
.
ptr
(),
level
==
0
&&
err
?
err
->
ptr
<
float
>
()
:
0
,
prevPts
.
cols
,
level
,
block
,
patch
,
StreamAccessor
::
getStream
(
stream
));
}
else
{
pyrlk
::
sparse4
(
prevPyr_
[
level
],
nextPyr_
[
level
],
prevPts
.
ptr
<
float2
>
(),
nextPts
.
ptr
<
float2
>
(),
status
.
ptr
(),
level
==
0
&&
err
?
err
->
ptr
<
float
>
()
:
0
,
prevPts
.
cols
,
level
,
block
,
patch
,
StreamAccessor
::
getStream
(
stream
));
}
}
}
for
(
int
level
=
1
;
level
<=
maxLevel
;
++
level
)
void
PyrLKOpticalFlowBase
::
dense
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
GpuMat
&
u
,
GpuMat
&
v
,
Stream
&
stream
)
{
cuda
::
pyrDown
(
prevPyr_
[
level
-
1
],
prevPyr_
[
level
]);
cuda
::
pyrDown
(
nextPyr_
[
level
-
1
],
nextPyr_
[
level
]);
}
CV_Assert
(
prevImg
.
type
()
==
CV_8UC1
);
CV_Assert
(
prevImg
.
size
()
==
nextImg
.
size
()
&&
prevImg
.
type
()
==
nextImg
.
type
()
);
CV_Assert
(
maxLevel_
>=
0
);
CV_Assert
(
winSize_
.
width
>
2
&&
winSize_
.
height
>
2
);
pyrlk
::
loadConstants
(
make_int2
(
winSize
.
width
,
winSize
.
height
),
iters
);
// build the image pyramids.
for
(
int
level
=
maxLevel
;
level
>=
0
;
level
--
)
{
if
(
cn
==
1
)
prevPyr_
.
resize
(
maxLevel_
+
1
);
nextPyr_
.
resize
(
maxLevel_
+
1
);
prevPyr_
[
0
]
=
prevImg
;
nextImg
.
convertTo
(
nextPyr_
[
0
],
CV_32F
,
stream
);
for
(
int
level
=
1
;
level
<=
maxLevel_
;
++
level
)
{
pyrlk
::
sparse1
(
prevPyr_
[
level
],
nextPyr_
[
level
],
prevPts
.
ptr
<
float2
>
(),
nextPts
.
ptr
<
float2
>
(),
status
.
ptr
(),
level
==
0
&&
err
?
err
->
ptr
<
float
>
()
:
0
,
prevPts
.
cols
,
level
,
block
,
patch
);
cuda
::
pyrDown
(
prevPyr_
[
level
-
1
],
prevPyr_
[
level
],
stream
);
cuda
::
pyrDown
(
nextPyr_
[
level
-
1
],
nextPyr_
[
level
],
stream
);
}
else
BufferPool
pool
(
stream
);
GpuMat
uPyr
[]
=
{
pool
.
getBuffer
(
prevImg
.
size
(),
CV_32FC1
),
pool
.
getBuffer
(
prevImg
.
size
(),
CV_32FC1
),
};
GpuMat
vPyr
[]
=
{
pool
.
getBuffer
(
prevImg
.
size
(),
CV_32FC1
),
pool
.
getBuffer
(
prevImg
.
size
(),
CV_32FC1
),
};
uPyr
[
0
].
setTo
(
Scalar
::
all
(
0
),
stream
);
vPyr
[
0
].
setTo
(
Scalar
::
all
(
0
),
stream
);
uPyr
[
1
].
setTo
(
Scalar
::
all
(
0
),
stream
);
vPyr
[
1
].
setTo
(
Scalar
::
all
(
0
),
stream
);
int2
winSize2i
=
make_int2
(
winSize_
.
width
,
winSize_
.
height
);
pyrlk
::
loadConstants
(
winSize2i
,
iters_
,
StreamAccessor
::
getStream
(
stream
));
int
idx
=
0
;
for
(
int
level
=
maxLevel_
;
level
>=
0
;
level
--
)
{
pyrlk
::
sparse4
(
prevPyr_
[
level
],
nextPyr_
[
level
],
prevPts
.
ptr
<
float2
>
(),
nextPts
.
ptr
<
float2
>
(),
status
.
ptr
(),
level
==
0
&&
err
?
err
->
ptr
<
float
>
()
:
0
,
prevPts
.
cols
,
level
,
block
,
patch
);
int
idx2
=
(
idx
+
1
)
&
1
;
pyrlk
::
dense
(
prevPyr_
[
level
],
nextPyr_
[
level
],
uPyr
[
idx
],
vPyr
[
idx
],
uPyr
[
idx2
],
vPyr
[
idx2
],
PtrStepSzf
(),
winSize2i
,
StreamAccessor
::
getStream
(
stream
));
if
(
level
>
0
)
idx
=
idx2
;
}
uPyr
[
idx
].
copyTo
(
u
,
stream
);
vPyr
[
idx
].
copyTo
(
v
,
stream
);
}
}
void
cv
::
cuda
::
PyrLKOpticalFlow
::
dense
(
const
GpuMat
&
prevImg
,
const
GpuMat
&
nextImg
,
GpuMat
&
u
,
GpuMat
&
v
,
GpuMat
*
err
)
{
CV_Assert
(
prevImg
.
type
()
==
CV_8UC1
);
CV_Assert
(
prevImg
.
size
()
==
nextImg
.
size
()
&&
prevImg
.
type
()
==
nextImg
.
type
());
CV_Assert
(
maxLevel
>=
0
);
CV_Assert
(
winSize
.
width
>
2
&&
winSize
.
height
>
2
);
class
SparsePyrLKOpticalFlowImpl
:
public
SparsePyrLKOpticalFlow
,
private
PyrLKOpticalFlowBase
{
public
:
SparsePyrLKOpticalFlowImpl
(
Size
winSize
,
int
maxLevel
,
int
iters
,
bool
useInitialFlow
)
:
PyrLKOpticalFlowBase
(
winSize
,
maxLevel
,
iters
,
useInitialFlow
)
{
}
if
(
err
)
err
->
create
(
prevImg
.
size
(),
CV_32FC1
);
virtual
Size
getWinSize
()
const
{
return
winSize_
;
}
virtual
void
setWinSize
(
Size
winSize
)
{
winSize_
=
winSize
;
}
// build the image pyramids.
virtual
int
getMaxLevel
()
const
{
return
maxLevel_
;
}
virtual
void
setMaxLevel
(
int
maxLevel
)
{
maxLevel_
=
maxLevel
;
}
prevPyr_
.
resize
(
maxLevel
+
1
);
nextPyr_
.
resize
(
maxLevel
+
1
);
virtual
int
getNumIters
()
const
{
return
iters_
;
}
virtual
void
setNumIters
(
int
iters
)
{
iters_
=
iters
;
}
prevPyr_
[
0
]
=
prevImg
;
nextImg
.
convertTo
(
nextPyr_
[
0
],
CV_32F
);
virtual
bool
getUseInitialFlow
()
const
{
return
useInitialFlow_
;
}
virtual
void
setUseInitialFlow
(
bool
useInitialFlow
)
{
useInitialFlow_
=
useInitialFlow
;
}
for
(
int
level
=
1
;
level
<=
maxLevel
;
++
level
)
virtual
void
calc
(
InputArray
_prevImg
,
InputArray
_nextImg
,
InputArray
_prevPts
,
InputOutputArray
_nextPts
,
OutputArray
_status
,
OutputArray
_err
,
Stream
&
stream
)
{
const
GpuMat
prevImg
=
_prevImg
.
getGpuMat
();
const
GpuMat
nextImg
=
_nextImg
.
getGpuMat
();
const
GpuMat
prevPts
=
_prevPts
.
getGpuMat
();
GpuMat
&
nextPts
=
_nextPts
.
getGpuMatRef
();
GpuMat
&
status
=
_status
.
getGpuMatRef
();
GpuMat
*
err
=
_err
.
needed
()
?
&
(
_err
.
getGpuMatRef
())
:
NULL
;
sparse
(
prevImg
,
nextImg
,
prevPts
,
nextPts
,
status
,
err
,
stream
);
}
};
class
DensePyrLKOpticalFlowImpl
:
public
DensePyrLKOpticalFlow
,
private
PyrLKOpticalFlowBase
{
cuda
::
pyrDown
(
prevPyr_
[
level
-
1
],
prevPyr_
[
level
]);
cuda
::
pyrDown
(
nextPyr_
[
level
-
1
],
nextPyr_
[
level
]);
}
public
:
DensePyrLKOpticalFlowImpl
(
Size
winSize
,
int
maxLevel
,
int
iters
,
bool
useInitialFlow
)
:
PyrLKOpticalFlowBase
(
winSize
,
maxLevel
,
iters
,
useInitialFlow
)
{
}
ensureSizeIsEnough
(
prevImg
.
size
(),
CV_32FC1
,
uPyr_
[
0
]);
ensureSizeIsEnough
(
prevImg
.
size
(),
CV_32FC1
,
vPyr_
[
0
]);
ensureSizeIsEnough
(
prevImg
.
size
(),
CV_32FC1
,
uPyr_
[
1
]);
ensureSizeIsEnough
(
prevImg
.
size
(),
CV_32FC1
,
vPyr_
[
1
]);
uPyr_
[
0
].
setTo
(
Scalar
::
all
(
0
));
vPyr_
[
0
].
setTo
(
Scalar
::
all
(
0
));
uPyr_
[
1
].
setTo
(
Scalar
::
all
(
0
));
vPyr_
[
1
].
setTo
(
Scalar
::
all
(
0
));
virtual
Size
getWinSize
()
const
{
return
winSize_
;
}
virtual
void
setWinSize
(
Size
winSize
)
{
winSize_
=
winSize
;
}
int2
winSize2i
=
make_int2
(
winSize
.
width
,
winSize
.
height
);
pyrlk
::
loadConstants
(
winSize2i
,
iters
);
virtual
int
getMaxLevel
()
const
{
return
maxLevel_
;
}
virtual
void
setMaxLevel
(
int
maxLevel
)
{
maxLevel_
=
maxLevel
;
}
PtrStepSzf
derr
=
err
?
*
err
:
PtrStepSzf
();
virtual
int
getNumIters
()
const
{
return
iters_
;
}
virtual
void
setNumIters
(
int
iters
)
{
iters_
=
iters
;
}
int
idx
=
0
;
virtual
bool
getUseInitialFlow
()
const
{
return
useInitialFlow_
;
}
virtual
void
setUseInitialFlow
(
bool
useInitialFlow
)
{
useInitialFlow_
=
useInitialFlow
;
}
for
(
int
level
=
maxLevel
;
level
>=
0
;
level
--
)
{
int
idx2
=
(
idx
+
1
)
&
1
;
virtual
void
calc
(
InputArray
_prevImg
,
InputArray
_nextImg
,
InputOutputArray
_flow
,
Stream
&
stream
)
{
const
GpuMat
prevImg
=
_prevImg
.
getGpuMat
();
const
GpuMat
nextImg
=
_nextImg
.
getGpuMat
();
pyrlk
::
dense
(
prevPyr_
[
level
],
nextPyr_
[
level
],
uPyr_
[
idx
],
vPyr_
[
idx
],
uPyr_
[
idx2
],
vPyr_
[
idx2
],
level
==
0
?
derr
:
PtrStepSzf
(),
winSize2i
);
BufferPool
pool
(
stream
);
GpuMat
u
=
pool
.
getBuffer
(
prevImg
.
size
(),
CV_32FC1
);
GpuMat
v
=
pool
.
getBuffer
(
prevImg
.
size
(),
CV_32FC1
);
if
(
level
>
0
)
idx
=
idx2
;
}
dense
(
prevImg
,
nextImg
,
u
,
v
,
stream
);
uPyr_
[
idx
].
copyTo
(
u
);
vPyr_
[
idx
].
copyTo
(
v
);
GpuMat
flows
[]
=
{
u
,
v
};
cuda
::
merge
(
flows
,
2
,
_flow
,
stream
);
}
};
}
void
cv
::
cuda
::
PyrLKOpticalFlow
::
releaseMemory
(
)
Ptr
<
SparsePyrLKOpticalFlow
>
cv
::
cuda
::
SparsePyrLKOpticalFlow
::
create
(
Size
winSize
,
int
maxLevel
,
int
iters
,
bool
useInitialFlow
)
{
prevPyr_
.
clear
();
nextPyr_
.
clear
();
buf_
.
release
();
uPyr_
[
0
].
release
();
vPyr_
[
0
].
release
();
return
makePtr
<
SparsePyrLKOpticalFlowImpl
>
(
winSize
,
maxLevel
,
iters
,
useInitialFlow
);
}
uPyr_
[
1
].
release
();
vPyr_
[
1
].
release
();
Ptr
<
DensePyrLKOpticalFlow
>
cv
::
cuda
::
DensePyrLKOpticalFlow
::
create
(
Size
winSize
,
int
maxLevel
,
int
iters
,
bool
useInitialFlow
)
{
return
makePtr
<
DensePyrLKOpticalFlowImpl
>
(
winSize
,
maxLevel
,
iters
,
useInitialFlow
);
}
#endif
/* !defined (HAVE_CUDA) */
modules/cudaoptflow/src/tvl1flow.cpp
View file @
381216aa
...
...
@@ -44,256 +44,338 @@
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
cv
::
cuda
::
OpticalFlowDual_TVL1_CUDA
::
OpticalFlowDual_TVL1_CUDA
()
{
throw_no_cuda
();
}
void
cv
::
cuda
::
OpticalFlowDual_TVL1_CUDA
::
operator
()(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
OpticalFlowDual_TVL1_CUDA
::
collectGarbage
()
{}
void
cv
::
cuda
::
OpticalFlowDual_TVL1_CUDA
::
procOneScale
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
&
)
{
throw_no_cuda
();
}
Ptr
<
OpticalFlowDual_TVL1
>
cv
::
cuda
::
OpticalFlowDual_TVL1
::
create
(
double
,
double
,
double
,
int
,
int
,
double
,
int
,
double
,
double
,
bool
)
{
throw_no_cuda
();
return
Ptr
<
OpticalFlowDual_TVL1
>
();
}
#else
using
namespace
cv
;
using
namespace
cv
::
cuda
;
cv
::
cuda
::
OpticalFlowDual_TVL1_CUDA
::
OpticalFlowDual_TVL1_CUDA
()
namespace
tvl1flow
{
tau
=
0.25
;
lambda
=
0.15
;
theta
=
0.3
;
nscales
=
5
;
warps
=
5
;
epsilon
=
0.01
;
iterations
=
300
;
scaleStep
=
0.8
;
gamma
=
0.0
;
useInitialFlow
=
false
;
void
centeredGradient
(
PtrStepSzf
src
,
PtrStepSzf
dx
,
PtrStepSzf
dy
,
cudaStream_t
stream
);
void
warpBackward
(
PtrStepSzf
I0
,
PtrStepSzf
I1
,
PtrStepSzf
I1x
,
PtrStepSzf
I1y
,
PtrStepSzf
u1
,
PtrStepSzf
u2
,
PtrStepSzf
I1w
,
PtrStepSzf
I1wx
,
PtrStepSzf
I1wy
,
PtrStepSzf
grad
,
PtrStepSzf
rho
,
cudaStream_t
stream
);
void
estimateU
(
PtrStepSzf
I1wx
,
PtrStepSzf
I1wy
,
PtrStepSzf
grad
,
PtrStepSzf
rho_c
,
PtrStepSzf
p11
,
PtrStepSzf
p12
,
PtrStepSzf
p21
,
PtrStepSzf
p22
,
PtrStepSzf
p31
,
PtrStepSzf
p32
,
PtrStepSzf
u1
,
PtrStepSzf
u2
,
PtrStepSzf
u3
,
PtrStepSzf
error
,
float
l_t
,
float
theta
,
float
gamma
,
bool
calcError
,
cudaStream_t
stream
);
void
estimateDualVariables
(
PtrStepSzf
u1
,
PtrStepSzf
u2
,
PtrStepSzf
u3
,
PtrStepSzf
p11
,
PtrStepSzf
p12
,
PtrStepSzf
p21
,
PtrStepSzf
p22
,
PtrStepSzf
p31
,
PtrStepSzf
p32
,
float
taut
,
float
gamma
,
cudaStream_t
stream
);
}
void
cv
::
cuda
::
OpticalFlowDual_TVL1_CUDA
::
operator
()(
const
GpuMat
&
I0
,
const
GpuMat
&
I1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
)
namespace
{
CV_Assert
(
I0
.
type
()
==
CV_8UC1
||
I0
.
type
()
==
CV_32FC1
);
CV_Assert
(
I0
.
size
()
==
I1
.
size
()
);
CV_Assert
(
I0
.
type
()
==
I1
.
type
()
);
CV_Assert
(
!
useInitialFlow
||
(
flowx
.
size
()
==
I0
.
size
()
&&
flowx
.
type
()
==
CV_32FC1
&&
flowy
.
size
()
==
flowx
.
size
()
&&
flowy
.
type
()
==
flowx
.
type
())
);
CV_Assert
(
nscales
>
0
);
// allocate memory for the pyramid structure
I0s
.
resize
(
nscales
);
I1s
.
resize
(
nscales
);
u1s
.
resize
(
nscales
);
u2s
.
resize
(
nscales
);
u3s
.
resize
(
nscales
);
I0
.
convertTo
(
I0s
[
0
],
CV_32F
,
I0
.
depth
()
==
CV_8U
?
1.0
:
255.0
);
I1
.
convertTo
(
I1s
[
0
],
CV_32F
,
I1
.
depth
()
==
CV_8U
?
1.0
:
255.0
);
if
(
!
useInitialFlow
)
class
OpticalFlowDual_TVL1_Impl
:
public
OpticalFlowDual_TVL1
{
flowx
.
create
(
I0
.
size
(),
CV_32FC1
);
flowy
.
create
(
I0
.
size
(),
CV_32FC1
);
}
public
:
OpticalFlowDual_TVL1_Impl
(
double
tau
,
double
lambda
,
double
theta
,
int
nscales
,
int
warps
,
double
epsilon
,
int
iterations
,
double
scaleStep
,
double
gamma
,
bool
useInitialFlow
)
:
tau_
(
tau
),
lambda_
(
lambda
),
gamma_
(
gamma
),
theta_
(
theta
),
nscales_
(
nscales
),
warps_
(
warps
),
epsilon_
(
epsilon
),
iterations_
(
iterations
),
scaleStep_
(
scaleStep
),
useInitialFlow_
(
useInitialFlow
)
{
}
virtual
double
getTau
()
const
{
return
tau_
;
}
virtual
void
setTau
(
double
tau
)
{
tau_
=
tau
;
}
virtual
double
getLambda
()
const
{
return
lambda_
;
}
virtual
void
setLambda
(
double
lambda
)
{
lambda_
=
lambda
;
}
virtual
double
getGamma
()
const
{
return
gamma_
;
}
virtual
void
setGamma
(
double
gamma
)
{
gamma_
=
gamma
;
}
virtual
double
getTheta
()
const
{
return
theta_
;
}
virtual
void
setTheta
(
double
theta
)
{
theta_
=
theta
;
}
virtual
int
getNumScales
()
const
{
return
nscales_
;
}
virtual
void
setNumScales
(
int
nscales
)
{
nscales_
=
nscales
;
}
virtual
int
getNumWarps
()
const
{
return
warps_
;
}
virtual
void
setNumWarps
(
int
warps
)
{
warps_
=
warps
;
}
virtual
double
getEpsilon
()
const
{
return
epsilon_
;
}
virtual
void
setEpsilon
(
double
epsilon
)
{
epsilon_
=
epsilon
;
}
virtual
int
getNumIterations
()
const
{
return
iterations_
;
}
virtual
void
setNumIterations
(
int
iterations
)
{
iterations_
=
iterations
;
}
virtual
double
getScaleStep
()
const
{
return
scaleStep_
;
}
virtual
void
setScaleStep
(
double
scaleStep
)
{
scaleStep_
=
scaleStep
;
}
virtual
bool
getUseInitialFlow
()
const
{
return
useInitialFlow_
;
}
virtual
void
setUseInitialFlow
(
bool
useInitialFlow
)
{
useInitialFlow_
=
useInitialFlow
;
}
virtual
void
calc
(
InputArray
I0
,
InputArray
I1
,
InputOutputArray
flow
,
Stream
&
stream
);
private
:
double
tau_
;
double
lambda_
;
double
gamma_
;
double
theta_
;
int
nscales_
;
int
warps_
;
double
epsilon_
;
int
iterations_
;
double
scaleStep_
;
bool
useInitialFlow_
;
private
:
void
calcImpl
(
const
GpuMat
&
I0
,
const
GpuMat
&
I1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
,
Stream
&
stream
);
void
procOneScale
(
const
GpuMat
&
I0
,
const
GpuMat
&
I1
,
GpuMat
&
u1
,
GpuMat
&
u2
,
GpuMat
&
u3
,
Stream
&
stream
);
u1s
[
0
]
=
flowx
;
u2s
[
0
]
=
flowy
;
if
(
gamma
)
u3s
[
0
].
create
(
I0
.
size
(),
CV_32FC1
);
std
::
vector
<
GpuMat
>
I0s
;
std
::
vector
<
GpuMat
>
I1s
;
std
::
vector
<
GpuMat
>
u1s
;
std
::
vector
<
GpuMat
>
u2s
;
std
::
vector
<
GpuMat
>
u3s
;
I1x_buf
.
create
(
I0
.
size
(),
CV_32FC1
)
;
I1y_buf
.
create
(
I0
.
size
(),
CV_32FC1
)
;
GpuMat
I1x_buf
;
GpuMat
I1y_buf
;
I1w_buf
.
create
(
I0
.
size
(),
CV_32FC1
)
;
I1wx_buf
.
create
(
I0
.
size
(),
CV_32FC1
)
;
I1wy_buf
.
create
(
I0
.
size
(),
CV_32FC1
)
;
GpuMat
I1w_buf
;
GpuMat
I1wx_buf
;
GpuMat
I1wy_buf
;
grad_buf
.
create
(
I0
.
size
(),
CV_32FC1
)
;
rho_c_buf
.
create
(
I0
.
size
(),
CV_32FC1
)
;
GpuMat
grad_buf
;
GpuMat
rho_c_buf
;
p11_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
p12_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
p21_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
p22_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
if
(
gamma
)
GpuMat
p11_buf
;
GpuMat
p12_buf
;
GpuMat
p21_buf
;
GpuMat
p22_buf
;
GpuMat
p31_buf
;
GpuMat
p32_buf
;
GpuMat
diff_buf
;
GpuMat
norm_buf
;
};
void
OpticalFlowDual_TVL1_Impl
::
calc
(
InputArray
_frame0
,
InputArray
_frame1
,
InputOutputArray
_flow
,
Stream
&
stream
)
{
p31_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
p32_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
const
GpuMat
frame0
=
_frame0
.
getGpuMat
();
const
GpuMat
frame1
=
_frame1
.
getGpuMat
();
BufferPool
pool
(
stream
);
GpuMat
flowx
=
pool
.
getBuffer
(
frame0
.
size
(),
CV_32FC1
);
GpuMat
flowy
=
pool
.
getBuffer
(
frame0
.
size
(),
CV_32FC1
);
calcImpl
(
frame0
,
frame1
,
flowx
,
flowy
,
stream
);
GpuMat
flows
[]
=
{
flowx
,
flowy
};
cuda
::
merge
(
flows
,
2
,
_flow
,
stream
);
}
diff_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
// create the scales
for
(
int
s
=
1
;
s
<
nscales
;
++
s
)
void
OpticalFlowDual_TVL1_Impl
::
calcImpl
(
const
GpuMat
&
I0
,
const
GpuMat
&
I1
,
GpuMat
&
flowx
,
GpuMat
&
flowy
,
Stream
&
stream
)
{
cuda
::
resize
(
I0s
[
s
-
1
],
I0s
[
s
],
Size
(),
scaleStep
,
scaleStep
);
cuda
::
resize
(
I1s
[
s
-
1
],
I1s
[
s
],
Size
(),
scaleStep
,
scaleStep
);
if
(
I0s
[
s
].
cols
<
16
||
I0s
[
s
].
rows
<
16
)
CV_Assert
(
I0
.
type
()
==
CV_8UC1
||
I0
.
type
()
==
CV_32FC1
);
CV_Assert
(
I0
.
size
()
==
I1
.
size
()
);
CV_Assert
(
I0
.
type
()
==
I1
.
type
()
);
CV_Assert
(
!
useInitialFlow_
||
(
flowx
.
size
()
==
I0
.
size
()
&&
flowx
.
type
()
==
CV_32FC1
&&
flowy
.
size
()
==
flowx
.
size
()
&&
flowy
.
type
()
==
flowx
.
type
())
);
CV_Assert
(
nscales_
>
0
);
// allocate memory for the pyramid structure
I0s
.
resize
(
nscales_
);
I1s
.
resize
(
nscales_
);
u1s
.
resize
(
nscales_
);
u2s
.
resize
(
nscales_
);
u3s
.
resize
(
nscales_
);
I0
.
convertTo
(
I0s
[
0
],
CV_32F
,
I0
.
depth
()
==
CV_8U
?
1.0
:
255.0
,
stream
);
I1
.
convertTo
(
I1s
[
0
],
CV_32F
,
I1
.
depth
()
==
CV_8U
?
1.0
:
255.0
,
stream
);
if
(
!
useInitialFlow_
)
{
nscales
=
s
;
break
;
flowx
.
create
(
I0
.
size
(),
CV_32FC1
)
;
flowy
.
create
(
I0
.
size
(),
CV_32FC1
)
;
}
if
(
useInitialFlow
)
u1s
[
0
]
=
flowx
;
u2s
[
0
]
=
flowy
;
if
(
gamma_
)
{
cuda
::
resize
(
u1s
[
s
-
1
],
u1s
[
s
],
Size
(),
scaleStep
,
scaleStep
);
cuda
::
resize
(
u2s
[
s
-
1
],
u2s
[
s
],
Size
(),
scaleStep
,
scaleStep
);
u3s
[
0
].
create
(
I0
.
size
(),
CV_32FC1
);
}
I1x_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
I1y_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
I1w_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
I1wx_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
I1wy_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
cuda
::
multiply
(
u1s
[
s
],
Scalar
::
all
(
scaleStep
),
u1s
[
s
]);
cuda
::
multiply
(
u2s
[
s
],
Scalar
::
all
(
scaleStep
),
u2s
[
s
]);
grad_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
rho_c_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
p11_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
p12_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
p21_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
p22_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
if
(
gamma_
)
{
p31_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
p32_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
}
else
diff_buf
.
create
(
I0
.
size
(),
CV_32FC1
);
// create the scales
for
(
int
s
=
1
;
s
<
nscales_
;
++
s
)
{
u1s
[
s
].
create
(
I0s
[
s
].
size
(),
CV_32FC1
);
u2s
[
s
].
create
(
I0s
[
s
].
size
(),
CV_32FC1
);
cuda
::
resize
(
I0s
[
s
-
1
],
I0s
[
s
],
Size
(),
scaleStep_
,
scaleStep_
,
INTER_LINEAR
,
stream
);
cuda
::
resize
(
I1s
[
s
-
1
],
I1s
[
s
],
Size
(),
scaleStep_
,
scaleStep_
,
INTER_LINEAR
,
stream
);
if
(
I0s
[
s
].
cols
<
16
||
I0s
[
s
].
rows
<
16
)
{
nscales_
=
s
;
break
;
}
if
(
useInitialFlow_
)
{
cuda
::
resize
(
u1s
[
s
-
1
],
u1s
[
s
],
Size
(),
scaleStep_
,
scaleStep_
,
INTER_LINEAR
,
stream
);
cuda
::
resize
(
u2s
[
s
-
1
],
u2s
[
s
],
Size
(),
scaleStep_
,
scaleStep_
,
INTER_LINEAR
,
stream
);
cuda
::
multiply
(
u1s
[
s
],
Scalar
::
all
(
scaleStep_
),
u1s
[
s
],
1
,
-
1
,
stream
);
cuda
::
multiply
(
u2s
[
s
],
Scalar
::
all
(
scaleStep_
),
u2s
[
s
],
1
,
-
1
,
stream
);
}
else
{
u1s
[
s
].
create
(
I0s
[
s
].
size
(),
CV_32FC1
);
u2s
[
s
].
create
(
I0s
[
s
].
size
(),
CV_32FC1
);
}
if
(
gamma_
)
{
u3s
[
s
].
create
(
I0s
[
s
].
size
(),
CV_32FC1
);
}
}
if
(
gamma
)
u3s
[
s
].
create
(
I0s
[
s
].
size
(),
CV_32FC1
);
}
if
(
!
useInitialFlow
)
{
u1s
[
nscales
-
1
].
setTo
(
Scalar
::
all
(
0
));
u2s
[
nscales
-
1
].
setTo
(
Scalar
::
all
(
0
));
}
if
(
gamma
)
u3s
[
nscales
-
1
].
setTo
(
Scalar
::
all
(
0
));
if
(
!
useInitialFlow_
)
{
u1s
[
nscales_
-
1
].
setTo
(
Scalar
::
all
(
0
),
stream
);
u2s
[
nscales_
-
1
].
setTo
(
Scalar
::
all
(
0
),
stream
);
}
if
(
gamma_
)
{
u3s
[
nscales_
-
1
].
setTo
(
Scalar
::
all
(
0
),
stream
);
}
// pyramidal structure for computing the optical flow
for
(
int
s
=
nscales
-
1
;
s
>=
0
;
--
s
)
{
// compute the optical flow at the current scale
procOneScale
(
I0s
[
s
],
I1s
[
s
],
u1s
[
s
],
u2s
[
s
],
u3s
[
s
]
);
// pyramidal structure for computing the optical flow
for
(
int
s
=
nscales_
-
1
;
s
>=
0
;
--
s
)
{
// compute the optical flow at the current scale
procOneScale
(
I0s
[
s
],
I1s
[
s
],
u1s
[
s
],
u2s
[
s
],
u3s
[
s
],
stream
);
// if this was the last scale, finish now
if
(
s
==
0
)
break
;
// if this was the last scale, finish now
if
(
s
==
0
)
break
;
// otherwise, upsample the optical flow
// otherwise, upsample the optical flow
// zoom the optical flow for the next finer scale
cuda
::
resize
(
u1s
[
s
],
u1s
[
s
-
1
],
I0s
[
s
-
1
].
size
());
cuda
::
resize
(
u2s
[
s
],
u2s
[
s
-
1
],
I0s
[
s
-
1
].
size
());
if
(
gamma
)
cuda
::
resize
(
u3s
[
s
],
u3s
[
s
-
1
],
I0s
[
s
-
1
].
size
());
// zoom the optical flow for the next finer scale
cuda
::
resize
(
u1s
[
s
],
u1s
[
s
-
1
],
I0s
[
s
-
1
].
size
(),
0
,
0
,
INTER_LINEAR
,
stream
);
cuda
::
resize
(
u2s
[
s
],
u2s
[
s
-
1
],
I0s
[
s
-
1
].
size
(),
0
,
0
,
INTER_LINEAR
,
stream
);
if
(
gamma_
)
{
cuda
::
resize
(
u3s
[
s
],
u3s
[
s
-
1
],
I0s
[
s
-
1
].
size
(),
0
,
0
,
INTER_LINEAR
,
stream
);
}
// scale the optical flow with the appropriate zoom factor
cuda
::
multiply
(
u1s
[
s
-
1
],
Scalar
::
all
(
1
/
scaleStep
),
u1s
[
s
-
1
]);
cuda
::
multiply
(
u2s
[
s
-
1
],
Scalar
::
all
(
1
/
scaleStep
),
u2s
[
s
-
1
]);
// scale the optical flow with the appropriate zoom factor
cuda
::
multiply
(
u1s
[
s
-
1
],
Scalar
::
all
(
1
/
scaleStep_
),
u1s
[
s
-
1
],
1
,
-
1
,
stream
);
cuda
::
multiply
(
u2s
[
s
-
1
],
Scalar
::
all
(
1
/
scaleStep_
),
u2s
[
s
-
1
],
1
,
-
1
,
stream
);
}
}
}
namespace
tvl1flow
{
void
centeredGradient
(
PtrStepSzf
src
,
PtrStepSzf
dx
,
PtrStepSzf
dy
);
void
warpBackward
(
PtrStepSzf
I0
,
PtrStepSzf
I1
,
PtrStepSzf
I1x
,
PtrStepSzf
I1y
,
PtrStepSzf
u1
,
PtrStepSzf
u2
,
PtrStepSzf
I1w
,
PtrStepSzf
I1wx
,
PtrStepSzf
I1wy
,
PtrStepSzf
grad
,
PtrStepSzf
rho
);
void
estimateU
(
PtrStepSzf
I1wx
,
PtrStepSzf
I1wy
,
PtrStepSzf
grad
,
PtrStepSzf
rho_c
,
PtrStepSzf
p11
,
PtrStepSzf
p12
,
PtrStepSzf
p21
,
PtrStepSzf
p22
,
PtrStepSzf
p31
,
PtrStepSzf
p32
,
PtrStepSzf
u1
,
PtrStepSzf
u2
,
PtrStepSzf
u3
,
PtrStepSzf
error
,
float
l_t
,
float
theta
,
float
gamma
,
bool
calcError
);
void
estimateDualVariables
(
PtrStepSzf
u1
,
PtrStepSzf
u2
,
PtrStepSzf
u3
,
PtrStepSzf
p11
,
PtrStepSzf
p12
,
PtrStepSzf
p21
,
PtrStepSzf
p22
,
PtrStepSzf
p31
,
PtrStepSzf
p32
,
float
taut
,
const
float
gamma
);
}
void
cv
::
cuda
::
OpticalFlowDual_TVL1_CUDA
::
procOneScale
(
const
GpuMat
&
I0
,
const
GpuMat
&
I1
,
GpuMat
&
u1
,
GpuMat
&
u2
,
GpuMat
&
u3
)
{
using
namespace
tvl1flow
;
void
OpticalFlowDual_TVL1_Impl
::
procOneScale
(
const
GpuMat
&
I0
,
const
GpuMat
&
I1
,
GpuMat
&
u1
,
GpuMat
&
u2
,
GpuMat
&
u3
,
Stream
&
_stream
)
{
using
namespace
tvl1flow
;
const
double
scaledEpsilon
=
epsilon
*
epsilon
*
I0
.
size
().
area
(
);
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
CV_DbgAssert
(
I1
.
size
()
==
I0
.
size
()
);
CV_DbgAssert
(
I1
.
type
()
==
I0
.
type
()
);
CV_DbgAssert
(
u1
.
size
()
==
I0
.
size
()
);
CV_DbgAssert
(
u2
.
size
()
==
u1
.
size
()
);
const
double
scaledEpsilon
=
epsilon_
*
epsilon_
*
I0
.
size
().
area
();
GpuMat
I1x
=
I1x_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
I1y
=
I1y_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
centeredGradient
(
I1
,
I1x
,
I1y
);
CV_DbgAssert
(
I1
.
size
()
==
I0
.
size
()
);
CV_DbgAssert
(
I1
.
type
()
==
I0
.
type
()
);
CV_DbgAssert
(
u1
.
size
()
==
I0
.
size
()
);
CV_DbgAssert
(
u2
.
size
()
==
u1
.
size
()
);
GpuMat
I1w
=
I1w
_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
I1wx
=
I1wx
_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
I1wy
=
I1wy_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
)
);
GpuMat
I1x
=
I1x
_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
I1y
=
I1y
_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
centeredGradient
(
I1
,
I1x
,
I1y
,
stream
);
GpuMat
grad
=
grad_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
rho_c
=
rho_c_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
I1w
=
I1w_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
I1wx
=
I1wx_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
I1wy
=
I1wy_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
p11
=
p11_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
p12
=
p12_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
p21
=
p21_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
p22
=
p22_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
p31
,
p32
;
if
(
gamma
)
{
p31
=
p31_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
p32
=
p32_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
}
p11
.
setTo
(
Scalar
::
all
(
0
));
p12
.
setTo
(
Scalar
::
all
(
0
));
p21
.
setTo
(
Scalar
::
all
(
0
));
p22
.
setTo
(
Scalar
::
all
(
0
));
if
(
gamma
)
{
p31
.
setTo
(
Scalar
::
all
(
0
));
p32
.
setTo
(
Scalar
::
all
(
0
));
}
GpuMat
grad
=
grad_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
rho_c
=
rho_c_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
diff
=
diff_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
p11
=
p11_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
p12
=
p12_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
p21
=
p21_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
p22
=
p22_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
GpuMat
p31
,
p32
;
if
(
gamma_
)
{
p31
=
p31_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
p32
=
p32_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
}
p11
.
setTo
(
Scalar
::
all
(
0
),
_stream
);
p12
.
setTo
(
Scalar
::
all
(
0
),
_stream
);
p21
.
setTo
(
Scalar
::
all
(
0
),
_stream
);
p22
.
setTo
(
Scalar
::
all
(
0
),
_stream
);
if
(
gamma_
)
{
p31
.
setTo
(
Scalar
::
all
(
0
),
_stream
);
p32
.
setTo
(
Scalar
::
all
(
0
),
_stream
);
}
const
float
l_t
=
static_cast
<
float
>
(
lambda
*
theta
);
const
float
taut
=
static_cast
<
float
>
(
tau
/
theta
);
GpuMat
diff
=
diff_buf
(
Rect
(
0
,
0
,
I0
.
cols
,
I0
.
rows
));
for
(
int
warpings
=
0
;
warpings
<
warps
;
++
warpings
)
{
warpBackward
(
I0
,
I1
,
I1x
,
I1y
,
u1
,
u2
,
I1w
,
I1wx
,
I1wy
,
grad
,
rho_c
);
const
float
l_t
=
static_cast
<
float
>
(
lambda_
*
theta_
);
const
float
taut
=
static_cast
<
float
>
(
tau_
/
theta_
);
double
error
=
std
::
numeric_limits
<
double
>::
max
();
double
prevError
=
0.0
;
for
(
int
n
=
0
;
error
>
scaledEpsilon
&&
n
<
iterations
;
++
n
)
for
(
int
warpings
=
0
;
warpings
<
warps_
;
++
warpings
)
{
// some tweaks to make sum operation less frequently
bool
calcError
=
(
epsilon
>
0
)
&&
(
n
&
0x1
)
&&
(
prevError
<
scaledEpsilon
);
estimateU
(
I1wx
,
I1wy
,
grad
,
rho_c
,
p11
,
p12
,
p21
,
p22
,
p31
,
p32
,
u1
,
u2
,
u3
,
diff
,
l_t
,
static_cast
<
float
>
(
theta
),
gamma
,
calcError
);
if
(
calcError
)
{
error
=
cuda
::
sum
(
diff
,
norm_buf
)[
0
];
prevError
=
error
;
}
else
warpBackward
(
I0
,
I1
,
I1x
,
I1y
,
u1
,
u2
,
I1w
,
I1wx
,
I1wy
,
grad
,
rho_c
,
stream
);
double
error
=
std
::
numeric_limits
<
double
>::
max
();
double
prevError
=
0.0
;
for
(
int
n
=
0
;
error
>
scaledEpsilon
&&
n
<
iterations_
;
++
n
)
{
error
=
std
::
numeric_limits
<
double
>::
max
();
prevError
-=
scaledEpsilon
;
// some tweaks to make sum operation less frequently
bool
calcError
=
(
epsilon_
>
0
)
&&
(
n
&
0x1
)
&&
(
prevError
<
scaledEpsilon
);
estimateU
(
I1wx
,
I1wy
,
grad
,
rho_c
,
p11
,
p12
,
p21
,
p22
,
p31
,
p32
,
u1
,
u2
,
u3
,
diff
,
l_t
,
static_cast
<
float
>
(
theta_
),
gamma_
,
calcError
,
stream
);
if
(
calcError
)
{
_stream
.
waitForCompletion
();
error
=
cuda
::
sum
(
diff
,
norm_buf
)[
0
];
prevError
=
error
;
}
else
{
error
=
std
::
numeric_limits
<
double
>::
max
();
prevError
-=
scaledEpsilon
;
}
estimateDualVariables
(
u1
,
u2
,
u3
,
p11
,
p12
,
p21
,
p22
,
p31
,
p32
,
taut
,
gamma_
,
stream
);
}
estimateDualVariables
(
u1
,
u2
,
u3
,
p11
,
p12
,
p21
,
p22
,
p31
,
p32
,
taut
,
gamma
);
}
}
}
void
cv
::
cuda
::
OpticalFlowDual_TVL1_CUDA
::
collectGarbage
()
Ptr
<
OpticalFlowDual_TVL1
>
cv
::
cuda
::
OpticalFlowDual_TVL1
::
create
(
double
tau
,
double
lambda
,
double
theta
,
int
nscales
,
int
warps
,
double
epsilon
,
int
iterations
,
double
scaleStep
,
double
gamma
,
bool
useInitialFlow
)
{
I0s
.
clear
();
I1s
.
clear
();
u1s
.
clear
();
u2s
.
clear
();
u3s
.
clear
();
I1x_buf
.
release
();
I1y_buf
.
release
();
I1w_buf
.
release
();
I1wx_buf
.
release
();
I1wy_buf
.
release
();
grad_buf
.
release
();
rho_c_buf
.
release
();
p11_buf
.
release
();
p12_buf
.
release
();
p21_buf
.
release
();
p22_buf
.
release
();
if
(
gamma
)
{
p31_buf
.
release
();
p32_buf
.
release
();
}
diff_buf
.
release
();
norm_buf
.
release
();
return
makePtr
<
OpticalFlowDual_TVL1_Impl
>
(
tau
,
lambda
,
theta
,
nscales
,
warps
,
epsilon
,
iterations
,
scaleStep
,
gamma
,
useInitialFlow
);
}
#endif // !defined HAVE_CUDA || defined(CUDA_DISABLER)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment