Commit 30f718d1 authored by Maksim Shabunin's avatar Maksim Shabunin

Merge pull request #708 from sbokov:variational_refinement

parents 51a4f6e4 b96a5f59
......@@ -44,3 +44,11 @@
journal={arXiv preprint arXiv:1603.03590},
year={2016}
}
@inproceedings{Brox2004,
title={High accuracy optical flow estimation based on a theory for warping},
author={Brox, Thomas and Bruhn, Andr{\'e}s and Papenberg, Nils and Weickert, Joachim},
booktitle={European Conference on Computer Vision (ECCV)},
pages={25--36},
year={2004}
}
......@@ -154,6 +154,64 @@ to the flow in the horizontal direction (u), second - vertical (v).
*/
CV_EXPORTS_W bool writeOpticalFlow( const String& path, InputArray flow );
/** @brief Variational optical flow refinement
This class implements variational refinement of the input flow field, i.e.
it uses input flow to initialize the minimization of the following functional:
\f$E(U) = \int_{\Omega} \delta \Psi(E_I) + \gamma \Psi(E_G) + \alpha \Psi(E_S) \f$,
where \f$E_I,E_G,E_S\f$ are color constancy, gradient constancy and smoothness terms
respectively. \f$\Psi(s^2)=\sqrt{s^2+\epsilon^2}\f$ is a robust penalizer to limit the
influence of outliers. A complete formulation and a description of the minimization
procedure can be found in @cite Brox2004
*/
class CV_EXPORTS_W VariationalRefinement : public DenseOpticalFlow
{
public:
/** @brief calc function overload to handle separate horizontal (u) and vertical (v) flow components
(to avoid extra splits/merges) */
CV_WRAP virtual void calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v) = 0;
/** @brief Number of outer (fixed-point) iterations in the minimization procedure.
@see setFixedPointIterations */
CV_WRAP virtual int getFixedPointIterations() const = 0;
/** @copybrief getFixedPointIterations @see getFixedPointIterations */
CV_WRAP virtual void setFixedPointIterations(int val) = 0;
/** @brief Number of inner successive over-relaxation (SOR) iterations
in the minimization procedure to solve the respective linear system.
@see setSorIterations */
CV_WRAP virtual int getSorIterations() const = 0;
/** @copybrief getSorIterations @see getSorIterations */
CV_WRAP virtual void setSorIterations(int val) = 0;
/** @brief Relaxation factor in SOR
@see setOmega */
CV_WRAP virtual float getOmega() const = 0;
/** @copybrief getOmega @see getOmega */
CV_WRAP virtual void setOmega(float val) = 0;
/** @brief Weight of the smoothness term
@see setAlpha */
CV_WRAP virtual float getAlpha() const = 0;
/** @copybrief getAlpha @see getAlpha */
CV_WRAP virtual void setAlpha(float val) = 0;
/** @brief Weight of the color constancy term
@see setDelta */
CV_WRAP virtual float getDelta() const = 0;
/** @copybrief getDelta @see getDelta */
CV_WRAP virtual void setDelta(float val) = 0;
/** @brief Weight of the gradient constancy term
@see setGamma */
CV_WRAP virtual float getGamma() const = 0;
/** @copybrief getGamma @see getGamma */
CV_WRAP virtual void setGamma(float val) = 0;
};
/** @brief Creates an instance of VariationalRefinement
*/
CV_EXPORTS_W Ptr<VariationalRefinement> createVariationalFlowRefinement();
/** @brief DeepFlow optical flow algorithm implementation.
......@@ -194,40 +252,80 @@ CV_EXPORTS_W Ptr<DenseOpticalFlow> createOptFlow_SparseToDense();
/** @brief DIS optical flow algorithm.
This class implements the Dense Inverse Search (DIS) optical flow algorithm. More
details about the algorithm can be found at @cite Kroeger2016 .
details about the algorithm can be found at @cite Kroeger2016 . Includes three presets with preselected
parameters to provide reasonable trade-off between speed and quality. However, even the slowest preset is
still relatively fast, use DeepFlow if you need better quality and don't care about speed.
*/
class CV_EXPORTS_W DISOpticalFlow : public DenseOpticalFlow
{
public:
/** @brief Finest level of the gaussian pyramid on which the flow is computed (zero level
corresponds to the original image resolution).The final flow is obtained by bilinear upscaling.
enum
{
PRESET_ULTRAFAST = 0,
PRESET_FAST = 1,
PRESET_MEDIUM = 2
};
/** @brief Finest level of the Gaussian pyramid on which the flow is computed (zero level
corresponds to the original image resolution). The final flow is obtained by bilinear upscaling.
@see setFinestScale */
virtual int getFinestScale() const = 0;
CV_WRAP virtual int getFinestScale() const = 0;
/** @copybrief getFinestScale @see getFinestScale */
virtual void setFinestScale(int val) = 0;
CV_WRAP virtual void setFinestScale(int val) = 0;
/** @brief Size of an image patch for matching (in pixels)
/** @brief Size of an image patch for matching (in pixels). Normally, default 8x8 patches work well
enough in most cases.
@see setPatchSize */
virtual int getPatchSize() const = 0;
CV_WRAP virtual int getPatchSize() const = 0;
/** @copybrief getPatchSize @see getPatchSize */
virtual void setPatchSize(int val) = 0;
CV_WRAP virtual void setPatchSize(int val) = 0;
/** @brief Stride between neighbor patches. Must be less than patch size.
/** @brief Stride between neighbor patches. Must be less than patch size. Lower values correspond
to higher flow quality.
@see setPatchStride */
virtual int getPatchStride() const = 0;
CV_WRAP virtual int getPatchStride() const = 0;
/** @copybrief getPatchStride @see getPatchStride */
virtual void setPatchStride(int val) = 0;
CV_WRAP virtual void setPatchStride(int val) = 0;
/** @brief number of gradient descent iterations in the patch inverse search stage
/** @brief Maximum number of gradient descent iterations in the patch inverse search stage. Higher values
may improve quality in some cases.
@see setGradientDescentIterations */
virtual int getGradientDescentIterations() const = 0;
CV_WRAP virtual int getGradientDescentIterations() const = 0;
/** @copybrief getGradientDescentIterations @see getGradientDescentIterations */
virtual void setGradientDescentIterations(int val) = 0;
CV_WRAP virtual void setGradientDescentIterations(int val) = 0;
/** @brief Number of fixed point iterations of variational refinement per scale. Set to zero to
disable variational refinement completely. Higher values will typically result in more smooth and
high-quality flow.
@see setGradientDescentIterations */
CV_WRAP virtual int getVariationalRefinementIterations() const = 0;
/** @copybrief getGradientDescentIterations @see getGradientDescentIterations */
CV_WRAP virtual void setVariationalRefinementIterations(int val) = 0;
/** @brief Whether to use mean-normalization of patches when computing patch distance. It is turned on
by default as it typically provides a noticeable quality boost because of increased robustness to
illumanition variations. Turn it off if you are certain that your sequence does't contain any changes
in illumination.
@see setUseMeanNormalization */
CV_WRAP virtual bool getUseMeanNormalization() const = 0;
/** @copybrief getUseMeanNormalization @see getUseMeanNormalization */
CV_WRAP virtual void setUseMeanNormalization(bool val) = 0;
/** @brief Whether to use spatial propagation of good optical flow vectors. This option is turned on by
default, as it tends to work better on average and can sometimes help recover from major errors
introduced by the coarse-to-fine scheme employed by the DIS optical flow algorithm. Turning this
option off can make the output flow field a bit smoother, however.
@see setUseSpatialPropagation */
CV_WRAP virtual bool getUseSpatialPropagation() const = 0;
/** @copybrief getUseSpatialPropagation @see getUseSpatialPropagation */
CV_WRAP virtual void setUseSpatialPropagation(bool val) = 0;
};
/** @brief Creates an instance of DISOpticalFlow
@param preset one of PRESET_ULTRAFAST, PRESET_FAST and PRESET_MEDIUM
*/
CV_EXPORTS_W Ptr<DISOpticalFlow> createOptFlow_DIS();
CV_EXPORTS_W Ptr<DISOpticalFlow> createOptFlow_DIS(int preset = DISOpticalFlow::PRESET_FAST);
//! @}
......
/*
* By downloading, copying, installing or using the software you agree to this license.
* If you do not agree to this license, do not download, install,
* copy or use the software.
*
*
* License Agreement
* For Open Source Computer Vision Library
* (3 - clause BSD License)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met :
*
* *Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and / or other materials provided with the distribution.
*
* * Neither the names of the copyright holders nor the names of the contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided by the copyright holders and contributors "as is" and
* any express or implied warranties, including, but not limited to, the implied
* warranties of merchantability and fitness for a particular purpose are disclaimed.
* In no event shall copyright holders or contributors be liable for any direct,
* indirect, incidental, special, exemplary, or consequential damages
* (including, but not limited to, procurement of substitute goods or services;
* loss of use, data, or profits; or business interruption) however caused
* and on any theory of liability, whether in contract, strict liability,
* or tort(including negligence or otherwise) arising in any way out of
* the use of this software, even if advised of the possibility of such damage.
*/
#include "perf_precomp.hpp"
using std::tr1::tuple;
using std::tr1::get;
using namespace perf;
using namespace testing;
using namespace cv;
using namespace cv::optflow;
typedef tuple<Size> DFParams;
typedef TestBaseWithParam<DFParams> DenseOpticalFlow_DeepFlow;
PERF_TEST_P(DenseOpticalFlow_DeepFlow, perf, Values(szVGA, sz720p))
{
DFParams params = GetParam();
Size sz = get<0>(params);
Mat frame1(sz, CV_8U);
Mat frame2(sz, CV_8U);
Mat flow;
randu(frame1, 0, 255);
randu(frame2, 0, 255);
cv::setNumThreads(cv::getNumberOfCPUs());
TEST_CYCLE_N(1)
{
Ptr<DenseOpticalFlow> algo = createOptFlow_DeepFlow();
algo->calc(frame1, frame2, flow);
}
SANITY_CHECK_NOTHING();
}
/*
* By downloading, copying, installing or using the software you agree to this license.
* If you do not agree to this license, do not download, install,
* copy or use the software.
*
*
* License Agreement
* For Open Source Computer Vision Library
* (3 - clause BSD License)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met :
*
* *Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and / or other materials provided with the distribution.
*
* * Neither the names of the copyright holders nor the names of the contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided by the copyright holders and contributors "as is" and
* any express or implied warranties, including, but not limited to, the implied
* warranties of merchantability and fitness for a particular purpose are disclaimed.
* In no event shall copyright holders or contributors be liable for any direct,
* indirect, incidental, special, exemplary, or consequential damages
* (including, but not limited to, procurement of substitute goods or services;
* loss of use, data, or profits; or business interruption) however caused
* and on any theory of liability, whether in contract, strict liability,
* or tort(including negligence or otherwise) arising in any way out of
* the use of this software, even if advised of the possibility of such damage.
*/
#include "perf_precomp.hpp"
using std::tr1::tuple;
using std::tr1::get;
using namespace perf;
using namespace testing;
using namespace cv;
using namespace cv::optflow;
void MakeArtificialExample(Mat &dst_frame1, Mat &dst_frame2);
typedef tuple<String, Size> DISParams;
typedef TestBaseWithParam<DISParams> DenseOpticalFlow_DIS;
PERF_TEST_P(DenseOpticalFlow_DIS, perf,
Combine(Values("PRESET_ULTRAFAST", "PRESET_FAST", "PRESET_MEDIUM"), Values(szVGA, sz720p, sz1080p)))
{
DISParams params = GetParam();
// use strings to print preset names in the perf test results:
String preset_string = get<0>(params);
int preset = DISOpticalFlow::PRESET_FAST;
if (preset_string == "PRESET_ULTRAFAST")
preset = DISOpticalFlow::PRESET_ULTRAFAST;
else if (preset_string == "PRESET_FAST")
preset = DISOpticalFlow::PRESET_FAST;
else if (preset_string == "PRESET_MEDIUM")
preset = DISOpticalFlow::PRESET_MEDIUM;
Size sz = get<1>(params);
Mat frame1(sz, CV_8U);
Mat frame2(sz, CV_8U);
Mat flow;
MakeArtificialExample(frame1, frame2);
cv::setNumThreads(cv::getNumberOfCPUs());
TEST_CYCLE_N(10)
{
Ptr<DenseOpticalFlow> algo = createOptFlow_DIS(preset);
algo->calc(frame1, frame2, flow);
}
SANITY_CHECK_NOTHING();
}
void MakeArtificialExample(Mat &dst_frame1, Mat &dst_frame2)
{
int src_scale = 2;
int OF_scale = 6;
double sigma = dst_frame1.cols / 300;
Mat tmp(Size(dst_frame1.cols / (int)pow(2, src_scale), dst_frame1.rows / (int)pow(2, src_scale)), CV_8U);
randu(tmp, 0, 255);
resize(tmp, dst_frame1, dst_frame1.size(), 0.0, 0.0, INTER_LINEAR);
resize(tmp, dst_frame2, dst_frame2.size(), 0.0, 0.0, INTER_LINEAR);
Mat displacement_field(Size(dst_frame1.cols / (int)pow(2, OF_scale), dst_frame1.rows / (int)pow(2, OF_scale)),
CV_32FC2);
randn(displacement_field, 0.0, sigma);
resize(displacement_field, displacement_field, dst_frame2.size(), 0.0, 0.0, INTER_CUBIC);
for (int i = 0; i < displacement_field.rows; i++)
for (int j = 0; j < displacement_field.cols; j++)
displacement_field.at<Vec2f>(i, j) += Vec2f((float)j, (float)i);
remap(dst_frame2, dst_frame2, displacement_field, Mat(), INTER_LINEAR, BORDER_REPLICATE);
}
#include "perf_precomp.hpp"
CV_PERF_TEST_MAIN(optflow)
#ifdef __GNUC__
# pragma GCC diagnostic ignored "-Wmissing-declarations"
# if defined __clang__ || defined __APPLE__
# pragma GCC diagnostic ignored "-Wmissing-prototypes"
# pragma GCC diagnostic ignored "-Wextra"
# endif
#endif
#ifndef __OPENCV_PERF_PRECOMP_HPP__
#define __OPENCV_PERF_PRECOMP_HPP__
#include "opencv2/ts.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/optflow.hpp"
#include "opencv2/highgui.hpp"
#endif
/*
* By downloading, copying, installing or using the software you agree to this license.
* If you do not agree to this license, do not download, install,
* copy or use the software.
*
*
* License Agreement
* For Open Source Computer Vision Library
* (3 - clause BSD License)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met :
*
* *Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and / or other materials provided with the distribution.
*
* * Neither the names of the copyright holders nor the names of the contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided by the copyright holders and contributors "as is" and
* any express or implied warranties, including, but not limited to, the implied
* warranties of merchantability and fitness for a particular purpose are disclaimed.
* In no event shall copyright holders or contributors be liable for any direct,
* indirect, incidental, special, exemplary, or consequential damages
* (including, but not limited to, procurement of substitute goods or services;
* loss of use, data, or profits; or business interruption) however caused
* and on any theory of liability, whether in contract, strict liability,
* or tort(including negligence or otherwise) arising in any way out of
* the use of this software, even if advised of the possibility of such damage.
*/
#include "perf_precomp.hpp"
using std::tr1::tuple;
using std::tr1::get;
using namespace perf;
using namespace testing;
using namespace cv;
using namespace cv::optflow;
typedef tuple<Size, int, int> VarRefParams;
typedef TestBaseWithParam<VarRefParams> DenseOpticalFlow_VariationalRefinement;
PERF_TEST_P(DenseOpticalFlow_VariationalRefinement, perf, Combine(Values(szQVGA, szVGA), Values(5, 10), Values(5, 10)))
{
VarRefParams params = GetParam();
Size sz = get<0>(params);
int sorIter = get<1>(params);
int fixedPointIter = get<2>(params);
Mat frame1(sz, CV_8U);
Mat frame2(sz, CV_8U);
Mat flow(sz, CV_32FC2);
randu(frame1, 0, 255);
randu(frame2, 0, 255);
flow.setTo(0.0f);
cv::setNumThreads(cv::getNumberOfCPUs());
TEST_CYCLE_N(10)
{
Ptr<VariationalRefinement> var = createVariationalFlowRefinement();
var->setAlpha(20.0f);
var->setGamma(10.0f);
var->setDelta(5.0f);
var->setSorIterations(sorIter);
var->setFixedPointIterations(fixedPointIter);
var->calc(frame1, frame2, flow);
}
SANITY_CHECK_NOTHING();
}
......@@ -11,7 +11,7 @@ using namespace optflow;
const String keys = "{help h usage ? | | print this message }"
"{@image1 | | image1 }"
"{@image2 | | image2 }"
"{@algorithm | | [farneback, simpleflow, tvl1, deepflow, sparsetodenseflow or DISflow] }"
"{@algorithm | | [farneback, simpleflow, tvl1, deepflow, sparsetodenseflow, DISflow_ultrafast, DISflow_fast, DISflow_medium] }"
"{@groundtruth | | path to the .flo file (optional), Middlebury format }"
"{m measure |endpoint| error measure - [endpoint or angular] }"
"{r region |all | region to compute stats about [all, discontinuities, untextured] }"
......@@ -229,7 +229,7 @@ int main( int argc, char** argv )
if ( i2.depth() != CV_8U )
i2.convertTo(i2, CV_8U);
if ( (method == "farneback" || method == "tvl1" || method == "deepflow" || method == "DISflow") && i1.channels() == 3 )
if ( (method == "farneback" || method == "tvl1" || method == "deepflow" || method == "DISflow_ultrafast" || method == "DISflow_fast" || method == "DISflow_medium") && i1.channels() == 3 )
{ // 1-channel images are expected
cvtColor(i1, i1, COLOR_BGR2GRAY);
cvtColor(i2, i2, COLOR_BGR2GRAY);
......@@ -252,8 +252,12 @@ int main( int argc, char** argv )
algorithm = createOptFlow_DeepFlow();
else if ( method == "sparsetodenseflow" )
algorithm = createOptFlow_SparseToDense();
else if ( method == "DISflow" )
algorithm = createOptFlow_DIS();
else if ( method == "DISflow_ultrafast" )
algorithm = createOptFlow_DIS(DISOpticalFlow::PRESET_ULTRAFAST);
else if (method == "DISflow_fast")
algorithm = createOptFlow_DIS(DISOpticalFlow::PRESET_FAST);
else if (method == "DISflow_medium")
algorithm = createOptFlow_DIS(DISOpticalFlow::PRESET_MEDIUM);
else
{
printf("Wrong method!\n");
......
......@@ -40,10 +40,11 @@
//
//M*/
#include "opencv2/core/hal/intrin.hpp"
#include "precomp.hpp"
#include "opencv2/imgproc.hpp"
using namespace std;
#define EPS 0.001F
#define INF 1E+10F
namespace cv
{
......@@ -58,13 +59,21 @@ class DISOpticalFlowImpl : public DISOpticalFlow
void calc(InputArray I0, InputArray I1, InputOutputArray flow);
void collectGarbage();
protected: // algorithm parameters
protected: //!< algorithm parameters
int finest_scale, coarsest_scale;
int patch_size;
int patch_stride;
int grad_descent_iter;
int variational_refinement_iter;
bool use_mean_normalization;
bool use_spatial_propagation;
public: // getters and setters
protected: //!< some auxiliary variables
int border_size;
int w, h; //!< flow buffer width and height on the current scale
int ws, hs; //!< sparse flow buffer width and height on the current scale
public:
int getFinestScale() const { return finest_scale; }
void setFinestScale(int val) { finest_scale = val; }
int getPatchSize() const { return patch_size; }
......@@ -73,60 +82,111 @@ class DISOpticalFlowImpl : public DISOpticalFlow
void setPatchStride(int val) { patch_stride = val; }
int getGradientDescentIterations() const { return grad_descent_iter; }
void setGradientDescentIterations(int val) { grad_descent_iter = val; }
int getVariationalRefinementIterations() const { return variational_refinement_iter; }
void setVariationalRefinementIterations(int val) { variational_refinement_iter = val; }
bool getUseMeanNormalization() const { return use_mean_normalization; }
void setUseMeanNormalization(bool val) { use_mean_normalization = val; }
bool getUseSpatialPropagation() const { return use_spatial_propagation; }
void setUseSpatialPropagation(bool val) { use_spatial_propagation = val; }
protected: //!< internal buffers
vector<Mat_<uchar> > I0s; //!< Gaussian pyramid for the current frame
vector<Mat_<uchar> > I1s; //!< Gaussian pyramid for the next frame
vector<Mat_<uchar> > I1s_ext; //!< I1s with borders
private: // internal buffers
vector< Mat_<uchar> > I0s; // gaussian pyramid for the current frame
vector< Mat_<uchar> > I1s; // gaussian pyramid for the next frame
vector<Mat_<short> > I0xs; //!< Gaussian pyramid for the x gradient of the current frame
vector<Mat_<short> > I0ys; //!< Gaussian pyramid for the y gradient of the current frame
vector< Mat_<short> > I0xs; // gaussian pyramid for the x gradient of the current frame
vector< Mat_<short> > I0ys; // gaussian pyramid for the y gradient of the current frame
vector<Mat_<float> > Ux; //!< x component of the flow vectors
vector<Mat_<float> > Uy; //!< y component of the flow vectors
vector< Mat_<float> > Ux; // x component of the flow vectors
vector< Mat_<float> > Uy; // y component of the flow vectors
Mat_<Vec2f> U; //!< a buffer for the merged flow
Mat_<Vec2f> U; // buffers for the merged flow
Mat_<float> Sx; //!< intermediate sparse flow representation (x component)
Mat_<float> Sy; //!< intermediate sparse flow representation (y component)
Mat_<float> Sx; // x component of the sparse flow vectors (before densification)
Mat_<float> Sy; // y component of the sparse flow vectors (before densification)
/* Structure tensor components: */
Mat_<float> I0xx_buf; //!< sum of squares of x gradient values
Mat_<float> I0yy_buf; //!< sum of squares of y gradient values
Mat_<float> I0xy_buf; //!< sum of x and y gradient products
// structure tensor components and auxiliary buffers:
Mat_<float> I0xx_buf; // sum of squares of x gradient values
Mat_<float> I0yy_buf; // sum of squares of y gradient values
Mat_<float> I0xy_buf; // sum of x and y gradient products
/* Extra buffers that are useful if patch mean-normalization is used: */
Mat_<float> I0x_buf; //!< sum of of x gradient values
Mat_<float> I0y_buf; //!< sum of of y gradient values
Mat_<float> I0xx_buf_aux; // for computing sums using the summed area table
/* Auxiliary buffers used in structure tensor computation: */
Mat_<float> I0xx_buf_aux;
Mat_<float> I0yy_buf_aux;
Mat_<float> I0xy_buf_aux;
////////////////////////////////////////////////////////////
Mat_<float> I0x_buf_aux;
Mat_<float> I0y_buf_aux;
private: // private methods
vector<Ptr<VariationalRefinement> > variational_refinement_processors;
private: //!< private methods and parallel sections
void prepareBuffers(Mat &I0, Mat &I1);
void precomputeStructureTensor(Mat &dst_I0xx, Mat &dst_I0yy, Mat &dst_I0xy, Mat &I0x, Mat &I0y);
void patchInverseSearch(Mat &dst_Sx, Mat &dst_Sy, Mat &src_Ux, Mat &src_Uy, Mat &I0, Mat &I0x, Mat &I0y, Mat &I1);
void densification(Mat &dst_Ux, Mat &dst_Uy, Mat &src_Sx, Mat &src_Sy, Mat &I0, Mat &I1);
void precomputeStructureTensor(Mat &dst_I0xx, Mat &dst_I0yy, Mat &dst_I0xy, Mat &dst_I0x, Mat &dst_I0y, Mat &I0x,
Mat &I0y);
struct PatchInverseSearch_ParBody : public ParallelLoopBody
{
DISOpticalFlowImpl *dis;
int nstripes, stripe_sz;
int hs;
Mat *Sx, *Sy, *Ux, *Uy, *I0, *I1, *I0x, *I0y;
int num_iter;
PatchInverseSearch_ParBody(DISOpticalFlowImpl &_dis, int _nstripes, int _hs, Mat &dst_Sx, Mat &dst_Sy,
Mat &src_Ux, Mat &src_Uy, Mat &_I0, Mat &_I1, Mat &_I0x, Mat &_I0y, int _num_iter);
void operator()(const Range &range) const;
};
struct Densification_ParBody : public ParallelLoopBody
{
DISOpticalFlowImpl *dis;
int nstripes, stripe_sz;
int h;
Mat *Ux, *Uy, *Sx, *Sy, *I0, *I1;
Densification_ParBody(DISOpticalFlowImpl &_dis, int _nstripes, int _h, Mat &dst_Ux, Mat &dst_Uy, Mat &src_Sx,
Mat &src_Sy, Mat &_I0, Mat &_I1);
void operator()(const Range &range) const;
};
};
DISOpticalFlowImpl::DISOpticalFlowImpl()
{
finest_scale = 3;
patch_size = 9;
finest_scale = 2;
patch_size = 8;
patch_stride = 4;
grad_descent_iter = 12;
grad_descent_iter = 16;
variational_refinement_iter = 5;
border_size = 16;
use_mean_normalization = true;
use_spatial_propagation = true;
/* Use separate variational refinement instances for different scales to avoid repeated memory allocation: */
int max_possible_scales = 10;
for (int i = 0; i < max_possible_scales; i++)
variational_refinement_processors.push_back(createVariationalFlowRefinement());
}
void DISOpticalFlowImpl::prepareBuffers(Mat &I0, Mat &I1)
{
I0s.resize(coarsest_scale + 1);
I1s.resize(coarsest_scale + 1);
I1s_ext.resize(coarsest_scale + 1);
I0xs.resize(coarsest_scale + 1);
I0ys.resize(coarsest_scale + 1);
Ux.resize(coarsest_scale + 1);
Uy.resize(coarsest_scale + 1);
int fraction = 1;
int cur_rows = 0, cur_cols = 0;
for (int i = 0; i <= coarsest_scale; i++)
{
/* Avoid initializing the pyramid levels above the finest scale, as they won't be used anyway */
if (i == finest_scale)
{
cur_rows = I0.rows / fraction;
......@@ -136,14 +196,21 @@ void DISOpticalFlowImpl::prepareBuffers(Mat &I0, Mat &I1)
I1s[i].create(cur_rows, cur_cols);
resize(I1, I1s[i], I1s[i].size(), 0.0, 0.0, INTER_AREA);
/* These buffers are reused in each scale so we initialize them once on the finest scale: */
Sx.create(cur_rows / patch_stride, cur_cols / patch_stride);
Sy.create(cur_rows / patch_stride, cur_cols / patch_stride);
I0xx_buf.create(cur_rows / patch_stride, cur_cols / patch_stride);
I0yy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride);
I0xy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride);
I0x_buf.create(cur_rows / patch_stride, cur_cols / patch_stride);
I0y_buf.create(cur_rows / patch_stride, cur_cols / patch_stride);
I0xx_buf_aux.create(cur_rows, cur_cols / patch_stride);
I0yy_buf_aux.create(cur_rows, cur_cols / patch_stride);
I0xy_buf_aux.create(cur_rows, cur_cols / patch_stride);
I0x_buf_aux.create(cur_rows, cur_cols / patch_stride);
I0y_buf_aux.create(cur_rows, cur_cols / patch_stride);
U.create(cur_rows, cur_cols);
}
else if (i > finest_scale)
......@@ -160,89 +227,112 @@ void DISOpticalFlowImpl::prepareBuffers(Mat &I0, Mat &I1)
if (i >= finest_scale)
{
I1s_ext[i].create(cur_rows + 2 * border_size, cur_cols + 2 * border_size);
copyMakeBorder(I1s[i], I1s_ext[i], border_size, border_size, border_size, border_size, BORDER_REPLICATE);
I0xs[i].create(cur_rows, cur_cols);
I0ys[i].create(cur_rows, cur_cols);
spatialGradient(I0s[i], I0xs[i], I0ys[i]);
Ux[i].create(cur_rows, cur_cols);
Uy[i].create(cur_rows, cur_cols);
variational_refinement_processors[i]->setAlpha(20.0f);
variational_refinement_processors[i]->setDelta(5.0f);
variational_refinement_processors[i]->setGamma(10.0f);
variational_refinement_processors[i]->setSorIterations(5);
variational_refinement_processors[i]->setFixedPointIterations(variational_refinement_iter);
}
}
}
void DISOpticalFlowImpl::precomputeStructureTensor(Mat &dst_I0xx, Mat &dst_I0yy, Mat &dst_I0xy, Mat &I0x, Mat &I0y)
/* This function computes the structure tensor elements (local sums of I0x^2, I0x*I0y and I0y^2).
* A simple box filter is not used instead because we need to compute these sums on a sparse grid
* and store them densely in the output buffers.
*/
void DISOpticalFlowImpl::precomputeStructureTensor(Mat &dst_I0xx, Mat &dst_I0yy, Mat &dst_I0xy, Mat &dst_I0x,
Mat &dst_I0y, Mat &I0x, Mat &I0y)
{
short *I0x_ptr = I0x.ptr<short>();
short *I0y_ptr = I0y.ptr<short>();
float *I0xx_ptr = dst_I0xx.ptr<float>();
float *I0yy_ptr = dst_I0yy.ptr<float>();
float *I0xy_ptr = dst_I0xy.ptr<float>();
float *I0x_ptr = dst_I0x.ptr<float>();
float *I0y_ptr = dst_I0y.ptr<float>();
float *I0xx_aux_ptr = I0xx_buf_aux.ptr<float>();
float *I0yy_aux_ptr = I0yy_buf_aux.ptr<float>();
float *I0xy_aux_ptr = I0xy_buf_aux.ptr<float>();
float *I0x_aux_ptr = I0x_buf_aux.ptr<float>();
float *I0y_aux_ptr = I0y_buf_aux.ptr<float>();
int w = I0x.cols;
int h = I0x.rows;
int psz2 = patch_size / 2;
int psz = 2 * psz2;
// width of the sparse OF fields:
int ws = 1 + (w - patch_size) / patch_stride;
// separable box filter for computing patch sums on a sparse
// grid (determined by patch_stride)
/* Separable box filter: horizontal pass */
for (int i = 0; i < h; i++)
{
float sum_xx = 0.0f, sum_yy = 0.0f, sum_xy = 0.0f;
short *x_row = I0x_ptr + i * w, *y_row = I0y_ptr + i * w;
float sum_xx = 0.0f, sum_yy = 0.0f, sum_xy = 0.0f, sum_x = 0.0f, sum_y = 0.0f;
short *x_row = I0x.ptr<short>(i);
short *y_row = I0y.ptr<short>(i);
for (int j = 0; j < patch_size; j++)
{
sum_xx += x_row[j] * x_row[j];
sum_yy += y_row[j] * y_row[j];
sum_xy += x_row[j] * y_row[j];
sum_x += x_row[j];
sum_y += y_row[j];
}
I0xx_aux_ptr[i * ws] = sum_xx;
I0yy_aux_ptr[i * ws] = sum_yy;
I0xy_aux_ptr[i * ws] = sum_xy;
I0x_aux_ptr[i * ws] = sum_x;
I0y_aux_ptr[i * ws] = sum_y;
int js = 1;
for (int j = patch_size; j < w; j++)
{
sum_xx += (x_row[j] * x_row[j] - x_row[j - patch_size] * x_row[j - patch_size]);
sum_yy += (y_row[j] * y_row[j] - y_row[j - patch_size] * y_row[j - patch_size]);
sum_xy += (x_row[j] * y_row[j] - x_row[j - patch_size] * y_row[j - patch_size]);
if ((j - psz) % patch_stride == 0)
sum_x += (x_row[j] - x_row[j - patch_size]);
sum_y += (y_row[j] - y_row[j - patch_size]);
if ((j - patch_size + 1) % patch_stride == 0)
{
I0xx_aux_ptr[i * ws + js] = sum_xx;
I0yy_aux_ptr[i * ws + js] = sum_yy;
I0xy_aux_ptr[i * ws + js] = sum_xy;
I0x_aux_ptr[i * ws + js] = sum_x;
I0y_aux_ptr[i * ws + js] = sum_y;
js++;
}
}
}
AutoBuffer<float> sum_xx_buf(ws), sum_yy_buf(ws), sum_xy_buf(ws);
AutoBuffer<float> sum_xx_buf(ws), sum_yy_buf(ws), sum_xy_buf(ws), sum_x_buf(ws), sum_y_buf(ws);
float *sum_xx = (float *)sum_xx_buf;
float *sum_yy = (float *)sum_yy_buf;
float *sum_xy = (float *)sum_xy_buf;
float *sum_x = (float *)sum_x_buf;
float *sum_y = (float *)sum_y_buf;
for (int j = 0; j < ws; j++)
{
sum_xx[j] = 0.0f;
sum_yy[j] = 0.0f;
sum_xy[j] = 0.0f;
sum_x[j] = 0.0f;
sum_y[j] = 0.0f;
}
/* Separable box filter: vertical pass */
for (int i = 0; i < patch_size; i++)
for (int j = 0; j < ws; j++)
{
sum_xx[j] += I0xx_aux_ptr[i * ws + j];
sum_yy[j] += I0yy_aux_ptr[i * ws + j];
sum_xy[j] += I0xy_aux_ptr[i * ws + j];
sum_x[j] += I0x_aux_ptr[i * ws + j];
sum_y[j] += I0y_aux_ptr[i * ws + j];
}
for (int j = 0; j < ws; j++)
{
I0xx_ptr[j] = sum_xx[j];
I0yy_ptr[j] = sum_yy[j];
I0xy_ptr[j] = sum_xy[j];
I0x_ptr[j] = sum_x[j];
I0y_ptr[j] = sum_y[j];
}
int is = 1;
for (int i = patch_size; i < h; i++)
......@@ -252,166 +342,605 @@ void DISOpticalFlowImpl::precomputeStructureTensor(Mat &dst_I0xx, Mat &dst_I0yy,
sum_xx[j] += (I0xx_aux_ptr[i * ws + j] - I0xx_aux_ptr[(i - patch_size) * ws + j]);
sum_yy[j] += (I0yy_aux_ptr[i * ws + j] - I0yy_aux_ptr[(i - patch_size) * ws + j]);
sum_xy[j] += (I0xy_aux_ptr[i * ws + j] - I0xy_aux_ptr[(i - patch_size) * ws + j]);
sum_x[j] += (I0x_aux_ptr[i * ws + j] - I0x_aux_ptr[(i - patch_size) * ws + j]);
sum_y[j] += (I0y_aux_ptr[i * ws + j] - I0y_aux_ptr[(i - patch_size) * ws + j]);
}
if ((i - psz) % patch_stride == 0)
if ((i - patch_size + 1) % patch_stride == 0)
{
for (int j = 0; j < ws; j++)
{
I0xx_ptr[is * ws + j] = sum_xx[j];
I0yy_ptr[is * ws + j] = sum_yy[j];
I0xy_ptr[is * ws + j] = sum_xy[j];
I0x_ptr[is * ws + j] = sum_x[j];
I0y_ptr[is * ws + j] = sum_y[j];
}
is++;
}
}
}
void DISOpticalFlowImpl::patchInverseSearch(Mat &dst_Sx, Mat &dst_Sy, Mat &src_Ux, Mat &src_Uy, Mat &I0, Mat &I0x,
Mat &I0y, Mat &I1)
DISOpticalFlowImpl::PatchInverseSearch_ParBody::PatchInverseSearch_ParBody(DISOpticalFlowImpl &_dis, int _nstripes,
int _hs, Mat &dst_Sx, Mat &dst_Sy,
Mat &src_Ux, Mat &src_Uy, Mat &_I0, Mat &_I1,
Mat &_I0x, Mat &_I0y, int _num_iter)
: dis(&_dis), nstripes(_nstripes), hs(_hs), Sx(&dst_Sx), Sy(&dst_Sy), Ux(&src_Ux), Uy(&src_Uy), I0(&_I0), I1(&_I1),
I0x(&_I0x), I0y(&_I0y), num_iter(_num_iter)
{
stripe_sz = (int)ceil(hs / (double)nstripes);
}
/////////////////////////////////////////////* Patch processing functions */////////////////////////////////////////////
/* Some auxiliary macros */
#define HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION \
v_float32x4 w00v = v_setall_f32(w00); \
v_float32x4 w01v = v_setall_f32(w01); \
v_float32x4 w10v = v_setall_f32(w10); \
v_float32x4 w11v = v_setall_f32(w11); \
\
v_uint8x16 I0_row_16, I1_row_16, I1_row_shifted_16, I1_row_next_16, I1_row_next_shifted_16; \
v_uint16x8 I0_row_8, I1_row_8, I1_row_shifted_8, I1_row_next_8, I1_row_next_shifted_8, tmp; \
v_uint32x4 I0_row_4_left, I1_row_4_left, I1_row_shifted_4_left, I1_row_next_4_left, I1_row_next_shifted_4_left; \
v_uint32x4 I0_row_4_right, I1_row_4_right, I1_row_shifted_4_right, I1_row_next_4_right, \
I1_row_next_shifted_4_right; \
v_float32x4 I_diff_left, I_diff_right; \
\
/* Preload and expand the first row of I1: */ \
I1_row_16 = v_load(I1_ptr); \
I1_row_shifted_16 = v_extract<1>(I1_row_16, I1_row_16); \
v_expand(I1_row_16, I1_row_8, tmp); \
v_expand(I1_row_shifted_16, I1_row_shifted_8, tmp); \
v_expand(I1_row_8, I1_row_4_left, I1_row_4_right); \
v_expand(I1_row_shifted_8, I1_row_shifted_4_left, I1_row_shifted_4_right); \
I1_ptr += I1_stride;
#define HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION \
/* Load the next row of I1: */ \
I1_row_next_16 = v_load(I1_ptr); \
/* Circular shift left by 1 element: */ \
I1_row_next_shifted_16 = v_extract<1>(I1_row_next_16, I1_row_next_16); \
/* Expand to 8 ushorts (we only need the first 8 values): */ \
v_expand(I1_row_next_16, I1_row_next_8, tmp); \
v_expand(I1_row_next_shifted_16, I1_row_next_shifted_8, tmp); \
/* Separate the left and right halves: */ \
v_expand(I1_row_next_8, I1_row_next_4_left, I1_row_next_4_right); \
v_expand(I1_row_next_shifted_8, I1_row_next_shifted_4_left, I1_row_next_shifted_4_right); \
\
/* Load current row of I0: */ \
I0_row_16 = v_load(I0_ptr); \
v_expand(I0_row_16, I0_row_8, tmp); \
v_expand(I0_row_8, I0_row_4_left, I0_row_4_right); \
\
/* Compute diffs between I0 and bilinearly interpolated I1: */ \
I_diff_left = w00v * v_cvt_f32(v_reinterpret_as_s32(I1_row_4_left)) + \
w01v * v_cvt_f32(v_reinterpret_as_s32(I1_row_shifted_4_left)) + \
w10v * v_cvt_f32(v_reinterpret_as_s32(I1_row_next_4_left)) + \
w11v * v_cvt_f32(v_reinterpret_as_s32(I1_row_next_shifted_4_left)) - \
v_cvt_f32(v_reinterpret_as_s32(I0_row_4_left)); \
I_diff_right = w00v * v_cvt_f32(v_reinterpret_as_s32(I1_row_4_right)) + \
w01v * v_cvt_f32(v_reinterpret_as_s32(I1_row_shifted_4_right)) + \
w10v * v_cvt_f32(v_reinterpret_as_s32(I1_row_next_4_right)) + \
w11v * v_cvt_f32(v_reinterpret_as_s32(I1_row_next_shifted_4_right)) - \
v_cvt_f32(v_reinterpret_as_s32(I0_row_4_right));
#define HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW \
I0_ptr += I0_stride; \
I1_ptr += I1_stride; \
\
I1_row_4_left = I1_row_next_4_left; \
I1_row_4_right = I1_row_next_4_right; \
I1_row_shifted_4_left = I1_row_next_shifted_4_left; \
I1_row_shifted_4_right = I1_row_next_shifted_4_right;
/* This function essentially performs one iteration of gradient descent when finding the most similar patch in I1 for a
* given one in I0. It assumes that I0_ptr and I1_ptr already point to the corresponding patches and w00, w01, w10, w11
* are precomputed bilinear interpolation weights. It returns the SSD (sum of squared differences) between these patches
* and computes the values (dst_dUx, dst_dUy) that are used in the flow vector update. HAL acceleration is implemented
* only for the default patch size (8x8). Everything is processed in floats as using fixed-point approximations harms
* the quality significantly.
*/
inline float processPatch(float &dst_dUx, float &dst_dUy, uchar *I0_ptr, uchar *I1_ptr, short *I0x_ptr, short *I0y_ptr,
int I0_stride, int I1_stride, float w00, float w01, float w10, float w11, int patch_sz)
{
float SSD = 0.0f;
#ifdef CV_SIMD128
if (patch_sz == 8)
{
/* Variables to accumulate the sums */
v_float32x4 Ux_vec = v_setall_f32(0);
v_float32x4 Uy_vec = v_setall_f32(0);
v_float32x4 SSD_vec = v_setall_f32(0);
v_int16x8 I0x_row, I0y_row;
v_int32x4 I0x_row_4_left, I0x_row_4_right, I0y_row_4_left, I0y_row_4_right;
HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION;
for (int row = 0; row < 8; row++)
{
HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION;
I0x_row = v_load(I0x_ptr);
v_expand(I0x_row, I0x_row_4_left, I0x_row_4_right);
I0y_row = v_load(I0y_ptr);
v_expand(I0y_row, I0y_row_4_left, I0y_row_4_right);
/* Update the sums: */
Ux_vec += I_diff_left * v_cvt_f32(I0x_row_4_left) + I_diff_right * v_cvt_f32(I0x_row_4_right);
Uy_vec += I_diff_left * v_cvt_f32(I0y_row_4_left) + I_diff_right * v_cvt_f32(I0y_row_4_right);
SSD_vec += I_diff_left * I_diff_left + I_diff_right * I_diff_right;
I0x_ptr += I0_stride;
I0y_ptr += I0_stride;
HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW;
}
/* Final reduce operations: */
dst_dUx = v_reduce_sum(Ux_vec);
dst_dUy = v_reduce_sum(Uy_vec);
SSD = v_reduce_sum(SSD_vec);
}
else
{
#endif
dst_dUx = 0.0f;
dst_dUy = 0.0f;
float diff;
for (int i = 0; i < patch_sz; i++)
for (int j = 0; j < patch_sz; j++)
{
diff = w00 * I1_ptr[i * I1_stride + j] + w01 * I1_ptr[i * I1_stride + j + 1] +
w10 * I1_ptr[(i + 1) * I1_stride + j] + w11 * I1_ptr[(i + 1) * I1_stride + j + 1] -
I0_ptr[i * I0_stride + j];
SSD += diff * diff;
dst_dUx += diff * I0x_ptr[i * I0_stride + j];
dst_dUy += diff * I0y_ptr[i * I0_stride + j];
}
#ifdef CV_SIMD128
}
#endif
return SSD;
}
/* Same as processPatch, but with patch mean normalization, which improves robustness under changing
* lighting conditions
*/
inline float processPatchMeanNorm(float &dst_dUx, float &dst_dUy, uchar *I0_ptr, uchar *I1_ptr, short *I0x_ptr,
short *I0y_ptr, int I0_stride, int I1_stride, float w00, float w01, float w10,
float w11, int patch_sz, float x_grad_sum, float y_grad_sum)
{
float sum_diff = 0.0, sum_diff_sq = 0.0;
float sum_I0x_mul = 0.0, sum_I0y_mul = 0.0;
float n = (float)patch_sz * patch_sz;
#ifdef CV_SIMD128
if (patch_sz == 8)
{
/* Variables to accumulate the sums */
v_float32x4 sum_I0x_mul_vec = v_setall_f32(0);
v_float32x4 sum_I0y_mul_vec = v_setall_f32(0);
v_float32x4 sum_diff_vec = v_setall_f32(0);
v_float32x4 sum_diff_sq_vec = v_setall_f32(0);
v_int16x8 I0x_row, I0y_row;
v_int32x4 I0x_row_4_left, I0x_row_4_right, I0y_row_4_left, I0y_row_4_right;
HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION;
for (int row = 0; row < 8; row++)
{
HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION;
I0x_row = v_load(I0x_ptr);
v_expand(I0x_row, I0x_row_4_left, I0x_row_4_right);
I0y_row = v_load(I0y_ptr);
v_expand(I0y_row, I0y_row_4_left, I0y_row_4_right);
/* Update the sums: */
sum_I0x_mul_vec += I_diff_left * v_cvt_f32(I0x_row_4_left) + I_diff_right * v_cvt_f32(I0x_row_4_right);
sum_I0y_mul_vec += I_diff_left * v_cvt_f32(I0y_row_4_left) + I_diff_right * v_cvt_f32(I0y_row_4_right);
sum_diff_sq_vec += I_diff_left * I_diff_left + I_diff_right * I_diff_right;
sum_diff_vec += I_diff_left + I_diff_right;
I0x_ptr += I0_stride;
I0y_ptr += I0_stride;
HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW;
}
/* Final reduce operations: */
sum_I0x_mul = v_reduce_sum(sum_I0x_mul_vec);
sum_I0y_mul = v_reduce_sum(sum_I0y_mul_vec);
sum_diff = v_reduce_sum(sum_diff_vec);
sum_diff_sq = v_reduce_sum(sum_diff_sq_vec);
}
else
{
#endif
float diff;
for (int i = 0; i < patch_sz; i++)
for (int j = 0; j < patch_sz; j++)
{
diff = w00 * I1_ptr[i * I1_stride + j] + w01 * I1_ptr[i * I1_stride + j + 1] +
w10 * I1_ptr[(i + 1) * I1_stride + j] + w11 * I1_ptr[(i + 1) * I1_stride + j + 1] -
I0_ptr[i * I0_stride + j];
sum_diff += diff;
sum_diff_sq += diff * diff;
sum_I0x_mul += diff * I0x_ptr[i * I0_stride + j];
sum_I0y_mul += diff * I0y_ptr[i * I0_stride + j];
}
#ifdef CV_SIMD128
}
#endif
dst_dUx = sum_I0x_mul - sum_diff * x_grad_sum / n;
dst_dUy = sum_I0y_mul - sum_diff * y_grad_sum / n;
return sum_diff_sq - sum_diff * sum_diff / n;
}
/* Similar to processPatch, but compute only the sum of squared differences (SSD) between the patches */
inline float computeSSD(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int I1_stride, float w00, float w01, float w10,
float w11, int patch_sz)
{
float SSD = 0.0f;
#ifdef CV_SIMD128
if (patch_sz == 8)
{
v_float32x4 SSD_vec = v_setall_f32(0);
HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION;
for (int row = 0; row < 8; row++)
{
HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION;
SSD_vec += I_diff_left * I_diff_left + I_diff_right * I_diff_right;
HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW;
}
SSD = v_reduce_sum(SSD_vec);
}
else
{
#endif
float diff;
for (int i = 0; i < patch_sz; i++)
for (int j = 0; j < patch_sz; j++)
{
diff = w00 * I1_ptr[i * I1_stride + j] + w01 * I1_ptr[i * I1_stride + j + 1] +
w10 * I1_ptr[(i + 1) * I1_stride + j] + w11 * I1_ptr[(i + 1) * I1_stride + j + 1] -
I0_ptr[i * I0_stride + j];
SSD += diff * diff;
}
#ifdef CV_SIMD128
}
#endif
return SSD;
}
/* Same as computeSSD, but with patch mean normalization */
inline float computeSSDMeanNorm(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int I1_stride, float w00, float w01,
float w10, float w11, int patch_sz)
{
float sum_diff = 0.0f, sum_diff_sq = 0.0f;
float n = (float)patch_sz * patch_sz;
#ifdef CV_SIMD128
if (patch_sz == 8)
{
v_float32x4 sum_diff_vec = v_setall_f32(0);
v_float32x4 sum_diff_sq_vec = v_setall_f32(0);
HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION;
for (int row = 0; row < 8; row++)
{
HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION;
sum_diff_sq_vec += I_diff_left * I_diff_left + I_diff_right * I_diff_right;
sum_diff_vec += I_diff_left + I_diff_right;
HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW;
}
sum_diff = v_reduce_sum(sum_diff_vec);
sum_diff_sq = v_reduce_sum(sum_diff_sq_vec);
}
else
{
#endif
float diff;
for (int i = 0; i < patch_sz; i++)
for (int j = 0; j < patch_sz; j++)
{
diff = w00 * I1_ptr[i * I1_stride + j] + w01 * I1_ptr[i * I1_stride + j + 1] +
w10 * I1_ptr[(i + 1) * I1_stride + j] + w11 * I1_ptr[(i + 1) * I1_stride + j + 1] -
I0_ptr[i * I0_stride + j];
sum_diff += diff;
sum_diff_sq += diff * diff;
}
#ifdef CV_SIMD128
}
#endif
return sum_diff_sq - sum_diff * sum_diff / n;
}
#undef HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION
#undef HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION
#undef HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void DISOpticalFlowImpl::PatchInverseSearch_ParBody::operator()(const Range &range) const
{
float *Ux_ptr = src_Ux.ptr<float>();
float *Uy_ptr = src_Uy.ptr<float>();
float *Sx_ptr = dst_Sx.ptr<float>();
float *Sy_ptr = dst_Sy.ptr<float>();
uchar *I0_ptr = I0.ptr<uchar>();
uchar *I1_ptr = I1.ptr<uchar>();
short *I0x_ptr = I0x.ptr<short>();
short *I0y_ptr = I0y.ptr<short>();
int w = I0.cols;
int h = I1.rows;
int psz2 = patch_size / 2;
// width and height of the sparse OF fields:
int ws = 1 + (w - patch_size) / patch_stride;
int hs = 1 + (h - patch_size) / patch_stride;
precomputeStructureTensor(I0xx_buf, I0yy_buf, I0xy_buf, I0x, I0y);
float *xx_ptr = I0xx_buf.ptr<float>();
float *yy_ptr = I0yy_buf.ptr<float>();
float *xy_ptr = I0xy_buf.ptr<float>();
// perform a fixed number of gradient descent iterations for each patch:
int i = psz2;
for (int is = 0; is < hs; is++)
{
int j = psz2;
for (int js = 0; js < ws; js++)
{
float cur_Ux = Ux_ptr[i * w + j];
float cur_Uy = Uy_ptr[i * w + j];
float detH = xx_ptr[is * ws + js] * yy_ptr[is * ws + js] - xy_ptr[is * ws + js] * xy_ptr[is * ws + js];
// force separate processing of stripes if we are using spatial propagation:
if(dis->use_spatial_propagation && range.end>range.start+1)
{
for(int n=range.start;n<range.end;n++)
(*this)(Range(n,n+1));
return;
}
int psz = dis->patch_size;
int psz2 = psz / 2;
int w_ext = dis->w + 2 * dis->border_size; //!< width of I1_ext
int bsz = dis->border_size;
/* Input dense flow */
float *Ux_ptr = Ux->ptr<float>();
float *Uy_ptr = Uy->ptr<float>();
/* Output sparse flow */
float *Sx_ptr = Sx->ptr<float>();
float *Sy_ptr = Sy->ptr<float>();
uchar *I0_ptr = I0->ptr<uchar>();
uchar *I1_ptr = I1->ptr<uchar>();
short *I0x_ptr = I0x->ptr<short>();
short *I0y_ptr = I0y->ptr<short>();
/* Precomputed structure tensor */
float *xx_ptr = dis->I0xx_buf.ptr<float>();
float *yy_ptr = dis->I0yy_buf.ptr<float>();
float *xy_ptr = dis->I0xy_buf.ptr<float>();
/* And extra buffers for mean-normalization: */
float *x_ptr = dis->I0x_buf.ptr<float>();
float *y_ptr = dis->I0y_buf.ptr<float>();
int i, j, dir;
int start_is, end_is, start_js, end_js;
int start_i, start_j;
float i_lower_limit = bsz - psz + 1.0f;
float i_upper_limit = bsz + dis->h - 1.0f;
float j_lower_limit = bsz - psz + 1.0f;
float j_upper_limit = bsz + dis->w - 1.0f;
float dUx, dUy, i_I1, j_I1, w00, w01, w10, w11, dx, dy;
#define INIT_BILINEAR_WEIGHTS(Ux, Uy) \
i_I1 = min(max(i + Uy + bsz, i_lower_limit), i_upper_limit); \
j_I1 = min(max(j + Ux + bsz, j_lower_limit), j_upper_limit); \
\
w11 = (i_I1 - floor(i_I1)) * (j_I1 - floor(j_I1)); \
w10 = (i_I1 - floor(i_I1)) * (floor(j_I1) + 1 - j_I1); \
w01 = (floor(i_I1) + 1 - i_I1) * (j_I1 - floor(j_I1)); \
w00 = (floor(i_I1) + 1 - i_I1) * (floor(j_I1) + 1 - j_I1);
#define COMPUTE_SSD(dst, Ux, Uy) \
INIT_BILINEAR_WEIGHTS(Ux, Uy); \
if (dis->use_mean_normalization) \
dst = computeSSDMeanNorm(I0_ptr + i * dis->w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, dis->w, w_ext, w00, \
w01, w10, w11, psz); \
else \
dst = computeSSD(I0_ptr + i * dis->w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, dis->w, w_ext, w00, w01, \
w10, w11, psz);
int num_inner_iter = (int)floor(dis->grad_descent_iter / (float)num_iter);
for (int iter = 0; iter < num_iter; iter++)
{
if (iter % 2 == 0)
{
dir = 1;
start_is = min(range.start * stripe_sz, hs);
end_is = min(range.end * stripe_sz, hs);
start_js = 0;
end_js = dis->ws;
start_i = start_is * dis->patch_stride;
start_j = 0;
}
else
{
dir = -1;
start_is = min(range.end * stripe_sz, hs) - 1;
end_is = min(range.start * stripe_sz, hs) - 1;
start_js = dis->ws - 1;
end_js = -1;
start_i = start_is * dis->patch_stride;
start_j = (dis->ws - 1) * dis->patch_stride;
}
i = start_i;
for (int is = start_is; dir * is < dir * end_is; is += dir)
{
j = start_j;
for (int js = start_js; dir * js < dir * end_js; js += dir)
{
if (iter == 0)
{
/* Using result form the previous pyramid level as the very first approximation: */
Sx_ptr[is * dis->ws + js] = Ux_ptr[(i + psz2) * dis->w + j + psz2];
Sy_ptr[is * dis->ws + js] = Uy_ptr[(i + psz2) * dis->w + j + psz2];
}
if (dis->use_spatial_propagation)
{
/* Updating the current Sx_ptr, Sy_ptr to the best candidate: */
float min_SSD, cur_SSD;
COMPUTE_SSD(min_SSD, Sx_ptr[is * dis->ws + js], Sy_ptr[is * dis->ws + js]);
if (dir * js > dir * start_js)
{
COMPUTE_SSD(cur_SSD, Sx_ptr[is * dis->ws + js - dir], Sy_ptr[is * dis->ws + js - dir]);
if (cur_SSD < min_SSD)
{
min_SSD = cur_SSD;
Sx_ptr[is * dis->ws + js] = Sx_ptr[is * dis->ws + js - dir];
Sy_ptr[is * dis->ws + js] = Sy_ptr[is * dis->ws + js - dir];
}
}
/* Flow vectors won't actually propagate across different stripes, which is the reason for keeping
* the number of stripes constant. It works well enough in practice and doesn't introduce any
* visible seams.
*/
if (dir * is > dir * start_is)
{
COMPUTE_SSD(cur_SSD, Sx_ptr[(is - dir) * dis->ws + js], Sy_ptr[(is - dir) * dis->ws + js]);
if (cur_SSD < min_SSD)
{
min_SSD = cur_SSD;
Sx_ptr[is * dis->ws + js] = Sx_ptr[(is - dir) * dis->ws + js];
Sy_ptr[is * dis->ws + js] = Sy_ptr[(is - dir) * dis->ws + js];
}
}
}
/* Use the best candidate as a starting point for the gradient descent: */
float cur_Ux = Sx_ptr[is * dis->ws + js];
float cur_Uy = Sy_ptr[is * dis->ws + js];
/* Computing the inverse of the structure tensor: */
float detH = xx_ptr[is * dis->ws + js] * yy_ptr[is * dis->ws + js] -
xy_ptr[is * dis->ws + js] * xy_ptr[is * dis->ws + js];
if (abs(detH) < EPS)
detH = EPS;
float invH11 = yy_ptr[is * ws + js] / detH;
float invH12 = -xy_ptr[is * ws + js] / detH;
float invH22 = xx_ptr[is * ws + js] / detH;
float prev_sum_diff = 100000000.0f;
for (int t = 0; t < grad_descent_iter; t++)
{
float dUx = 0, dUy = 0;
float diff = 0;
float sum_diff = 0.0f;
for (int pos_y = i - psz2; pos_y <= i + psz2; pos_y++)
for (int pos_x = j - psz2; pos_x <= j + psz2; pos_x++)
{
float pos_x_shifted = min(max(pos_x + cur_Ux, 0.0f), w - 1.0f - EPS);
float pos_y_shifted = min(max(pos_y + cur_Uy, 0.0f), h - 1.0f - EPS);
int pos_x_lower = (int)pos_x_shifted;
int pos_x_upper = pos_x_lower + 1;
int pos_y_lower = (int)pos_y_shifted;
int pos_y_upper = pos_y_lower + 1;
diff = (pos_x_shifted - pos_x_lower) * (pos_y_shifted - pos_y_lower) *
I1_ptr[pos_y_upper * w + pos_x_upper] +
(pos_x_upper - pos_x_shifted) * (pos_y_shifted - pos_y_lower) *
I1_ptr[pos_y_upper * w + pos_x_lower] +
(pos_x_shifted - pos_x_lower) * (pos_y_upper - pos_y_shifted) *
I1_ptr[pos_y_lower * w + pos_x_upper] +
(pos_x_upper - pos_x_shifted) * (pos_y_upper - pos_y_shifted) *
I1_ptr[pos_y_lower * w + pos_x_lower] -
I0_ptr[pos_y * w + pos_x];
sum_diff += diff * diff;
dUx += I0x_ptr[pos_y * w + pos_x] * diff;
dUy += I0y_ptr[pos_y * w + pos_x] * diff;
}
cur_Ux -= invH11 * dUx + invH12 * dUy;
cur_Uy -= invH12 * dUx + invH22 * dUy;
if (sum_diff > prev_sum_diff)
float invH11 = yy_ptr[is * dis->ws + js] / detH;
float invH12 = -xy_ptr[is * dis->ws + js] / detH;
float invH22 = xx_ptr[is * dis->ws + js] / detH;
float prev_SSD = INF, SSD;
float x_grad_sum = x_ptr[is * dis->ws + js];
float y_grad_sum = y_ptr[is * dis->ws + js];
for (int t = 0; t < num_inner_iter; t++)
{
INIT_BILINEAR_WEIGHTS(cur_Ux, cur_Uy);
if (dis->use_mean_normalization)
SSD = processPatchMeanNorm(dUx, dUy, I0_ptr + i * dis->w + j,
I1_ptr + (int)i_I1 * w_ext + (int)j_I1, I0x_ptr + i * dis->w + j,
I0y_ptr + i * dis->w + j, dis->w, w_ext, w00, w01, w10, w11, psz,
x_grad_sum, y_grad_sum);
else
SSD = processPatch(dUx, dUy, I0_ptr + i * dis->w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
I0x_ptr + i * dis->w + j, I0y_ptr + i * dis->w + j, dis->w, w_ext, w00, w01,
w10, w11, psz);
dx = invH11 * dUx + invH12 * dUy;
dy = invH12 * dUx + invH22 * dUy;
cur_Ux -= dx;
cur_Uy -= dy;
/* Break when patch distance stops decreasing */
if (SSD >= prev_SSD)
break;
prev_sum_diff = sum_diff;
prev_SSD = SSD;
}
if (norm(Vec2f(cur_Ux - Ux_ptr[i * w + j], cur_Uy - Uy_ptr[i * w + j])) <= patch_size)
/* If gradient descent converged to a flow vector that is very far from the initial approximation
* (more than patch size) then we don't use it. Noticeably improves the robustness.
*/
if (norm(Vec2f(cur_Ux - Sx_ptr[is * dis->ws + js], cur_Uy - Sy_ptr[is * dis->ws + js])) <= psz)
{
Sx_ptr[is * ws + js] = cur_Ux;
Sy_ptr[is * ws + js] = cur_Uy;
Sx_ptr[is * dis->ws + js] = cur_Ux;
Sy_ptr[is * dis->ws + js] = cur_Uy;
}
else
{
Sx_ptr[is * ws + js] = Ux_ptr[i * w + j];
Sy_ptr[is * ws + js] = Uy_ptr[i * w + j];
j += dir * dis->patch_stride;
}
j += patch_stride;
i += dir * dis->patch_stride;
}
i += patch_stride;
}
#undef INIT_BILINEAR_WEIGHTS
#undef COMPUTE_SSD
}
void DISOpticalFlowImpl::densification(Mat &dst_Ux, Mat &dst_Uy, Mat &src_Sx, Mat &src_Sy, Mat &I0, Mat &I1)
DISOpticalFlowImpl::Densification_ParBody::Densification_ParBody(DISOpticalFlowImpl &_dis, int _nstripes, int _h,
Mat &dst_Ux, Mat &dst_Uy, Mat &src_Sx, Mat &src_Sy,
Mat &_I0, Mat &_I1)
: dis(&_dis), nstripes(_nstripes), h(_h), Ux(&dst_Ux), Uy(&dst_Uy), Sx(&src_Sx), Sy(&src_Sy), I0(&_I0), I1(&_I1)
{
float *Ux_ptr = dst_Ux.ptr<float>();
float *Uy_ptr = dst_Uy.ptr<float>();
float *Sx_ptr = src_Sx.ptr<float>();
float *Sy_ptr = src_Sy.ptr<float>();
uchar *I0_ptr = I0.ptr<uchar>();
uchar *I1_ptr = I1.ptr<uchar>();
int w = I0.cols;
int h = I0.rows;
int psz2 = patch_size / 2;
// width of the sparse OF fields:
int ws = 1 + (w - patch_size) / patch_stride;
int start_x;
int start_y;
start_y = psz2;
for (int i = 0; i < h; i++)
stripe_sz = (int)ceil(h / (double)nstripes);
}
/* This function transforms a sparse optical flow field obtained by PatchInverseSearch (which computes flow values
* on a sparse grid defined by patch_stride) into a dense optical flow field by weighted averaging of values from the
* overlapping patches.
*/
void DISOpticalFlowImpl::Densification_ParBody::operator()(const Range &range) const
{
int start_i = min(range.start * stripe_sz, h);
int end_i = min(range.end * stripe_sz, h);
/* Input sparse flow */
float *Sx_ptr = Sx->ptr<float>();
float *Sy_ptr = Sy->ptr<float>();
/* Output dense flow */
float *Ux_ptr = Ux->ptr<float>();
float *Uy_ptr = Uy->ptr<float>();
uchar *I0_ptr = I0->ptr<uchar>();
uchar *I1_ptr = I1->ptr<uchar>();
int psz = dis->patch_size;
int pstr = dis->patch_stride;
int i_l, i_u;
int j_l, j_u;
float i_m, j_m, diff;
/* These values define the set of sparse grid locations that contain patches overlapping with the current dense flow
* location */
int start_is, end_is;
int start_js, end_js;
/* Some helper macros for updating this set of sparse grid locations */
#define UPDATE_SPARSE_I_COORDINATES \
if (i % pstr == 0 && i + psz <= h) \
end_is++; \
if (i - psz >= 0 && (i - psz) % pstr == 0 && start_is < end_is) \
start_is++;
#define UPDATE_SPARSE_J_COORDINATES \
if (j % pstr == 0 && j + psz <= dis->w) \
end_js++; \
if (j - psz >= 0 && (j - psz) % pstr == 0 && start_js < end_js) \
start_js++;
start_is = 0;
end_is = -1;
for (int i = 0; i < start_i; i++)
{
if (i - psz2 > start_y && start_y + patch_stride < h - psz2)
start_y += patch_stride;
start_x = psz2;
for (int j = 0; j < w; j++)
UPDATE_SPARSE_I_COORDINATES;
}
for (int i = start_i; i < end_i; i++)
{
UPDATE_SPARSE_I_COORDINATES;
start_js = 0;
end_js = -1;
for (int j = 0; j < dis->w; j++)
{
UPDATE_SPARSE_J_COORDINATES;
float coef, sum_coef = 0.0f;
float sum_Ux = 0.0f;
float sum_Uy = 0.0f;
if (j - psz2 > start_x && start_x + patch_stride < w - psz2)
start_x += patch_stride;
for (int pos_y = start_y; pos_y <= min(i + psz2, h - psz2 - 1); pos_y += patch_stride)
for (int pos_x = start_x; pos_x <= min(j + psz2, w - psz2 - 1); pos_x += patch_stride)
/* Iterate through all the patches that overlap the current location (i,j) */
for (int is = start_is; is <= end_is; is++)
for (int js = start_js; js <= end_js; js++)
{
float diff;
int is = (pos_y - psz2) / patch_stride;
int js = (pos_x - psz2) / patch_stride;
float j_shifted = min(max(j + Sx_ptr[is * ws + js], 0.0f), w - 1.0f - EPS);
float i_shifted = min(max(i + Sy_ptr[is * ws + js], 0.0f), h - 1.0f - EPS);
int j_lower = (int)j_shifted;
int j_upper = j_lower + 1;
int i_lower = (int)i_shifted;
int i_upper = i_lower + 1;
diff = (j_shifted - j_lower) * (i_shifted - i_lower) * I1_ptr[i_upper * w + j_upper] +
(j_upper - j_shifted) * (i_shifted - i_lower) * I1_ptr[i_upper * w + j_lower] +
(j_shifted - j_lower) * (i_upper - i_shifted) * I1_ptr[i_lower * w + j_upper] +
(j_upper - j_shifted) * (i_upper - i_shifted) * I1_ptr[i_lower * w + j_lower] -
I0_ptr[i * w + j];
coef = 1 / max(1.0f, diff * diff);
sum_Ux += coef * Sx_ptr[is * ws + js];
sum_Uy += coef * Sy_ptr[is * ws + js];
j_m = min(max(j + Sx_ptr[is * dis->ws + js], 0.0f), dis->w - 1.0f - EPS);
i_m = min(max(i + Sy_ptr[is * dis->ws + js], 0.0f), dis->h - 1.0f - EPS);
j_l = (int)j_m;
j_u = j_l + 1;
i_l = (int)i_m;
i_u = i_l + 1;
diff = (j_m - j_l) * (i_m - i_l) * I1_ptr[i_u * dis->w + j_u] +
(j_u - j_m) * (i_m - i_l) * I1_ptr[i_u * dis->w + j_l] +
(j_m - j_l) * (i_u - i_m) * I1_ptr[i_l * dis->w + j_u] +
(j_u - j_m) * (i_u - i_m) * I1_ptr[i_l * dis->w + j_l] - I0_ptr[i * dis->w + j];
coef = 1 / max(1.0f, abs(diff));
sum_Ux += coef * Sx_ptr[is * dis->ws + js];
sum_Uy += coef * Sy_ptr[is * dis->ws + js];
sum_coef += coef;
}
Ux_ptr[i * w + j] = sum_Ux / sum_coef;
Uy_ptr[i * w + j] = sum_Uy / sum_coef;
Ux_ptr[i * dis->w + j] = sum_Ux / sum_coef;
Uy_ptr[i * dis->w + j] = sum_Uy / sum_coef;
}
}
#undef UPDATE_SPARSE_I_COORDINATES
#undef UPDATE_SPARSE_J_COORDINATES
}
void DISOpticalFlowImpl::calc(InputArray I0, InputArray I1, InputOutputArray flow)
......@@ -419,12 +948,15 @@ void DISOpticalFlowImpl::calc(InputArray I0, InputArray I1, InputOutputArray flo
CV_Assert(!I0.empty() && I0.depth() == CV_8U && I0.channels() == 1);
CV_Assert(!I1.empty() && I1.depth() == CV_8U && I1.channels() == 1);
CV_Assert(I0.sameSize(I1));
CV_Assert(I0.isContinuous());
CV_Assert(I1.isContinuous());
Mat I0Mat = I0.getMat();
Mat I1Mat = I1.getMat();
flow.create(I1Mat.size(), CV_32FC2);
Mat &flowMat = flow.getMatRef();
coarsest_scale = (int)(log((2 * I0Mat.cols) / (4.0 * patch_size))/log(2.0) + 0.5) - 1;
coarsest_scale = (int)(log((2 * I0Mat.cols) / (4.0 * patch_size)) / log(2.0) + 0.5) - 1;
int num_stripes = getNumThreads();
prepareBuffers(I0Mat, I1Mat);
Ux[coarsest_scale].setTo(0.0f);
......@@ -432,9 +964,31 @@ void DISOpticalFlowImpl::calc(InputArray I0, InputArray I1, InputOutputArray flo
for (int i = coarsest_scale; i >= finest_scale; i--)
{
patchInverseSearch(Sx, Sy, Ux[i], Uy[i], I0s[i], I0xs[i], I0ys[i], I1s[i]);
densification(Ux[i], Uy[i], Sx, Sy, I0s[i], I1s[i]);
// TODO: variational refinement step
w = I0s[i].cols;
h = I0s[i].rows;
ws = 1 + (w - patch_size) / patch_stride;
hs = 1 + (h - patch_size) / patch_stride;
precomputeStructureTensor(I0xx_buf, I0yy_buf, I0xy_buf, I0x_buf, I0y_buf, I0xs[i], I0ys[i]);
if (use_spatial_propagation)
{
/* Use a fixed number of stripes regardless the number of threads to make inverse search
* with spatial propagation reproducible
*/
parallel_for_(Range(0, 8), PatchInverseSearch_ParBody(*this, 8, hs, Sx, Sy, Ux[i], Uy[i], I0s[i],
I1s_ext[i], I0xs[i], I0ys[i], 2));
}
else
{
parallel_for_(Range(0, num_stripes),
PatchInverseSearch_ParBody(*this, num_stripes, hs, Sx, Sy, Ux[i], Uy[i], I0s[i], I1s_ext[i],
I0xs[i], I0ys[i], 1));
}
parallel_for_(Range(0, num_stripes),
Densification_ParBody(*this, num_stripes, I0s[i].rows, Ux[i], Uy[i], Sx, Sy, I0s[i], I1s[i]));
if (variational_refinement_iter > 0)
variational_refinement_processors[i]->calcUV(I0s[i], I1s[i], Ux[i], Uy[i]);
if (i > finest_scale)
{
......@@ -454,6 +1008,7 @@ void DISOpticalFlowImpl::collectGarbage()
{
I0s.clear();
I1s.clear();
I1s_ext.clear();
I0xs.clear();
I0ys.clear();
Ux.clear();
......@@ -467,8 +1022,39 @@ void DISOpticalFlowImpl::collectGarbage()
I0xx_buf_aux.release();
I0yy_buf_aux.release();
I0xy_buf_aux.release();
for (int i = finest_scale; i <= coarsest_scale; i++)
variational_refinement_processors[i]->collectGarbage();
variational_refinement_processors.clear();
}
Ptr<DISOpticalFlow> createOptFlow_DIS() { return makePtr<DISOpticalFlowImpl>(); }
Ptr<DISOpticalFlow> createOptFlow_DIS(int preset)
{
Ptr<DISOpticalFlow> dis = makePtr<DISOpticalFlowImpl>();
dis->setPatchSize(8);
if (preset == DISOpticalFlow::PRESET_ULTRAFAST)
{
dis->setFinestScale(2);
dis->setPatchStride(4);
dis->setGradientDescentIterations(12);
dis->setVariationalRefinementIterations(0);
}
else if (preset == DISOpticalFlow::PRESET_FAST)
{
dis->setFinestScale(2);
dis->setPatchStride(4);
dis->setGradientDescentIterations(16);
dis->setVariationalRefinementIterations(5);
}
else if (preset == DISOpticalFlow::PRESET_MEDIUM)
{
dis->setFinestScale(1);
dis->setPatchStride(3);
dis->setGradientDescentIterations(25);
dis->setVariationalRefinementIterations(5);
}
return dis;
}
}
}
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/core/hal/intrin.hpp"
#include "precomp.hpp"
using namespace std;
namespace cv
{
namespace optflow
{
class VariationalRefinementImpl : public VariationalRefinement
{
public:
VariationalRefinementImpl();
void calc(InputArray I0, InputArray I1, InputOutputArray flow);
void calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v);
void collectGarbage();
protected: //!< algorithm parameters
int fixedPointIterations, sorIterations;
float omega;
float alpha, delta, gamma;
float zeta, epsilon;
public:
int getFixedPointIterations() const { return fixedPointIterations; }
void setFixedPointIterations(int val) { fixedPointIterations = val; }
int getSorIterations() const { return sorIterations; }
void setSorIterations(int val) { sorIterations = val; }
float getOmega() const { return omega; }
void setOmega(float val) { omega = val; }
float getAlpha() const { return alpha; }
void setAlpha(float val) { alpha = val; }
float getDelta() const { return delta; }
void setDelta(float val) { delta = val; }
float getGamma() const { return gamma; }
void setGamma(float val) { gamma = val; }
protected: //!< internal buffers
/* This struct defines a special data layout for Mat_<float>. Original buffer is split into two: one for "red"
* elements (sum of indices is even) and one for "black" (sum of indices is odd) in a checkerboard pattern. It
* allows for more efficient processing in SOR iterations, more natural SIMD vectorization and parallelization
* (Red-Black SOR). Additionally, it simplifies border handling by adding repeated borders to both red and
* black buffers.
*/
struct RedBlackBuffer
{
Mat_<float> red; //!< (i+j)%2==0
Mat_<float> black; //!< (i+j)%2==1
/* Width of even and odd rows may be different */
int red_even_len, red_odd_len;
int black_even_len, black_odd_len;
void create(Size s);
void release();
};
Mat_<float> Ix, Iy, Iz, Ixx, Ixy, Iyy, Ixz, Iyz; //!< image derivative buffers
RedBlackBuffer Ix_rb, Iy_rb, Iz_rb, Ixx_rb, Ixy_rb, Iyy_rb, Ixz_rb, Iyz_rb; //!< corresponding red-black buffers
RedBlackBuffer A11, A12, A22, b1, b2; //!< main linear system coefficients
RedBlackBuffer weights; //!< smoothness term weights in the current fixed point iteration
Mat_<float> mapX, mapY; //!< auxiliary buffers for remapping
RedBlackBuffer tempW_u, tempW_v; //!< flow buffers that are modified in each fixed point iteration
RedBlackBuffer dW_u, dW_v; //!< optical flow increment
RedBlackBuffer W_u_rb, W_v_rb; //!< red-black-buffer version of the input flow
private: //!< private methods and parallel sections
void splitCheckerboard(RedBlackBuffer &dst, Mat &src);
void mergeCheckerboard(Mat &dst, RedBlackBuffer &src);
void updateRepeatedBorders(RedBlackBuffer &dst);
void warpImage(Mat &dst, Mat &src, Mat &flow_u, Mat &flow_v);
void prepareBuffers(Mat &I0, Mat &I1, Mat &W_u, Mat &W_v);
/* Parallelizing arbitrary operations with 3 input/output arguments */
typedef void (VariationalRefinementImpl::*Op)(void *op1, void *op2, void *op3);
struct ParallelOp_ParBody : public ParallelLoopBody
{
VariationalRefinementImpl *var;
vector<Op> ops;
vector<void *> op1s;
vector<void *> op2s;
vector<void *> op3s;
ParallelOp_ParBody(VariationalRefinementImpl &_var, vector<Op> _ops, vector<void *> &_op1s,
vector<void *> &_op2s, vector<void *> &_op3s);
void operator()(const Range &range) const;
};
void gradHorizAndSplitOp(void *src, void *dst, void *dst_split)
{
Sobel(*(Mat *)src, *(Mat *)dst, -1, 1, 0, 1, 1, 0.00, BORDER_REPLICATE);
splitCheckerboard(*(RedBlackBuffer *)dst_split, *(Mat *)dst);
}
void gradVertAndSplitOp(void *src, void *dst, void *dst_split)
{
Sobel(*(Mat *)src, *(Mat *)dst, -1, 0, 1, 1, 1, 0.00, BORDER_REPLICATE);
splitCheckerboard(*(RedBlackBuffer *)dst_split, *(Mat *)dst);
}
void averageOp(void *src1, void *src2, void *dst)
{
addWeighted(*(Mat *)src1, 0.5, *(Mat *)src2, 0.5, 0.0, *(Mat *)dst, CV_32F);
}
void subtractOp(void *src1, void *src2, void *dst)
{
subtract(*(Mat *)src1, *(Mat *)src2, *(Mat *)dst, noArray(), CV_32F);
}
struct ComputeDataTerm_ParBody : public ParallelLoopBody
{
VariationalRefinementImpl *var;
int nstripes, stripe_sz;
int h;
RedBlackBuffer *dW_u, *dW_v;
bool red_pass;
ComputeDataTerm_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_dW_u,
RedBlackBuffer &_dW_v, bool _red_pass);
void operator()(const Range &range) const;
};
struct ComputeSmoothnessTermHorPass_ParBody : public ParallelLoopBody
{
VariationalRefinementImpl *var;
int nstripes, stripe_sz;
int h;
RedBlackBuffer *W_u, *W_v, *curW_u, *curW_v;
bool red_pass;
ComputeSmoothnessTermHorPass_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h,
RedBlackBuffer &_W_u, RedBlackBuffer &_W_v, RedBlackBuffer &_tempW_u,
RedBlackBuffer &_tempW_v, bool _red_pass);
void operator()(const Range &range) const;
};
struct ComputeSmoothnessTermVertPass_ParBody : public ParallelLoopBody
{
VariationalRefinementImpl *var;
int nstripes, stripe_sz;
int h;
RedBlackBuffer *W_u, *W_v;
bool red_pass;
ComputeSmoothnessTermVertPass_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h,
RedBlackBuffer &W_u, RedBlackBuffer &_W_v, bool _red_pass);
void operator()(const Range &range) const;
};
struct RedBlackSOR_ParBody : public ParallelLoopBody
{
VariationalRefinementImpl *var;
int nstripes, stripe_sz;
int h;
RedBlackBuffer *dW_u, *dW_v;
bool red_pass;
RedBlackSOR_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_dW_u,
RedBlackBuffer &_dW_v, bool _red_pass);
void operator()(const Range &range) const;
};
};
VariationalRefinementImpl::VariationalRefinementImpl()
{
fixedPointIterations = 5;
sorIterations = 5;
alpha = 20.0f;
delta = 5.0f;
gamma = 10.0f;
omega = 1.6f;
zeta = 0.1f;
epsilon = 0.001f;
}
/* This function converts an input Mat into the RedBlackBuffer format, which involves
* splitting the input buffer into two and adding repeated borders. Assumes that enough
* memory in dst is already allocated.
*/
void VariationalRefinementImpl::splitCheckerboard(RedBlackBuffer &dst, Mat &src)
{
int buf_j, j;
int buf_w = (int)ceil(src.cols / 2.0) + 2; //!< max width of red/black buffers with borders
/* Rows of red and black buffers can have different actual width, some extra repeated values are
* added for padding in such cases.
*/
for (int i = 0; i < src.rows; i++)
{
float *src_buf = src.ptr<float>(i);
float *r_buf = dst.red.ptr<float>(i + 1);
float *b_buf = dst.black.ptr<float>(i + 1);
r_buf[0] = b_buf[0] = src_buf[0];
buf_j = 1;
if (i % 2 == 0)
{
for (j = 0; j < src.cols - 1; j += 2)
{
r_buf[buf_j] = src_buf[j];
b_buf[buf_j] = src_buf[j + 1];
buf_j++;
}
if (j < src.cols)
r_buf[buf_j] = b_buf[buf_j] = src_buf[j];
else
j--;
}
else
{
for (j = 0; j < src.cols - 1; j += 2)
{
b_buf[buf_j] = src_buf[j];
r_buf[buf_j] = src_buf[j + 1];
buf_j++;
}
if (j < src.cols)
r_buf[buf_j] = b_buf[buf_j] = src_buf[j];
else
j--;
}
r_buf[buf_w - 1] = b_buf[buf_w - 1] = src_buf[j];
}
/* Fill top and bottom borders: */
{
float *r_buf_border = dst.red.ptr<float>(dst.red.rows - 1);
float *b_buf_border = dst.black.ptr<float>(dst.black.rows - 1);
float *r_buf = dst.red.ptr<float>(dst.red.rows - 2);
float *b_buf = dst.black.ptr<float>(dst.black.rows - 2);
memcpy(r_buf_border, b_buf, buf_w * sizeof(float));
memcpy(b_buf_border, r_buf, buf_w * sizeof(float));
}
{
float *r_buf_border = dst.red.ptr<float>(0);
float *b_buf_border = dst.black.ptr<float>(0);
float *r_buf = dst.red.ptr<float>(1);
float *b_buf = dst.black.ptr<float>(1);
memcpy(r_buf_border, b_buf, buf_w * sizeof(float));
memcpy(b_buf_border, r_buf, buf_w * sizeof(float));
}
}
/* The inverse of splitCheckerboard, i.e. converting the RedBlackBuffer back into Mat.
* Assumes that enough memory in dst is already allocated.
*/
void VariationalRefinementImpl::mergeCheckerboard(Mat &dst, RedBlackBuffer &src)
{
int buf_j, j;
for (int i = 0; i < dst.rows; i++)
{
float *src_r_buf = src.red.ptr<float>(i + 1);
float *src_b_buf = src.black.ptr<float>(i + 1);
float *dst_buf = dst.ptr<float>(i);
buf_j = 1;
if (i % 2 == 0)
{
for (j = 0; j < dst.cols - 1; j += 2)
{
dst_buf[j] = src_r_buf[buf_j];
dst_buf[j + 1] = src_b_buf[buf_j];
buf_j++;
}
if (j < dst.cols)
dst_buf[j] = src_r_buf[buf_j];
}
else
{
for (j = 0; j < dst.cols - 1; j += 2)
{
dst_buf[j] = src_b_buf[buf_j];
dst_buf[j + 1] = src_r_buf[buf_j];
buf_j++;
}
if (j < dst.cols)
dst_buf[j] = src_b_buf[buf_j];
}
}
}
/* An auxiliary function that updates the borders. Used to enforce that border values repeat
* the ones adjacent to the border.
*/
void VariationalRefinementImpl::updateRepeatedBorders(RedBlackBuffer &dst)
{
int buf_w = dst.red.cols;
for (int i = 0; i < dst.red.rows - 2; i++)
{
float *r_buf = dst.red.ptr<float>(i + 1);
float *b_buf = dst.black.ptr<float>(i + 1);
if (i % 2 == 0)
{
b_buf[0] = r_buf[1];
if (dst.red_even_len > dst.black_even_len)
b_buf[dst.black_even_len + 1] = r_buf[dst.red_even_len];
else
r_buf[dst.red_even_len + 1] = b_buf[dst.black_even_len];
}
else
{
r_buf[0] = b_buf[1];
if (dst.red_odd_len < dst.black_odd_len)
r_buf[dst.red_odd_len + 1] = b_buf[dst.black_odd_len];
else
b_buf[dst.black_odd_len + 1] = r_buf[dst.red_odd_len];
}
}
{
float *r_buf_border = dst.red.ptr<float>(dst.red.rows - 1);
float *b_buf_border = dst.black.ptr<float>(dst.black.rows - 1);
float *r_buf = dst.red.ptr<float>(dst.red.rows - 2);
float *b_buf = dst.black.ptr<float>(dst.black.rows - 2);
memcpy(r_buf_border, b_buf, buf_w * sizeof(float));
memcpy(b_buf_border, r_buf, buf_w * sizeof(float));
}
{
float *r_buf_border = dst.red.ptr<float>(0);
float *b_buf_border = dst.black.ptr<float>(0);
float *r_buf = dst.red.ptr<float>(1);
float *b_buf = dst.black.ptr<float>(1);
memcpy(r_buf_border, b_buf, buf_w * sizeof(float));
memcpy(b_buf_border, r_buf, buf_w * sizeof(float));
}
}
void VariationalRefinementImpl::RedBlackBuffer::create(Size s)
{
/* Allocate enough memory to include borders */
int w = (int)ceil(s.width / 2.0) + 2;
red.create(s.height + 2, w);
black.create(s.height + 2, w);
if (s.width % 2 == 0)
red_even_len = red_odd_len = black_even_len = black_odd_len = w - 2;
else
{
red_even_len = black_odd_len = w - 2;
red_odd_len = black_even_len = w - 3;
}
}
void VariationalRefinementImpl::RedBlackBuffer::release()
{
red.release();
black.release();
}
VariationalRefinementImpl::ParallelOp_ParBody::ParallelOp_ParBody(VariationalRefinementImpl &_var, vector<Op> _ops,
vector<void *> &_op1s, vector<void *> &_op2s,
vector<void *> &_op3s)
: var(&_var), ops(_ops), op1s(_op1s), op2s(_op2s), op3s(_op3s)
{
}
void VariationalRefinementImpl::ParallelOp_ParBody::operator()(const Range &range) const
{
for (int i = range.start; i < range.end; i++)
(var->*ops[i])(op1s[i], op2s[i], op3s[i]);
}
void VariationalRefinementImpl::warpImage(Mat &dst, Mat &src, Mat &flow_u, Mat &flow_v)
{
for (int i = 0; i < flow_u.rows; i++)
{
float *pFlowU = flow_u.ptr<float>(i);
float *pFlowV = flow_v.ptr<float>(i);
float *pMapX = mapX.ptr<float>(i);
float *pMapY = mapY.ptr<float>(i);
for (int j = 0; j < flow_u.cols; j++)
{
pMapX[j] = j + pFlowU[j];
pMapY[j] = i + pFlowV[j];
}
}
remap(src, dst, mapX, mapY, INTER_LINEAR, BORDER_REPLICATE);
}
void VariationalRefinementImpl::prepareBuffers(Mat &I0, Mat &I1, Mat &W_u, Mat &W_v)
{
Size s = I0.size();
A11.create(s);
A12.create(s);
A22.create(s);
b1.create(s);
b2.create(s);
weights.create(s);
weights.red.setTo(0.0f);
weights.black.setTo(0.0f);
tempW_u.create(s);
tempW_v.create(s);
dW_u.create(s);
dW_v.create(s);
W_u_rb.create(s);
W_v_rb.create(s);
Ix.create(s);
Iy.create(s);
Iz.create(s);
Ixx.create(s);
Ixy.create(s);
Iyy.create(s);
Ixz.create(s);
Iyz.create(s);
Ix_rb.create(s);
Iy_rb.create(s);
Iz_rb.create(s);
Ixx_rb.create(s);
Ixy_rb.create(s);
Iyy_rb.create(s);
Ixz_rb.create(s);
Iyz_rb.create(s);
mapX.create(s);
mapY.create(s);
/* Floating point warps work significantly better than fixed-point */
Mat I1flt, warpedI;
I1.convertTo(I1flt, CV_32F);
warpImage(warpedI, I1flt, W_u, W_v);
/* Computing an average of the current and warped next frames (to compute the derivatives on) and
* temporal derivative Iz
*/
Mat averagedI;
{
vector<void *> op1s;
op1s.push_back((void *)&I0);
op1s.push_back((void *)&warpedI);
vector<void *> op2s;
op2s.push_back((void *)&warpedI);
op2s.push_back((void *)&I0);
vector<void *> op3s;
op3s.push_back((void *)&averagedI);
op3s.push_back((void *)&Iz);
vector<Op> ops;
ops.push_back(&VariationalRefinementImpl::averageOp);
ops.push_back(&VariationalRefinementImpl::subtractOp);
parallel_for_(Range(0, 2), ParallelOp_ParBody(*this, ops, op1s, op2s, op3s));
}
splitCheckerboard(Iz_rb, Iz);
/* Computing first-order derivatives */
{
vector<void *> op1s;
op1s.push_back((void *)&averagedI);
op1s.push_back((void *)&averagedI);
op1s.push_back((void *)&Iz);
op1s.push_back((void *)&Iz);
vector<void *> op2s;
op2s.push_back((void *)&Ix);
op2s.push_back((void *)&Iy);
op2s.push_back((void *)&Ixz);
op2s.push_back((void *)&Iyz);
vector<void *> op3s;
op3s.push_back((void *)&Ix_rb);
op3s.push_back((void *)&Iy_rb);
op3s.push_back((void *)&Ixz_rb);
op3s.push_back((void *)&Iyz_rb);
vector<Op> ops;
ops.push_back(&VariationalRefinementImpl::gradHorizAndSplitOp);
ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp);
ops.push_back(&VariationalRefinementImpl::gradHorizAndSplitOp);
ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp);
parallel_for_(Range(0, 4), ParallelOp_ParBody(*this, ops, op1s, op2s, op3s));
}
/* Computing second-order derivatives */
{
vector<void *> op1s;
op1s.push_back((void *)&Ix);
op1s.push_back((void *)&Ix);
op1s.push_back((void *)&Iy);
vector<void *> op2s;
op2s.push_back((void *)&Ixx);
op2s.push_back((void *)&Ixy);
op2s.push_back((void *)&Iyy);
vector<void *> op3s;
op3s.push_back((void *)&Ixx_rb);
op3s.push_back((void *)&Ixy_rb);
op3s.push_back((void *)&Iyy_rb);
vector<Op> ops;
ops.push_back(&VariationalRefinementImpl::gradHorizAndSplitOp);
ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp);
ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp);
parallel_for_(Range(0, 3), ParallelOp_ParBody(*this, ops, op1s, op2s, op3s));
}
}
VariationalRefinementImpl::ComputeDataTerm_ParBody::ComputeDataTerm_ParBody(VariationalRefinementImpl &_var,
int _nstripes, int _h,
RedBlackBuffer &_dW_u,
RedBlackBuffer &_dW_v, bool _red_pass)
: var(&_var), nstripes(_nstripes), h(_h), dW_u(&_dW_u), dW_v(&_dW_v), red_pass(_red_pass)
{
stripe_sz = (int)ceil(h / (double)nstripes);
}
/* This function computes parts of the main linear system coefficients A11,A12,A22,b1,b1
* that correspond to the data term, which includes color and gradient constancy assumptions.
*/
void VariationalRefinementImpl::ComputeDataTerm_ParBody::operator()(const Range &range) const
{
int start_i = min(range.start * stripe_sz, h);
int end_i = min(range.end * stripe_sz, h);
float zeta_squared = var->zeta * var->zeta;
float epsilon_squared = var->epsilon * var->epsilon;
float gamma2 = var->gamma / 2;
float delta2 = var->delta / 2;
float *pIx, *pIy, *pIz;
float *pIxx, *pIxy, *pIyy, *pIxz, *pIyz;
float *pdU, *pdV;
float *pa11, *pa12, *pa22, *pb1, *pb2;
float derivNorm, derivNorm2;
float Ik1z, Ik1zx, Ik1zy;
float weight;
int len;
for (int i = start_i; i < end_i; i++)
{
#define INIT_ROW_POINTERS(color) \
pIx = var->Ix_rb.color.ptr<float>(i + 1) + 1; \
pIy = var->Iy_rb.color.ptr<float>(i + 1) + 1; \
pIz = var->Iz_rb.color.ptr<float>(i + 1) + 1; \
pIxx = var->Ixx_rb.color.ptr<float>(i + 1) + 1; \
pIxy = var->Ixy_rb.color.ptr<float>(i + 1) + 1; \
pIyy = var->Iyy_rb.color.ptr<float>(i + 1) + 1; \
pIxz = var->Ixz_rb.color.ptr<float>(i + 1) + 1; \
pIyz = var->Iyz_rb.color.ptr<float>(i + 1) + 1; \
pa11 = var->A11.color.ptr<float>(i + 1) + 1; \
pa12 = var->A12.color.ptr<float>(i + 1) + 1; \
pa22 = var->A22.color.ptr<float>(i + 1) + 1; \
pb1 = var->b1.color.ptr<float>(i + 1) + 1; \
pb2 = var->b2.color.ptr<float>(i + 1) + 1; \
pdU = dW_u->color.ptr<float>(i + 1) + 1; \
pdV = dW_v->color.ptr<float>(i + 1) + 1; \
if (i % 2 == 0) \
len = var->Ix_rb.color##_even_len; \
else \
len = var->Ix_rb.color##_odd_len;
if (red_pass)
{
INIT_ROW_POINTERS(red);
}
else
{
INIT_ROW_POINTERS(black);
}
#undef INIT_ROW_POINTERS
int j = 0;
#ifdef CV_SIMD128
v_float32x4 zeta_vec = v_setall_f32(zeta_squared);
v_float32x4 eps_vec = v_setall_f32(epsilon_squared);
v_float32x4 delta_vec = v_setall_f32(delta2);
v_float32x4 gamma_vec = v_setall_f32(gamma2);
v_float32x4 zero_vec = v_setall_f32(0.0f);
v_float32x4 pIx_vec, pIy_vec, pIz_vec, pdU_vec, pdV_vec;
v_float32x4 pIxx_vec, pIxy_vec, pIyy_vec, pIxz_vec, pIyz_vec;
v_float32x4 derivNorm_vec, derivNorm2_vec, weight_vec;
v_float32x4 Ik1z_vec, Ik1zx_vec, Ik1zy_vec;
v_float32x4 pa11_vec, pa12_vec, pa22_vec, pb1_vec, pb2_vec;
for (; j < len - 3; j += 4)
{
pIx_vec = v_load(pIx + j);
pIy_vec = v_load(pIy + j);
pIz_vec = v_load(pIz + j);
pdU_vec = v_load(pdU + j);
pdV_vec = v_load(pdV + j);
derivNorm_vec = pIx_vec * pIx_vec + pIy_vec * pIy_vec + zeta_vec;
Ik1z_vec = pIz_vec + pIx_vec * pdU_vec + pIy_vec * pdV_vec;
weight_vec = (delta_vec / v_sqrt(Ik1z_vec * Ik1z_vec / derivNorm_vec + eps_vec)) / derivNorm_vec;
pa11_vec = weight_vec * (pIx_vec * pIx_vec) + zeta_vec;
pa12_vec = weight_vec * (pIx_vec * pIy_vec);
pa22_vec = weight_vec * (pIy_vec * pIy_vec) + zeta_vec;
pb1_vec = zero_vec - weight_vec * (pIz_vec * pIx_vec);
pb2_vec = zero_vec - weight_vec * (pIz_vec * pIy_vec);
pIxx_vec = v_load(pIxx + j);
pIxy_vec = v_load(pIxy + j);
pIyy_vec = v_load(pIyy + j);
pIxz_vec = v_load(pIxz + j);
pIyz_vec = v_load(pIyz + j);
derivNorm_vec = pIxx_vec * pIxx_vec + pIxy_vec * pIxy_vec + zeta_vec;
derivNorm2_vec = pIyy_vec * pIyy_vec + pIxy_vec * pIxy_vec + zeta_vec;
Ik1zx_vec = pIxz_vec + pIxx_vec * pdU_vec + pIxy_vec * pdV_vec;
Ik1zy_vec = pIyz_vec + pIxy_vec * pdU_vec + pIyy_vec * pdV_vec;
weight_vec = gamma_vec / v_sqrt(Ik1zx_vec * Ik1zx_vec / derivNorm_vec +
Ik1zy_vec * Ik1zy_vec / derivNorm2_vec + eps_vec);
pa11_vec += weight_vec * (pIxx_vec * pIxx_vec / derivNorm_vec + pIxy_vec * pIxy_vec / derivNorm2_vec);
pa12_vec += weight_vec * (pIxx_vec * pIxy_vec / derivNorm_vec + pIxy_vec * pIyy_vec / derivNorm2_vec);
pa22_vec += weight_vec * (pIxy_vec * pIxy_vec / derivNorm_vec + pIyy_vec * pIyy_vec / derivNorm2_vec);
pb1_vec -= weight_vec * (pIxx_vec * pIxz_vec / derivNorm_vec + pIxy_vec * pIyz_vec / derivNorm2_vec);
pb2_vec -= weight_vec * (pIxy_vec * pIxz_vec / derivNorm_vec + pIyy_vec * pIyz_vec / derivNorm2_vec);
v_store(pa11 + j, pa11_vec);
v_store(pa12 + j, pa12_vec);
v_store(pa22 + j, pa22_vec);
v_store(pb1 + j, pb1_vec);
v_store(pb2 + j, pb2_vec);
}
#endif
for (; j < len; j++)
{
/* Step 1: Compute color constancy terms */
/* Normalization factor:*/
derivNorm = pIx[j] * pIx[j] + pIy[j] * pIy[j] + zeta_squared;
/* Color constancy penalty (computed by Taylor expansion):*/
Ik1z = pIz[j] + pIx[j] * pdU[j] + pIy[j] * pdV[j];
/* Weight of the color constancy term in the current fixed-point iteration divided by derivNorm: */
weight = (delta2 / sqrt(Ik1z * Ik1z / derivNorm + epsilon_squared)) / derivNorm;
/* Add respective color constancy terms to the linear system coefficients: */
pa11[j] = weight * (pIx[j] * pIx[j]) + zeta_squared;
pa12[j] = weight * (pIx[j] * pIy[j]);
pa22[j] = weight * (pIy[j] * pIy[j]) + zeta_squared;
pb1[j] = -weight * (pIz[j] * pIx[j]);
pb2[j] = -weight * (pIz[j] * pIy[j]);
/* Step 2: Compute gradient constancy terms */
/* Normalization factor for x gradient: */
derivNorm = pIxx[j] * pIxx[j] + pIxy[j] * pIxy[j] + zeta_squared;
/* Normalization factor for y gradient: */
derivNorm2 = pIyy[j] * pIyy[j] + pIxy[j] * pIxy[j] + zeta_squared;
/* Gradient constancy penalties (computed by Taylor expansion): */
Ik1zx = pIxz[j] + pIxx[j] * pdU[j] + pIxy[j] * pdV[j];
Ik1zy = pIyz[j] + pIxy[j] * pdU[j] + pIyy[j] * pdV[j];
/* Weight of the gradient constancy term in the current fixed-point iteration: */
weight = gamma2 / sqrt(Ik1zx * Ik1zx / derivNorm + Ik1zy * Ik1zy / derivNorm2 + epsilon_squared);
/* Add respective gradient constancy components to the linear system coefficients: */
pa11[j] += weight * (pIxx[j] * pIxx[j] / derivNorm + pIxy[j] * pIxy[j] / derivNorm2);
pa12[j] += weight * (pIxx[j] * pIxy[j] / derivNorm + pIxy[j] * pIyy[j] / derivNorm2);
pa22[j] += weight * (pIxy[j] * pIxy[j] / derivNorm + pIyy[j] * pIyy[j] / derivNorm2);
pb1[j] += -weight * (pIxx[j] * pIxz[j] / derivNorm + pIxy[j] * pIyz[j] / derivNorm2);
pb2[j] += -weight * (pIxy[j] * pIxz[j] / derivNorm + pIyy[j] * pIyz[j] / derivNorm2);
}
}
}
VariationalRefinementImpl::ComputeSmoothnessTermHorPass_ParBody::ComputeSmoothnessTermHorPass_ParBody(
VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_W_u, RedBlackBuffer &_W_v,
RedBlackBuffer &_tempW_u, RedBlackBuffer &_tempW_v, bool _red_pass)
: var(&_var), nstripes(_nstripes), h(_h), W_u(&_W_u), W_v(&_W_v), curW_u(&_tempW_u), curW_v(&_tempW_v),
red_pass(_red_pass)
{
stripe_sz = (int)ceil(h / (double)nstripes);
}
/* This function updates the linear system coefficients A11,A22,b1,b1 according to the
* flow smoothness term and computes corresponding weights for the current fixed point iteration.
* A11,A22,b1,b1 are updated only partially (horizontal pass). Doing both horizontal and vertical
* passes in one loop complicates parallelization (different threads write to the same elements).
*/
void VariationalRefinementImpl::ComputeSmoothnessTermHorPass_ParBody::operator()(const Range &range) const
{
int start_i = min(range.start * stripe_sz, h);
int end_i = min(range.end * stripe_sz, h);
float epsilon_squared = var->epsilon * var->epsilon;
float alpha2 = var->alpha / 2;
float *pWeight;
float *pA_u, *pA_u_next, *pA_v, *pA_v_next;
float *pB_u, *pB_u_next, *pB_v, *pB_v_next;
float *cW_u, *cW_u_next, *cW_u_next_row;
float *cW_v, *cW_v_next, *cW_v_next_row;
float *pW_u, *pW_u_next;
float *pW_v, *pW_v_next;
float ux, uy, vx, vy;
int len;
bool touches_right_border = true;
#define INIT_ROW_POINTERS(cur_color, next_color, next_offs_even, next_offs_odd, bool_default) \
pWeight = var->weights.cur_color.ptr<float>(i + 1) + 1; \
pA_u = var->A11.cur_color.ptr<float>(i + 1) + 1; \
pB_u = var->b1.cur_color.ptr<float>(i + 1) + 1; \
cW_u = curW_u->cur_color.ptr<float>(i + 1) + 1; \
pW_u = W_u->cur_color.ptr<float>(i + 1) + 1; \
pA_v = var->A22.cur_color.ptr<float>(i + 1) + 1; \
pB_v = var->b2.cur_color.ptr<float>(i + 1) + 1; \
cW_v = curW_v->cur_color.ptr<float>(i + 1) + 1; \
pW_v = W_v->cur_color.ptr<float>(i + 1) + 1; \
\
cW_u_next_row = curW_u->next_color.ptr<float>(i + 2) + 1; \
cW_v_next_row = curW_v->next_color.ptr<float>(i + 2) + 1; \
\
if (i % 2 == 0) \
{ \
pA_u_next = var->A11.next_color.ptr<float>(i + 1) + next_offs_even; \
pB_u_next = var->b1.next_color.ptr<float>(i + 1) + next_offs_even; \
cW_u_next = curW_u->next_color.ptr<float>(i + 1) + next_offs_even; \
pW_u_next = W_u->next_color.ptr<float>(i + 1) + next_offs_even; \
pA_v_next = var->A22.next_color.ptr<float>(i + 1) + next_offs_even; \
pB_v_next = var->b2.next_color.ptr<float>(i + 1) + next_offs_even; \
cW_v_next = curW_v->next_color.ptr<float>(i + 1) + next_offs_even; \
pW_v_next = W_v->next_color.ptr<float>(i + 1) + next_offs_even; \
len = var->A11.cur_color##_even_len; \
if (var->A11.cur_color##_even_len != var->A11.cur_color##_odd_len) \
touches_right_border = bool_default; \
else \
touches_right_border = !bool_default; \
} \
else \
{ \
pA_u_next = var->A11.next_color.ptr<float>(i + 1) + next_offs_odd; \
pB_u_next = var->b1.next_color.ptr<float>(i + 1) + next_offs_odd; \
cW_u_next = curW_u->next_color.ptr<float>(i + 1) + next_offs_odd; \
pW_u_next = W_u->next_color.ptr<float>(i + 1) + next_offs_odd; \
pA_v_next = var->A22.next_color.ptr<float>(i + 1) + next_offs_odd; \
pB_v_next = var->b2.next_color.ptr<float>(i + 1) + next_offs_odd; \
cW_v_next = curW_v->next_color.ptr<float>(i + 1) + next_offs_odd; \
pW_v_next = W_v->next_color.ptr<float>(i + 1) + next_offs_odd; \
len = var->A11.cur_color##_odd_len; \
if (var->A11.cur_color##_even_len != var->A11.cur_color##_odd_len) \
touches_right_border = !bool_default; \
else \
touches_right_border = bool_default; \
}
for (int i = start_i; i < end_i; i++)
{
if (red_pass)
{
INIT_ROW_POINTERS(red, black, 1, 2, true);
}
else
{
INIT_ROW_POINTERS(black, red, 2, 1, false);
}
#undef INIT_ROW_POINTERS
#define COMPUTE \
/* Gradients for the flow on the current fixed-point iteration: */ \
ux = cW_u_next[j] - cW_u[j]; \
vx = cW_v_next[j] - cW_v[j]; \
uy = cW_u_next_row[j] - cW_u[j]; \
vy = cW_v_next_row[j] - cW_v[j]; \
/* Weight of the smoothness term in the current fixed-point iteration: */ \
pWeight[j] = alpha2 / sqrt(ux * ux + vx * vx + uy * uy + vy * vy + epsilon_squared); \
/* Gradients for initial raw flow multiplied by weight:*/ \
ux = pWeight[j] * (pW_u_next[j] - pW_u[j]); \
vx = pWeight[j] * (pW_v_next[j] - pW_v[j]);
#define UPDATE \
pB_u[j] += ux; \
pA_u[j] += pWeight[j]; \
pB_v[j] += vx; \
pA_v[j] += pWeight[j]; \
pB_u_next[j] -= ux; \
pA_u_next[j] += pWeight[j]; \
pB_v_next[j] -= vx; \
pA_v_next[j] += pWeight[j];
int j = 0;
#ifdef CV_SIMD128
v_float32x4 alpha2_vec = v_setall_f32(alpha2);
v_float32x4 eps_vec = v_setall_f32(epsilon_squared);
v_float32x4 cW_u_vec, cW_v_vec;
v_float32x4 pWeight_vec, ux_vec, vx_vec, uy_vec, vy_vec;
for (; j < len - 4; j += 4)
{
cW_u_vec = v_load(cW_u + j);
cW_v_vec = v_load(cW_v + j);
ux_vec = v_load(cW_u_next + j) - cW_u_vec;
vx_vec = v_load(cW_v_next + j) - cW_v_vec;
uy_vec = v_load(cW_u_next_row + j) - cW_u_vec;
vy_vec = v_load(cW_v_next_row + j) - cW_v_vec;
pWeight_vec =
alpha2_vec / v_sqrt(ux_vec * ux_vec + vx_vec * vx_vec + uy_vec * uy_vec + vy_vec * vy_vec + eps_vec);
v_store(pWeight + j, pWeight_vec);
ux_vec = pWeight_vec * (v_load(pW_u_next + j) - v_load(pW_u + j));
vx_vec = pWeight_vec * (v_load(pW_v_next + j) - v_load(pW_v + j));
v_store(pA_u + j, v_load(pA_u + j) + pWeight_vec);
v_store(pA_v + j, v_load(pA_v + j) + pWeight_vec);
v_store(pB_u + j, v_load(pB_u + j) + ux_vec);
v_store(pB_v + j, v_load(pB_v + j) + vx_vec);
v_store(pA_u_next + j, v_load(pA_u_next + j) + pWeight_vec);
v_store(pA_v_next + j, v_load(pA_v_next + j) + pWeight_vec);
v_store(pB_u_next + j, v_load(pB_u_next + j) - ux_vec);
v_store(pB_v_next + j, v_load(pB_v_next + j) - vx_vec);
}
#endif
for (; j < len - 1; j++)
{
COMPUTE;
UPDATE;
}
/* Omit the update on the rightmost elements */
if (touches_right_border)
{
COMPUTE;
}
else
{
COMPUTE;
UPDATE;
}
}
#undef COMPUTE
#undef UPDATE
}
VariationalRefinementImpl::ComputeSmoothnessTermVertPass_ParBody::ComputeSmoothnessTermVertPass_ParBody(
VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_W_u, RedBlackBuffer &_W_v, bool _red_pass)
: var(&_var), nstripes(_nstripes), W_u(&_W_u), W_v(&_W_v), red_pass(_red_pass)
{
/* Omit the last row in the vertical pass */
h = _h - 1;
stripe_sz = (int)ceil(h / (double)nstripes);
}
/* This function adds the last remaining terms to the linear system coefficients A11,A22,b1,b1. */
void VariationalRefinementImpl::ComputeSmoothnessTermVertPass_ParBody::operator()(const Range &range) const
{
int start_i = min(range.start * stripe_sz, h);
int end_i = min(range.end * stripe_sz, h);
float *pWeight;
float *pA_u, *pA_u_next_row, *pA_v, *pA_v_next_row;
float *pB_u, *pB_u_next_row, *pB_v, *pB_v_next_row;
float *pW_u, *pW_u_next_row, *pW_v, *pW_v_next_row;
float vy, uy;
int len;
for (int i = start_i; i < end_i; i++)
{
#define INIT_ROW_POINTERS(cur_color, next_color) \
pWeight = var->weights.cur_color.ptr<float>(i + 1) + 1; \
pA_u = var->A11.cur_color.ptr<float>(i + 1) + 1; \
pB_u = var->b1.cur_color.ptr<float>(i + 1) + 1; \
pW_u = W_u->cur_color.ptr<float>(i + 1) + 1; \
pA_v = var->A22.cur_color.ptr<float>(i + 1) + 1; \
pB_v = var->b2.cur_color.ptr<float>(i + 1) + 1; \
pW_v = W_v->cur_color.ptr<float>(i + 1) + 1; \
\
pA_u_next_row = var->A11.next_color.ptr<float>(i + 2) + 1; \
pB_u_next_row = var->b1.next_color.ptr<float>(i + 2) + 1; \
pW_u_next_row = W_u->next_color.ptr<float>(i + 2) + 1; \
pA_v_next_row = var->A22.next_color.ptr<float>(i + 2) + 1; \
pB_v_next_row = var->b2.next_color.ptr<float>(i + 2) + 1; \
pW_v_next_row = W_v->next_color.ptr<float>(i + 2) + 1; \
\
if (i % 2 == 0) \
len = var->A11.cur_color##_even_len; \
else \
len = var->A11.cur_color##_odd_len;
if (red_pass)
{
INIT_ROW_POINTERS(red, black);
}
else
{
INIT_ROW_POINTERS(black, red);
}
#undef INIT_ROW_POINTERS
int j = 0;
#ifdef CV_SIMD128
v_float32x4 pWeight_vec, uy_vec, vy_vec;
for (; j < len - 3; j += 4)
{
pWeight_vec = v_load(pWeight + j);
uy_vec = pWeight_vec * (v_load(pW_u_next_row + j) - v_load(pW_u + j));
vy_vec = pWeight_vec * (v_load(pW_v_next_row + j) - v_load(pW_v + j));
v_store(pA_u + j, v_load(pA_u + j) + pWeight_vec);
v_store(pA_v + j, v_load(pA_v + j) + pWeight_vec);
v_store(pB_u + j, v_load(pB_u + j) + uy_vec);
v_store(pB_v + j, v_load(pB_v + j) + vy_vec);
v_store(pA_u_next_row + j, v_load(pA_u_next_row + j) + pWeight_vec);
v_store(pA_v_next_row + j, v_load(pA_v_next_row + j) + pWeight_vec);
v_store(pB_u_next_row + j, v_load(pB_u_next_row + j) - uy_vec);
v_store(pB_v_next_row + j, v_load(pB_v_next_row + j) - vy_vec);
}
#endif
for (; j < len; j++)
{
uy = pWeight[j] * (pW_u_next_row[j] - pW_u[j]);
vy = pWeight[j] * (pW_v_next_row[j] - pW_v[j]);
pB_u[j] += uy;
pA_u[j] += pWeight[j];
pB_v[j] += vy;
pA_v[j] += pWeight[j];
pB_u_next_row[j] -= uy;
pA_u_next_row[j] += pWeight[j];
pB_v_next_row[j] -= vy;
pA_v_next_row[j] += pWeight[j];
}
}
}
VariationalRefinementImpl::RedBlackSOR_ParBody::RedBlackSOR_ParBody(VariationalRefinementImpl &_var, int _nstripes,
int _h, RedBlackBuffer &_dW_u,
RedBlackBuffer &_dW_v, bool _red_pass)
: var(&_var), nstripes(_nstripes), h(_h), dW_u(&_dW_u), dW_v(&_dW_v), red_pass(_red_pass)
{
stripe_sz = (int)ceil(h / (double)nstripes);
}
/* This function implements the Red-Black SOR (successive-over relaxation) method for solving the main
* linear system in the current fixed-point iteration.
*/
void VariationalRefinementImpl::RedBlackSOR_ParBody::operator()(const Range &range) const
{
int start = min(range.start * stripe_sz, h);
int end = min(range.end * stripe_sz, h);
float *pa11, *pa12, *pa22, *pb1, *pb2, *pW, *pdu, *pdv;
float *pW_next, *pdu_next, *pdv_next;
float *pW_prev_row, *pdu_prev_row, *pdv_prev_row;
float *pdu_next_row, *pdv_next_row;
float sigmaU, sigmaV;
int j, len;
for (int i = start; i < end; i++)
{
#define INIT_ROW_POINTERS(cur_color, next_color, next_offs_even, next_offs_odd) \
pW = var->weights.cur_color.ptr<float>(i + 1) + 1; \
pa11 = var->A11.cur_color.ptr<float>(i + 1) + 1; \
pa12 = var->A12.cur_color.ptr<float>(i + 1) + 1; \
pa22 = var->A22.cur_color.ptr<float>(i + 1) + 1; \
pb1 = var->b1.cur_color.ptr<float>(i + 1) + 1; \
pb2 = var->b2.cur_color.ptr<float>(i + 1) + 1; \
pdu = dW_u->cur_color.ptr<float>(i + 1) + 1; \
pdv = dW_v->cur_color.ptr<float>(i + 1) + 1; \
\
pdu_next_row = dW_u->next_color.ptr<float>(i + 2) + 1; \
pdv_next_row = dW_v->next_color.ptr<float>(i + 2) + 1; \
\
pW_prev_row = var->weights.next_color.ptr<float>(i) + 1; \
pdu_prev_row = dW_u->next_color.ptr<float>(i) + 1; \
pdv_prev_row = dW_v->next_color.ptr<float>(i) + 1; \
\
if (i % 2 == 0) \
{ \
pW_next = var->weights.next_color.ptr<float>(i + 1) + next_offs_even; \
pdu_next = dW_u->next_color.ptr<float>(i + 1) + next_offs_even; \
pdv_next = dW_v->next_color.ptr<float>(i + 1) + next_offs_even; \
len = var->A11.cur_color##_even_len; \
} \
else \
{ \
pW_next = var->weights.next_color.ptr<float>(i + 1) + next_offs_odd; \
pdu_next = dW_u->next_color.ptr<float>(i + 1) + next_offs_odd; \
pdv_next = dW_v->next_color.ptr<float>(i + 1) + next_offs_odd; \
len = var->A11.cur_color##_odd_len; \
}
if (red_pass)
{
INIT_ROW_POINTERS(red, black, 1, 2);
}
else
{
INIT_ROW_POINTERS(black, red, 2, 1);
}
#undef INIT_ROW_POINTERS
j = 0;
#ifdef CV_SIMD128
v_float32x4 pW_prev_vec = v_setall_f32(pW_next[-1]);
v_float32x4 pdu_prev_vec = v_setall_f32(pdu_next[-1]);
v_float32x4 pdv_prev_vec = v_setall_f32(pdv_next[-1]);
v_float32x4 omega_vec = v_setall_f32(var->omega);
v_float32x4 pW_vec, pW_next_vec, pW_prev_row_vec;
v_float32x4 pdu_next_vec, pdu_prev_row_vec, pdu_next_row_vec;
v_float32x4 pdv_next_vec, pdv_prev_row_vec, pdv_next_row_vec;
v_float32x4 pW_shifted_vec, pdu_shifted_vec, pdv_shifted_vec;
v_float32x4 pa12_vec, sigmaU_vec, sigmaV_vec, pdu_vec, pdv_vec;
for (; j < len - 3; j += 4)
{
pW_vec = v_load(pW + j);
pW_next_vec = v_load(pW_next + j);
pW_prev_row_vec = v_load(pW_prev_row + j);
pdu_next_vec = v_load(pdu_next + j);
pdu_prev_row_vec = v_load(pdu_prev_row + j);
pdu_next_row_vec = v_load(pdu_next_row + j);
pdv_next_vec = v_load(pdv_next + j);
pdv_prev_row_vec = v_load(pdv_prev_row + j);
pdv_next_row_vec = v_load(pdv_next_row + j);
pa12_vec = v_load(pa12 + j);
pW_shifted_vec = v_reinterpret_as_f32(
v_extract<3>(v_reinterpret_as_s32(pW_prev_vec), v_reinterpret_as_s32(pW_next_vec)));
pdu_shifted_vec = v_reinterpret_as_f32(
v_extract<3>(v_reinterpret_as_s32(pdu_prev_vec), v_reinterpret_as_s32(pdu_next_vec)));
pdv_shifted_vec = v_reinterpret_as_f32(
v_extract<3>(v_reinterpret_as_s32(pdv_prev_vec), v_reinterpret_as_s32(pdv_next_vec)));
sigmaU_vec = pW_shifted_vec * pdu_shifted_vec + pW_vec * pdu_next_vec + pW_prev_row_vec * pdu_prev_row_vec +
pW_vec * pdu_next_row_vec;
sigmaV_vec = pW_shifted_vec * pdv_shifted_vec + pW_vec * pdv_next_vec + pW_prev_row_vec * pdv_prev_row_vec +
pW_vec * pdv_next_row_vec;
pdu_vec = v_load(pdu + j);
pdv_vec = v_load(pdv + j);
pdu_vec += omega_vec * ((sigmaU_vec + v_load(pb1 + j) - pdv_vec * pa12_vec) / v_load(pa11 + j) - pdu_vec);
pdv_vec += omega_vec * ((sigmaV_vec + v_load(pb2 + j) - pdu_vec * pa12_vec) / v_load(pa22 + j) - pdv_vec);
v_store(pdu + j, pdu_vec);
v_store(pdv + j, pdv_vec);
pW_prev_vec = pW_next_vec;
pdu_prev_vec = pdu_next_vec;
pdv_prev_vec = pdv_next_vec;
}
#endif
for (; j < len; j++)
{
sigmaU = pW_next[j - 1] * pdu_next[j - 1] + pW[j] * pdu_next[j] + pW_prev_row[j] * pdu_prev_row[j] +
pW[j] * pdu_next_row[j];
sigmaV = pW_next[j - 1] * pdv_next[j - 1] + pW[j] * pdv_next[j] + pW_prev_row[j] * pdv_prev_row[j] +
pW[j] * pdv_next_row[j];
pdu[j] += var->omega * ((sigmaU + pb1[j] - pdv[j] * pa12[j]) / pa11[j] - pdu[j]);
pdv[j] += var->omega * ((sigmaV + pb2[j] - pdu[j] * pa12[j]) / pa22[j] - pdv[j]);
}
}
}
void VariationalRefinementImpl::calc(InputArray I0, InputArray I1, InputOutputArray flow)
{
CV_Assert(!I0.empty() && I0.depth() == CV_8U && I0.channels() == 1);
CV_Assert(!I1.empty() && I1.depth() == CV_8U && I1.channels() == 1);
CV_Assert(I0.sameSize(I1));
CV_Assert(!flow.empty() && flow.depth() == CV_32F && flow.channels() == 2);
CV_Assert(I0.sameSize(flow));
Mat uv[2];
Mat &flowMat = flow.getMatRef();
split(flowMat, uv);
calcUV(I0, I1, uv[0], uv[1]);
merge(uv, 2, flowMat);
}
void VariationalRefinementImpl::calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v)
{
CV_Assert(!I0.empty() && I0.depth() == CV_8U && I0.channels() == 1);
CV_Assert(!I1.empty() && I1.depth() == CV_8U && I1.channels() == 1);
CV_Assert(I0.sameSize(I1));
CV_Assert(!flow_u.empty() && flow_u.depth() == CV_32F && flow_u.channels() == 1);
CV_Assert(!flow_v.empty() && flow_v.depth() == CV_32F && flow_v.channels() == 1);
CV_Assert(I0.sameSize(flow_u));
CV_Assert(flow_u.sameSize(flow_v));
int num_stripes = getNumThreads();
Mat I0Mat = I0.getMat();
Mat I1Mat = I1.getMat();
Mat &W_u = flow_u.getMatRef();
Mat &W_v = flow_v.getMatRef();
prepareBuffers(I0Mat, I1Mat, W_u, W_v);
splitCheckerboard(W_u_rb, W_u);
splitCheckerboard(W_v_rb, W_v);
W_u_rb.red.copyTo(tempW_u.red);
W_u_rb.black.copyTo(tempW_u.black);
W_v_rb.red.copyTo(tempW_v.red);
W_v_rb.black.copyTo(tempW_v.black);
dW_u.red.setTo(0.0f);
dW_u.black.setTo(0.0f);
dW_v.red.setTo(0.0f);
dW_v.black.setTo(0.0f);
for (int i = 0; i < fixedPointIterations; i++)
{
parallel_for_(Range(0, num_stripes), ComputeDataTerm_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, true));
parallel_for_(Range(0, num_stripes), ComputeDataTerm_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, false));
parallel_for_(Range(0, num_stripes), ComputeSmoothnessTermHorPass_ParBody(
*this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, tempW_u, tempW_v, true));
parallel_for_(Range(0, num_stripes), ComputeSmoothnessTermHorPass_ParBody(
*this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, tempW_u, tempW_v, false));
parallel_for_(Range(0, num_stripes),
ComputeSmoothnessTermVertPass_ParBody(*this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, true));
parallel_for_(Range(0, num_stripes),
ComputeSmoothnessTermVertPass_ParBody(*this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, false));
for (int j = 0; j < sorIterations; j++)
{
parallel_for_(Range(0, num_stripes), RedBlackSOR_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, true));
parallel_for_(Range(0, num_stripes), RedBlackSOR_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, false));
}
tempW_u.red = W_u_rb.red + dW_u.red;
tempW_u.black = W_u_rb.black + dW_u.black;
updateRepeatedBorders(tempW_u);
tempW_v.red = W_v_rb.red + dW_v.red;
tempW_v.black = W_v_rb.black + dW_v.black;
updateRepeatedBorders(tempW_v);
}
mergeCheckerboard(W_u, tempW_u);
mergeCheckerboard(W_v, tempW_v);
}
void VariationalRefinementImpl::collectGarbage()
{
Ix.release();
Iy.release();
Iz.release();
Ixx.release();
Ixy.release();
Iyy.release();
Ixz.release();
Iyz.release();
Ix_rb.release();
Iy_rb.release();
Iz_rb.release();
Ixx_rb.release();
Ixy_rb.release();
Iyy_rb.release();
Ixz_rb.release();
Iyz_rb.release();
A11.release();
A12.release();
A22.release();
b1.release();
b2.release();
weights.release();
mapX.release();
mapY.release();
tempW_u.release();
tempW_v.release();
dW_u.release();
dW_v.release();
W_u_rb.release();
W_v_rb.release();
}
Ptr<VariationalRefinement> createVariationalFlowRefinement() { return makePtr<VariationalRefinementImpl>(); }
}
}
......@@ -40,74 +40,39 @@
//M*/
#include "test_precomp.hpp"
#include <string>
#include <fstream>
using namespace std;
using namespace cv;
using namespace cvtest;
using namespace optflow;
/* ///////////////////// simpleflow_test ///////////////////////// */
class CV_SimpleFlowTest : public cvtest::BaseTest
{
public:
CV_SimpleFlowTest();
protected:
void run(int);
};
static string getDataDir() { return TS::ptr()->get_data_path(); }
CV_SimpleFlowTest::CV_SimpleFlowTest() {}
static bool readOpticalFlowFromFile(FILE* file, cv::Mat& flow) {
char header[5];
if (fread(header, 1, 4, file) < 4 && (string)header != "PIEH") {
return false;
}
static bool isFlowCorrect(float u) { return !cvIsNaN(u) && (fabs(u) < 1e9); }
int cols, rows;
if (fread(&cols, sizeof(int), 1, file) != 1||
fread(&rows, sizeof(int), 1, file) != 1) {
return false;
}
flow = cv::Mat::zeros(rows, cols, CV_32FC2);
for (int i = 0; i < rows; ++i) {
for (int j = 0; j < cols; ++j) {
cv::Vec2f flow_at_point;
if (fread(&(flow_at_point[0]), sizeof(float), 1, file) != 1 ||
fread(&(flow_at_point[1]), sizeof(float), 1, file) != 1) {
return false;
}
flow.at<cv::Vec2f>(i, j) = flow_at_point;
}
}
return true;
}
static bool isFlowCorrect(float u) {
return !cvIsNaN(u) && (fabs(u) < 1e9);
}
static float calc_rmse(cv::Mat flow1, cv::Mat flow2) {
static float calcRMSE(Mat flow1, Mat flow2)
{
float sum = 0;
int counter = 0;
const int rows = flow1.rows;
const int cols = flow1.cols;
for (int y = 0; y < rows; ++y) {
for (int x = 0; x < cols; ++x) {
cv::Vec2f flow1_at_point = flow1.at<cv::Vec2f>(y, x);
cv::Vec2f flow2_at_point = flow2.at<cv::Vec2f>(y, x);
for (int y = 0; y < rows; ++y)
{
for (int x = 0; x < cols; ++x)
{
Vec2f flow1_at_point = flow1.at<Vec2f>(y, x);
Vec2f flow2_at_point = flow2.at<Vec2f>(y, x);
float u1 = flow1_at_point[0];
float v1 = flow1_at_point[1];
float u2 = flow2_at_point[0];
float v2 = flow2_at_point[1];
if (isFlowCorrect(u1) && isFlowCorrect(u2) && isFlowCorrect(v1) && isFlowCorrect(v2)) {
sum += (u1-u2)*(u1-u2) + (v1-v2)*(v1-v2);
if (isFlowCorrect(u1) && isFlowCorrect(u2) && isFlowCorrect(v1) && isFlowCorrect(v2))
{
sum += (u1 - u2) * (u1 - u2) + (v1 - v2) * (v1 - v2);
counter++;
}
}
......@@ -115,76 +80,111 @@ static float calc_rmse(cv::Mat flow1, cv::Mat flow2) {
return (float)sqrt(sum / (1e-9 + counter));
}
void CV_SimpleFlowTest::run(int) {
const float MAX_RMSE = 0.6f;
const string frame1_path = ts->get_data_path() + "optflow/RubberWhale1.png";
const string frame2_path = ts->get_data_path() + "optflow/RubberWhale2.png";
const string gt_flow_path = ts->get_data_path() + "optflow/RubberWhale.flo";
cv::Mat frame1 = cv::imread(frame1_path);
cv::Mat frame2 = cv::imread(frame2_path);
if (frame1.empty()) {
ts->printf(cvtest::TS::LOG, "could not read image %s\n", frame2_path.c_str());
ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
return;
}
if (frame2.empty()) {
ts->printf(cvtest::TS::LOG, "could not read image %s\n", frame2_path.c_str());
ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
return;
}
if (frame1.rows != frame2.rows && frame1.cols != frame2.cols) {
ts->printf(cvtest::TS::LOG, "images should be of equal sizes (%s and %s)",
frame1_path.c_str(), frame2_path.c_str());
ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
return;
}
if (frame1.type() != 16 || frame2.type() != 16) {
ts->printf(cvtest::TS::LOG, "images should be of equal type CV_8UC3 (%s and %s)",
frame1_path.c_str(), frame2_path.c_str());
ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
return;
}
cv::Mat flow_gt;
bool readRubberWhale(Mat &dst_frame_1, Mat &dst_frame_2, Mat &dst_GT)
{
const string frame1_path = getDataDir() + "optflow/RubberWhale1.png";
const string frame2_path = getDataDir() + "optflow/RubberWhale2.png";
const string gt_flow_path = getDataDir() + "optflow/RubberWhale.flo";
FILE* gt_flow_file = fopen(gt_flow_path.c_str(), "rb");
if (gt_flow_file == NULL) {
ts->printf(cvtest::TS::LOG, "could not read ground-thuth flow from file %s",
gt_flow_path.c_str());
ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
return;
}
dst_frame_1 = imread(frame1_path);
dst_frame_2 = imread(frame2_path);
dst_GT = readOpticalFlow(gt_flow_path);
if (!readOpticalFlowFromFile(gt_flow_file, flow_gt)) {
ts->printf(cvtest::TS::LOG, "error while reading flow data from file %s",
gt_flow_path.c_str());
ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
return;
}
fclose(gt_flow_file);
if (dst_frame_1.empty() || dst_frame_2.empty() || dst_GT.empty())
return false;
else
return true;
}
cv::Mat flow;
cv::optflow::calcOpticalFlowSF(frame1, frame2, flow, 3, 2, 4);
TEST(DenseOpticalFlow_SimpleFlow, ReferenceAccuracy)
{
Mat frame1, frame2, GT;
ASSERT_TRUE(readRubberWhale(frame1, frame2, GT));
float target_RMSE = 0.37f;
Mat flow;
Ptr<DenseOpticalFlow> algo;
algo = createOptFlow_SimpleFlow();
algo->calc(frame1, frame2, flow);
ASSERT_EQ(GT.rows, flow.rows);
ASSERT_EQ(GT.cols, flow.cols);
EXPECT_LE(calcRMSE(GT, flow), target_RMSE);
}
float rmse = calc_rmse(flow_gt, flow);
TEST(DenseOpticalFlow_DeepFlow, ReferenceAccuracy)
{
Mat frame1, frame2, GT;
ASSERT_TRUE(readRubberWhale(frame1, frame2, GT));
float target_RMSE = 0.35f;
cvtColor(frame1, frame1, COLOR_BGR2GRAY);
cvtColor(frame2, frame2, COLOR_BGR2GRAY);
Mat flow;
Ptr<DenseOpticalFlow> algo;
algo = createOptFlow_DeepFlow();
algo->calc(frame1, frame2, flow);
ASSERT_EQ(GT.rows, flow.rows);
ASSERT_EQ(GT.cols, flow.cols);
EXPECT_LE(calcRMSE(GT, flow), target_RMSE);
}
ts->printf(cvtest::TS::LOG, "Optical flow estimation RMSE for SimpleFlow algorithm : %lf\n",
rmse);
TEST(DenseOpticalFlow_SparseToDenseFlow, ReferenceAccuracy)
{
Mat frame1, frame2, GT;
ASSERT_TRUE(readRubberWhale(frame1, frame2, GT));
float target_RMSE = 0.52f;
Mat flow;
Ptr<DenseOpticalFlow> algo;
algo = createOptFlow_SparseToDense();
algo->calc(frame1, frame2, flow);
ASSERT_EQ(GT.rows, flow.rows);
ASSERT_EQ(GT.cols, flow.cols);
EXPECT_LE(calcRMSE(GT, flow), target_RMSE);
}
if (rmse > MAX_RMSE) {
ts->printf( cvtest::TS::LOG,
"Too big rmse error : %lf ( >= %lf )\n", rmse, MAX_RMSE);
ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY);
return;
TEST(DenseOpticalFlow_DIS, ReferenceAccuracy)
{
Mat frame1, frame2, GT;
ASSERT_TRUE(readRubberWhale(frame1, frame2, GT));
int presets[] = {DISOpticalFlow::PRESET_ULTRAFAST, DISOpticalFlow::PRESET_FAST, DISOpticalFlow::PRESET_MEDIUM};
float target_RMSE[] = {0.86f, 0.74f, 0.49f};
cvtColor(frame1, frame1, COLOR_BGR2GRAY);
cvtColor(frame2, frame2, COLOR_BGR2GRAY);
Ptr<DenseOpticalFlow> algo;
// iterate over presets:
for (int i = 0; i < 3; i++)
{
Mat flow;
algo = createOptFlow_DIS(presets[i]);
algo->calc(frame1, frame2, flow);
ASSERT_EQ(GT.rows, flow.rows);
ASSERT_EQ(GT.cols, flow.cols);
EXPECT_LE(calcRMSE(GT, flow), target_RMSE[i]);
}
}
TEST(Video_OpticalFlowSimpleFlow, accuracy) { CV_SimpleFlowTest test; test.safe_run(); }
/* End of file. */
TEST(DenseOpticalFlow_VariationalRefinement, ReferenceAccuracy)
{
Mat frame1, frame2, GT;
ASSERT_TRUE(readRubberWhale(frame1, frame2, GT));
float target_RMSE = 0.86f;
cvtColor(frame1, frame1, COLOR_BGR2GRAY);
cvtColor(frame2, frame2, COLOR_BGR2GRAY);
Ptr<VariationalRefinement> var_ref;
var_ref = createVariationalFlowRefinement();
var_ref->setAlpha(20.0f);
var_ref->setDelta(5.0f);
var_ref->setGamma(10.0f);
var_ref->setSorIterations(25);
var_ref->setFixedPointIterations(25);
Mat flow(frame1.size(), CV_32FC2);
flow.setTo(0.0f);
var_ref->calc(frame1, frame2, flow);
ASSERT_EQ(GT.rows, flow.rows);
ASSERT_EQ(GT.cols, flow.cols);
EXPECT_LE(calcRMSE(GT, flow), target_RMSE);
}
......@@ -40,107 +40,120 @@
//M*/
#include "test_precomp.hpp"
#include <string>
#include <fstream>
using namespace std;
using namespace std::tr1;
using namespace cv;
using namespace cvtest;
using namespace perf;
using namespace testing;
using namespace optflow;
/* ///////////////////// sparsetodenseflow_test ///////////////////////// */
typedef tuple<Size> OFParams;
typedef TestWithParam<OFParams> DenseOpticalFlow_DIS;
typedef TestWithParam<OFParams> DenseOpticalFlow_VariationalRefinement;
class CV_SparseToDenseFlowTest : public cvtest::BaseTest
TEST_P(DenseOpticalFlow_DIS, MultithreadReproducibility)
{
protected:
void run(int);
};
static bool isFlowCorrect(float u) {
return !cvIsNaN(u) && (fabs(u) < 1e9);
}
static float calc_rmse(Mat flow1, Mat flow2) {
float sum = 0;
int counter = 0;
const int rows = flow1.rows;
const int cols = flow1.cols;
for (int y = 0; y < rows; ++y) {
for (int x = 0; x < cols; ++x) {
Vec2f flow1_at_point = flow1.at<Vec2f>(y, x);
Vec2f flow2_at_point = flow2.at<Vec2f>(y, x);
float u1 = flow1_at_point[0];
float v1 = flow1_at_point[1];
float u2 = flow2_at_point[0];
float v2 = flow2_at_point[1];
if (isFlowCorrect(u1) && isFlowCorrect(u2) && isFlowCorrect(v1) && isFlowCorrect(v2)) {
sum += (u1-u2)*(u1-u2) + (v1-v2)*(v1-v2);
counter++;
}
}
double MAX_DIF = 0.01;
double MAX_MEAN_DIF = 0.001;
int loopsCount = 2;
RNG rng(0);
OFParams params = GetParam();
Size size = get<0>(params);
for (int iter = 0; iter <= loopsCount; iter++)
{
Mat frame1(size, CV_8U);
randu(frame1, 0, 255);
Mat frame2(size, CV_8U);
randu(frame2, 0, 255);
Ptr<DISOpticalFlow> algo = createOptFlow_DIS();
int psz = rng.uniform(4, 16);
int pstr = rng.uniform(1, psz - 1);
int grad_iter = rng.uniform(1, 64);
int var_iter = rng.uniform(0, 10);
bool use_mean_normalization = !!rng.uniform(0, 2);
bool use_spatial_propagation = !!rng.uniform(0, 2);
algo->setFinestScale(0);
algo->setPatchSize(psz);
algo->setPatchStride(pstr);
algo->setGradientDescentIterations(grad_iter);
algo->setVariationalRefinementIterations(var_iter);
algo->setUseMeanNormalization(use_mean_normalization);
algo->setUseSpatialPropagation(use_spatial_propagation);
cv::setNumThreads(cv::getNumberOfCPUs());
Mat resMultiThread;
algo->calc(frame1, frame2, resMultiThread);
cv::setNumThreads(1);
Mat resSingleThread;
algo->calc(frame1, frame2, resSingleThread);
EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_INF), MAX_DIF);
EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_L1), MAX_MEAN_DIF * frame1.total());
// resulting flow should be within the frame bounds:
double min_val, max_val;
minMaxLoc(resMultiThread, &min_val, &max_val);
EXPECT_LE(abs(min_val), sqrt(size.height * size.height + size.width * size.width));
EXPECT_LE(abs(max_val), sqrt(size.height * size.height + size.width * size.width));
}
return (float)sqrt(sum / (1e-9 + counter));
}
void CV_SparseToDenseFlowTest::run(int) {
const float MAX_RMSE = 0.6f;
const string frame1_path = ts->get_data_path() + "optflow/RubberWhale1.png";
const string frame2_path = ts->get_data_path() + "optflow/RubberWhale2.png";
const string gt_flow_path = ts->get_data_path() + "optflow/RubberWhale.flo";
Mat frame1 = imread(frame1_path);
Mat frame2 = imread(frame2_path);
if (frame1.empty()) {
ts->printf(cvtest::TS::LOG, "could not read image %s\n", frame2_path.c_str());
ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
return;
}
if (frame2.empty()) {
ts->printf(cvtest::TS::LOG, "could not read image %s\n", frame2_path.c_str());
ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
return;
}
if (frame1.rows != frame2.rows && frame1.cols != frame2.cols) {
ts->printf(cvtest::TS::LOG, "images should be of equal sizes (%s and %s)",
frame1_path.c_str(), frame2_path.c_str());
ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
return;
}
if (frame1.type() != 16 || frame2.type() != 16) {
ts->printf(cvtest::TS::LOG, "images should be of equal type CV_8UC3 (%s and %s)",
frame1_path.c_str(), frame2_path.c_str());
ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
return;
}
Mat flow_gt = optflow::readOpticalFlow(gt_flow_path);
if(flow_gt.empty()) {
ts->printf(cvtest::TS::LOG, "error while reading flow data from file %s",
gt_flow_path.c_str());
ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
return;
}
INSTANTIATE_TEST_CASE_P(FullSet, DenseOpticalFlow_DIS, Values(szODD, szQVGA));
Mat flow;
optflow::calcOpticalFlowSparseToDense(frame1, frame2, flow);
float rmse = calc_rmse(flow_gt, flow);
ts->printf(cvtest::TS::LOG, "Optical flow estimation RMSE for SparseToDenseFlow algorithm : %lf\n",
rmse);
if (rmse > MAX_RMSE) {
ts->printf( cvtest::TS::LOG,
"Too big rmse error : %lf ( >= %lf )\n", rmse, MAX_RMSE);
ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY);
return;
TEST_P(DenseOpticalFlow_VariationalRefinement, MultithreadReproducibility)
{
double MAX_DIF = 0.01;
double MAX_MEAN_DIF = 0.001;
float input_flow_rad = 5.0;
int loopsCount = 2;
RNG rng(0);
OFParams params = GetParam();
Size size = get<0>(params);
for (int iter = 0; iter <= loopsCount; iter++)
{
Mat frame1(size, CV_8U);
randu(frame1, 0, 255);
Mat frame2(size, CV_8U);
randu(frame2, 0, 255);
Mat flow(size, CV_32FC2);
randu(flow, -input_flow_rad, input_flow_rad);
Ptr<VariationalRefinement> var = createVariationalFlowRefinement();
var->setAlpha(rng.uniform(1.0f, 100.0f));
var->setGamma(rng.uniform(0.1f, 10.0f));
var->setDelta(rng.uniform(0.1f, 10.0f));
var->setSorIterations(rng.uniform(1, 20));
var->setFixedPointIterations(rng.uniform(1, 20));
var->setOmega(rng.uniform(1.01f, 1.99f));
cv::setNumThreads(cv::getNumberOfCPUs());
Mat resMultiThread;
flow.copyTo(resMultiThread);
var->calc(frame1, frame2, resMultiThread);
cv::setNumThreads(1);
Mat resSingleThread;
flow.copyTo(resSingleThread);
var->calc(frame1, frame2, resSingleThread);
EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_INF), MAX_DIF);
EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_L1), MAX_MEAN_DIF * frame1.total());
// resulting flow should be within the frame bounds:
double min_val, max_val;
minMaxLoc(resMultiThread, &min_val, &max_val);
EXPECT_LE(abs(min_val), sqrt(size.height * size.height + size.width * size.width));
EXPECT_LE(abs(max_val), sqrt(size.height * size.height + size.width * size.width));
}
}
TEST(Video_OpticalFlowSparseToDenseFlow, accuracy) { CV_SparseToDenseFlowTest test; test.safe_run(); }
INSTANTIATE_TEST_CASE_P(FullSet, DenseOpticalFlow_VariationalRefinement, Values(szODD, szQVGA));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment