Commit 96bf2661 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky Committed by Alexander Alekhin

Merge pull request #13084 from vpisarev:shuffle_optflow_algos

* moved DIS optical flow from opencv_contrib to opencv, moved TVL1 from opencv to opencv_contrib

* fixed compile warning

* TVL1 optical flow example moved to opencv_contrib
parent 841741aa
......@@ -397,6 +397,29 @@ public:
};
/** @brief Read a .flo file
@param path Path to the file to be loaded
The function readOpticalFlow loads a flow field from a file and returns it as a single matrix.
Resulting Mat has a type CV_32FC2 - floating-point, 2-channel. First channel corresponds to the
flow in the horizontal direction (u), second - vertical (v).
*/
CV_EXPORTS_W Mat readOpticalFlow( const String& path );
/** @brief Write a .flo to disk
@param path Path to the file to be written
@param flow Flow field to be stored
The function stores a flow field in a file, returns true on success, false otherwise.
The flow field must be a 2-channel, floating-point matrix (CV_32FC2). First channel corresponds
to the flow in the horizontal direction (u), second - vertical (v).
*/
CV_EXPORTS_W bool writeOpticalFlow( const String& path, InputArray flow );
/**
Base class for dense optical flow algorithms
*/
class CV_EXPORTS_W DenseOpticalFlow : public Algorithm
{
public:
......@@ -433,131 +456,6 @@ public:
OutputArray err = cv::noArray()) = 0;
};
/** @brief "Dual TV L1" Optical Flow Algorithm.
The class implements the "Dual TV L1" optical flow algorithm described in @cite Zach2007 and
@cite Javier2012 .
Here are important members of the class that control the algorithm, which you can set after
constructing the class instance:
- member double tau
Time step of the numerical scheme.
- member double lambda
Weight parameter for the data term, attachment parameter. This is the most relevant
parameter, which determines the smoothness of the output. The smaller this parameter is,
the smoother the solutions we obtain. It depends on the range of motions of the images, so
its value should be adapted to each image sequence.
- member double theta
Weight parameter for (u - v)\^2, tightness parameter. It serves as a link between the
attachment and the regularization terms. In theory, it should have a small value in order
to maintain both parts in correspondence. The method is stable for a large range of values
of this parameter.
- member int nscales
Number of scales used to create the pyramid of images.
- member int warps
Number of warpings per scale. Represents the number of times that I1(x+u0) and grad(
I1(x+u0) ) are computed per scale. This is a parameter that assures the stability of the
method. It also affects the running time, so it is a compromise between speed and
accuracy.
- member double epsilon
Stopping criterion threshold used in the numerical scheme, which is a trade-off between
precision and running time. A small value will yield more accurate solutions at the
expense of a slower convergence.
- member int iterations
Stopping criterion iterations number used in the numerical scheme.
C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
*/
class CV_EXPORTS_W DualTVL1OpticalFlow : public DenseOpticalFlow
{
public:
//! @brief Time step of the numerical scheme
/** @see setTau */
CV_WRAP virtual double getTau() const = 0;
/** @copybrief getTau @see getTau */
CV_WRAP virtual void setTau(double val) = 0;
//! @brief Weight parameter for the data term, attachment parameter
/** @see setLambda */
CV_WRAP virtual double getLambda() const = 0;
/** @copybrief getLambda @see getLambda */
CV_WRAP virtual void setLambda(double val) = 0;
//! @brief Weight parameter for (u - v)^2, tightness parameter
/** @see setTheta */
CV_WRAP virtual double getTheta() const = 0;
/** @copybrief getTheta @see getTheta */
CV_WRAP virtual void setTheta(double val) = 0;
//! @brief coefficient for additional illumination variation term
/** @see setGamma */
CV_WRAP virtual double getGamma() const = 0;
/** @copybrief getGamma @see getGamma */
CV_WRAP virtual void setGamma(double val) = 0;
//! @brief Number of scales used to create the pyramid of images
/** @see setScalesNumber */
CV_WRAP virtual int getScalesNumber() const = 0;
/** @copybrief getScalesNumber @see getScalesNumber */
CV_WRAP virtual void setScalesNumber(int val) = 0;
//! @brief Number of warpings per scale
/** @see setWarpingsNumber */
CV_WRAP virtual int getWarpingsNumber() const = 0;
/** @copybrief getWarpingsNumber @see getWarpingsNumber */
CV_WRAP virtual void setWarpingsNumber(int val) = 0;
//! @brief Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time
/** @see setEpsilon */
CV_WRAP virtual double getEpsilon() const = 0;
/** @copybrief getEpsilon @see getEpsilon */
CV_WRAP virtual void setEpsilon(double val) = 0;
//! @brief Inner iterations (between outlier filtering) used in the numerical scheme
/** @see setInnerIterations */
CV_WRAP virtual int getInnerIterations() const = 0;
/** @copybrief getInnerIterations @see getInnerIterations */
CV_WRAP virtual void setInnerIterations(int val) = 0;
//! @brief Outer iterations (number of inner loops) used in the numerical scheme
/** @see setOuterIterations */
CV_WRAP virtual int getOuterIterations() const = 0;
/** @copybrief getOuterIterations @see getOuterIterations */
CV_WRAP virtual void setOuterIterations(int val) = 0;
//! @brief Use initial flow
/** @see setUseInitialFlow */
CV_WRAP virtual bool getUseInitialFlow() const = 0;
/** @copybrief getUseInitialFlow @see getUseInitialFlow */
CV_WRAP virtual void setUseInitialFlow(bool val) = 0;
//! @brief Step between scales (<1)
/** @see setScaleStep */
CV_WRAP virtual double getScaleStep() const = 0;
/** @copybrief getScaleStep @see getScaleStep */
CV_WRAP virtual void setScaleStep(double val) = 0;
//! @brief Median filter kernel size (1 = no filter) (3 or 5)
/** @see setMedianFiltering */
CV_WRAP virtual int getMedianFiltering() const = 0;
/** @copybrief getMedianFiltering @see getMedianFiltering */
CV_WRAP virtual void setMedianFiltering(int val) = 0;
/** @brief Creates instance of cv::DualTVL1OpticalFlow*/
CV_WRAP static Ptr<DualTVL1OpticalFlow> create(
double tau = 0.25,
double lambda = 0.15,
double theta = 0.3,
int nscales = 5,
int warps = 5,
double epsilon = 0.01,
int innnerIterations = 30,
int outerIterations = 10,
double scaleStep = 0.8,
double gamma = 0.0,
int medianFiltering = 5,
bool useInitialFlow = false);
};
/** @brief Creates instance of cv::DenseOpticalFlow
*/
CV_EXPORTS_W Ptr<DualTVL1OpticalFlow> createOptFlow_DualTVL1();
/** @brief Class computing a dense optical flow using the Gunnar Farneback's algorithm.
*/
......@@ -599,6 +497,166 @@ public:
int flags = 0);
};
/** @brief Variational optical flow refinement
This class implements variational refinement of the input flow field, i.e.
it uses input flow to initialize the minimization of the following functional:
\f$E(U) = \int_{\Omega} \delta \Psi(E_I) + \gamma \Psi(E_G) + \alpha \Psi(E_S) \f$,
where \f$E_I,E_G,E_S\f$ are color constancy, gradient constancy and smoothness terms
respectively. \f$\Psi(s^2)=\sqrt{s^2+\epsilon^2}\f$ is a robust penalizer to limit the
influence of outliers. A complete formulation and a description of the minimization
procedure can be found in @cite Brox2004
*/
class CV_EXPORTS_W VariationalRefinement : public DenseOpticalFlow
{
public:
/** @brief @ref calc function overload to handle separate horizontal (u) and vertical (v) flow components
(to avoid extra splits/merges) */
CV_WRAP virtual void calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v) = 0;
/** @brief Number of outer (fixed-point) iterations in the minimization procedure.
@see setFixedPointIterations */
CV_WRAP virtual int getFixedPointIterations() const = 0;
/** @copybrief getFixedPointIterations @see getFixedPointIterations */
CV_WRAP virtual void setFixedPointIterations(int val) = 0;
/** @brief Number of inner successive over-relaxation (SOR) iterations
in the minimization procedure to solve the respective linear system.
@see setSorIterations */
CV_WRAP virtual int getSorIterations() const = 0;
/** @copybrief getSorIterations @see getSorIterations */
CV_WRAP virtual void setSorIterations(int val) = 0;
/** @brief Relaxation factor in SOR
@see setOmega */
CV_WRAP virtual float getOmega() const = 0;
/** @copybrief getOmega @see getOmega */
CV_WRAP virtual void setOmega(float val) = 0;
/** @brief Weight of the smoothness term
@see setAlpha */
CV_WRAP virtual float getAlpha() const = 0;
/** @copybrief getAlpha @see getAlpha */
CV_WRAP virtual void setAlpha(float val) = 0;
/** @brief Weight of the color constancy term
@see setDelta */
CV_WRAP virtual float getDelta() const = 0;
/** @copybrief getDelta @see getDelta */
CV_WRAP virtual void setDelta(float val) = 0;
/** @brief Weight of the gradient constancy term
@see setGamma */
CV_WRAP virtual float getGamma() const = 0;
/** @copybrief getGamma @see getGamma */
CV_WRAP virtual void setGamma(float val) = 0;
/** @brief Creates an instance of VariationalRefinement
*/
CV_WRAP static Ptr<VariationalRefinement> create();
};
/** @brief DIS optical flow algorithm.
This class implements the Dense Inverse Search (DIS) optical flow algorithm. More
details about the algorithm can be found at @cite Kroeger2016 . Includes three presets with preselected
parameters to provide reasonable trade-off between speed and quality. However, even the slowest preset is
still relatively fast, use DeepFlow if you need better quality and don't care about speed.
This implementation includes several additional features compared to the algorithm described in the paper,
including spatial propagation of flow vectors (@ref getUseSpatialPropagation), as well as an option to
utilize an initial flow approximation passed to @ref calc (which is, essentially, temporal propagation,
if the previous frame's flow field is passed).
*/
class CV_EXPORTS_W DISOpticalFlow : public DenseOpticalFlow
{
public:
enum
{
PRESET_ULTRAFAST = 0,
PRESET_FAST = 1,
PRESET_MEDIUM = 2
};
/** @brief Finest level of the Gaussian pyramid on which the flow is computed (zero level
corresponds to the original image resolution). The final flow is obtained by bilinear upscaling.
@see setFinestScale */
CV_WRAP virtual int getFinestScale() const = 0;
/** @copybrief getFinestScale @see getFinestScale */
CV_WRAP virtual void setFinestScale(int val) = 0;
/** @brief Size of an image patch for matching (in pixels). Normally, default 8x8 patches work well
enough in most cases.
@see setPatchSize */
CV_WRAP virtual int getPatchSize() const = 0;
/** @copybrief getPatchSize @see getPatchSize */
CV_WRAP virtual void setPatchSize(int val) = 0;
/** @brief Stride between neighbor patches. Must be less than patch size. Lower values correspond
to higher flow quality.
@see setPatchStride */
CV_WRAP virtual int getPatchStride() const = 0;
/** @copybrief getPatchStride @see getPatchStride */
CV_WRAP virtual void setPatchStride(int val) = 0;
/** @brief Maximum number of gradient descent iterations in the patch inverse search stage. Higher values
may improve quality in some cases.
@see setGradientDescentIterations */
CV_WRAP virtual int getGradientDescentIterations() const = 0;
/** @copybrief getGradientDescentIterations @see getGradientDescentIterations */
CV_WRAP virtual void setGradientDescentIterations(int val) = 0;
/** @brief Number of fixed point iterations of variational refinement per scale. Set to zero to
disable variational refinement completely. Higher values will typically result in more smooth and
high-quality flow.
@see setGradientDescentIterations */
CV_WRAP virtual int getVariationalRefinementIterations() const = 0;
/** @copybrief getGradientDescentIterations @see getGradientDescentIterations */
CV_WRAP virtual void setVariationalRefinementIterations(int val) = 0;
/** @brief Weight of the smoothness term
@see setVariationalRefinementAlpha */
CV_WRAP virtual float getVariationalRefinementAlpha() const = 0;
/** @copybrief getVariationalRefinementAlpha @see getVariationalRefinementAlpha */
CV_WRAP virtual void setVariationalRefinementAlpha(float val) = 0;
/** @brief Weight of the color constancy term
@see setVariationalRefinementDelta */
CV_WRAP virtual float getVariationalRefinementDelta() const = 0;
/** @copybrief getVariationalRefinementDelta @see getVariationalRefinementDelta */
CV_WRAP virtual void setVariationalRefinementDelta(float val) = 0;
/** @brief Weight of the gradient constancy term
@see setVariationalRefinementGamma */
CV_WRAP virtual float getVariationalRefinementGamma() const = 0;
/** @copybrief getVariationalRefinementGamma @see getVariationalRefinementGamma */
CV_WRAP virtual void setVariationalRefinementGamma(float val) = 0;
/** @brief Whether to use mean-normalization of patches when computing patch distance. It is turned on
by default as it typically provides a noticeable quality boost because of increased robustness to
illumination variations. Turn it off if you are certain that your sequence doesn't contain any changes
in illumination.
@see setUseMeanNormalization */
CV_WRAP virtual bool getUseMeanNormalization() const = 0;
/** @copybrief getUseMeanNormalization @see getUseMeanNormalization */
CV_WRAP virtual void setUseMeanNormalization(bool val) = 0;
/** @brief Whether to use spatial propagation of good optical flow vectors. This option is turned on by
default, as it tends to work better on average and can sometimes help recover from major errors
introduced by the coarse-to-fine scheme employed by the DIS optical flow algorithm. Turning this
option off can make the output flow field a bit smoother, however.
@see setUseSpatialPropagation */
CV_WRAP virtual bool getUseSpatialPropagation() const = 0;
/** @copybrief getUseSpatialPropagation @see getUseSpatialPropagation */
CV_WRAP virtual void setUseSpatialPropagation(bool val) = 0;
/** @brief Creates an instance of DISOpticalFlow
@param preset one of PRESET_ULTRAFAST, PRESET_FAST and PRESET_MEDIUM
*/
CV_WRAP static Ptr<DISOpticalFlow> create(int preset = DISOpticalFlow::PRESET_FAST);
};
/** @brief Class used for calculating a sparse optical flow.
......
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "../perf_precomp.hpp"
#include "opencv2/ts/ocl_perf.hpp"
namespace opencv_test { namespace {
#ifdef HAVE_OPENCL
void MakeArtificialExample(UMat &dst_frame1, UMat &dst_frame2);
typedef tuple<String, Size> DISParams;
typedef TestBaseWithParam<DISParams> DenseOpticalFlow_DIS;
OCL_PERF_TEST_P(DenseOpticalFlow_DIS, perf,
Combine(Values("PRESET_ULTRAFAST", "PRESET_FAST", "PRESET_MEDIUM"), Values(szVGA, sz720p, sz1080p)))
{
DISParams params = GetParam();
// use strings to print preset names in the perf test results:
String preset_string = get<0>(params);
int preset = DISOpticalFlow::PRESET_FAST;
if (preset_string == "PRESET_ULTRAFAST")
preset = DISOpticalFlow::PRESET_ULTRAFAST;
else if (preset_string == "PRESET_FAST")
preset = DISOpticalFlow::PRESET_FAST;
else if (preset_string == "PRESET_MEDIUM")
preset = DISOpticalFlow::PRESET_MEDIUM;
Size sz = get<1>(params);
UMat frame1(sz, CV_8U);
UMat frame2(sz, CV_8U);
UMat flow;
MakeArtificialExample(frame1, frame2);
Ptr<DenseOpticalFlow> algo = DISOpticalFlow::create(preset);
OCL_TEST_CYCLE_N(10)
{
algo->calc(frame1, frame2, flow);
}
SANITY_CHECK_NOTHING();
}
void MakeArtificialExample(UMat &dst_frame1, UMat &dst_frame2)
{
int src_scale = 2;
int OF_scale = 6;
double sigma = dst_frame1.cols / 300;
UMat tmp(Size(dst_frame1.cols / (1 << src_scale), dst_frame1.rows / (1 << src_scale)), CV_8U);
randu(tmp, 0, 255);
resize(tmp, dst_frame1, dst_frame1.size(), 0.0, 0.0, INTER_LINEAR_EXACT);
resize(tmp, dst_frame2, dst_frame2.size(), 0.0, 0.0, INTER_LINEAR_EXACT);
Mat displacement_field(Size(dst_frame1.cols / (1 << OF_scale), dst_frame1.rows / (1 << OF_scale)),
CV_32FC2);
randn(displacement_field, 0.0, sigma);
resize(displacement_field, displacement_field, dst_frame2.size(), 0.0, 0.0, INTER_CUBIC);
for (int i = 0; i < displacement_field.rows; i++)
for (int j = 0; j < displacement_field.cols; j++)
displacement_field.at<Vec2f>(i, j) += Vec2f((float)j, (float)i);
remap(dst_frame2, dst_frame2, displacement_field, Mat(), INTER_LINEAR, BORDER_REPLICATE);
}
#endif // HAVE_OPENCL
}} // namespace
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "perf_precomp.hpp"
namespace opencv_test { namespace {
void MakeArtificialExample(Mat &dst_frame1, Mat &dst_frame2);
typedef tuple<String, Size> DISParams;
typedef TestBaseWithParam<DISParams> DenseOpticalFlow_DIS;
PERF_TEST_P(DenseOpticalFlow_DIS, perf,
Combine(Values("PRESET_ULTRAFAST", "PRESET_FAST", "PRESET_MEDIUM"), Values(szVGA, sz720p, sz1080p)))
{
DISParams params = GetParam();
// use strings to print preset names in the perf test results:
String preset_string = get<0>(params);
int preset = DISOpticalFlow::PRESET_FAST;
if (preset_string == "PRESET_ULTRAFAST")
preset = DISOpticalFlow::PRESET_ULTRAFAST;
else if (preset_string == "PRESET_FAST")
preset = DISOpticalFlow::PRESET_FAST;
else if (preset_string == "PRESET_MEDIUM")
preset = DISOpticalFlow::PRESET_MEDIUM;
Size sz = get<1>(params);
Mat frame1(sz, CV_8U);
Mat frame2(sz, CV_8U);
Mat flow;
MakeArtificialExample(frame1, frame2);
TEST_CYCLE_N(10)
{
Ptr<DenseOpticalFlow> algo = DISOpticalFlow::create(preset);
algo->calc(frame1, frame2, flow);
}
SANITY_CHECK_NOTHING();
}
void MakeArtificialExample(Mat &dst_frame1, Mat &dst_frame2)
{
int src_scale = 2;
int OF_scale = 6;
double sigma = dst_frame1.cols / 300;
Mat tmp(Size(dst_frame1.cols / (1 << src_scale), dst_frame1.rows / (1 << src_scale)), CV_8U);
randu(tmp, 0, 255);
resize(tmp, dst_frame1, dst_frame1.size(), 0.0, 0.0, INTER_LINEAR_EXACT);
resize(tmp, dst_frame2, dst_frame2.size(), 0.0, 0.0, INTER_LINEAR_EXACT);
Mat displacement_field(Size(dst_frame1.cols / (1 << OF_scale), dst_frame1.rows / (1 << OF_scale)),
CV_32FC2);
randn(displacement_field, 0.0, sigma);
resize(displacement_field, displacement_field, dst_frame2.size(), 0.0, 0.0, INTER_CUBIC);
for (int i = 0; i < displacement_field.rows; i++)
for (int j = 0; j < displacement_field.cols; j++)
displacement_field.at<Vec2f>(i, j) += Vec2f((float)j, (float)i);
remap(dst_frame2, dst_frame2, displacement_field, Mat(), INTER_LINEAR, BORDER_REPLICATE);
}
}} // namespace
......@@ -6,5 +6,11 @@
#include "opencv2/ts.hpp"
#include <opencv2/video.hpp>
#include "opencv2/ts/ts_perf.hpp"
namespace cvtest
{
using namespace perf;
}
#endif
#include "perf_precomp.hpp"
namespace opencv_test { namespace {
using namespace perf;
typedef TestBaseWithParam< std::pair<string, string> > ImagePair;
std::pair<string, string> impair(const char* im1, const char* im2)
{
return std::make_pair(string(im1), string(im2));
}
PERF_TEST_P(ImagePair, OpticalFlowDual_TVL1, testing::Values(impair("cv/optflow/RubberWhale1.png", "cv/optflow/RubberWhale2.png")))
{
declare.time(260);
Mat frame1 = imread(getDataPath(GetParam().first), IMREAD_GRAYSCALE);
Mat frame2 = imread(getDataPath(GetParam().second), IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
ASSERT_FALSE(frame2.empty());
Mat flow;
Ptr<DenseOpticalFlow> tvl1 = createOptFlow_DualTVL1();
TEST_CYCLE() tvl1->calc(frame1, frame2, flow);
SANITY_CHECK_NOTHING();
}
}} // namespace
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "perf_precomp.hpp"
namespace opencv_test { namespace {
typedef tuple<Size, int, int> VarRefParams;
typedef TestBaseWithParam<VarRefParams> DenseOpticalFlow_VariationalRefinement;
PERF_TEST_P(DenseOpticalFlow_VariationalRefinement, perf, Combine(Values(szQVGA, szVGA), Values(5, 10), Values(5, 10)))
{
VarRefParams params = GetParam();
Size sz = get<0>(params);
int sorIter = get<1>(params);
int fixedPointIter = get<2>(params);
Mat frame1(sz, CV_8U);
Mat frame2(sz, CV_8U);
Mat flow(sz, CV_32FC2);
randu(frame1, 0, 255);
randu(frame2, 0, 255);
flow.setTo(0.0f);
TEST_CYCLE_N(10)
{
Ptr<VariationalRefinement> var = VariationalRefinement::create();
var->setAlpha(20.0f);
var->setGamma(10.0f);
var->setDelta(5.0f);
var->setSorIterations(sorIter);
var->setFixedPointIterations(fixedPointIter);
var->calc(frame1, frame2, flow);
}
SANITY_CHECK_NOTHING();
}
}} // namespace
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include "opencl_kernels_video.hpp"
using namespace std;
#define EPS 0.001F
#define INF 1E+10F
namespace cv
{
class DISOpticalFlowImpl CV_FINAL : public DISOpticalFlow
{
public:
DISOpticalFlowImpl();
void calc(InputArray I0, InputArray I1, InputOutputArray flow) CV_OVERRIDE;
void collectGarbage() CV_OVERRIDE;
protected: //!< algorithm parameters
int finest_scale, coarsest_scale;
int patch_size;
int patch_stride;
int grad_descent_iter;
int variational_refinement_iter;
float variational_refinement_alpha;
float variational_refinement_gamma;
float variational_refinement_delta;
bool use_mean_normalization;
bool use_spatial_propagation;
protected: //!< some auxiliary variables
int border_size;
int w, h; //!< flow buffer width and height on the current scale
int ws, hs; //!< sparse flow buffer width and height on the current scale
public:
int getFinestScale() const CV_OVERRIDE { return finest_scale; }
void setFinestScale(int val) CV_OVERRIDE { finest_scale = val; }
int getPatchSize() const CV_OVERRIDE { return patch_size; }
void setPatchSize(int val) CV_OVERRIDE { patch_size = val; }
int getPatchStride() const CV_OVERRIDE { return patch_stride; }
void setPatchStride(int val) CV_OVERRIDE { patch_stride = val; }
int getGradientDescentIterations() const CV_OVERRIDE { return grad_descent_iter; }
void setGradientDescentIterations(int val) CV_OVERRIDE { grad_descent_iter = val; }
int getVariationalRefinementIterations() const CV_OVERRIDE { return variational_refinement_iter; }
void setVariationalRefinementIterations(int val) CV_OVERRIDE { variational_refinement_iter = val; }
float getVariationalRefinementAlpha() const CV_OVERRIDE { return variational_refinement_alpha; }
void setVariationalRefinementAlpha(float val) CV_OVERRIDE { variational_refinement_alpha = val; }
float getVariationalRefinementDelta() const CV_OVERRIDE { return variational_refinement_delta; }
void setVariationalRefinementDelta(float val) CV_OVERRIDE { variational_refinement_delta = val; }
float getVariationalRefinementGamma() const CV_OVERRIDE { return variational_refinement_gamma; }
void setVariationalRefinementGamma(float val) CV_OVERRIDE { variational_refinement_gamma = val; }
bool getUseMeanNormalization() const CV_OVERRIDE { return use_mean_normalization; }
void setUseMeanNormalization(bool val) CV_OVERRIDE { use_mean_normalization = val; }
bool getUseSpatialPropagation() const CV_OVERRIDE { return use_spatial_propagation; }
void setUseSpatialPropagation(bool val) CV_OVERRIDE { use_spatial_propagation = val; }
protected: //!< internal buffers
vector<Mat_<uchar> > I0s; //!< Gaussian pyramid for the current frame
vector<Mat_<uchar> > I1s; //!< Gaussian pyramid for the next frame
vector<Mat_<uchar> > I1s_ext; //!< I1s with borders
vector<Mat_<short> > I0xs; //!< Gaussian pyramid for the x gradient of the current frame
vector<Mat_<short> > I0ys; //!< Gaussian pyramid for the y gradient of the current frame
vector<Mat_<float> > Ux; //!< x component of the flow vectors
vector<Mat_<float> > Uy; //!< y component of the flow vectors
vector<Mat_<float> > initial_Ux; //!< x component of the initial flow field, if one was passed as an input
vector<Mat_<float> > initial_Uy; //!< y component of the initial flow field, if one was passed as an input
Mat_<Vec2f> U; //!< a buffer for the merged flow
Mat_<float> Sx; //!< intermediate sparse flow representation (x component)
Mat_<float> Sy; //!< intermediate sparse flow representation (y component)
/* Structure tensor components: */
Mat_<float> I0xx_buf; //!< sum of squares of x gradient values
Mat_<float> I0yy_buf; //!< sum of squares of y gradient values
Mat_<float> I0xy_buf; //!< sum of x and y gradient products
/* Extra buffers that are useful if patch mean-normalization is used: */
Mat_<float> I0x_buf; //!< sum of x gradient values
Mat_<float> I0y_buf; //!< sum of y gradient values
/* Auxiliary buffers used in structure tensor computation: */
Mat_<float> I0xx_buf_aux;
Mat_<float> I0yy_buf_aux;
Mat_<float> I0xy_buf_aux;
Mat_<float> I0x_buf_aux;
Mat_<float> I0y_buf_aux;
vector<Ptr<VariationalRefinement> > variational_refinement_processors;
private: //!< private methods and parallel sections
void prepareBuffers(Mat &I0, Mat &I1, Mat &flow, bool use_flow);
void precomputeStructureTensor(Mat &dst_I0xx, Mat &dst_I0yy, Mat &dst_I0xy, Mat &dst_I0x, Mat &dst_I0y, Mat &I0x,
Mat &I0y);
struct PatchInverseSearch_ParBody : public ParallelLoopBody
{
DISOpticalFlowImpl *dis;
int nstripes, stripe_sz;
int hs;
Mat *Sx, *Sy, *Ux, *Uy, *I0, *I1, *I0x, *I0y;
int num_iter, pyr_level;
PatchInverseSearch_ParBody(DISOpticalFlowImpl &_dis, int _nstripes, int _hs, Mat &dst_Sx, Mat &dst_Sy,
Mat &src_Ux, Mat &src_Uy, Mat &_I0, Mat &_I1, Mat &_I0x, Mat &_I0y, int _num_iter,
int _pyr_level);
void operator()(const Range &range) const CV_OVERRIDE;
};
struct Densification_ParBody : public ParallelLoopBody
{
DISOpticalFlowImpl *dis;
int nstripes, stripe_sz;
int h;
Mat *Ux, *Uy, *Sx, *Sy, *I0, *I1;
Densification_ParBody(DISOpticalFlowImpl &_dis, int _nstripes, int _h, Mat &dst_Ux, Mat &dst_Uy, Mat &src_Sx,
Mat &src_Sy, Mat &_I0, Mat &_I1);
void operator()(const Range &range) const CV_OVERRIDE;
};
#ifdef HAVE_OPENCL
vector<UMat> u_I0s; //!< Gaussian pyramid for the current frame
vector<UMat> u_I1s; //!< Gaussian pyramid for the next frame
vector<UMat> u_I1s_ext; //!< I1s with borders
vector<UMat> u_I0xs; //!< Gaussian pyramid for the x gradient of the current frame
vector<UMat> u_I0ys; //!< Gaussian pyramid for the y gradient of the current frame
vector<UMat> u_Ux; //!< x component of the flow vectors
vector<UMat> u_Uy; //!< y component of the flow vectors
vector<UMat> u_initial_Ux; //!< x component of the initial flow field, if one was passed as an input
vector<UMat> u_initial_Uy; //!< y component of the initial flow field, if one was passed as an input
UMat u_U; //!< a buffer for the merged flow
UMat u_Sx; //!< intermediate sparse flow representation (x component)
UMat u_Sy; //!< intermediate sparse flow representation (y component)
/* Structure tensor components: */
UMat u_I0xx_buf; //!< sum of squares of x gradient values
UMat u_I0yy_buf; //!< sum of squares of y gradient values
UMat u_I0xy_buf; //!< sum of x and y gradient products
/* Extra buffers that are useful if patch mean-normalization is used: */
UMat u_I0x_buf; //!< sum of x gradient values
UMat u_I0y_buf; //!< sum of y gradient values
/* Auxiliary buffers used in structure tensor computation: */
UMat u_I0xx_buf_aux;
UMat u_I0yy_buf_aux;
UMat u_I0xy_buf_aux;
UMat u_I0x_buf_aux;
UMat u_I0y_buf_aux;
bool ocl_precomputeStructureTensor(UMat &dst_I0xx, UMat &dst_I0yy, UMat &dst_I0xy,
UMat &dst_I0x, UMat &dst_I0y, UMat &I0x, UMat &I0y);
void ocl_prepareBuffers(UMat &I0, UMat &I1, UMat &flow, bool use_flow);
bool ocl_calc(InputArray I0, InputArray I1, InputOutputArray flow);
bool ocl_Densification(UMat &dst_Ux, UMat &dst_Uy, UMat &src_Sx, UMat &src_Sy, UMat &_I0, UMat &_I1);
bool ocl_PatchInverseSearch(UMat &src_Ux, UMat &src_Uy,
UMat &I0, UMat &I1, UMat &I0x, UMat &I0y, int num_iter, int pyr_level);
#endif
};
DISOpticalFlowImpl::DISOpticalFlowImpl()
{
finest_scale = 2;
patch_size = 8;
patch_stride = 4;
grad_descent_iter = 16;
variational_refinement_iter = 5;
variational_refinement_alpha = 20.f;
variational_refinement_gamma = 10.f;
variational_refinement_delta = 5.f;
border_size = 16;
use_mean_normalization = true;
use_spatial_propagation = true;
/* Use separate variational refinement instances for different scales to avoid repeated memory allocation: */
int max_possible_scales = 10;
for (int i = 0; i < max_possible_scales; i++)
variational_refinement_processors.push_back(VariationalRefinement::create());
}
void DISOpticalFlowImpl::prepareBuffers(Mat &I0, Mat &I1, Mat &flow, bool use_flow)
{
I0s.resize(coarsest_scale + 1);
I1s.resize(coarsest_scale + 1);
I1s_ext.resize(coarsest_scale + 1);
I0xs.resize(coarsest_scale + 1);
I0ys.resize(coarsest_scale + 1);
Ux.resize(coarsest_scale + 1);
Uy.resize(coarsest_scale + 1);
Mat flow_uv[2];
if (use_flow)
{
split(flow, flow_uv);
initial_Ux.resize(coarsest_scale + 1);
initial_Uy.resize(coarsest_scale + 1);
}
int fraction = 1;
int cur_rows = 0, cur_cols = 0;
for (int i = 0; i <= coarsest_scale; i++)
{
/* Avoid initializing the pyramid levels above the finest scale, as they won't be used anyway */
if (i == finest_scale)
{
cur_rows = I0.rows / fraction;
cur_cols = I0.cols / fraction;
I0s[i].create(cur_rows, cur_cols);
resize(I0, I0s[i], I0s[i].size(), 0.0, 0.0, INTER_AREA);
I1s[i].create(cur_rows, cur_cols);
resize(I1, I1s[i], I1s[i].size(), 0.0, 0.0, INTER_AREA);
/* These buffers are reused in each scale so we initialize them once on the finest scale: */
Sx.create(cur_rows / patch_stride, cur_cols / patch_stride);
Sy.create(cur_rows / patch_stride, cur_cols / patch_stride);
I0xx_buf.create(cur_rows / patch_stride, cur_cols / patch_stride);
I0yy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride);
I0xy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride);
I0x_buf.create(cur_rows / patch_stride, cur_cols / patch_stride);
I0y_buf.create(cur_rows / patch_stride, cur_cols / patch_stride);
I0xx_buf_aux.create(cur_rows, cur_cols / patch_stride);
I0yy_buf_aux.create(cur_rows, cur_cols / patch_stride);
I0xy_buf_aux.create(cur_rows, cur_cols / patch_stride);
I0x_buf_aux.create(cur_rows, cur_cols / patch_stride);
I0y_buf_aux.create(cur_rows, cur_cols / patch_stride);
U.create(cur_rows, cur_cols);
}
else if (i > finest_scale)
{
cur_rows = I0s[i - 1].rows / 2;
cur_cols = I0s[i - 1].cols / 2;
I0s[i].create(cur_rows, cur_cols);
resize(I0s[i - 1], I0s[i], I0s[i].size(), 0.0, 0.0, INTER_AREA);
I1s[i].create(cur_rows, cur_cols);
resize(I1s[i - 1], I1s[i], I1s[i].size(), 0.0, 0.0, INTER_AREA);
}
if (i >= finest_scale)
{
I1s_ext[i].create(cur_rows + 2 * border_size, cur_cols + 2 * border_size);
copyMakeBorder(I1s[i], I1s_ext[i], border_size, border_size, border_size, border_size, BORDER_REPLICATE);
I0xs[i].create(cur_rows, cur_cols);
I0ys[i].create(cur_rows, cur_cols);
spatialGradient(I0s[i], I0xs[i], I0ys[i]);
Ux[i].create(cur_rows, cur_cols);
Uy[i].create(cur_rows, cur_cols);
variational_refinement_processors[i]->setAlpha(variational_refinement_alpha);
variational_refinement_processors[i]->setDelta(variational_refinement_delta);
variational_refinement_processors[i]->setGamma(variational_refinement_gamma);
variational_refinement_processors[i]->setSorIterations(5);
variational_refinement_processors[i]->setFixedPointIterations(variational_refinement_iter);
if (use_flow)
{
resize(flow_uv[0], initial_Ux[i], Size(cur_cols, cur_rows));
initial_Ux[i] /= fraction;
resize(flow_uv[1], initial_Uy[i], Size(cur_cols, cur_rows));
initial_Uy[i] /= fraction;
}
}
fraction *= 2;
}
}
/* This function computes the structure tensor elements (local sums of I0x^2, I0x*I0y and I0y^2).
* A simple box filter is not used instead because we need to compute these sums on a sparse grid
* and store them densely in the output buffers.
*/
void DISOpticalFlowImpl::precomputeStructureTensor(Mat &dst_I0xx, Mat &dst_I0yy, Mat &dst_I0xy, Mat &dst_I0x,
Mat &dst_I0y, Mat &I0x, Mat &I0y)
{
float *I0xx_ptr = dst_I0xx.ptr<float>();
float *I0yy_ptr = dst_I0yy.ptr<float>();
float *I0xy_ptr = dst_I0xy.ptr<float>();
float *I0x_ptr = dst_I0x.ptr<float>();
float *I0y_ptr = dst_I0y.ptr<float>();
float *I0xx_aux_ptr = I0xx_buf_aux.ptr<float>();
float *I0yy_aux_ptr = I0yy_buf_aux.ptr<float>();
float *I0xy_aux_ptr = I0xy_buf_aux.ptr<float>();
float *I0x_aux_ptr = I0x_buf_aux.ptr<float>();
float *I0y_aux_ptr = I0y_buf_aux.ptr<float>();
/* Separable box filter: horizontal pass */
for (int i = 0; i < h; i++)
{
float sum_xx = 0.0f, sum_yy = 0.0f, sum_xy = 0.0f, sum_x = 0.0f, sum_y = 0.0f;
short *x_row = I0x.ptr<short>(i);
short *y_row = I0y.ptr<short>(i);
for (int j = 0; j < patch_size; j++)
{
sum_xx += x_row[j] * x_row[j];
sum_yy += y_row[j] * y_row[j];
sum_xy += x_row[j] * y_row[j];
sum_x += x_row[j];
sum_y += y_row[j];
}
I0xx_aux_ptr[i * ws] = sum_xx;
I0yy_aux_ptr[i * ws] = sum_yy;
I0xy_aux_ptr[i * ws] = sum_xy;
I0x_aux_ptr[i * ws] = sum_x;
I0y_aux_ptr[i * ws] = sum_y;
int js = 1;
for (int j = patch_size; j < w; j++)
{
sum_xx += (x_row[j] * x_row[j] - x_row[j - patch_size] * x_row[j - patch_size]);
sum_yy += (y_row[j] * y_row[j] - y_row[j - patch_size] * y_row[j - patch_size]);
sum_xy += (x_row[j] * y_row[j] - x_row[j - patch_size] * y_row[j - patch_size]);
sum_x += (x_row[j] - x_row[j - patch_size]);
sum_y += (y_row[j] - y_row[j - patch_size]);
if ((j - patch_size + 1) % patch_stride == 0)
{
I0xx_aux_ptr[i * ws + js] = sum_xx;
I0yy_aux_ptr[i * ws + js] = sum_yy;
I0xy_aux_ptr[i * ws + js] = sum_xy;
I0x_aux_ptr[i * ws + js] = sum_x;
I0y_aux_ptr[i * ws + js] = sum_y;
js++;
}
}
}
AutoBuffer<float> sum_xx(ws), sum_yy(ws), sum_xy(ws), sum_x(ws), sum_y(ws);
for (int j = 0; j < ws; j++)
{
sum_xx[j] = 0.0f;
sum_yy[j] = 0.0f;
sum_xy[j] = 0.0f;
sum_x[j] = 0.0f;
sum_y[j] = 0.0f;
}
/* Separable box filter: vertical pass */
for (int i = 0; i < patch_size; i++)
for (int j = 0; j < ws; j++)
{
sum_xx[j] += I0xx_aux_ptr[i * ws + j];
sum_yy[j] += I0yy_aux_ptr[i * ws + j];
sum_xy[j] += I0xy_aux_ptr[i * ws + j];
sum_x[j] += I0x_aux_ptr[i * ws + j];
sum_y[j] += I0y_aux_ptr[i * ws + j];
}
for (int j = 0; j < ws; j++)
{
I0xx_ptr[j] = sum_xx[j];
I0yy_ptr[j] = sum_yy[j];
I0xy_ptr[j] = sum_xy[j];
I0x_ptr[j] = sum_x[j];
I0y_ptr[j] = sum_y[j];
}
int is = 1;
for (int i = patch_size; i < h; i++)
{
for (int j = 0; j < ws; j++)
{
sum_xx[j] += (I0xx_aux_ptr[i * ws + j] - I0xx_aux_ptr[(i - patch_size) * ws + j]);
sum_yy[j] += (I0yy_aux_ptr[i * ws + j] - I0yy_aux_ptr[(i - patch_size) * ws + j]);
sum_xy[j] += (I0xy_aux_ptr[i * ws + j] - I0xy_aux_ptr[(i - patch_size) * ws + j]);
sum_x[j] += (I0x_aux_ptr[i * ws + j] - I0x_aux_ptr[(i - patch_size) * ws + j]);
sum_y[j] += (I0y_aux_ptr[i * ws + j] - I0y_aux_ptr[(i - patch_size) * ws + j]);
}
if ((i - patch_size + 1) % patch_stride == 0)
{
for (int j = 0; j < ws; j++)
{
I0xx_ptr[is * ws + j] = sum_xx[j];
I0yy_ptr[is * ws + j] = sum_yy[j];
I0xy_ptr[is * ws + j] = sum_xy[j];
I0x_ptr[is * ws + j] = sum_x[j];
I0y_ptr[is * ws + j] = sum_y[j];
}
is++;
}
}
}
DISOpticalFlowImpl::PatchInverseSearch_ParBody::PatchInverseSearch_ParBody(DISOpticalFlowImpl &_dis, int _nstripes,
int _hs, Mat &dst_Sx, Mat &dst_Sy,
Mat &src_Ux, Mat &src_Uy, Mat &_I0, Mat &_I1,
Mat &_I0x, Mat &_I0y, int _num_iter,
int _pyr_level)
: dis(&_dis), nstripes(_nstripes), hs(_hs), Sx(&dst_Sx), Sy(&dst_Sy), Ux(&src_Ux), Uy(&src_Uy), I0(&_I0), I1(&_I1),
I0x(&_I0x), I0y(&_I0y), num_iter(_num_iter), pyr_level(_pyr_level)
{
stripe_sz = (int)ceil(hs / (double)nstripes);
}
/////////////////////////////////////////////* Patch processing functions */////////////////////////////////////////////
/* Some auxiliary macros */
#define HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION \
v_float32x4 w00v = v_setall_f32(w00); \
v_float32x4 w01v = v_setall_f32(w01); \
v_float32x4 w10v = v_setall_f32(w10); \
v_float32x4 w11v = v_setall_f32(w11); \
\
v_uint8x16 I0_row_16, I1_row_16, I1_row_shifted_16, I1_row_next_16, I1_row_next_shifted_16; \
v_uint16x8 I0_row_8, I1_row_8, I1_row_shifted_8, I1_row_next_8, I1_row_next_shifted_8, tmp; \
v_uint32x4 I0_row_4_left, I1_row_4_left, I1_row_shifted_4_left, I1_row_next_4_left, I1_row_next_shifted_4_left; \
v_uint32x4 I0_row_4_right, I1_row_4_right, I1_row_shifted_4_right, I1_row_next_4_right, \
I1_row_next_shifted_4_right; \
v_float32x4 I_diff_left, I_diff_right; \
\
/* Preload and expand the first row of I1: */ \
I1_row_16 = v_load(I1_ptr); \
I1_row_shifted_16 = v_extract<1>(I1_row_16, I1_row_16); \
v_expand(I1_row_16, I1_row_8, tmp); \
v_expand(I1_row_shifted_16, I1_row_shifted_8, tmp); \
v_expand(I1_row_8, I1_row_4_left, I1_row_4_right); \
v_expand(I1_row_shifted_8, I1_row_shifted_4_left, I1_row_shifted_4_right); \
I1_ptr += I1_stride;
#define HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION \
/* Load the next row of I1: */ \
I1_row_next_16 = v_load(I1_ptr); \
/* Circular shift left by 1 element: */ \
I1_row_next_shifted_16 = v_extract<1>(I1_row_next_16, I1_row_next_16); \
/* Expand to 8 ushorts (we only need the first 8 values): */ \
v_expand(I1_row_next_16, I1_row_next_8, tmp); \
v_expand(I1_row_next_shifted_16, I1_row_next_shifted_8, tmp); \
/* Separate the left and right halves: */ \
v_expand(I1_row_next_8, I1_row_next_4_left, I1_row_next_4_right); \
v_expand(I1_row_next_shifted_8, I1_row_next_shifted_4_left, I1_row_next_shifted_4_right); \
\
/* Load current row of I0: */ \
I0_row_16 = v_load(I0_ptr); \
v_expand(I0_row_16, I0_row_8, tmp); \
v_expand(I0_row_8, I0_row_4_left, I0_row_4_right); \
\
/* Compute diffs between I0 and bilinearly interpolated I1: */ \
I_diff_left = w00v * v_cvt_f32(v_reinterpret_as_s32(I1_row_4_left)) + \
w01v * v_cvt_f32(v_reinterpret_as_s32(I1_row_shifted_4_left)) + \
w10v * v_cvt_f32(v_reinterpret_as_s32(I1_row_next_4_left)) + \
w11v * v_cvt_f32(v_reinterpret_as_s32(I1_row_next_shifted_4_left)) - \
v_cvt_f32(v_reinterpret_as_s32(I0_row_4_left)); \
I_diff_right = w00v * v_cvt_f32(v_reinterpret_as_s32(I1_row_4_right)) + \
w01v * v_cvt_f32(v_reinterpret_as_s32(I1_row_shifted_4_right)) + \
w10v * v_cvt_f32(v_reinterpret_as_s32(I1_row_next_4_right)) + \
w11v * v_cvt_f32(v_reinterpret_as_s32(I1_row_next_shifted_4_right)) - \
v_cvt_f32(v_reinterpret_as_s32(I0_row_4_right));
#define HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW \
I0_ptr += I0_stride; \
I1_ptr += I1_stride; \
\
I1_row_4_left = I1_row_next_4_left; \
I1_row_4_right = I1_row_next_4_right; \
I1_row_shifted_4_left = I1_row_next_shifted_4_left; \
I1_row_shifted_4_right = I1_row_next_shifted_4_right;
/* This function essentially performs one iteration of gradient descent when finding the most similar patch in I1 for a
* given one in I0. It assumes that I0_ptr and I1_ptr already point to the corresponding patches and w00, w01, w10, w11
* are precomputed bilinear interpolation weights. It returns the SSD (sum of squared differences) between these patches
* and computes the values (dst_dUx, dst_dUy) that are used in the flow vector update. HAL acceleration is implemented
* only for the default patch size (8x8). Everything is processed in floats as using fixed-point approximations harms
* the quality significantly.
*/
inline float processPatch(float &dst_dUx, float &dst_dUy, uchar *I0_ptr, uchar *I1_ptr, short *I0x_ptr, short *I0y_ptr,
int I0_stride, int I1_stride, float w00, float w01, float w10, float w11, int patch_sz)
{
float SSD = 0.0f;
#ifdef CV_SIMD128
if (patch_sz == 8)
{
/* Variables to accumulate the sums */
v_float32x4 Ux_vec = v_setall_f32(0);
v_float32x4 Uy_vec = v_setall_f32(0);
v_float32x4 SSD_vec = v_setall_f32(0);
v_int16x8 I0x_row, I0y_row;
v_int32x4 I0x_row_4_left, I0x_row_4_right, I0y_row_4_left, I0y_row_4_right;
HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION;
for (int row = 0; row < 8; row++)
{
HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION;
I0x_row = v_load(I0x_ptr);
v_expand(I0x_row, I0x_row_4_left, I0x_row_4_right);
I0y_row = v_load(I0y_ptr);
v_expand(I0y_row, I0y_row_4_left, I0y_row_4_right);
/* Update the sums: */
Ux_vec += I_diff_left * v_cvt_f32(I0x_row_4_left) + I_diff_right * v_cvt_f32(I0x_row_4_right);
Uy_vec += I_diff_left * v_cvt_f32(I0y_row_4_left) + I_diff_right * v_cvt_f32(I0y_row_4_right);
SSD_vec += I_diff_left * I_diff_left + I_diff_right * I_diff_right;
I0x_ptr += I0_stride;
I0y_ptr += I0_stride;
HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW;
}
/* Final reduce operations: */
dst_dUx = v_reduce_sum(Ux_vec);
dst_dUy = v_reduce_sum(Uy_vec);
SSD = v_reduce_sum(SSD_vec);
}
else
{
#endif
dst_dUx = 0.0f;
dst_dUy = 0.0f;
float diff;
for (int i = 0; i < patch_sz; i++)
for (int j = 0; j < patch_sz; j++)
{
diff = w00 * I1_ptr[i * I1_stride + j] + w01 * I1_ptr[i * I1_stride + j + 1] +
w10 * I1_ptr[(i + 1) * I1_stride + j] + w11 * I1_ptr[(i + 1) * I1_stride + j + 1] -
I0_ptr[i * I0_stride + j];
SSD += diff * diff;
dst_dUx += diff * I0x_ptr[i * I0_stride + j];
dst_dUy += diff * I0y_ptr[i * I0_stride + j];
}
#ifdef CV_SIMD128
}
#endif
return SSD;
}
/* Same as processPatch, but with patch mean normalization, which improves robustness under changing
* lighting conditions
*/
inline float processPatchMeanNorm(float &dst_dUx, float &dst_dUy, uchar *I0_ptr, uchar *I1_ptr, short *I0x_ptr,
short *I0y_ptr, int I0_stride, int I1_stride, float w00, float w01, float w10,
float w11, int patch_sz, float x_grad_sum, float y_grad_sum)
{
float sum_diff = 0.0, sum_diff_sq = 0.0;
float sum_I0x_mul = 0.0, sum_I0y_mul = 0.0;
float n = (float)patch_sz * patch_sz;
#ifdef CV_SIMD128
if (patch_sz == 8)
{
/* Variables to accumulate the sums */
v_float32x4 sum_I0x_mul_vec = v_setall_f32(0);
v_float32x4 sum_I0y_mul_vec = v_setall_f32(0);
v_float32x4 sum_diff_vec = v_setall_f32(0);
v_float32x4 sum_diff_sq_vec = v_setall_f32(0);
v_int16x8 I0x_row, I0y_row;
v_int32x4 I0x_row_4_left, I0x_row_4_right, I0y_row_4_left, I0y_row_4_right;
HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION;
for (int row = 0; row < 8; row++)
{
HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION;
I0x_row = v_load(I0x_ptr);
v_expand(I0x_row, I0x_row_4_left, I0x_row_4_right);
I0y_row = v_load(I0y_ptr);
v_expand(I0y_row, I0y_row_4_left, I0y_row_4_right);
/* Update the sums: */
sum_I0x_mul_vec += I_diff_left * v_cvt_f32(I0x_row_4_left) + I_diff_right * v_cvt_f32(I0x_row_4_right);
sum_I0y_mul_vec += I_diff_left * v_cvt_f32(I0y_row_4_left) + I_diff_right * v_cvt_f32(I0y_row_4_right);
sum_diff_sq_vec += I_diff_left * I_diff_left + I_diff_right * I_diff_right;
sum_diff_vec += I_diff_left + I_diff_right;
I0x_ptr += I0_stride;
I0y_ptr += I0_stride;
HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW;
}
/* Final reduce operations: */
sum_I0x_mul = v_reduce_sum(sum_I0x_mul_vec);
sum_I0y_mul = v_reduce_sum(sum_I0y_mul_vec);
sum_diff = v_reduce_sum(sum_diff_vec);
sum_diff_sq = v_reduce_sum(sum_diff_sq_vec);
}
else
{
#endif
float diff;
for (int i = 0; i < patch_sz; i++)
for (int j = 0; j < patch_sz; j++)
{
diff = w00 * I1_ptr[i * I1_stride + j] + w01 * I1_ptr[i * I1_stride + j + 1] +
w10 * I1_ptr[(i + 1) * I1_stride + j] + w11 * I1_ptr[(i + 1) * I1_stride + j + 1] -
I0_ptr[i * I0_stride + j];
sum_diff += diff;
sum_diff_sq += diff * diff;
sum_I0x_mul += diff * I0x_ptr[i * I0_stride + j];
sum_I0y_mul += diff * I0y_ptr[i * I0_stride + j];
}
#ifdef CV_SIMD128
}
#endif
dst_dUx = sum_I0x_mul - sum_diff * x_grad_sum / n;
dst_dUy = sum_I0y_mul - sum_diff * y_grad_sum / n;
return sum_diff_sq - sum_diff * sum_diff / n;
}
/* Similar to processPatch, but compute only the sum of squared differences (SSD) between the patches */
inline float computeSSD(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int I1_stride, float w00, float w01, float w10,
float w11, int patch_sz)
{
float SSD = 0.0f;
#ifdef CV_SIMD128
if (patch_sz == 8)
{
v_float32x4 SSD_vec = v_setall_f32(0);
HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION;
for (int row = 0; row < 8; row++)
{
HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION;
SSD_vec += I_diff_left * I_diff_left + I_diff_right * I_diff_right;
HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW;
}
SSD = v_reduce_sum(SSD_vec);
}
else
{
#endif
float diff;
for (int i = 0; i < patch_sz; i++)
for (int j = 0; j < patch_sz; j++)
{
diff = w00 * I1_ptr[i * I1_stride + j] + w01 * I1_ptr[i * I1_stride + j + 1] +
w10 * I1_ptr[(i + 1) * I1_stride + j] + w11 * I1_ptr[(i + 1) * I1_stride + j + 1] -
I0_ptr[i * I0_stride + j];
SSD += diff * diff;
}
#ifdef CV_SIMD128
}
#endif
return SSD;
}
/* Same as computeSSD, but with patch mean normalization */
inline float computeSSDMeanNorm(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int I1_stride, float w00, float w01,
float w10, float w11, int patch_sz)
{
float sum_diff = 0.0f, sum_diff_sq = 0.0f;
float n = (float)patch_sz * patch_sz;
#ifdef CV_SIMD128
if (patch_sz == 8)
{
v_float32x4 sum_diff_vec = v_setall_f32(0);
v_float32x4 sum_diff_sq_vec = v_setall_f32(0);
HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION;
for (int row = 0; row < 8; row++)
{
HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION;
sum_diff_sq_vec += I_diff_left * I_diff_left + I_diff_right * I_diff_right;
sum_diff_vec += I_diff_left + I_diff_right;
HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW;
}
sum_diff = v_reduce_sum(sum_diff_vec);
sum_diff_sq = v_reduce_sum(sum_diff_sq_vec);
}
else
{
#endif
float diff;
for (int i = 0; i < patch_sz; i++)
for (int j = 0; j < patch_sz; j++)
{
diff = w00 * I1_ptr[i * I1_stride + j] + w01 * I1_ptr[i * I1_stride + j + 1] +
w10 * I1_ptr[(i + 1) * I1_stride + j] + w11 * I1_ptr[(i + 1) * I1_stride + j + 1] -
I0_ptr[i * I0_stride + j];
sum_diff += diff;
sum_diff_sq += diff * diff;
}
#ifdef CV_SIMD128
}
#endif
return sum_diff_sq - sum_diff * sum_diff / n;
}
#undef HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION
#undef HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION
#undef HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void DISOpticalFlowImpl::PatchInverseSearch_ParBody::operator()(const Range &range) const
{
// force separate processing of stripes if we are using spatial propagation:
if (dis->use_spatial_propagation && range.end > range.start + 1)
{
for (int n = range.start; n < range.end; n++)
(*this)(Range(n, n + 1));
return;
}
int psz = dis->patch_size;
int psz2 = psz / 2;
int w_ext = dis->w + 2 * dis->border_size; //!< width of I1_ext
int bsz = dis->border_size;
/* Input dense flow */
float *Ux_ptr = Ux->ptr<float>();
float *Uy_ptr = Uy->ptr<float>();
/* Output sparse flow */
float *Sx_ptr = Sx->ptr<float>();
float *Sy_ptr = Sy->ptr<float>();
uchar *I0_ptr = I0->ptr<uchar>();
uchar *I1_ptr = I1->ptr<uchar>();
short *I0x_ptr = I0x->ptr<short>();
short *I0y_ptr = I0y->ptr<short>();
/* Precomputed structure tensor */
float *xx_ptr = dis->I0xx_buf.ptr<float>();
float *yy_ptr = dis->I0yy_buf.ptr<float>();
float *xy_ptr = dis->I0xy_buf.ptr<float>();
/* And extra buffers for mean-normalization: */
float *x_ptr = dis->I0x_buf.ptr<float>();
float *y_ptr = dis->I0y_buf.ptr<float>();
bool use_temporal_candidates = false;
float *initial_Ux_ptr = NULL, *initial_Uy_ptr = NULL;
if (!dis->initial_Ux.empty())
{
initial_Ux_ptr = dis->initial_Ux[pyr_level].ptr<float>();
initial_Uy_ptr = dis->initial_Uy[pyr_level].ptr<float>();
use_temporal_candidates = true;
}
int i, j, dir;
int start_is, end_is, start_js, end_js;
int start_i, start_j;
float i_lower_limit = bsz - psz + 1.0f;
float i_upper_limit = bsz + dis->h - 1.0f;
float j_lower_limit = bsz - psz + 1.0f;
float j_upper_limit = bsz + dis->w - 1.0f;
float dUx, dUy, i_I1, j_I1, w00, w01, w10, w11, dx, dy;
#define INIT_BILINEAR_WEIGHTS(Ux, Uy) \
i_I1 = min(max(i + Uy + bsz, i_lower_limit), i_upper_limit); \
j_I1 = min(max(j + Ux + bsz, j_lower_limit), j_upper_limit); \
\
w11 = (i_I1 - floor(i_I1)) * (j_I1 - floor(j_I1)); \
w10 = (i_I1 - floor(i_I1)) * (floor(j_I1) + 1 - j_I1); \
w01 = (floor(i_I1) + 1 - i_I1) * (j_I1 - floor(j_I1)); \
w00 = (floor(i_I1) + 1 - i_I1) * (floor(j_I1) + 1 - j_I1);
#define COMPUTE_SSD(dst, Ux, Uy) \
INIT_BILINEAR_WEIGHTS(Ux, Uy); \
if (dis->use_mean_normalization) \
dst = computeSSDMeanNorm(I0_ptr + i * dis->w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, dis->w, w_ext, w00, \
w01, w10, w11, psz); \
else \
dst = computeSSD(I0_ptr + i * dis->w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, dis->w, w_ext, w00, w01, \
w10, w11, psz);
int num_inner_iter = (int)floor(dis->grad_descent_iter / (float)num_iter);
for (int iter = 0; iter < num_iter; iter++)
{
if (iter % 2 == 0)
{
dir = 1;
start_is = min(range.start * stripe_sz, hs);
end_is = min(range.end * stripe_sz, hs);
start_js = 0;
end_js = dis->ws;
start_i = start_is * dis->patch_stride;
start_j = 0;
}
else
{
dir = -1;
start_is = min(range.end * stripe_sz, hs) - 1;
end_is = min(range.start * stripe_sz, hs) - 1;
start_js = dis->ws - 1;
end_js = -1;
start_i = start_is * dis->patch_stride;
start_j = (dis->ws - 1) * dis->patch_stride;
}
i = start_i;
for (int is = start_is; dir * is < dir * end_is; is += dir)
{
j = start_j;
for (int js = start_js; dir * js < dir * end_js; js += dir)
{
if (iter == 0)
{
/* Using result form the previous pyramid level as the very first approximation: */
Sx_ptr[is * dis->ws + js] = Ux_ptr[(i + psz2) * dis->w + j + psz2];
Sy_ptr[is * dis->ws + js] = Uy_ptr[(i + psz2) * dis->w + j + psz2];
}
float min_SSD = INF, cur_SSD;
if (use_temporal_candidates || dis->use_spatial_propagation)
{
COMPUTE_SSD(min_SSD, Sx_ptr[is * dis->ws + js], Sy_ptr[is * dis->ws + js]);
}
if (use_temporal_candidates)
{
/* Try temporal candidates (vectors from the initial flow field that was passed to the function) */
COMPUTE_SSD(cur_SSD, initial_Ux_ptr[(i + psz2) * dis->w + j + psz2],
initial_Uy_ptr[(i + psz2) * dis->w + j + psz2]);
if (cur_SSD < min_SSD)
{
min_SSD = cur_SSD;
Sx_ptr[is * dis->ws + js] = initial_Ux_ptr[(i + psz2) * dis->w + j + psz2];
Sy_ptr[is * dis->ws + js] = initial_Uy_ptr[(i + psz2) * dis->w + j + psz2];
}
}
if (dis->use_spatial_propagation)
{
/* Try spatial candidates: */
if (dir * js > dir * start_js)
{
COMPUTE_SSD(cur_SSD, Sx_ptr[is * dis->ws + js - dir], Sy_ptr[is * dis->ws + js - dir]);
if (cur_SSD < min_SSD)
{
min_SSD = cur_SSD;
Sx_ptr[is * dis->ws + js] = Sx_ptr[is * dis->ws + js - dir];
Sy_ptr[is * dis->ws + js] = Sy_ptr[is * dis->ws + js - dir];
}
}
/* Flow vectors won't actually propagate across different stripes, which is the reason for keeping
* the number of stripes constant. It works well enough in practice and doesn't introduce any
* visible seams.
*/
if (dir * is > dir * start_is)
{
COMPUTE_SSD(cur_SSD, Sx_ptr[(is - dir) * dis->ws + js], Sy_ptr[(is - dir) * dis->ws + js]);
if (cur_SSD < min_SSD)
{
min_SSD = cur_SSD;
Sx_ptr[is * dis->ws + js] = Sx_ptr[(is - dir) * dis->ws + js];
Sy_ptr[is * dis->ws + js] = Sy_ptr[(is - dir) * dis->ws + js];
}
}
}
/* Use the best candidate as a starting point for the gradient descent: */
float cur_Ux = Sx_ptr[is * dis->ws + js];
float cur_Uy = Sy_ptr[is * dis->ws + js];
/* Computing the inverse of the structure tensor: */
float detH = xx_ptr[is * dis->ws + js] * yy_ptr[is * dis->ws + js] -
xy_ptr[is * dis->ws + js] * xy_ptr[is * dis->ws + js];
if (abs(detH) < EPS)
detH = EPS;
float invH11 = yy_ptr[is * dis->ws + js] / detH;
float invH12 = -xy_ptr[is * dis->ws + js] / detH;
float invH22 = xx_ptr[is * dis->ws + js] / detH;
float prev_SSD = INF, SSD;
float x_grad_sum = x_ptr[is * dis->ws + js];
float y_grad_sum = y_ptr[is * dis->ws + js];
for (int t = 0; t < num_inner_iter; t++)
{
INIT_BILINEAR_WEIGHTS(cur_Ux, cur_Uy);
if (dis->use_mean_normalization)
SSD = processPatchMeanNorm(dUx, dUy, I0_ptr + i * dis->w + j,
I1_ptr + (int)i_I1 * w_ext + (int)j_I1, I0x_ptr + i * dis->w + j,
I0y_ptr + i * dis->w + j, dis->w, w_ext, w00, w01, w10, w11, psz,
x_grad_sum, y_grad_sum);
else
SSD = processPatch(dUx, dUy, I0_ptr + i * dis->w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
I0x_ptr + i * dis->w + j, I0y_ptr + i * dis->w + j, dis->w, w_ext, w00, w01,
w10, w11, psz);
dx = invH11 * dUx + invH12 * dUy;
dy = invH12 * dUx + invH22 * dUy;
cur_Ux -= dx;
cur_Uy -= dy;
/* Break when patch distance stops decreasing */
if (SSD >= prev_SSD)
break;
prev_SSD = SSD;
}
/* If gradient descent converged to a flow vector that is very far from the initial approximation
* (more than patch size) then we don't use it. Noticeably improves the robustness.
*/
if (norm(Vec2f(cur_Ux - Sx_ptr[is * dis->ws + js], cur_Uy - Sy_ptr[is * dis->ws + js])) <= psz)
{
Sx_ptr[is * dis->ws + js] = cur_Ux;
Sy_ptr[is * dis->ws + js] = cur_Uy;
}
j += dir * dis->patch_stride;
}
i += dir * dis->patch_stride;
}
}
#undef INIT_BILINEAR_WEIGHTS
#undef COMPUTE_SSD
}
DISOpticalFlowImpl::Densification_ParBody::Densification_ParBody(DISOpticalFlowImpl &_dis, int _nstripes, int _h,
Mat &dst_Ux, Mat &dst_Uy, Mat &src_Sx, Mat &src_Sy,
Mat &_I0, Mat &_I1)
: dis(&_dis), nstripes(_nstripes), h(_h), Ux(&dst_Ux), Uy(&dst_Uy), Sx(&src_Sx), Sy(&src_Sy), I0(&_I0), I1(&_I1)
{
stripe_sz = (int)ceil(h / (double)nstripes);
}
/* This function transforms a sparse optical flow field obtained by PatchInverseSearch (which computes flow values
* on a sparse grid defined by patch_stride) into a dense optical flow field by weighted averaging of values from the
* overlapping patches.
*/
void DISOpticalFlowImpl::Densification_ParBody::operator()(const Range &range) const
{
int start_i = min(range.start * stripe_sz, h);
int end_i = min(range.end * stripe_sz, h);
/* Input sparse flow */
float *Sx_ptr = Sx->ptr<float>();
float *Sy_ptr = Sy->ptr<float>();
/* Output dense flow */
float *Ux_ptr = Ux->ptr<float>();
float *Uy_ptr = Uy->ptr<float>();
uchar *I0_ptr = I0->ptr<uchar>();
uchar *I1_ptr = I1->ptr<uchar>();
int psz = dis->patch_size;
int pstr = dis->patch_stride;
int i_l, i_u;
int j_l, j_u;
float i_m, j_m, diff;
/* These values define the set of sparse grid locations that contain patches overlapping with the current dense flow
* location */
int start_is, end_is;
int start_js, end_js;
/* Some helper macros for updating this set of sparse grid locations */
#define UPDATE_SPARSE_I_COORDINATES \
if (i % pstr == 0 && i + psz <= h) \
end_is++; \
if (i - psz >= 0 && (i - psz) % pstr == 0 && start_is < end_is) \
start_is++;
#define UPDATE_SPARSE_J_COORDINATES \
if (j % pstr == 0 && j + psz <= dis->w) \
end_js++; \
if (j - psz >= 0 && (j - psz) % pstr == 0 && start_js < end_js) \
start_js++;
start_is = 0;
end_is = -1;
for (int i = 0; i < start_i; i++)
{
UPDATE_SPARSE_I_COORDINATES;
}
for (int i = start_i; i < end_i; i++)
{
UPDATE_SPARSE_I_COORDINATES;
start_js = 0;
end_js = -1;
for (int j = 0; j < dis->w; j++)
{
UPDATE_SPARSE_J_COORDINATES;
float coef, sum_coef = 0.0f;
float sum_Ux = 0.0f;
float sum_Uy = 0.0f;
/* Iterate through all the patches that overlap the current location (i,j) */
for (int is = start_is; is <= end_is; is++)
for (int js = start_js; js <= end_js; js++)
{
j_m = min(max(j + Sx_ptr[is * dis->ws + js], 0.0f), dis->w - 1.0f - EPS);
i_m = min(max(i + Sy_ptr[is * dis->ws + js], 0.0f), dis->h - 1.0f - EPS);
j_l = (int)j_m;
j_u = j_l + 1;
i_l = (int)i_m;
i_u = i_l + 1;
diff = (j_m - j_l) * (i_m - i_l) * I1_ptr[i_u * dis->w + j_u] +
(j_u - j_m) * (i_m - i_l) * I1_ptr[i_u * dis->w + j_l] +
(j_m - j_l) * (i_u - i_m) * I1_ptr[i_l * dis->w + j_u] +
(j_u - j_m) * (i_u - i_m) * I1_ptr[i_l * dis->w + j_l] - I0_ptr[i * dis->w + j];
coef = 1 / max(1.0f, abs(diff));
sum_Ux += coef * Sx_ptr[is * dis->ws + js];
sum_Uy += coef * Sy_ptr[is * dis->ws + js];
sum_coef += coef;
}
Ux_ptr[i * dis->w + j] = sum_Ux / sum_coef;
Uy_ptr[i * dis->w + j] = sum_Uy / sum_coef;
}
}
#undef UPDATE_SPARSE_I_COORDINATES
#undef UPDATE_SPARSE_J_COORDINATES
}
#ifdef HAVE_OPENCL
bool DISOpticalFlowImpl::ocl_PatchInverseSearch(UMat &src_Ux, UMat &src_Uy,
UMat &I0, UMat &I1, UMat &I0x, UMat &I0y, int num_iter, int pyr_level)
{
size_t globalSize[] = {(size_t)ws, (size_t)hs};
size_t localSize[] = {16, 16};
int idx;
int num_inner_iter = (int)floor(grad_descent_iter / (float)num_iter);
for (int iter = 0; iter < num_iter; iter++)
{
if (iter == 0)
{
ocl::Kernel k1("dis_patch_inverse_search_fwd_1", ocl::video::dis_flow_oclsrc);
size_t global_sz[] = {(size_t)hs * 8};
size_t local_sz[] = {8};
idx = 0;
idx = k1.set(idx, ocl::KernelArg::PtrReadOnly(src_Ux));
idx = k1.set(idx, ocl::KernelArg::PtrReadOnly(src_Uy));
idx = k1.set(idx, ocl::KernelArg::PtrReadOnly(I0));
idx = k1.set(idx, ocl::KernelArg::PtrReadOnly(I1));
idx = k1.set(idx, (int)border_size);
idx = k1.set(idx, (int)patch_size);
idx = k1.set(idx, (int)patch_stride);
idx = k1.set(idx, (int)w);
idx = k1.set(idx, (int)h);
idx = k1.set(idx, (int)ws);
idx = k1.set(idx, (int)hs);
idx = k1.set(idx, (int)pyr_level);
idx = k1.set(idx, ocl::KernelArg::PtrWriteOnly(u_Sx));
idx = k1.set(idx, ocl::KernelArg::PtrWriteOnly(u_Sy));
if (!k1.run(1, global_sz, local_sz, false))
return false;
ocl::Kernel k2("dis_patch_inverse_search_fwd_2", ocl::video::dis_flow_oclsrc);
idx = 0;
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(src_Ux));
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(src_Uy));
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(I0));
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(I1));
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(I0x));
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(I0y));
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0xx_buf));
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0yy_buf));
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0xy_buf));
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0x_buf));
idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0y_buf));
idx = k2.set(idx, (int)border_size);
idx = k2.set(idx, (int)patch_size);
idx = k2.set(idx, (int)patch_stride);
idx = k2.set(idx, (int)w);
idx = k2.set(idx, (int)h);
idx = k2.set(idx, (int)ws);
idx = k2.set(idx, (int)hs);
idx = k2.set(idx, (int)num_inner_iter);
idx = k2.set(idx, (int)pyr_level);
idx = k2.set(idx, ocl::KernelArg::PtrReadWrite(u_Sx));
idx = k2.set(idx, ocl::KernelArg::PtrReadWrite(u_Sy));
if (!k2.run(2, globalSize, localSize, false))
return false;
}
else
{
ocl::Kernel k3("dis_patch_inverse_search_bwd_1", ocl::video::dis_flow_oclsrc);
size_t global_sz[] = {(size_t)hs * 8};
size_t local_sz[] = {8};
idx = 0;
idx = k3.set(idx, ocl::KernelArg::PtrReadOnly(I0));
idx = k3.set(idx, ocl::KernelArg::PtrReadOnly(I1));
idx = k3.set(idx, (int)border_size);
idx = k3.set(idx, (int)patch_size);
idx = k3.set(idx, (int)patch_stride);
idx = k3.set(idx, (int)w);
idx = k3.set(idx, (int)h);
idx = k3.set(idx, (int)ws);
idx = k3.set(idx, (int)hs);
idx = k3.set(idx, (int)pyr_level);
idx = k3.set(idx, ocl::KernelArg::PtrReadWrite(u_Sx));
idx = k3.set(idx, ocl::KernelArg::PtrReadWrite(u_Sy));
if (!k3.run(1, global_sz, local_sz, false))
return false;
ocl::Kernel k4("dis_patch_inverse_search_bwd_2", ocl::video::dis_flow_oclsrc);
idx = 0;
idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(I0));
idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(I1));
idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(I0x));
idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(I0y));
idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0xx_buf));
idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0yy_buf));
idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0xy_buf));
idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0x_buf));
idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0y_buf));
idx = k4.set(idx, (int)border_size);
idx = k4.set(idx, (int)patch_size);
idx = k4.set(idx, (int)patch_stride);
idx = k4.set(idx, (int)w);
idx = k4.set(idx, (int)h);
idx = k4.set(idx, (int)ws);
idx = k4.set(idx, (int)hs);
idx = k4.set(idx, (int)num_inner_iter);
idx = k4.set(idx, ocl::KernelArg::PtrReadWrite(u_Sx));
idx = k4.set(idx, ocl::KernelArg::PtrReadWrite(u_Sy));
if (!k4.run(2, globalSize, localSize, false))
return false;
}
}
return true;
}
bool DISOpticalFlowImpl::ocl_Densification(UMat &dst_Ux, UMat &dst_Uy, UMat &src_Sx, UMat &src_Sy, UMat &_I0, UMat &_I1)
{
size_t globalSize[] = {(size_t)w, (size_t)h};
size_t localSize[] = {16, 16};
ocl::Kernel kernel("dis_densification", ocl::video::dis_flow_oclsrc);
kernel.args(ocl::KernelArg::PtrReadOnly(src_Sx),
ocl::KernelArg::PtrReadOnly(src_Sy),
ocl::KernelArg::PtrReadOnly(_I0),
ocl::KernelArg::PtrReadOnly(_I1),
(int)patch_size, (int)patch_stride,
(int)w, (int)h, (int)ws,
ocl::KernelArg::PtrWriteOnly(dst_Ux),
ocl::KernelArg::PtrWriteOnly(dst_Uy));
return kernel.run(2, globalSize, localSize, false);
}
void DISOpticalFlowImpl::ocl_prepareBuffers(UMat &I0, UMat &I1, UMat &flow, bool use_flow)
{
u_I0s.resize(coarsest_scale + 1);
u_I1s.resize(coarsest_scale + 1);
u_I1s_ext.resize(coarsest_scale + 1);
u_I0xs.resize(coarsest_scale + 1);
u_I0ys.resize(coarsest_scale + 1);
u_Ux.resize(coarsest_scale + 1);
u_Uy.resize(coarsest_scale + 1);
vector<UMat> flow_uv(2);
if (use_flow)
{
split(flow, flow_uv);
u_initial_Ux.resize(coarsest_scale + 1);
u_initial_Uy.resize(coarsest_scale + 1);
}
int fraction = 1;
int cur_rows = 0, cur_cols = 0;
for (int i = 0; i <= coarsest_scale; i++)
{
/* Avoid initializing the pyramid levels above the finest scale, as they won't be used anyway */
if (i == finest_scale)
{
cur_rows = I0.rows / fraction;
cur_cols = I0.cols / fraction;
u_I0s[i].create(cur_rows, cur_cols, CV_8UC1);
resize(I0, u_I0s[i], u_I0s[i].size(), 0.0, 0.0, INTER_AREA);
u_I1s[i].create(cur_rows, cur_cols, CV_8UC1);
resize(I1, u_I1s[i], u_I1s[i].size(), 0.0, 0.0, INTER_AREA);
/* These buffers are reused in each scale so we initialize them once on the finest scale: */
u_Sx.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1);
u_Sy.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1);
u_I0xx_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1);
u_I0yy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1);
u_I0xy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1);
u_I0x_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1);
u_I0y_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1);
u_I0xx_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1);
u_I0yy_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1);
u_I0xy_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1);
u_I0x_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1);
u_I0y_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1);
u_U.create(cur_rows, cur_cols, CV_32FC2);
}
else if (i > finest_scale)
{
cur_rows = u_I0s[i - 1].rows / 2;
cur_cols = u_I0s[i - 1].cols / 2;
u_I0s[i].create(cur_rows, cur_cols, CV_8UC1);
resize(u_I0s[i - 1], u_I0s[i], u_I0s[i].size(), 0.0, 0.0, INTER_AREA);
u_I1s[i].create(cur_rows, cur_cols, CV_8UC1);
resize(u_I1s[i - 1], u_I1s[i], u_I1s[i].size(), 0.0, 0.0, INTER_AREA);
}
if (i >= finest_scale)
{
u_I1s_ext[i].create(cur_rows + 2 * border_size, cur_cols + 2 * border_size, CV_8UC1);
copyMakeBorder(u_I1s[i], u_I1s_ext[i], border_size, border_size, border_size, border_size, BORDER_REPLICATE);
u_I0xs[i].create(cur_rows, cur_cols, CV_16SC1);
u_I0ys[i].create(cur_rows, cur_cols, CV_16SC1);
spatialGradient(u_I0s[i], u_I0xs[i], u_I0ys[i]);
u_Ux[i].create(cur_rows, cur_cols, CV_32FC1);
u_Uy[i].create(cur_rows, cur_cols, CV_32FC1);
variational_refinement_processors[i]->setAlpha(variational_refinement_alpha);
variational_refinement_processors[i]->setDelta(variational_refinement_delta);
variational_refinement_processors[i]->setGamma(variational_refinement_gamma);
variational_refinement_processors[i]->setSorIterations(5);
variational_refinement_processors[i]->setFixedPointIterations(variational_refinement_iter);
if (use_flow)
{
resize(flow_uv[0], u_initial_Ux[i], Size(cur_cols, cur_rows));
divide(u_initial_Ux[i], static_cast<float>(fraction), u_initial_Ux[i]);
resize(flow_uv[1], u_initial_Uy[i], Size(cur_cols, cur_rows));
divide(u_initial_Uy[i], static_cast<float>(fraction), u_initial_Uy[i]);
}
}
fraction *= 2;
}
}
bool DISOpticalFlowImpl::ocl_precomputeStructureTensor(UMat &dst_I0xx, UMat &dst_I0yy, UMat &dst_I0xy,
UMat &dst_I0x, UMat &dst_I0y, UMat &I0x, UMat &I0y)
{
size_t globalSizeX[] = {(size_t)h};
size_t localSizeX[] = {16};
ocl::Kernel kernelX("dis_precomputeStructureTensor_hor", ocl::video::dis_flow_oclsrc);
kernelX.args(ocl::KernelArg::PtrReadOnly(I0x),
ocl::KernelArg::PtrReadOnly(I0y),
(int)patch_size, (int)patch_stride,
(int)w, (int)h, (int)ws,
ocl::KernelArg::PtrWriteOnly(u_I0xx_buf_aux),
ocl::KernelArg::PtrWriteOnly(u_I0yy_buf_aux),
ocl::KernelArg::PtrWriteOnly(u_I0xy_buf_aux),
ocl::KernelArg::PtrWriteOnly(u_I0x_buf_aux),
ocl::KernelArg::PtrWriteOnly(u_I0y_buf_aux));
if (!kernelX.run(1, globalSizeX, localSizeX, false))
return false;
size_t globalSizeY[] = {(size_t)ws};
size_t localSizeY[] = {16};
ocl::Kernel kernelY("dis_precomputeStructureTensor_ver", ocl::video::dis_flow_oclsrc);
kernelY.args(ocl::KernelArg::PtrReadOnly(u_I0xx_buf_aux),
ocl::KernelArg::PtrReadOnly(u_I0yy_buf_aux),
ocl::KernelArg::PtrReadOnly(u_I0xy_buf_aux),
ocl::KernelArg::PtrReadOnly(u_I0x_buf_aux),
ocl::KernelArg::PtrReadOnly(u_I0y_buf_aux),
(int)patch_size, (int)patch_stride,
(int)w, (int)h, (int)ws,
ocl::KernelArg::PtrWriteOnly(dst_I0xx),
ocl::KernelArg::PtrWriteOnly(dst_I0yy),
ocl::KernelArg::PtrWriteOnly(dst_I0xy),
ocl::KernelArg::PtrWriteOnly(dst_I0x),
ocl::KernelArg::PtrWriteOnly(dst_I0y));
return kernelY.run(1, globalSizeY, localSizeY, false);
}
bool DISOpticalFlowImpl::ocl_calc(InputArray I0, InputArray I1, InputOutputArray flow)
{
UMat I0Mat = I0.getUMat();
UMat I1Mat = I1.getUMat();
bool use_input_flow = false;
if (flow.sameSize(I0) && flow.depth() == CV_32F && flow.channels() == 2)
use_input_flow = true;
else
flow.create(I1Mat.size(), CV_32FC2);
UMat &u_flowMat = flow.getUMatRef();
coarsest_scale = min((int)(log(max(I0Mat.cols, I0Mat.rows) / (4.0 * patch_size)) / log(2.0) + 0.5), /* Original code serach for maximal movement of width/4 */
(int)(log(min(I0Mat.cols, I0Mat.rows) / patch_size) / log(2.0))); /* Deepest pyramid level greater or equal than patch*/
ocl_prepareBuffers(I0Mat, I1Mat, u_flowMat, use_input_flow);
u_Ux[coarsest_scale].setTo(0.0f);
u_Uy[coarsest_scale].setTo(0.0f);
for (int i = coarsest_scale; i >= finest_scale; i--)
{
w = u_I0s[i].cols;
h = u_I0s[i].rows;
ws = 1 + (w - patch_size) / patch_stride;
hs = 1 + (h - patch_size) / patch_stride;
if (!ocl_precomputeStructureTensor(u_I0xx_buf, u_I0yy_buf, u_I0xy_buf,
u_I0x_buf, u_I0y_buf, u_I0xs[i], u_I0ys[i]))
return false;
if (!ocl_PatchInverseSearch(u_Ux[i], u_Uy[i], u_I0s[i], u_I1s_ext[i], u_I0xs[i], u_I0ys[i], 2, i))
return false;
if (!ocl_Densification(u_Ux[i], u_Uy[i], u_Sx, u_Sy, u_I0s[i], u_I1s[i]))
return false;
if (variational_refinement_iter > 0)
variational_refinement_processors[i]->calcUV(u_I0s[i], u_I1s[i],
u_Ux[i].getMat(ACCESS_WRITE), u_Uy[i].getMat(ACCESS_WRITE));
if (i > finest_scale)
{
resize(u_Ux[i], u_Ux[i - 1], u_Ux[i - 1].size());
resize(u_Uy[i], u_Uy[i - 1], u_Uy[i - 1].size());
multiply(u_Ux[i - 1], 2, u_Ux[i - 1]);
multiply(u_Uy[i - 1], 2, u_Uy[i - 1]);
}
}
vector<UMat> uxy(2);
uxy[0] = u_Ux[finest_scale];
uxy[1] = u_Uy[finest_scale];
merge(uxy, u_U);
resize(u_U, u_flowMat, u_flowMat.size());
multiply(u_flowMat, 1 << finest_scale, u_flowMat);
return true;
}
#endif
void DISOpticalFlowImpl::calc(InputArray I0, InputArray I1, InputOutputArray flow)
{
CV_Assert(!I0.empty() && I0.depth() == CV_8U && I0.channels() == 1);
CV_Assert(!I1.empty() && I1.depth() == CV_8U && I1.channels() == 1);
CV_Assert(I0.sameSize(I1));
CV_Assert(I0.isContinuous());
CV_Assert(I1.isContinuous());
CV_OCL_RUN(ocl::Device::getDefault().isIntel() && flow.isUMat() &&
(patch_size == 8) && (use_spatial_propagation == true),
ocl_calc(I0, I1, flow));
Mat I0Mat = I0.getMat();
Mat I1Mat = I1.getMat();
bool use_input_flow = false;
if (flow.sameSize(I0) && flow.depth() == CV_32F && flow.channels() == 2)
use_input_flow = true;
else
flow.create(I1Mat.size(), CV_32FC2);
Mat flowMat = flow.getMat();
coarsest_scale = min((int)(log(max(I0Mat.cols, I0Mat.rows) / (4.0 * patch_size)) / log(2.0) + 0.5), /* Original code serach for maximal movement of width/4 */
(int)(log(min(I0Mat.cols, I0Mat.rows) / patch_size) / log(2.0))); /* Deepest pyramid level greater or equal than patch*/
int num_stripes = getNumThreads();
prepareBuffers(I0Mat, I1Mat, flowMat, use_input_flow);
Ux[coarsest_scale].setTo(0.0f);
Uy[coarsest_scale].setTo(0.0f);
for (int i = coarsest_scale; i >= finest_scale; i--)
{
w = I0s[i].cols;
h = I0s[i].rows;
ws = 1 + (w - patch_size) / patch_stride;
hs = 1 + (h - patch_size) / patch_stride;
precomputeStructureTensor(I0xx_buf, I0yy_buf, I0xy_buf, I0x_buf, I0y_buf, I0xs[i], I0ys[i]);
if (use_spatial_propagation)
{
/* Use a fixed number of stripes regardless the number of threads to make inverse search
* with spatial propagation reproducible
*/
parallel_for_(Range(0, 8), PatchInverseSearch_ParBody(*this, 8, hs, Sx, Sy, Ux[i], Uy[i], I0s[i],
I1s_ext[i], I0xs[i], I0ys[i], 2, i));
}
else
{
parallel_for_(Range(0, num_stripes),
PatchInverseSearch_ParBody(*this, num_stripes, hs, Sx, Sy, Ux[i], Uy[i], I0s[i], I1s_ext[i],
I0xs[i], I0ys[i], 1, i));
}
parallel_for_(Range(0, num_stripes),
Densification_ParBody(*this, num_stripes, I0s[i].rows, Ux[i], Uy[i], Sx, Sy, I0s[i], I1s[i]));
if (variational_refinement_iter > 0)
variational_refinement_processors[i]->calcUV(I0s[i], I1s[i], Ux[i], Uy[i]);
if (i > finest_scale)
{
resize(Ux[i], Ux[i - 1], Ux[i - 1].size());
resize(Uy[i], Uy[i - 1], Uy[i - 1].size());
Ux[i - 1] *= 2;
Uy[i - 1] *= 2;
}
}
Mat uxy[] = {Ux[finest_scale], Uy[finest_scale]};
merge(uxy, 2, U);
resize(U, flowMat, flowMat.size());
flowMat *= 1 << finest_scale;
}
void DISOpticalFlowImpl::collectGarbage()
{
I0s.clear();
I1s.clear();
I1s_ext.clear();
I0xs.clear();
I0ys.clear();
Ux.clear();
Uy.clear();
U.release();
Sx.release();
Sy.release();
I0xx_buf.release();
I0yy_buf.release();
I0xy_buf.release();
I0xx_buf_aux.release();
I0yy_buf_aux.release();
I0xy_buf_aux.release();
#ifdef HAVE_OPENCL
u_I0s.clear();
u_I1s.clear();
u_I1s_ext.clear();
u_I0xs.clear();
u_I0ys.clear();
u_Ux.clear();
u_Uy.clear();
u_U.release();
u_Sx.release();
u_Sy.release();
u_I0xx_buf.release();
u_I0yy_buf.release();
u_I0xy_buf.release();
u_I0xx_buf_aux.release();
u_I0yy_buf_aux.release();
u_I0xy_buf_aux.release();
#endif
for (int i = finest_scale; i <= coarsest_scale; i++)
variational_refinement_processors[i]->collectGarbage();
variational_refinement_processors.clear();
}
Ptr<DISOpticalFlow> DISOpticalFlow::create(int preset)
{
Ptr<DISOpticalFlow> dis = makePtr<DISOpticalFlowImpl>();
dis->setPatchSize(8);
if (preset == DISOpticalFlow::PRESET_ULTRAFAST)
{
dis->setFinestScale(2);
dis->setPatchStride(4);
dis->setGradientDescentIterations(12);
dis->setVariationalRefinementIterations(0);
}
else if (preset == DISOpticalFlow::PRESET_FAST)
{
dis->setFinestScale(2);
dis->setPatchStride(4);
dis->setGradientDescentIterations(16);
dis->setVariationalRefinementIterations(5);
}
else if (preset == DISOpticalFlow::PRESET_MEDIUM)
{
dis->setFinestScale(1);
dis->setPatchStride(3);
dis->setGradientDescentIterations(25);
dis->setVariationalRefinementIterations(5);
}
return dis;
}
}
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#define EPS 0.001f
#define INF 1E+10F
__kernel void dis_precomputeStructureTensor_hor(__global const short *I0x,
__global const short *I0y,
int patch_size, int patch_stride,
int w, int h, int ws,
__global float *I0xx_aux_ptr,
__global float *I0yy_aux_ptr,
__global float *I0xy_aux_ptr,
__global float *I0x_aux_ptr,
__global float *I0y_aux_ptr)
{
int i = get_global_id(0);
if (i >= h) return;
const __global short *x_row = I0x + i * w;
const __global short *y_row = I0y + i * w;
float sum_xx = 0.0f, sum_yy = 0.0f, sum_xy = 0.0f, sum_x = 0.0f, sum_y = 0.0f;
float8 x_vec = convert_float8(vload8(0, x_row));
float8 y_vec = convert_float8(vload8(0, y_row));
sum_xx = dot(x_vec.lo, x_vec.lo) + dot(x_vec.hi, x_vec.hi);
sum_yy = dot(y_vec.lo, y_vec.lo) + dot(y_vec.hi, y_vec.hi);
sum_xy = dot(x_vec.lo, y_vec.lo) + dot(x_vec.hi, y_vec.hi);
sum_x = dot(x_vec.lo, 1.0f) + dot(x_vec.hi, 1.0f);
sum_y = dot(y_vec.lo, 1.0f) + dot(y_vec.hi, 1.0f);
I0xx_aux_ptr[i * ws] = sum_xx;
I0yy_aux_ptr[i * ws] = sum_yy;
I0xy_aux_ptr[i * ws] = sum_xy;
I0x_aux_ptr[i * ws] = sum_x;
I0y_aux_ptr[i * ws] = sum_y;
int js = 1;
for (int j = patch_size; j < w; j++)
{
short x_val1 = x_row[j];
short x_val2 = x_row[j - patch_size];
short y_val1 = y_row[j];
short y_val2 = y_row[j - patch_size];
sum_xx += (x_val1 * x_val1 - x_val2 * x_val2);
sum_yy += (y_val1 * y_val1 - y_val2 * y_val2);
sum_xy += (x_val1 * y_val1 - x_val2 * y_val2);
sum_x += (x_val1 - x_val2);
sum_y += (y_val1 - y_val2);
if ((j - patch_size + 1) % patch_stride == 0)
{
int index = i * ws + js;
I0xx_aux_ptr[index] = sum_xx;
I0yy_aux_ptr[index] = sum_yy;
I0xy_aux_ptr[index] = sum_xy;
I0x_aux_ptr[index] = sum_x;
I0y_aux_ptr[index] = sum_y;
js++;
}
}
}
__kernel void dis_precomputeStructureTensor_ver(__global const float *I0xx_aux_ptr,
__global const float *I0yy_aux_ptr,
__global const float *I0xy_aux_ptr,
__global const float *I0x_aux_ptr,
__global const float *I0y_aux_ptr,
int patch_size, int patch_stride,
int w, int h, int ws,
__global float *I0xx_ptr,
__global float *I0yy_ptr,
__global float *I0xy_ptr,
__global float *I0x_ptr,
__global float *I0y_ptr)
{
int j = get_global_id(0);
if (j >= ws) return;
float sum_xx, sum_yy, sum_xy, sum_x, sum_y;
sum_xx = sum_yy = sum_xy = sum_x = sum_y = 0.0f;
for (int i = 0; i < patch_size; i++)
{
sum_xx += I0xx_aux_ptr[i * ws + j];
sum_yy += I0yy_aux_ptr[i * ws + j];
sum_xy += I0xy_aux_ptr[i * ws + j];
sum_x += I0x_aux_ptr[i * ws + j];
sum_y += I0y_aux_ptr[i * ws + j];
}
I0xx_ptr[j] = sum_xx;
I0yy_ptr[j] = sum_yy;
I0xy_ptr[j] = sum_xy;
I0x_ptr[j] = sum_x;
I0y_ptr[j] = sum_y;
int is = 1;
for (int i = patch_size; i < h; i++)
{
sum_xx += (I0xx_aux_ptr[i * ws + j] - I0xx_aux_ptr[(i - patch_size) * ws + j]);
sum_yy += (I0yy_aux_ptr[i * ws + j] - I0yy_aux_ptr[(i - patch_size) * ws + j]);
sum_xy += (I0xy_aux_ptr[i * ws + j] - I0xy_aux_ptr[(i - patch_size) * ws + j]);
sum_x += (I0x_aux_ptr[i * ws + j] - I0x_aux_ptr[(i - patch_size) * ws + j]);
sum_y += (I0y_aux_ptr[i * ws + j] - I0y_aux_ptr[(i - patch_size) * ws + j]);
if ((i - patch_size + 1) % patch_stride == 0)
{
I0xx_ptr[is * ws + j] = sum_xx;
I0yy_ptr[is * ws + j] = sum_yy;
I0xy_ptr[is * ws + j] = sum_xy;
I0x_ptr[is * ws + j] = sum_x;
I0y_ptr[is * ws + j] = sum_y;
is++;
}
}
}
__kernel void dis_densification(__global const float *sx, __global const float *sy,
__global const uchar *i0, __global const uchar *i1,
int psz, int pstr,
int w, int h, int ws,
__global float *ux, __global float *uy)
{
int x = get_global_id(0);
int y = get_global_id(1);
int i, j;
if (x >= w || y >= h) return;
int start_is, end_is;
int start_js, end_js;
end_is = min(y / pstr, (h - psz) / pstr);
start_is = max(0, y - psz + pstr) / pstr;
start_is = min(start_is, end_is);
end_js = min(x / pstr, (w - psz) / pstr);
start_js = max(0, x - psz + pstr) / pstr;
start_js = min(start_js, end_js);
float coef, sum_coef = 0.0f;
float sum_Ux = 0.0f;
float sum_Uy = 0.0f;
int i_l, i_u;
int j_l, j_u;
float i_m, j_m, diff;
i = y;
j = x;
/* Iterate through all the patches that overlap the current location (i,j) */
for (int is = start_is; is <= end_is; is++)
for (int js = start_js; js <= end_js; js++)
{
float sx_val = sx[is * ws + js];
float sy_val = sy[is * ws + js];
uchar2 i1_vec1, i1_vec2;
j_m = min(max(j + sx_val, 0.0f), w - 1.0f - EPS);
i_m = min(max(i + sy_val, 0.0f), h - 1.0f - EPS);
j_l = (int)j_m;
j_u = j_l + 1;
i_l = (int)i_m;
i_u = i_l + 1;
i1_vec1 = vload2(0, i1 + i_u * w + j_l);
i1_vec2 = vload2(0, i1 + i_l * w + j_l);
diff = (j_m - j_l) * (i_m - i_l) * i1_vec1.y +
(j_u - j_m) * (i_m - i_l) * i1_vec1.x +
(j_m - j_l) * (i_u - i_m) * i1_vec2.y +
(j_u - j_m) * (i_u - i_m) * i1_vec2.x - i0[i * w + j];
coef = 1 / max(1.0f, fabs(diff));
sum_Ux += coef * sx_val;
sum_Uy += coef * sy_val;
sum_coef += coef;
}
ux[i * w + j] = sum_Ux / sum_coef;
uy[i * w + j] = sum_Uy / sum_coef;
}
#define INIT_BILINEAR_WEIGHTS(Ux, Uy) \
i_I1 = min(max(i + Uy + bsz, i_lower_limit), i_upper_limit); \
j_I1 = min(max(j + Ux + bsz, j_lower_limit), j_upper_limit); \
\
w11 = (i_I1 - floor(i_I1)) * (j_I1 - floor(j_I1)); \
w10 = (i_I1 - floor(i_I1)) * (floor(j_I1) + 1 - j_I1); \
w01 = (floor(i_I1) + 1 - i_I1) * (j_I1 - floor(j_I1)); \
w00 = (floor(i_I1) + 1 - i_I1) * (floor(j_I1) + 1 - j_I1);
float computeSSDMeanNorm(const __global uchar *I0_ptr, const __global uchar *I1_ptr,
int I0_stride, int I1_stride,
float w00, float w01, float w10, float w11, int patch_sz, int i)
{
float sum_diff = 0.0f, sum_diff_sq = 0.0f;
int n = patch_sz * patch_sz;
uchar8 I1_vec1, I1_vec2, I0_vec;
uchar I1_val1, I1_val2;
I0_vec = vload8(0, I0_ptr + i * I0_stride);
I1_vec1 = vload8(0, I1_ptr + i * I1_stride);
I1_vec2 = vload8(0, I1_ptr + (i + 1) * I1_stride);
I1_val1 = I1_ptr[i * I1_stride + 8];
I1_val2 = I1_ptr[(i + 1) * I1_stride + 8];
float8 vec = w00 * convert_float8(I1_vec1) + w01 * convert_float8((uchar8)(I1_vec1.s123, I1_vec1.s4567, I1_val1)) +
w10 * convert_float8(I1_vec2) + w11 * convert_float8((uchar8)(I1_vec2.s123, I1_vec2.s4567, I1_val2)) -
convert_float8(I0_vec);
sum_diff = (dot(vec.lo, 1.0) + dot(vec.hi, 1.0));
sum_diff_sq = (dot(vec.lo, vec.lo) + dot(vec.hi, vec.hi));
sum_diff = sub_group_reduce_add(sum_diff);
sum_diff_sq = sub_group_reduce_add(sum_diff_sq);
return sum_diff_sq - sum_diff * sum_diff / n;
}
__kernel void dis_patch_inverse_search_fwd_1(__global const float *Ux_ptr, __global const float *Uy_ptr,
__global const uchar *I0_ptr, __global const uchar *I1_ptr,
int border_size, int patch_size, int patch_stride,
int w, int h, int ws, int hs, int pyr_level,
__global float *Sx_ptr, __global float *Sy_ptr)
{
int id = get_global_id(0);
int is = id / 8;
if (id >= (hs * 8)) return;
int i = is * patch_stride;
int j = 0;
int psz = patch_size;
int psz2 = psz / 2;
int w_ext = w + 2 * border_size;
int bsz = border_size;
float i_lower_limit = bsz - psz + 1.0f;
float i_upper_limit = bsz + h - 1.0f;
float j_lower_limit = bsz - psz + 1.0f;
float j_upper_limit = bsz + w - 1.0f;
float i_I1, j_I1, w00, w01, w10, w11;
float prev_Ux = Ux_ptr[(i + psz2) * w + j + psz2];
float prev_Uy = Uy_ptr[(i + psz2) * w + j + psz2];
Sx_ptr[is * ws] = prev_Ux;
Sy_ptr[is * ws] = prev_Uy;
j += patch_stride;
int sid = get_sub_group_local_id();
for (int js = 1; js < ws; js++, j += patch_stride)
{
float min_SSD, cur_SSD;
float Ux = Ux_ptr[(i + psz2) * w + j + psz2];
float Uy = Uy_ptr[(i + psz2) * w + j + psz2];
INIT_BILINEAR_WEIGHTS(Ux, Uy);
min_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
w, w_ext, w00, w01, w10, w11, psz, sid);
INIT_BILINEAR_WEIGHTS(prev_Ux, prev_Uy);
cur_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
w, w_ext, w00, w01, w10, w11, psz, sid);
if (cur_SSD < min_SSD)
{
Ux = prev_Ux;
Uy = prev_Uy;
}
prev_Ux = Ux;
prev_Uy = Uy;
Sx_ptr[is * ws + js] = Ux;
Sy_ptr[is * ws + js] = Uy;
}
}
float3 processPatchMeanNorm(const __global uchar *I0_ptr, const __global uchar *I1_ptr,
const __global short *I0x_ptr, const __global short *I0y_ptr,
int I0_stride, int I1_stride, float w00, float w01, float w10,
float w11, int patch_sz, float x_grad_sum, float y_grad_sum)
{
float sum_diff = 0.0, sum_diff_sq = 0.0;
float sum_I0x_mul = 0.0, sum_I0y_mul = 0.0;
int n = patch_sz * patch_sz;
uchar8 I1_vec1, I1_vec2;
uchar I1_val1, I1_val2;
for (int i = 0; i < 8; i++)
{
uchar8 I0_vec = vload8(0, I0_ptr + i * I0_stride);
I1_vec1 = (i == 0) ? vload8(0, I1_ptr + i * I1_stride) : I1_vec2;
I1_vec2 = vload8(0, I1_ptr + (i + 1) * I1_stride);
I1_val1 = (i == 0) ? I1_ptr[i * I1_stride + patch_sz] : I1_val2;
I1_val2 = I1_ptr[(i + 1) * I1_stride + patch_sz];
float8 vec = w00 * convert_float8(I1_vec1) + w01 * convert_float8((uchar8)(I1_vec1.s123, I1_vec1.s4567, I1_val1)) +
w10 * convert_float8(I1_vec2) + w11 * convert_float8((uchar8)(I1_vec2.s123, I1_vec2.s4567, I1_val2)) -
convert_float8(I0_vec);
sum_diff += (dot(vec.lo, 1.0) + dot(vec.hi, 1.0));
sum_diff_sq += (dot(vec.lo, vec.lo) + dot(vec.hi, vec.hi));
short8 I0x_vec = vload8(0, I0x_ptr + i * I0_stride);
short8 I0y_vec = vload8(0, I0y_ptr + i * I0_stride);
sum_I0x_mul += dot(vec.lo, convert_float4(I0x_vec.lo));
sum_I0x_mul += dot(vec.hi, convert_float4(I0x_vec.hi));
sum_I0y_mul += dot(vec.lo, convert_float4(I0y_vec.lo));
sum_I0y_mul += dot(vec.hi, convert_float4(I0y_vec.hi));
}
float dst_dUx = sum_I0x_mul - sum_diff * x_grad_sum / n;
float dst_dUy = sum_I0y_mul - sum_diff * y_grad_sum / n;
float SSD = sum_diff_sq - sum_diff * sum_diff / n;
return (float3)(SSD, dst_dUx, dst_dUy);
}
__kernel void dis_patch_inverse_search_fwd_2(__global const float *Ux_ptr, __global const float *Uy_ptr,
__global const uchar *I0_ptr, __global const uchar *I1_ptr,
__global const short *I0x_ptr, __global const short *I0y_ptr,
__global const float *xx_ptr, __global const float *yy_ptr,
__global const float *xy_ptr,
__global const float *x_ptr, __global const float *y_ptr,
int border_size, int patch_size, int patch_stride,
int w, int h, int ws, int hs, int num_inner_iter, int pyr_level,
__global float *Sx_ptr, __global float *Sy_ptr)
{
int js = get_global_id(0);
int is = get_global_id(1);
int i = is * patch_stride;
int j = js * patch_stride;
int psz = patch_size;
int psz2 = psz / 2;
int w_ext = w + 2 * border_size;
int bsz = border_size;
int index = is * ws + js;
if (js >= ws || is >= hs) return;
float Ux = Sx_ptr[index];
float Uy = Sy_ptr[index];
float cur_Ux = Ux;
float cur_Uy = Uy;
float cur_xx = xx_ptr[index];
float cur_yy = yy_ptr[index];
float cur_xy = xy_ptr[index];
float detH = cur_xx * cur_yy - cur_xy * cur_xy;
if (fabs(detH) < EPS) detH = EPS;
float invH11 = cur_yy / detH;
float invH12 = -cur_xy / detH;
float invH22 = cur_xx / detH;
float prev_SSD = INF, SSD;
float x_grad_sum = x_ptr[index];
float y_grad_sum = y_ptr[index];
float i_lower_limit = bsz - psz + 1.0f;
float i_upper_limit = bsz + h - 1.0f;
float j_lower_limit = bsz - psz + 1.0f;
float j_upper_limit = bsz + w - 1.0f;
float dUx, dUy, i_I1, j_I1, w00, w01, w10, w11, dx, dy;
float3 res;
for (int t = 0; t < num_inner_iter; t++)
{
INIT_BILINEAR_WEIGHTS(cur_Ux, cur_Uy);
res = processPatchMeanNorm(I0_ptr + i * w + j,
I1_ptr + (int)i_I1 * w_ext + (int)j_I1, I0x_ptr + i * w + j,
I0y_ptr + i * w + j, w, w_ext, w00, w01, w10, w11, psz,
x_grad_sum, y_grad_sum);
SSD = res.x;
dUx = res.y;
dUy = res.z;
dx = invH11 * dUx + invH12 * dUy;
dy = invH12 * dUx + invH22 * dUy;
cur_Ux -= dx;
cur_Uy -= dy;
if (SSD >= prev_SSD)
break;
prev_SSD = SSD;
}
float2 vec = (float2)(cur_Ux - Ux, cur_Uy - Uy);
if (dot(vec, vec) <= (float)(psz * psz))
{
Sx_ptr[index] = cur_Ux;
Sy_ptr[index] = cur_Uy;
}
}
__kernel void dis_patch_inverse_search_bwd_1(__global const uchar *I0_ptr, __global const uchar *I1_ptr,
int border_size, int patch_size, int patch_stride,
int w, int h, int ws, int hs, int pyr_level,
__global float *Sx_ptr, __global float *Sy_ptr)
{
int id = get_global_id(0);
int is = id / 8;
if (id >= (hs * 8)) return;
is = (hs - 1 - is);
int i = is * patch_stride;
int j = (ws - 2) * patch_stride;
int psz = patch_size;
int psz2 = psz / 2;
int w_ext = w + 2 * border_size;
int bsz = border_size;
float i_lower_limit = bsz - psz + 1.0f;
float i_upper_limit = bsz + h - 1.0f;
float j_lower_limit = bsz - psz + 1.0f;
float j_upper_limit = bsz + w - 1.0f;
float i_I1, j_I1, w00, w01, w10, w11;
int sid = get_sub_group_local_id();
for (int js = (ws - 2); js > -1; js--, j -= patch_stride)
{
float min_SSD, cur_SSD;
float2 Ux = vload2(0, Sx_ptr + is * ws + js);
float2 Uy = vload2(0, Sy_ptr + is * ws + js);
INIT_BILINEAR_WEIGHTS(Ux.x, Uy.x);
min_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
w, w_ext, w00, w01, w10, w11, psz, sid);
INIT_BILINEAR_WEIGHTS(Ux.y, Uy.y);
cur_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
w, w_ext, w00, w01, w10, w11, psz, sid);
if (cur_SSD < min_SSD)
{
Sx_ptr[is * ws + js] = Ux.y;
Sy_ptr[is * ws + js] = Uy.y;
}
}
}
__kernel void dis_patch_inverse_search_bwd_2(__global const uchar *I0_ptr, __global const uchar *I1_ptr,
__global const short *I0x_ptr, __global const short *I0y_ptr,
__global const float *xx_ptr, __global const float *yy_ptr,
__global const float *xy_ptr,
__global const float *x_ptr, __global const float *y_ptr,
int border_size, int patch_size, int patch_stride,
int w, int h, int ws, int hs, int num_inner_iter,
__global float *Sx_ptr, __global float *Sy_ptr)
{
int js = get_global_id(0);
int is = get_global_id(1);
if (js >= ws || is >= hs) return;
js = (ws - 1 - js);
is = (hs - 1 - is);
int j = js * patch_stride;
int i = is * patch_stride;
int psz = patch_size;
int psz2 = psz / 2;
int w_ext = w + 2 * border_size;
int bsz = border_size;
int index = is * ws + js;
float Ux = Sx_ptr[index];
float Uy = Sy_ptr[index];
float cur_Ux = Ux;
float cur_Uy = Uy;
float cur_xx = xx_ptr[index];
float cur_yy = yy_ptr[index];
float cur_xy = xy_ptr[index];
float detH = cur_xx * cur_yy - cur_xy * cur_xy;
if (fabs(detH) < EPS) detH = EPS;
float invH11 = cur_yy / detH;
float invH12 = -cur_xy / detH;
float invH22 = cur_xx / detH;
float prev_SSD = INF, SSD;
float x_grad_sum = x_ptr[index];
float y_grad_sum = y_ptr[index];
float i_lower_limit = bsz - psz + 1.0f;
float i_upper_limit = bsz + h - 1.0f;
float j_lower_limit = bsz - psz + 1.0f;
float j_upper_limit = bsz + w - 1.0f;
float dUx, dUy, i_I1, j_I1, w00, w01, w10, w11, dx, dy;
float3 res;
for (int t = 0; t < num_inner_iter; t++)
{
INIT_BILINEAR_WEIGHTS(cur_Ux, cur_Uy);
res = processPatchMeanNorm(I0_ptr + i * w + j,
I1_ptr + (int)i_I1 * w_ext + (int)j_I1, I0x_ptr + i * w + j,
I0y_ptr + i * w + j, w, w_ext, w00, w01, w10, w11, psz,
x_grad_sum, y_grad_sum);
SSD = res.x;
dUx = res.y;
dUy = res.z;
dx = invH11 * dUx + invH12 * dUy;
dy = invH12 * dUx + invH22 * dUy;
cur_Ux -= dx;
cur_Uy -= dy;
if (SSD >= prev_SSD)
break;
prev_SSD = SSD;
}
float2 vec = (float2)(cur_Ux - Ux, cur_Uy - Uy);
if ((dot(vec, vec)) <= (float)(psz * psz))
{
Sx_ptr[index] = cur_Ux;
Sy_ptr[index] = cur_Uy;
}
}
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Jin Ma jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
__kernel void centeredGradientKernel(__global const float* src_ptr, int src_col, int src_row, int src_step,
__global float* dx, __global float* dy, int d_step)
{
int x = get_global_id(0);
int y = get_global_id(1);
if((x < src_col)&&(y < src_row))
{
int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1);
int src_x2 = (x - 1) > 0 ? (x -1) : 0;
dx[y * d_step+ x] = 0.5f * (src_ptr[y * src_step + src_x1] - src_ptr[y * src_step+ src_x2]);
int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1);
int src_y2 = (y - 1) > 0 ? (y - 1) : 0;
dy[y * d_step+ x] = 0.5f * (src_ptr[src_y1 * src_step + x] - src_ptr[src_y2 * src_step+ x]);
}
}
inline float bicubicCoeff(float x_)
{
float x = fabs(x_);
if (x <= 1.0f)
return x * x * (1.5f * x - 2.5f) + 1.0f;
else if (x < 2.0f)
return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
else
return 0.0f;
}
__kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_col, int I0_row,
image2d_t tex_I1, image2d_t tex_I1x, image2d_t tex_I1y,
__global const float* u1, int u1_step,
__global const float* u2,
__global float* I1w,
__global float* I1wx, /*int I1wx_step,*/
__global float* I1wy, /*int I1wy_step,*/
__global float* grad, /*int grad_step,*/
__global float* rho,
int I1w_step,
int u2_step,
int u1_offset_x,
int u1_offset_y,
int u2_offset_x,
int u2_offset_y)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < I0_col&&y < I0_row)
{
//float u1Val = u1(y, x);
float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
//float u2Val = u2(y, x);
float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
float wx = x + u1Val;
float wy = y + u2Val;
int xmin = ceil(wx - 2.0f);
int xmax = floor(wx + 2.0f);
int ymin = ceil(wy - 2.0f);
int ymax = floor(wy + 2.0f);
float sum = 0.0f;
float sumx = 0.0f;
float sumy = 0.0f;
float wsum = 0.0f;
sampler_t sampleri = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
for (int cy = ymin; cy <= ymax; ++cy)
{
for (int cx = xmin; cx <= xmax; ++cx)
{
float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
//sum += w * tex2D(tex_I1 , cx, cy);
int2 cood = (int2)(cx, cy);
sum += w * read_imagef(tex_I1, sampleri, cood).x;
//sumx += w * tex2D(tex_I1x, cx, cy);
sumx += w * read_imagef(tex_I1x, sampleri, cood).x;
//sumy += w * tex2D(tex_I1y, cx, cy);
sumy += w * read_imagef(tex_I1y, sampleri, cood).x;
wsum += w;
}
}
float coeff = 1.0f / wsum;
float I1wVal = sum * coeff;
float I1wxVal = sumx * coeff;
float I1wyVal = sumy * coeff;
I1w[y * I1w_step + x] = I1wVal;
I1wx[y * I1w_step + x] = I1wxVal;
I1wy[y * I1w_step + x] = I1wyVal;
float Ix2 = I1wxVal * I1wxVal;
float Iy2 = I1wyVal * I1wyVal;
// store the |Grad(I1)|^2
grad[y * I1w_step + x] = Ix2 + Iy2;
// compute the constant part of the rho function
float I0Val = I0[y * I0_step + x];
rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
}
}
inline float readImage(__global const float *image, int x, int y, int rows, int cols, int elemCntPerRow)
{
int i0 = clamp(x, 0, cols - 1);
int j0 = clamp(y, 0, rows - 1);
return image[j0 * elemCntPerRow + i0];
}
__kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step, int I0_col, int I0_row,
__global const float* tex_I1, __global const float* tex_I1x, __global const float* tex_I1y,
__global const float* u1, int u1_step,
__global const float* u2,
__global float* I1w,
__global float* I1wx, /*int I1wx_step,*/
__global float* I1wy, /*int I1wy_step,*/
__global float* grad, /*int grad_step,*/
__global float* rho,
int I1w_step,
int u2_step,
int I1_step,
int I1x_step)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < I0_col&&y < I0_row)
{
//float u1Val = u1(y, x);
float u1Val = u1[y * u1_step + x];
//float u2Val = u2(y, x);
float u2Val = u2[y * u2_step + x];
float wx = x + u1Val;
float wy = y + u2Val;
int xmin = ceil(wx - 2.0f);
int xmax = floor(wx + 2.0f);
int ymin = ceil(wy - 2.0f);
int ymax = floor(wy + 2.0f);
float sum = 0.0f;
float sumx = 0.0f;
float sumy = 0.0f;
float wsum = 0.0f;
for (int cy = ymin; cy <= ymax; ++cy)
{
for (int cx = xmin; cx <= xmax; ++cx)
{
float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
int2 cood = (int2)(cx, cy);
sum += w * readImage(tex_I1, cood.x, cood.y, I0_col, I0_row, I1_step);
sumx += w * readImage(tex_I1x, cood.x, cood.y, I0_col, I0_row, I1x_step);
sumy += w * readImage(tex_I1y, cood.x, cood.y, I0_col, I0_row, I1x_step);
wsum += w;
}
}
float coeff = 1.0f / wsum;
float I1wVal = sum * coeff;
float I1wxVal = sumx * coeff;
float I1wyVal = sumy * coeff;
I1w[y * I1w_step + x] = I1wVal;
I1wx[y * I1w_step + x] = I1wxVal;
I1wy[y * I1w_step + x] = I1wyVal;
float Ix2 = I1wxVal * I1wxVal;
float Iy2 = I1wyVal * I1wyVal;
// store the |Grad(I1)|^2
grad[y * I1w_step + x] = Ix2 + Iy2;
// compute the constant part of the rho function
float I0Val = I0[y * I0_step + x];
rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
}
}
__kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col, int u1_row, int u1_step,
__global const float* u2,
__global float* p11, int p11_step,
__global float* p12,
__global float* p21,
__global float* p22,
float taut,
int u2_step,
int u1_offset_x,
int u1_offset_y,
int u2_offset_x,
int u2_offset_y)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < u1_col && y < u1_row)
{
int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
int src_y2 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
float g1 = hypot(u1x, u1y);
float g2 = hypot(u2x, u2y);
float ng1 = 1.0f + taut * g1;
float ng2 = 1.0f + taut * g2;
p11[y * p11_step + x] = (p11[y * p11_step + x] + taut * u1x) / ng1;
p12[y * p11_step + x] = (p12[y * p11_step + x] + taut * u1y) / ng1;
p21[y * p11_step + x] = (p21[y * p11_step + x] + taut * u2x) / ng2;
p22[y * p11_step + x] = (p22[y * p11_step + x] + taut * u2y) / ng2;
}
}
inline float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step)
{
if (x > 0 && y > 0)
{
float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1];
float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x];
return v1x + v2y;
}
else
{
if (y > 0)
return v1[y * v1_step + 0] + v2[y * v2_step + 0] - v2[(y - 1) * v2_step + 0];
else
{
if (x > 0)
return v1[0 * v1_step + x] - v1[0 * v1_step + x - 1] + v2[0 * v2_step + x];
else
return v1[0 * v1_step + 0] + v2[0 * v2_step + 0];
}
}
}
__kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx_row, int I1wx_step,
__global const float* I1wy, /*int I1wy_step,*/
__global const float* grad, /*int grad_step,*/
__global const float* rho_c, /*int rho_c_step,*/
__global const float* p11, /*int p11_step,*/
__global const float* p12, /*int p12_step,*/
__global const float* p21, /*int p21_step,*/
__global const float* p22, /*int p22_step,*/
__global float* u1, int u1_step,
__global float* u2,
__global float* error, float l_t, float theta, int u2_step,
int u1_offset_x,
int u1_offset_y,
int u2_offset_x,
int u2_offset_y,
char calc_error)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < I1wx_col && y < I1wx_row)
{
float I1wxVal = I1wx[y * I1wx_step + x];
float I1wyVal = I1wy[y * I1wx_step + x];
float gradVal = grad[y * I1wx_step + x];
float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal);
// estimate the values of the variable (v1, v2) (thresholding operator TH)
float d1 = 0.0f;
float d2 = 0.0f;
if (rho < -l_t * gradVal)
{
d1 = l_t * I1wxVal;
d2 = l_t * I1wyVal;
}
else if (rho > l_t * gradVal)
{
d1 = -l_t * I1wxVal;
d2 = -l_t * I1wyVal;
}
else if (gradVal > 1.192092896e-07f)
{
float fi = -rho / gradVal;
d1 = fi * I1wxVal;
d2 = fi * I1wyVal;
}
float v1 = u1OldVal + d1;
float v2 = u2OldVal + d2;
// compute the divergence of the dual variable (p1, p2)
float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step);
float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step);
// estimate the values of the optical flow (u1, u2)
float u1NewVal = v1 + theta * div_p1;
float u2NewVal = v2 + theta * div_p2;
u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal;
u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal;
if(calc_error)
{
float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
error[y * I1wx_step + x] = n1 + n2;
}
}
}
......@@ -10,14 +10,10 @@
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
......@@ -31,7 +27,7 @@
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
......@@ -43,69 +39,91 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include<iostream>
#include<fstream>
#include "../perf_precomp.hpp"
#include "opencv2/ts/ocl_perf.hpp"
namespace cv {
#ifdef HAVE_OPENCL
const float FLOW_TAG_FLOAT = 202021.25f;
const char *FLOW_TAG_STRING = "PIEH";
CV_EXPORTS_W Mat readOpticalFlow( const String& path )
{
Mat_<Point2f> flow;
std::ifstream file(path.c_str(), std::ios_base::binary);
if ( !file.good() )
return flow; // no file - return empty matrix
namespace opencv_test {
namespace ocl {
float tag;
file.read((char*) &tag, sizeof(float));
if ( tag != FLOW_TAG_FLOAT )
return flow;
///////////// OpticalFlow Dual TVL1 ////////////////////////
typedef tuple< tuple<int, double>, bool> OpticalFlowDualTVL1Params;
typedef TestBaseWithParam<OpticalFlowDualTVL1Params> OpticalFlowDualTVL1Fixture;
int width, height;
OCL_PERF_TEST_P(OpticalFlowDualTVL1Fixture, OpticalFlowDualTVL1,
::testing::Combine(
::testing::Values(make_tuple<int, double>(-1, 0.3),
make_tuple<int, double>(3, 0.5)),
::testing::Bool()
)
)
{
Mat frame0 = imread(getDataPath("cv/optflow/RubberWhale1.png"), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty()) << "can't load RubberWhale1.png";
file.read((char*) &width, 4);
file.read((char*) &height, 4);
Mat frame1 = imread(getDataPath("cv/optflow/RubberWhale2.png"), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty()) << "can't load RubberWhale2.png";
flow.create(height, width);
const Size srcSize = frame0.size();
for ( int i = 0; i < flow.rows; ++i )
{
for ( int j = 0; j < flow.cols; ++j )
{
Point2f u;
file.read((char*) &u.x, sizeof(float));
file.read((char*) &u.y, sizeof(float));
if ( !file.good() )
{
flow.release();
return flow;
}
flow(i, j) = u;
}
}
file.close();
return flow;
}
const OpticalFlowDualTVL1Params params = GetParam();
const tuple<int, double> filteringScale = get<0>(params);
const int medianFiltering = get<0>(filteringScale);
const double scaleStep = get<1>(filteringScale);
const bool useInitFlow = get<1>(params);
double eps = 0.9;
CV_EXPORTS_W bool writeOpticalFlow( const String& path, InputArray flow )
{
const int nChannels = 2;
UMat uFrame0; frame0.copyTo(uFrame0);
UMat uFrame1; frame1.copyTo(uFrame1);
UMat uFlow(srcSize, CV_32FC2);
declare.in(uFrame0, uFrame1, WARMUP_READ).out(uFlow, WARMUP_READ);
Mat input = flow.getMat();
if ( input.channels() != nChannels || input.depth() != CV_32F || path.length() == 0 )
return false;
//create algorithm
cv::Ptr<cv::DualTVL1OpticalFlow> alg = cv::createOptFlow_DualTVL1();
std::ofstream file(path.c_str(), std::ofstream::binary);
if ( !file.good() )
return false;
//set parameters
alg->setScaleStep(scaleStep);
alg->setMedianFiltering(medianFiltering);
int nRows, nCols;
if (useInitFlow)
{
//calculate initial flow as result of optical flow
alg->calc(uFrame0, uFrame1, uFlow);
}
nRows = (int) input.size().height;
nCols = (int) input.size().width;
//set flag to use initial flow
alg->setUseInitialFlow(useInitFlow);
OCL_TEST_CYCLE()
alg->calc(uFrame0, uFrame1, uFlow);
const int headerSize = 12;
char header[headerSize];
memcpy(header, FLOW_TAG_STRING, 4);
// size of ints is known - has been asserted in the current function
memcpy(header + 4, reinterpret_cast<const char*>(&nCols), sizeof(nCols));
memcpy(header + 8, reinterpret_cast<const char*>(&nRows), sizeof(nRows));
file.write(header, headerSize);
if ( !file.good() )
return false;
SANITY_CHECK(uFlow, eps, ERROR_RELATIVE);
int row;
char* p;
for ( row = 0; row < nRows; row++ )
{
p = input.ptr<char>(row);
file.write(p, nCols * nChannels * sizeof(float));
if ( !file.good() )
return false;
}
file.close();
return true;
}
} // namespace opencv_test::ocl
#endif // HAVE_OPENCL
}
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/*
//
// This implementation is based on Javier Sánchez Pérez <jsanchez@dis.ulpgc.es> implementation.
// Original BSD license:
//
// Copyright (c) 2011, Javier Sánchez Pérez, Enric Meinhardt Llopis
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
*/
#include "precomp.hpp"
#include "opencl_kernels_video.hpp"
#include <limits>
#include <iomanip>
#include <iostream>
#include "opencv2/core/opencl/ocl_defs.hpp"
using namespace cv;
namespace {
class OpticalFlowDual_TVL1 : public DualTVL1OpticalFlow
{
public:
OpticalFlowDual_TVL1(double tau_, double lambda_, double theta_, int nscales_, int warps_,
double epsilon_, int innerIterations_, int outerIterations_,
double scaleStep_, double gamma_, int medianFiltering_,
bool useInitialFlow_) :
tau(tau_), lambda(lambda_), theta(theta_), gamma(gamma_), nscales(nscales_),
warps(warps_), epsilon(epsilon_), innerIterations(innerIterations_),
outerIterations(outerIterations_), useInitialFlow(useInitialFlow_),
scaleStep(scaleStep_), medianFiltering(medianFiltering_)
{
}
OpticalFlowDual_TVL1();
void calc(InputArray I0, InputArray I1, InputOutputArray flow) CV_OVERRIDE;
void collectGarbage() CV_OVERRIDE;
inline double getTau() const CV_OVERRIDE { return tau; }
inline void setTau(double val) CV_OVERRIDE { tau = val; }
inline double getLambda() const CV_OVERRIDE { return lambda; }
inline void setLambda(double val) CV_OVERRIDE { lambda = val; }
inline double getTheta() const CV_OVERRIDE { return theta; }
inline void setTheta(double val) CV_OVERRIDE { theta = val; }
inline double getGamma() const CV_OVERRIDE { return gamma; }
inline void setGamma(double val) CV_OVERRIDE { gamma = val; }
inline int getScalesNumber() const CV_OVERRIDE { return nscales; }
inline void setScalesNumber(int val) CV_OVERRIDE { nscales = val; }
inline int getWarpingsNumber() const CV_OVERRIDE { return warps; }
inline void setWarpingsNumber(int val) CV_OVERRIDE { warps = val; }
inline double getEpsilon() const CV_OVERRIDE { return epsilon; }
inline void setEpsilon(double val) CV_OVERRIDE { epsilon = val; }
inline int getInnerIterations() const CV_OVERRIDE { return innerIterations; }
inline void setInnerIterations(int val) CV_OVERRIDE { innerIterations = val; }
inline int getOuterIterations() const CV_OVERRIDE { return outerIterations; }
inline void setOuterIterations(int val) CV_OVERRIDE { outerIterations = val; }
inline bool getUseInitialFlow() const CV_OVERRIDE { return useInitialFlow; }
inline void setUseInitialFlow(bool val) CV_OVERRIDE { useInitialFlow = val; }
inline double getScaleStep() const CV_OVERRIDE { return scaleStep; }
inline void setScaleStep(double val) CV_OVERRIDE { scaleStep = val; }
inline int getMedianFiltering() const CV_OVERRIDE { return medianFiltering; }
inline void setMedianFiltering(int val) CV_OVERRIDE { medianFiltering = val; }
protected:
double tau;
double lambda;
double theta;
double gamma;
int nscales;
int warps;
double epsilon;
int innerIterations;
int outerIterations;
bool useInitialFlow;
double scaleStep;
int medianFiltering;
private:
void procOneScale(const Mat_<float>& I0, const Mat_<float>& I1, Mat_<float>& u1, Mat_<float>& u2, Mat_<float>& u3);
#ifdef HAVE_OPENCL
bool procOneScale_ocl(const UMat& I0, const UMat& I1, UMat& u1, UMat& u2);
bool calc_ocl(InputArray I0, InputArray I1, InputOutputArray flow);
#endif
struct dataMat
{
std::vector<Mat_<float> > I0s;
std::vector<Mat_<float> > I1s;
std::vector<Mat_<float> > u1s;
std::vector<Mat_<float> > u2s;
std::vector<Mat_<float> > u3s;
Mat_<float> I1x_buf;
Mat_<float> I1y_buf;
Mat_<float> flowMap1_buf;
Mat_<float> flowMap2_buf;
Mat_<float> I1w_buf;
Mat_<float> I1wx_buf;
Mat_<float> I1wy_buf;
Mat_<float> grad_buf;
Mat_<float> rho_c_buf;
Mat_<float> v1_buf;
Mat_<float> v2_buf;
Mat_<float> v3_buf;
Mat_<float> p11_buf;
Mat_<float> p12_buf;
Mat_<float> p21_buf;
Mat_<float> p22_buf;
Mat_<float> p31_buf;
Mat_<float> p32_buf;
Mat_<float> div_p1_buf;
Mat_<float> div_p2_buf;
Mat_<float> div_p3_buf;
Mat_<float> u1x_buf;
Mat_<float> u1y_buf;
Mat_<float> u2x_buf;
Mat_<float> u2y_buf;
Mat_<float> u3x_buf;
Mat_<float> u3y_buf;
} dm;
#ifdef HAVE_OPENCL
struct dataUMat
{
std::vector<UMat> I0s;
std::vector<UMat> I1s;
std::vector<UMat> u1s;
std::vector<UMat> u2s;
UMat I1x_buf;
UMat I1y_buf;
UMat I1w_buf;
UMat I1wx_buf;
UMat I1wy_buf;
UMat grad_buf;
UMat rho_c_buf;
UMat p11_buf;
UMat p12_buf;
UMat p21_buf;
UMat p22_buf;
UMat diff_buf;
UMat norm_buf;
} dum;
#endif
};
#ifdef HAVE_OPENCL
namespace cv_ocl_tvl1flow
{
bool centeredGradient(const UMat &src, UMat &dx, UMat &dy);
bool warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UMat &I1y,
UMat &u1, UMat &u2, UMat &I1w, UMat &I1wx, UMat &I1wy,
UMat &grad, UMat &rho);
bool estimateU(UMat &I1wx, UMat &I1wy, UMat &grad,
UMat &rho_c, UMat &p11, UMat &p12,
UMat &p21, UMat &p22, UMat &u1,
UMat &u2, UMat &error, float l_t, float theta, char calc_error);
bool estimateDualVariables(UMat &u1, UMat &u2,
UMat &p11, UMat &p12, UMat &p21, UMat &p22, float taut);
}
bool cv_ocl_tvl1flow::centeredGradient(const UMat &src, UMat &dx, UMat &dy)
{
size_t globalsize[2] = { (size_t)src.cols, (size_t)src.rows };
ocl::Kernel kernel;
if (!kernel.create("centeredGradientKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, ""))
return false;
int idxArg = 0;
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(src));//src mat
idxArg = kernel.set(idxArg, (int)(src.cols));//src mat col
idxArg = kernel.set(idxArg, (int)(src.rows));//src mat rows
idxArg = kernel.set(idxArg, (int)(src.step / src.elemSize()));//src mat step
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dx));//res mat dx
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dy));//res mat dy
idxArg = kernel.set(idxArg, (int)(dx.step/dx.elemSize()));//res mat step
return kernel.run(2, globalsize, NULL, false);
}
bool cv_ocl_tvl1flow::warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UMat &I1y,
UMat &u1, UMat &u2, UMat &I1w, UMat &I1wx, UMat &I1wy,
UMat &grad, UMat &rho)
{
size_t globalsize[2] = { (size_t)I0.cols, (size_t)I0.rows };
ocl::Kernel kernel;
if (!kernel.create("warpBackwardKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, ""))
return false;
int idxArg = 0;
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I0));//I0 mat
int I0_step = (int)(I0.step / I0.elemSize());
idxArg = kernel.set(idxArg, I0_step);//I0_step
idxArg = kernel.set(idxArg, (int)(I0.cols));//I0_col
idxArg = kernel.set(idxArg, (int)(I0.rows));//I0_row
ocl::Image2D imageI1(I1);
ocl::Image2D imageI1x(I1x);
ocl::Image2D imageI1y(I1y);
idxArg = kernel.set(idxArg, imageI1);//image2d_t tex_I1
idxArg = kernel.set(idxArg, imageI1x);//image2d_t tex_I1x
idxArg = kernel.set(idxArg, imageI1y);//image2d_t tex_I1y
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u1));//const float* u1
idxArg = kernel.set(idxArg, (int)(u1.step / u1.elemSize()));//int u1_step
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u2));//const float* u2
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1w));///float* I1w
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1wx));//float* I1wx
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1wy));//float* I1wy
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(grad));//float* grad
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(rho));//float* rho
idxArg = kernel.set(idxArg, (int)(I1w.step / I1w.elemSize()));//I1w_step
idxArg = kernel.set(idxArg, (int)(u2.step / u2.elemSize()));//u2_step
int u1_offset_x = (int)((u1.offset) % (u1.step));
u1_offset_x = (int)(u1_offset_x / u1.elemSize());
idxArg = kernel.set(idxArg, (int)u1_offset_x );//u1_offset_x
idxArg = kernel.set(idxArg, (int)(u1.offset/u1.step));//u1_offset_y
int u2_offset_x = (int)((u2.offset) % (u2.step));
u2_offset_x = (int) (u2_offset_x / u2.elemSize());
idxArg = kernel.set(idxArg, (int)u2_offset_x);//u2_offset_x
idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step));//u2_offset_y
return kernel.run(2, globalsize, NULL, false);
}
bool cv_ocl_tvl1flow::estimateU(UMat &I1wx, UMat &I1wy, UMat &grad,
UMat &rho_c, UMat &p11, UMat &p12,
UMat &p21, UMat &p22, UMat &u1,
UMat &u2, UMat &error, float l_t, float theta, char calc_error)
{
size_t globalsize[2] = { (size_t)I1wx.cols, (size_t)I1wx.rows };
ocl::Kernel kernel;
if (!kernel.create("estimateUKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, ""))
return false;
int idxArg = 0;
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I1wx)); //const float* I1wx
idxArg = kernel.set(idxArg, (int)(I1wx.cols)); //int I1wx_col
idxArg = kernel.set(idxArg, (int)(I1wx.rows)); //int I1wx_row
idxArg = kernel.set(idxArg, (int)(I1wx.step/I1wx.elemSize())); //int I1wx_step
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I1wy)); //const float* I1wy
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(grad)); //const float* grad
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(rho_c)); //const float* rho_c
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p11)); //const float* p11
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p12)); //const float* p12
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p21)); //const float* p21
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p22)); //const float* p22
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(u1)); //float* u1
idxArg = kernel.set(idxArg, (int)(u1.step / u1.elemSize())); //int u1_step
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(u2)); //float* u2
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(error)); //float* error
idxArg = kernel.set(idxArg, (float)l_t); //float l_t
idxArg = kernel.set(idxArg, (float)theta); //float theta
idxArg = kernel.set(idxArg, (int)(u2.step / u2.elemSize()));//int u2_step
int u1_offset_x = (int)(u1.offset % u1.step);
u1_offset_x = (int) (u1_offset_x / u1.elemSize());
idxArg = kernel.set(idxArg, (int)u1_offset_x); //int u1_offset_x
idxArg = kernel.set(idxArg, (int)(u1.offset/u1.step)); //int u1_offset_y
int u2_offset_x = (int)(u2.offset % u2.step);
u2_offset_x = (int)(u2_offset_x / u2.elemSize());
idxArg = kernel.set(idxArg, (int)u2_offset_x ); //int u2_offset_x
idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y
idxArg = kernel.set(idxArg, (char)calc_error); //char calc_error
return kernel.run(2, globalsize, NULL, false);
}
bool cv_ocl_tvl1flow::estimateDualVariables(UMat &u1, UMat &u2,
UMat &p11, UMat &p12, UMat &p21, UMat &p22, float taut)
{
size_t globalsize[2] = { (size_t)u1.cols, (size_t)u1.rows };
ocl::Kernel kernel;
if (!kernel.create("estimateDualVariablesKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, ""))
return false;
int idxArg = 0;
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u1));// const float* u1
idxArg = kernel.set(idxArg, (int)(u1.cols)); //int u1_col
idxArg = kernel.set(idxArg, (int)(u1.rows)); //int u1_row
idxArg = kernel.set(idxArg, (int)(u1.step/u1.elemSize())); //int u1_step
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u2)); // const float* u2
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p11)); // float* p11
idxArg = kernel.set(idxArg, (int)(p11.step/p11.elemSize())); //int p11_step
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p12)); // float* p12
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p21)); // float* p21
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p22)); // float* p22
idxArg = kernel.set(idxArg, (float)(taut)); //float taut
idxArg = kernel.set(idxArg, (int)(u2.step/u2.elemSize())); //int u2_step
int u1_offset_x = (int)(u1.offset % u1.step);
u1_offset_x = (int)(u1_offset_x / u1.elemSize());
idxArg = kernel.set(idxArg, u1_offset_x); //int u1_offset_x
idxArg = kernel.set(idxArg, (int)(u1.offset / u1.step)); //int u1_offset_y
int u2_offset_x = (int)(u2.offset % u2.step);
u2_offset_x = (int)(u2_offset_x / u2.elemSize());
idxArg = kernel.set(idxArg, u2_offset_x); //int u2_offset_x
idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y
return kernel.run(2, globalsize, NULL, false);
}
#endif
OpticalFlowDual_TVL1::OpticalFlowDual_TVL1()
{
tau = 0.25;
lambda = 0.15;
theta = 0.3;
nscales = 5;
warps = 5;
epsilon = 0.01;
gamma = 0.;
innerIterations = 30;
outerIterations = 10;
useInitialFlow = false;
medianFiltering = 5;
scaleStep = 0.8;
}
void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray _flow)
{
CV_INSTRUMENT_REGION();
#ifndef __APPLE__
CV_OCL_RUN(_flow.isUMat() &&
ocl::Image2D::isFormatSupported(CV_32F, 1, false),
calc_ocl(_I0, _I1, _flow))
#endif
Mat I0 = _I0.getMat();
Mat I1 = _I1.getMat();
CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 );
CV_Assert( I0.size() == I1.size() );
CV_Assert( I0.type() == I1.type() );
CV_Assert( !useInitialFlow || (_flow.size() == I0.size() && _flow.type() == CV_32FC2) );
CV_Assert( nscales > 0 );
bool use_gamma = gamma != 0;
// allocate memory for the pyramid structure
dm.I0s.resize(nscales);
dm.I1s.resize(nscales);
dm.u1s.resize(nscales);
dm.u2s.resize(nscales);
dm.u3s.resize(nscales);
I0.convertTo(dm.I0s[0], dm.I0s[0].depth(), I0.depth() == CV_8U ? 1.0 : 255.0);
I1.convertTo(dm.I1s[0], dm.I1s[0].depth(), I1.depth() == CV_8U ? 1.0 : 255.0);
dm.u1s[0].create(I0.size());
dm.u2s[0].create(I0.size());
if (use_gamma) dm.u3s[0].create(I0.size());
if (useInitialFlow)
{
Mat_<float> mv[] = { dm.u1s[0], dm.u2s[0] };
split(_flow.getMat(), mv);
}
dm.I1x_buf.create(I0.size());
dm.I1y_buf.create(I0.size());
dm.flowMap1_buf.create(I0.size());
dm.flowMap2_buf.create(I0.size());
dm.I1w_buf.create(I0.size());
dm.I1wx_buf.create(I0.size());
dm.I1wy_buf.create(I0.size());
dm.grad_buf.create(I0.size());
dm.rho_c_buf.create(I0.size());
dm.v1_buf.create(I0.size());
dm.v2_buf.create(I0.size());
dm.v3_buf.create(I0.size());
dm.p11_buf.create(I0.size());
dm.p12_buf.create(I0.size());
dm.p21_buf.create(I0.size());
dm.p22_buf.create(I0.size());
dm.p31_buf.create(I0.size());
dm.p32_buf.create(I0.size());
dm.div_p1_buf.create(I0.size());
dm.div_p2_buf.create(I0.size());
dm.div_p3_buf.create(I0.size());
dm.u1x_buf.create(I0.size());
dm.u1y_buf.create(I0.size());
dm.u2x_buf.create(I0.size());
dm.u2y_buf.create(I0.size());
dm.u3x_buf.create(I0.size());
dm.u3y_buf.create(I0.size());
// create the scales
for (int s = 1; s < nscales; ++s)
{
resize(dm.I0s[s - 1], dm.I0s[s], Size(), scaleStep, scaleStep, INTER_LINEAR);
resize(dm.I1s[s - 1], dm.I1s[s], Size(), scaleStep, scaleStep, INTER_LINEAR);
if (dm.I0s[s].cols < 16 || dm.I0s[s].rows < 16)
{
nscales = s;
break;
}
if (useInitialFlow)
{
resize(dm.u1s[s - 1], dm.u1s[s], Size(), scaleStep, scaleStep, INTER_LINEAR);
resize(dm.u2s[s - 1], dm.u2s[s], Size(), scaleStep, scaleStep, INTER_LINEAR);
multiply(dm.u1s[s], Scalar::all(scaleStep), dm.u1s[s]);
multiply(dm.u2s[s], Scalar::all(scaleStep), dm.u2s[s]);
}
else
{
dm.u1s[s].create(dm.I0s[s].size());
dm.u2s[s].create(dm.I0s[s].size());
}
if (use_gamma) dm.u3s[s].create(dm.I0s[s].size());
}
if (!useInitialFlow)
{
dm.u1s[nscales - 1].setTo(Scalar::all(0));
dm.u2s[nscales - 1].setTo(Scalar::all(0));
}
if (use_gamma) dm.u3s[nscales - 1].setTo(Scalar::all(0));
// pyramidal structure for computing the optical flow
for (int s = nscales - 1; s >= 0; --s)
{
// compute the optical flow at the current scale
procOneScale(dm.I0s[s], dm.I1s[s], dm.u1s[s], dm.u2s[s], dm.u3s[s]);
// if this was the last scale, finish now
if (s == 0)
break;
// otherwise, upsample the optical flow
// zoom the optical flow for the next finer scale
resize(dm.u1s[s], dm.u1s[s - 1], dm.I0s[s - 1].size(), 0, 0, INTER_LINEAR);
resize(dm.u2s[s], dm.u2s[s - 1], dm.I0s[s - 1].size(), 0, 0, INTER_LINEAR);
if (use_gamma) resize(dm.u3s[s], dm.u3s[s - 1], dm.I0s[s - 1].size(), 0, 0, INTER_LINEAR);
// scale the optical flow with the appropriate zoom factor (don't scale u3!)
multiply(dm.u1s[s - 1], Scalar::all(1 / scaleStep), dm.u1s[s - 1]);
multiply(dm.u2s[s - 1], Scalar::all(1 / scaleStep), dm.u2s[s - 1]);
}
Mat uxy[] = { dm.u1s[0], dm.u2s[0] };
merge(uxy, 2, _flow);
}
#ifdef HAVE_OPENCL
bool OpticalFlowDual_TVL1::calc_ocl(InputArray _I0, InputArray _I1, InputOutputArray _flow)
{
UMat I0 = _I0.getUMat();
UMat I1 = _I1.getUMat();
CV_Assert(I0.type() == CV_8UC1 || I0.type() == CV_32FC1);
CV_Assert(I0.size() == I1.size());
CV_Assert(I0.type() == I1.type());
CV_Assert(!useInitialFlow || (_flow.size() == I0.size() && _flow.type() == CV_32FC2));
CV_Assert(nscales > 0);
// allocate memory for the pyramid structure
dum.I0s.resize(nscales);
dum.I1s.resize(nscales);
dum.u1s.resize(nscales);
dum.u2s.resize(nscales);
//I0s_step == I1s_step
double alpha = I0.depth() == CV_8U ? 1.0 : 255.0;
I0.convertTo(dum.I0s[0], CV_32F, alpha);
I1.convertTo(dum.I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0);
dum.u1s[0].create(I0.size(), CV_32FC1);
dum.u2s[0].create(I0.size(), CV_32FC1);
if (useInitialFlow)
{
std::vector<UMat> umv;
umv.push_back(dum.u1s[0]);
umv.push_back(dum.u2s[0]);
cv::split(_flow,umv);
}
dum.I1x_buf.create(I0.size(), CV_32FC1);
dum.I1y_buf.create(I0.size(), CV_32FC1);
dum.I1w_buf.create(I0.size(), CV_32FC1);
dum.I1wx_buf.create(I0.size(), CV_32FC1);
dum.I1wy_buf.create(I0.size(), CV_32FC1);
dum.grad_buf.create(I0.size(), CV_32FC1);
dum.rho_c_buf.create(I0.size(), CV_32FC1);
dum.p11_buf.create(I0.size(), CV_32FC1);
dum.p12_buf.create(I0.size(), CV_32FC1);
dum.p21_buf.create(I0.size(), CV_32FC1);
dum.p22_buf.create(I0.size(), CV_32FC1);
dum.diff_buf.create(I0.size(), CV_32FC1);
// create the scales
for (int s = 1; s < nscales; ++s)
{
resize(dum.I0s[s - 1], dum.I0s[s], Size(), scaleStep, scaleStep, INTER_LINEAR);
resize(dum.I1s[s - 1], dum.I1s[s], Size(), scaleStep, scaleStep, INTER_LINEAR);
if (dum.I0s[s].cols < 16 || dum.I0s[s].rows < 16)
{
nscales = s;
break;
}
if (useInitialFlow)
{
resize(dum.u1s[s - 1], dum.u1s[s], Size(), scaleStep, scaleStep, INTER_LINEAR);
resize(dum.u2s[s - 1], dum.u2s[s], Size(), scaleStep, scaleStep, INTER_LINEAR);
//scale by scale factor
multiply(dum.u1s[s], Scalar::all(scaleStep), dum.u1s[s]);
multiply(dum.u2s[s], Scalar::all(scaleStep), dum.u2s[s]);
}
}
// pyramidal structure for computing the optical flow
for (int s = nscales - 1; s >= 0; --s)
{
// compute the optical flow at the current scale
if (!OpticalFlowDual_TVL1::procOneScale_ocl(dum.I0s[s], dum.I1s[s], dum.u1s[s], dum.u2s[s]))
return false;
// if this was the last scale, finish now
if (s == 0)
break;
// zoom the optical flow for the next finer scale
resize(dum.u1s[s], dum.u1s[s - 1], dum.I0s[s - 1].size(), 0, 0, INTER_LINEAR);
resize(dum.u2s[s], dum.u2s[s - 1], dum.I0s[s - 1].size(), 0, 0, INTER_LINEAR);
// scale the optical flow with the appropriate zoom factor
multiply(dum.u1s[s - 1], Scalar::all(1 / scaleStep), dum.u1s[s - 1]);
multiply(dum.u2s[s - 1], Scalar::all(1 / scaleStep), dum.u2s[s - 1]);
}
std::vector<UMat> uxy;
uxy.push_back(dum.u1s[0]);
uxy.push_back(dum.u2s[0]);
merge(uxy, _flow);
return true;
}
#endif
////////////////////////////////////////////////////////////
// buildFlowMap
struct BuildFlowMapBody : ParallelLoopBody
{
void operator() (const Range& range) const CV_OVERRIDE;
Mat_<float> u1;
Mat_<float> u2;
mutable Mat_<float> map1;
mutable Mat_<float> map2;
};
void BuildFlowMapBody::operator() (const Range& range) const
{
for (int y = range.start; y < range.end; ++y)
{
const float* u1Row = u1[y];
const float* u2Row = u2[y];
float* map1Row = map1[y];
float* map2Row = map2[y];
for (int x = 0; x < u1.cols; ++x)
{
map1Row[x] = x + u1Row[x];
map2Row[x] = y + u2Row[x];
}
}
}
void buildFlowMap(const Mat_<float>& u1, const Mat_<float>& u2, Mat_<float>& map1, Mat_<float>& map2)
{
CV_DbgAssert( u2.size() == u1.size() );
CV_DbgAssert( map1.size() == u1.size() );
CV_DbgAssert( map2.size() == u1.size() );
BuildFlowMapBody body;
body.u1 = u1;
body.u2 = u2;
body.map1 = map1;
body.map2 = map2;
parallel_for_(Range(0, u1.rows), body);
}
////////////////////////////////////////////////////////////
// centeredGradient
struct CenteredGradientBody : ParallelLoopBody
{
void operator() (const Range& range) const CV_OVERRIDE;
Mat_<float> src;
mutable Mat_<float> dx;
mutable Mat_<float> dy;
};
void CenteredGradientBody::operator() (const Range& range) const
{
const int last_col = src.cols - 1;
for (int y = range.start; y < range.end; ++y)
{
const float* srcPrevRow = src[y - 1];
const float* srcCurRow = src[y];
const float* srcNextRow = src[y + 1];
float* dxRow = dx[y];
float* dyRow = dy[y];
for (int x = 1; x < last_col; ++x)
{
dxRow[x] = 0.5f * (srcCurRow[x + 1] - srcCurRow[x - 1]);
dyRow[x] = 0.5f * (srcNextRow[x] - srcPrevRow[x]);
}
}
}
void centeredGradient(const Mat_<float>& src, Mat_<float>& dx, Mat_<float>& dy)
{
CV_DbgAssert( src.rows > 2 && src.cols > 2 );
CV_DbgAssert( dx.size() == src.size() );
CV_DbgAssert( dy.size() == src.size() );
const int last_row = src.rows - 1;
const int last_col = src.cols - 1;
// compute the gradient on the center body of the image
{
CenteredGradientBody body;
body.src = src;
body.dx = dx;
body.dy = dy;
parallel_for_(Range(1, last_row), body);
}
// compute the gradient on the first and last rows
for (int x = 1; x < last_col; ++x)
{
dx(0, x) = 0.5f * (src(0, x + 1) - src(0, x - 1));
dy(0, x) = 0.5f * (src(1, x) - src(0, x));
dx(last_row, x) = 0.5f * (src(last_row, x + 1) - src(last_row, x - 1));
dy(last_row, x) = 0.5f * (src(last_row, x) - src(last_row - 1, x));
}
// compute the gradient on the first and last columns
for (int y = 1; y < last_row; ++y)
{
dx(y, 0) = 0.5f * (src(y, 1) - src(y, 0));
dy(y, 0) = 0.5f * (src(y + 1, 0) - src(y - 1, 0));
dx(y, last_col) = 0.5f * (src(y, last_col) - src(y, last_col - 1));
dy(y, last_col) = 0.5f * (src(y + 1, last_col) - src(y - 1, last_col));
}
// compute the gradient at the four corners
dx(0, 0) = 0.5f * (src(0, 1) - src(0, 0));
dy(0, 0) = 0.5f * (src(1, 0) - src(0, 0));
dx(0, last_col) = 0.5f * (src(0, last_col) - src(0, last_col - 1));
dy(0, last_col) = 0.5f * (src(1, last_col) - src(0, last_col));
dx(last_row, 0) = 0.5f * (src(last_row, 1) - src(last_row, 0));
dy(last_row, 0) = 0.5f * (src(last_row, 0) - src(last_row - 1, 0));
dx(last_row, last_col) = 0.5f * (src(last_row, last_col) - src(last_row, last_col - 1));
dy(last_row, last_col) = 0.5f * (src(last_row, last_col) - src(last_row - 1, last_col));
}
////////////////////////////////////////////////////////////
// forwardGradient
struct ForwardGradientBody : ParallelLoopBody
{
void operator() (const Range& range) const CV_OVERRIDE;
Mat_<float> src;
mutable Mat_<float> dx;
mutable Mat_<float> dy;
};
void ForwardGradientBody::operator() (const Range& range) const
{
const int last_col = src.cols - 1;
for (int y = range.start; y < range.end; ++y)
{
const float* srcCurRow = src[y];
const float* srcNextRow = src[y + 1];
float* dxRow = dx[y];
float* dyRow = dy[y];
for (int x = 0; x < last_col; ++x)
{
dxRow[x] = srcCurRow[x + 1] - srcCurRow[x];
dyRow[x] = srcNextRow[x] - srcCurRow[x];
}
}
}
void forwardGradient(const Mat_<float>& src, Mat_<float>& dx, Mat_<float>& dy)
{
CV_DbgAssert( src.rows > 2 && src.cols > 2 );
CV_DbgAssert( dx.size() == src.size() );
CV_DbgAssert( dy.size() == src.size() );
const int last_row = src.rows - 1;
const int last_col = src.cols - 1;
// compute the gradient on the central body of the image
{
ForwardGradientBody body;
body.src = src;
body.dx = dx;
body.dy = dy;
parallel_for_(Range(0, last_row), body);
}
// compute the gradient on the last row
for (int x = 0; x < last_col; ++x)
{
dx(last_row, x) = src(last_row, x + 1) - src(last_row, x);
dy(last_row, x) = 0.0f;
}
// compute the gradient on the last column
for (int y = 0; y < last_row; ++y)
{
dx(y, last_col) = 0.0f;
dy(y, last_col) = src(y + 1, last_col) - src(y, last_col);
}
dx(last_row, last_col) = 0.0f;
dy(last_row, last_col) = 0.0f;
}
////////////////////////////////////////////////////////////
// divergence
struct DivergenceBody : ParallelLoopBody
{
void operator() (const Range& range) const CV_OVERRIDE;
Mat_<float> v1;
Mat_<float> v2;
mutable Mat_<float> div;
};
void DivergenceBody::operator() (const Range& range) const
{
for (int y = range.start; y < range.end; ++y)
{
const float* v1Row = v1[y];
const float* v2PrevRow = v2[y - 1];
const float* v2CurRow = v2[y];
float* divRow = div[y];
for(int x = 1; x < v1.cols; ++x)
{
const float v1x = v1Row[x] - v1Row[x - 1];
const float v2y = v2CurRow[x] - v2PrevRow[x];
divRow[x] = v1x + v2y;
}
}
}
void divergence(const Mat_<float>& v1, const Mat_<float>& v2, Mat_<float>& div)
{
CV_DbgAssert( v1.rows > 2 && v1.cols > 2 );
CV_DbgAssert( v2.size() == v1.size() );
CV_DbgAssert( div.size() == v1.size() );
{
DivergenceBody body;
body.v1 = v1;
body.v2 = v2;
body.div = div;
parallel_for_(Range(1, v1.rows), body);
}
// compute the divergence on the first row
for(int x = 1; x < v1.cols; ++x)
div(0, x) = v1(0, x) - v1(0, x - 1) + v2(0, x);
// compute the divergence on the first column
for (int y = 1; y < v1.rows; ++y)
div(y, 0) = v1(y, 0) + v2(y, 0) - v2(y - 1, 0);
div(0, 0) = v1(0, 0) + v2(0, 0);
}
////////////////////////////////////////////////////////////
// calcGradRho
struct CalcGradRhoBody : ParallelLoopBody
{
void operator() (const Range& range) const CV_OVERRIDE;
Mat_<float> I0;
Mat_<float> I1w;
Mat_<float> I1wx;
Mat_<float> I1wy;
Mat_<float> u1;
Mat_<float> u2;
mutable Mat_<float> grad;
mutable Mat_<float> rho_c;
};
void CalcGradRhoBody::operator() (const Range& range) const
{
for (int y = range.start; y < range.end; ++y)
{
const float* I0Row = I0[y];
const float* I1wRow = I1w[y];
const float* I1wxRow = I1wx[y];
const float* I1wyRow = I1wy[y];
const float* u1Row = u1[y];
const float* u2Row = u2[y];
float* gradRow = grad[y];
float* rhoRow = rho_c[y];
for (int x = 0; x < I0.cols; ++x)
{
const float Ix2 = I1wxRow[x] * I1wxRow[x];
const float Iy2 = I1wyRow[x] * I1wyRow[x];
// store the |Grad(I1)|^2
gradRow[x] = Ix2 + Iy2;
// compute the constant part of the rho function
rhoRow[x] = (I1wRow[x] - I1wxRow[x] * u1Row[x] - I1wyRow[x] * u2Row[x] - I0Row[x]);
}
}
}
void calcGradRho(const Mat_<float>& I0, const Mat_<float>& I1w, const Mat_<float>& I1wx, const Mat_<float>& I1wy, const Mat_<float>& u1, const Mat_<float>& u2,
Mat_<float>& grad, Mat_<float>& rho_c)
{
CV_DbgAssert( I1w.size() == I0.size() );
CV_DbgAssert( I1wx.size() == I0.size() );
CV_DbgAssert( I1wy.size() == I0.size() );
CV_DbgAssert( u1.size() == I0.size() );
CV_DbgAssert( u2.size() == I0.size() );
CV_DbgAssert( grad.size() == I0.size() );
CV_DbgAssert( rho_c.size() == I0.size() );
CalcGradRhoBody body;
body.I0 = I0;
body.I1w = I1w;
body.I1wx = I1wx;
body.I1wy = I1wy;
body.u1 = u1;
body.u2 = u2;
body.grad = grad;
body.rho_c = rho_c;
parallel_for_(Range(0, I0.rows), body);
}
////////////////////////////////////////////////////////////
// estimateV
struct EstimateVBody : ParallelLoopBody
{
void operator() (const Range& range) const CV_OVERRIDE;
Mat_<float> I1wx;
Mat_<float> I1wy;
Mat_<float> u1;
Mat_<float> u2;
Mat_<float> u3;
Mat_<float> grad;
Mat_<float> rho_c;
mutable Mat_<float> v1;
mutable Mat_<float> v2;
mutable Mat_<float> v3;
float l_t;
float gamma;
};
void EstimateVBody::operator() (const Range& range) const
{
bool use_gamma = gamma != 0;
for (int y = range.start; y < range.end; ++y)
{
const float* I1wxRow = I1wx[y];
const float* I1wyRow = I1wy[y];
const float* u1Row = u1[y];
const float* u2Row = u2[y];
const float* u3Row = use_gamma?u3[y]:NULL;
const float* gradRow = grad[y];
const float* rhoRow = rho_c[y];
float* v1Row = v1[y];
float* v2Row = v2[y];
float* v3Row = use_gamma ? v3[y]:NULL;
for (int x = 0; x < I1wx.cols; ++x)
{
const float rho = use_gamma ? rhoRow[x] + (I1wxRow[x] * u1Row[x] + I1wyRow[x] * u2Row[x]) + gamma * u3Row[x] :
rhoRow[x] + (I1wxRow[x] * u1Row[x] + I1wyRow[x] * u2Row[x]);
float d1 = 0.0f;
float d2 = 0.0f;
float d3 = 0.0f;
if (rho < -l_t * gradRow[x])
{
d1 = l_t * I1wxRow[x];
d2 = l_t * I1wyRow[x];
if (use_gamma) d3 = l_t * gamma;
}
else if (rho > l_t * gradRow[x])
{
d1 = -l_t * I1wxRow[x];
d2 = -l_t * I1wyRow[x];
if (use_gamma) d3 = -l_t * gamma;
}
else if (gradRow[x] > std::numeric_limits<float>::epsilon())
{
float fi = -rho / gradRow[x];
d1 = fi * I1wxRow[x];
d2 = fi * I1wyRow[x];
if (use_gamma) d3 = fi * gamma;
}
v1Row[x] = u1Row[x] + d1;
v2Row[x] = u2Row[x] + d2;
if (use_gamma) v3Row[x] = u3Row[x] + d3;
}
}
}
void estimateV(const Mat_<float>& I1wx, const Mat_<float>& I1wy, const Mat_<float>& u1, const Mat_<float>& u2, const Mat_<float>& u3, const Mat_<float>& grad, const Mat_<float>& rho_c,
Mat_<float>& v1, Mat_<float>& v2, Mat_<float>& v3, float l_t, float gamma)
{
CV_DbgAssert( I1wy.size() == I1wx.size() );
CV_DbgAssert( u1.size() == I1wx.size() );
CV_DbgAssert( u2.size() == I1wx.size() );
CV_DbgAssert( grad.size() == I1wx.size() );
CV_DbgAssert( rho_c.size() == I1wx.size() );
CV_DbgAssert( v1.size() == I1wx.size() );
CV_DbgAssert( v2.size() == I1wx.size() );
EstimateVBody body;
bool use_gamma = gamma != 0;
body.I1wx = I1wx;
body.I1wy = I1wy;
body.u1 = u1;
body.u2 = u2;
if (use_gamma) body.u3 = u3;
body.grad = grad;
body.rho_c = rho_c;
body.v1 = v1;
body.v2 = v2;
if (use_gamma) body.v3 = v3;
body.l_t = l_t;
body.gamma = gamma;
parallel_for_(Range(0, I1wx.rows), body);
}
////////////////////////////////////////////////////////////
// estimateU
float estimateU(const Mat_<float>& v1, const Mat_<float>& v2, const Mat_<float>& v3,
const Mat_<float>& div_p1, const Mat_<float>& div_p2, const Mat_<float>& div_p3,
Mat_<float>& u1, Mat_<float>& u2, Mat_<float>& u3,
float theta, float gamma)
{
CV_DbgAssert( v2.size() == v1.size() );
CV_DbgAssert( div_p1.size() == v1.size() );
CV_DbgAssert( div_p2.size() == v1.size() );
CV_DbgAssert( u1.size() == v1.size() );
CV_DbgAssert( u2.size() == v1.size() );
float error = 0.0f;
bool use_gamma = gamma != 0;
for (int y = 0; y < v1.rows; ++y)
{
const float* v1Row = v1[y];
const float* v2Row = v2[y];
const float* v3Row = use_gamma?v3[y]:NULL;
const float* divP1Row = div_p1[y];
const float* divP2Row = div_p2[y];
const float* divP3Row = use_gamma?div_p3[y]:NULL;
float* u1Row = u1[y];
float* u2Row = u2[y];
float* u3Row = use_gamma?u3[y]:NULL;
for (int x = 0; x < v1.cols; ++x)
{
const float u1k = u1Row[x];
const float u2k = u2Row[x];
const float u3k = use_gamma?u3Row[x]:0;
u1Row[x] = v1Row[x] + theta * divP1Row[x];
u2Row[x] = v2Row[x] + theta * divP2Row[x];
if (use_gamma) u3Row[x] = v3Row[x] + theta * divP3Row[x];
error += use_gamma?(u1Row[x] - u1k) * (u1Row[x] - u1k) + (u2Row[x] - u2k) * (u2Row[x] - u2k) + (u3Row[x] - u3k) * (u3Row[x] - u3k):
(u1Row[x] - u1k) * (u1Row[x] - u1k) + (u2Row[x] - u2k) * (u2Row[x] - u2k);
}
}
return error;
}
////////////////////////////////////////////////////////////
// estimateDualVariables
struct EstimateDualVariablesBody : ParallelLoopBody
{
void operator() (const Range& range) const CV_OVERRIDE;
Mat_<float> u1x;
Mat_<float> u1y;
Mat_<float> u2x;
Mat_<float> u2y;
Mat_<float> u3x;
Mat_<float> u3y;
mutable Mat_<float> p11;
mutable Mat_<float> p12;
mutable Mat_<float> p21;
mutable Mat_<float> p22;
mutable Mat_<float> p31;
mutable Mat_<float> p32;
float taut;
bool use_gamma;
};
void EstimateDualVariablesBody::operator() (const Range& range) const
{
for (int y = range.start; y < range.end; ++y)
{
const float* u1xRow = u1x[y];
const float* u1yRow = u1y[y];
const float* u2xRow = u2x[y];
const float* u2yRow = u2y[y];
const float* u3xRow = u3x[y];
const float* u3yRow = u3y[y];
float* p11Row = p11[y];
float* p12Row = p12[y];
float* p21Row = p21[y];
float* p22Row = p22[y];
float* p31Row = p31[y];
float* p32Row = p32[y];
for (int x = 0; x < u1x.cols; ++x)
{
const float g1 = static_cast<float>(hypot(u1xRow[x], u1yRow[x]));
const float g2 = static_cast<float>(hypot(u2xRow[x], u2yRow[x]));
const float ng1 = 1.0f + taut * g1;
const float ng2 = 1.0f + taut * g2;
p11Row[x] = (p11Row[x] + taut * u1xRow[x]) / ng1;
p12Row[x] = (p12Row[x] + taut * u1yRow[x]) / ng1;
p21Row[x] = (p21Row[x] + taut * u2xRow[x]) / ng2;
p22Row[x] = (p22Row[x] + taut * u2yRow[x]) / ng2;
if (use_gamma)
{
const float g3 = static_cast<float>(hypot(u3xRow[x], u3yRow[x]));
const float ng3 = 1.0f + taut * g3;
p31Row[x] = (p31Row[x] + taut * u3xRow[x]) / ng3;
p32Row[x] = (p32Row[x] + taut * u3yRow[x]) / ng3;
}
}
}
}
void estimateDualVariables(const Mat_<float>& u1x, const Mat_<float>& u1y,
const Mat_<float>& u2x, const Mat_<float>& u2y,
const Mat_<float>& u3x, const Mat_<float>& u3y,
Mat_<float>& p11, Mat_<float>& p12,
Mat_<float>& p21, Mat_<float>& p22,
Mat_<float>& p31, Mat_<float>& p32,
float taut, bool use_gamma)
{
CV_DbgAssert( u1y.size() == u1x.size() );
CV_DbgAssert( u2x.size() == u1x.size() );
CV_DbgAssert( u3x.size() == u1x.size() );
CV_DbgAssert( u2y.size() == u1x.size() );
CV_DbgAssert( u3y.size() == u1x.size() );
CV_DbgAssert( p11.size() == u1x.size() );
CV_DbgAssert( p12.size() == u1x.size() );
CV_DbgAssert( p21.size() == u1x.size() );
CV_DbgAssert( p22.size() == u1x.size() );
CV_DbgAssert( p31.size() == u1x.size() );
CV_DbgAssert( p32.size() == u1x.size() );
EstimateDualVariablesBody body;
body.u1x = u1x;
body.u1y = u1y;
body.u2x = u2x;
body.u2y = u2y;
body.u3x = u3x;
body.u3y = u3y;
body.p11 = p11;
body.p12 = p12;
body.p21 = p21;
body.p22 = p22;
body.p31 = p31;
body.p32 = p32;
body.taut = taut;
body.use_gamma = use_gamma;
parallel_for_(Range(0, u1x.rows), body);
}
#ifdef HAVE_OPENCL
bool OpticalFlowDual_TVL1::procOneScale_ocl(const UMat& I0, const UMat& I1, UMat& u1, UMat& u2)
{
using namespace cv_ocl_tvl1flow;
const double scaledEpsilon = epsilon * epsilon * I0.size().area();
CV_DbgAssert(I1.size() == I0.size());
CV_DbgAssert(I1.type() == I0.type());
CV_DbgAssert(u1.empty() || u1.size() == I0.size());
CV_DbgAssert(u2.size() == u1.size());
if (u1.empty())
{
u1.create(I0.size(), CV_32FC1);
u1.setTo(Scalar::all(0));
u2.create(I0.size(), CV_32FC1);
u2.setTo(Scalar::all(0));
}
UMat I1x = dum.I1x_buf(Rect(0, 0, I0.cols, I0.rows));
UMat I1y = dum.I1y_buf(Rect(0, 0, I0.cols, I0.rows));
if (!centeredGradient(I1, I1x, I1y))
return false;
UMat I1w = dum.I1w_buf(Rect(0, 0, I0.cols, I0.rows));
UMat I1wx = dum.I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
UMat I1wy = dum.I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
UMat grad = dum.grad_buf(Rect(0, 0, I0.cols, I0.rows));
UMat rho_c = dum.rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
UMat p11 = dum.p11_buf(Rect(0, 0, I0.cols, I0.rows));
UMat p12 = dum.p12_buf(Rect(0, 0, I0.cols, I0.rows));
UMat p21 = dum.p21_buf(Rect(0, 0, I0.cols, I0.rows));
UMat p22 = dum.p22_buf(Rect(0, 0, I0.cols, I0.rows));
p11.setTo(Scalar::all(0));
p12.setTo(Scalar::all(0));
p21.setTo(Scalar::all(0));
p22.setTo(Scalar::all(0));
UMat diff = dum.diff_buf(Rect(0, 0, I0.cols, I0.rows));
const float l_t = static_cast<float>(lambda * theta);
const float taut = static_cast<float>(tau / theta);
int n;
for (int warpings = 0; warpings < warps; ++warpings)
{
if (!warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c))
return false;
double error = std::numeric_limits<double>::max();
double prev_error = 0;
for (int n_outer = 0; error > scaledEpsilon && n_outer < outerIterations; ++n_outer)
{
if (medianFiltering > 1) {
cv::medianBlur(u1, u1, medianFiltering);
cv::medianBlur(u2, u2, medianFiltering);
}
for (int n_inner = 0; error > scaledEpsilon && n_inner < innerIterations; ++n_inner)
{
// some tweaks to make sum operation less frequently
n = n_inner + n_outer*innerIterations;
char calc_error = (n & 0x1) && (prev_error < scaledEpsilon);
if (!estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22,
u1, u2, diff, l_t, static_cast<float>(theta), calc_error))
return false;
if (calc_error)
{
error = cv::sum(diff)[0];
prev_error = error;
}
else
{
error = std::numeric_limits<double>::max();
prev_error -= scaledEpsilon;
}
if (!estimateDualVariables(u1, u2, p11, p12, p21, p22, taut))
return false;
}
}
}
return true;
}
#endif
void OpticalFlowDual_TVL1::procOneScale(const Mat_<float>& I0, const Mat_<float>& I1, Mat_<float>& u1, Mat_<float>& u2, Mat_<float>& u3)
{
const float scaledEpsilon = static_cast<float>(epsilon * epsilon * I0.size().area());
CV_DbgAssert( I1.size() == I0.size() );
CV_DbgAssert( I1.type() == I0.type() );
CV_DbgAssert( u1.size() == I0.size() );
CV_DbgAssert( u2.size() == u1.size() );
Mat_<float> I1x = dm.I1x_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> I1y = dm.I1y_buf(Rect(0, 0, I0.cols, I0.rows));
centeredGradient(I1, I1x, I1y);
Mat_<float> flowMap1 = dm.flowMap1_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> flowMap2 = dm.flowMap2_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> I1w = dm.I1w_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> I1wx = dm.I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> I1wy = dm.I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> grad = dm.grad_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> rho_c = dm.rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> v1 = dm.v1_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> v2 = dm.v2_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> v3 = dm.v3_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> p11 = dm.p11_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> p12 = dm.p12_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> p21 = dm.p21_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> p22 = dm.p22_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> p31 = dm.p31_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> p32 = dm.p32_buf(Rect(0, 0, I0.cols, I0.rows));
p11.setTo(Scalar::all(0));
p12.setTo(Scalar::all(0));
p21.setTo(Scalar::all(0));
p22.setTo(Scalar::all(0));
bool use_gamma = gamma != 0.;
if (use_gamma) p31.setTo(Scalar::all(0));
if (use_gamma) p32.setTo(Scalar::all(0));
Mat_<float> div_p1 = dm.div_p1_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> div_p2 = dm.div_p2_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> div_p3 = dm.div_p3_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> u1x = dm.u1x_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> u1y = dm.u1y_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> u2x = dm.u2x_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> u2y = dm.u2y_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> u3x = dm.u3x_buf(Rect(0, 0, I0.cols, I0.rows));
Mat_<float> u3y = dm.u3y_buf(Rect(0, 0, I0.cols, I0.rows));
const float l_t = static_cast<float>(lambda * theta);
const float taut = static_cast<float>(tau / theta);
for (int warpings = 0; warpings < warps; ++warpings)
{
// compute the warping of the target image and its derivatives
buildFlowMap(u1, u2, flowMap1, flowMap2);
remap(I1, I1w, flowMap1, flowMap2, INTER_CUBIC);
remap(I1x, I1wx, flowMap1, flowMap2, INTER_CUBIC);
remap(I1y, I1wy, flowMap1, flowMap2, INTER_CUBIC);
//calculate I1(x+u0) and its gradient
calcGradRho(I0, I1w, I1wx, I1wy, u1, u2, grad, rho_c);
float error = std::numeric_limits<float>::max();
for (int n_outer = 0; error > scaledEpsilon && n_outer < outerIterations; ++n_outer)
{
if (medianFiltering > 1) {
cv::medianBlur(u1, u1, medianFiltering);
cv::medianBlur(u2, u2, medianFiltering);
}
for (int n_inner = 0; error > scaledEpsilon && n_inner < innerIterations; ++n_inner)
{
// estimate the values of the variable (v1, v2) (thresholding operator TH)
estimateV(I1wx, I1wy, u1, u2, u3, grad, rho_c, v1, v2, v3, l_t, static_cast<float>(gamma));
// compute the divergence of the dual variable (p1, p2, p3)
divergence(p11, p12, div_p1);
divergence(p21, p22, div_p2);
if (use_gamma) divergence(p31, p32, div_p3);
// estimate the values of the optical flow (u1, u2)
error = estimateU(v1, v2, v3, div_p1, div_p2, div_p3, u1, u2, u3, static_cast<float>(theta), static_cast<float>(gamma));
// compute the gradient of the optical flow (Du1, Du2)
forwardGradient(u1, u1x, u1y);
forwardGradient(u2, u2x, u2y);
if (use_gamma) forwardGradient(u3, u3x, u3y);
// estimate the values of the dual variable (p1, p2, p3)
estimateDualVariables(u1x, u1y, u2x, u2y, u3x, u3y, p11, p12, p21, p22, p31, p32, taut, use_gamma);
}
}
}
}
void OpticalFlowDual_TVL1::collectGarbage()
{
//dataMat structure dm
dm.I0s.clear();
dm.I1s.clear();
dm.u1s.clear();
dm.u2s.clear();
dm.I1x_buf.release();
dm.I1y_buf.release();
dm.flowMap1_buf.release();
dm.flowMap2_buf.release();
dm.I1w_buf.release();
dm.I1wx_buf.release();
dm.I1wy_buf.release();
dm.grad_buf.release();
dm.rho_c_buf.release();
dm.v1_buf.release();
dm.v2_buf.release();
dm.p11_buf.release();
dm.p12_buf.release();
dm.p21_buf.release();
dm.p22_buf.release();
dm.div_p1_buf.release();
dm.div_p2_buf.release();
dm.u1x_buf.release();
dm.u1y_buf.release();
dm.u2x_buf.release();
dm.u2y_buf.release();
#ifdef HAVE_OPENCL
//dataUMat structure dum
dum.I0s.clear();
dum.I1s.clear();
dum.u1s.clear();
dum.u2s.clear();
dum.I1x_buf.release();
dum.I1y_buf.release();
dum.I1w_buf.release();
dum.I1wx_buf.release();
dum.I1wy_buf.release();
dum.grad_buf.release();
dum.rho_c_buf.release();
dum.p11_buf.release();
dum.p12_buf.release();
dum.p21_buf.release();
dum.p22_buf.release();
dum.diff_buf.release();
dum.norm_buf.release();
#endif
}
} // namespace
Ptr<DualTVL1OpticalFlow> cv::createOptFlow_DualTVL1()
{
return makePtr<OpticalFlowDual_TVL1>();
}
Ptr<DualTVL1OpticalFlow> cv::DualTVL1OpticalFlow::create(
double tau, double lambda, double theta, int nscales, int warps,
double epsilon, int innerIterations, int outerIterations, double scaleStep,
double gamma, int medianFilter, bool useInitialFlow)
{
return makePtr<OpticalFlowDual_TVL1>(tau, lambda, theta, nscales, warps,
epsilon, innerIterations, outerIterations,
scaleStep, gamma, medianFilter, useInitialFlow);
}
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include "opencv2/core/hal/intrin.hpp"
using namespace std;
namespace cv
{
class VariationalRefinementImpl CV_FINAL : public VariationalRefinement
{
public:
VariationalRefinementImpl();
void calc(InputArray I0, InputArray I1, InputOutputArray flow) CV_OVERRIDE;
void calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v) CV_OVERRIDE;
void collectGarbage() CV_OVERRIDE;
protected: //!< algorithm parameters
int fixedPointIterations, sorIterations;
float omega;
float alpha, delta, gamma;
float zeta, epsilon;
public:
int getFixedPointIterations() const CV_OVERRIDE { return fixedPointIterations; }
void setFixedPointIterations(int val) CV_OVERRIDE { fixedPointIterations = val; }
int getSorIterations() const CV_OVERRIDE { return sorIterations; }
void setSorIterations(int val) CV_OVERRIDE { sorIterations = val; }
float getOmega() const CV_OVERRIDE { return omega; }
void setOmega(float val) CV_OVERRIDE { omega = val; }
float getAlpha() const CV_OVERRIDE { return alpha; }
void setAlpha(float val) CV_OVERRIDE { alpha = val; }
float getDelta() const CV_OVERRIDE { return delta; }
void setDelta(float val) CV_OVERRIDE { delta = val; }
float getGamma() const CV_OVERRIDE { return gamma; }
void setGamma(float val) CV_OVERRIDE { gamma = val; }
protected: //!< internal buffers
/* This struct defines a special data layout for Mat_<float>. Original buffer is split into two: one for "red"
* elements (sum of indices is even) and one for "black" (sum of indices is odd) in a checkerboard pattern. It
* allows for more efficient processing in SOR iterations, more natural SIMD vectorization and parallelization
* (Red-Black SOR). Additionally, it simplifies border handling by adding repeated borders to both red and
* black buffers.
*/
struct RedBlackBuffer
{
Mat_<float> red; //!< (i+j)%2==0
Mat_<float> black; //!< (i+j)%2==1
/* Width of even and odd rows may be different */
int red_even_len, red_odd_len;
int black_even_len, black_odd_len;
void create(Size s);
void release();
};
Mat_<float> Ix, Iy, Iz, Ixx, Ixy, Iyy, Ixz, Iyz; //!< image derivative buffers
RedBlackBuffer Ix_rb, Iy_rb, Iz_rb, Ixx_rb, Ixy_rb, Iyy_rb, Ixz_rb, Iyz_rb; //!< corresponding red-black buffers
RedBlackBuffer A11, A12, A22, b1, b2; //!< main linear system coefficients
RedBlackBuffer weights; //!< smoothness term weights in the current fixed point iteration
Mat_<float> mapX, mapY; //!< auxiliary buffers for remapping
RedBlackBuffer tempW_u, tempW_v; //!< flow buffers that are modified in each fixed point iteration
RedBlackBuffer dW_u, dW_v; //!< optical flow increment
RedBlackBuffer W_u_rb, W_v_rb; //!< red-black-buffer version of the input flow
private: //!< private methods and parallel sections
void splitCheckerboard(RedBlackBuffer &dst, Mat &src);
void mergeCheckerboard(Mat &dst, RedBlackBuffer &src);
void updateRepeatedBorders(RedBlackBuffer &dst);
void warpImage(Mat &dst, Mat &src, Mat &flow_u, Mat &flow_v);
void prepareBuffers(Mat &I0, Mat &I1, Mat &W_u, Mat &W_v);
/* Parallelizing arbitrary operations with 3 input/output arguments */
typedef void (VariationalRefinementImpl::*Op)(void *op1, void *op2, void *op3);
struct ParallelOp_ParBody : public ParallelLoopBody
{
VariationalRefinementImpl *var;
vector<Op> ops;
vector<void *> op1s;
vector<void *> op2s;
vector<void *> op3s;
ParallelOp_ParBody(VariationalRefinementImpl &_var, vector<Op> _ops, vector<void *> &_op1s,
vector<void *> &_op2s, vector<void *> &_op3s);
void operator()(const Range &range) const CV_OVERRIDE;
};
void gradHorizAndSplitOp(void *src, void *dst, void *dst_split)
{
Sobel(*(Mat *)src, *(Mat *)dst, -1, 1, 0, 1, 1, 0.00, BORDER_REPLICATE);
splitCheckerboard(*(RedBlackBuffer *)dst_split, *(Mat *)dst);
}
void gradVertAndSplitOp(void *src, void *dst, void *dst_split)
{
Sobel(*(Mat *)src, *(Mat *)dst, -1, 0, 1, 1, 1, 0.00, BORDER_REPLICATE);
splitCheckerboard(*(RedBlackBuffer *)dst_split, *(Mat *)dst);
}
void averageOp(void *src1, void *src2, void *dst)
{
addWeighted(*(Mat *)src1, 0.5, *(Mat *)src2, 0.5, 0.0, *(Mat *)dst, CV_32F);
}
void subtractOp(void *src1, void *src2, void *dst)
{
subtract(*(Mat *)src1, *(Mat *)src2, *(Mat *)dst, noArray(), CV_32F);
}
struct ComputeDataTerm_ParBody : public ParallelLoopBody
{
VariationalRefinementImpl *var;
int nstripes, stripe_sz;
int h;
RedBlackBuffer *dW_u, *dW_v;
bool red_pass;
ComputeDataTerm_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_dW_u,
RedBlackBuffer &_dW_v, bool _red_pass);
void operator()(const Range &range) const CV_OVERRIDE;
};
struct ComputeSmoothnessTermHorPass_ParBody : public ParallelLoopBody
{
VariationalRefinementImpl *var;
int nstripes, stripe_sz;
int h;
RedBlackBuffer *W_u, *W_v, *curW_u, *curW_v;
bool red_pass;
ComputeSmoothnessTermHorPass_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h,
RedBlackBuffer &_W_u, RedBlackBuffer &_W_v, RedBlackBuffer &_tempW_u,
RedBlackBuffer &_tempW_v, bool _red_pass);
void operator()(const Range &range) const CV_OVERRIDE;
};
struct ComputeSmoothnessTermVertPass_ParBody : public ParallelLoopBody
{
VariationalRefinementImpl *var;
int nstripes, stripe_sz;
int h;
RedBlackBuffer *W_u, *W_v;
bool red_pass;
ComputeSmoothnessTermVertPass_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h,
RedBlackBuffer &W_u, RedBlackBuffer &_W_v, bool _red_pass);
void operator()(const Range &range) const CV_OVERRIDE;
};
struct RedBlackSOR_ParBody : public ParallelLoopBody
{
VariationalRefinementImpl *var;
int nstripes, stripe_sz;
int h;
RedBlackBuffer *dW_u, *dW_v;
bool red_pass;
RedBlackSOR_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_dW_u,
RedBlackBuffer &_dW_v, bool _red_pass);
void operator()(const Range &range) const CV_OVERRIDE;
};
};
VariationalRefinementImpl::VariationalRefinementImpl()
{
fixedPointIterations = 5;
sorIterations = 5;
alpha = 20.0f;
delta = 5.0f;
gamma = 10.0f;
omega = 1.6f;
zeta = 0.1f;
epsilon = 0.001f;
}
/* This function converts an input Mat into the RedBlackBuffer format, which involves
* splitting the input buffer into two and adding repeated borders. Assumes that enough
* memory in dst is already allocated.
*/
void VariationalRefinementImpl::splitCheckerboard(RedBlackBuffer &dst, Mat &src)
{
int buf_j, j;
int buf_w = (int)ceil(src.cols / 2.0) + 2; //!< max width of red/black buffers with borders
/* Rows of red and black buffers can have different actual width, some extra repeated values are
* added for padding in such cases.
*/
for (int i = 0; i < src.rows; i++)
{
float *src_buf = src.ptr<float>(i);
float *r_buf = dst.red.ptr<float>(i + 1);
float *b_buf = dst.black.ptr<float>(i + 1);
r_buf[0] = b_buf[0] = src_buf[0];
buf_j = 1;
if (i % 2 == 0)
{
for (j = 0; j < src.cols - 1; j += 2)
{
r_buf[buf_j] = src_buf[j];
b_buf[buf_j] = src_buf[j + 1];
buf_j++;
}
if (j < src.cols)
r_buf[buf_j] = b_buf[buf_j] = src_buf[j];
else
j--;
}
else
{
for (j = 0; j < src.cols - 1; j += 2)
{
b_buf[buf_j] = src_buf[j];
r_buf[buf_j] = src_buf[j + 1];
buf_j++;
}
if (j < src.cols)
r_buf[buf_j] = b_buf[buf_j] = src_buf[j];
else
j--;
}
r_buf[buf_w - 1] = b_buf[buf_w - 1] = src_buf[j];
}
/* Fill top and bottom borders: */
{
float *r_buf_border = dst.red.ptr<float>(dst.red.rows - 1);
float *b_buf_border = dst.black.ptr<float>(dst.black.rows - 1);
float *r_buf = dst.red.ptr<float>(dst.red.rows - 2);
float *b_buf = dst.black.ptr<float>(dst.black.rows - 2);
memcpy(r_buf_border, b_buf, buf_w * sizeof(float));
memcpy(b_buf_border, r_buf, buf_w * sizeof(float));
}
{
float *r_buf_border = dst.red.ptr<float>(0);
float *b_buf_border = dst.black.ptr<float>(0);
float *r_buf = dst.red.ptr<float>(1);
float *b_buf = dst.black.ptr<float>(1);
memcpy(r_buf_border, b_buf, buf_w * sizeof(float));
memcpy(b_buf_border, r_buf, buf_w * sizeof(float));
}
}
/* The inverse of splitCheckerboard, i.e. converting the RedBlackBuffer back into Mat.
* Assumes that enough memory in dst is already allocated.
*/
void VariationalRefinementImpl::mergeCheckerboard(Mat &dst, RedBlackBuffer &src)
{
int buf_j, j;
for (int i = 0; i < dst.rows; i++)
{
float *src_r_buf = src.red.ptr<float>(i + 1);
float *src_b_buf = src.black.ptr<float>(i + 1);
float *dst_buf = dst.ptr<float>(i);
buf_j = 1;
if (i % 2 == 0)
{
for (j = 0; j < dst.cols - 1; j += 2)
{
dst_buf[j] = src_r_buf[buf_j];
dst_buf[j + 1] = src_b_buf[buf_j];
buf_j++;
}
if (j < dst.cols)
dst_buf[j] = src_r_buf[buf_j];
}
else
{
for (j = 0; j < dst.cols - 1; j += 2)
{
dst_buf[j] = src_b_buf[buf_j];
dst_buf[j + 1] = src_r_buf[buf_j];
buf_j++;
}
if (j < dst.cols)
dst_buf[j] = src_b_buf[buf_j];
}
}
}
/* An auxiliary function that updates the borders. Used to enforce that border values repeat
* the ones adjacent to the border.
*/
void VariationalRefinementImpl::updateRepeatedBorders(RedBlackBuffer &dst)
{
int buf_w = dst.red.cols;
for (int i = 0; i < dst.red.rows - 2; i++)
{
float *r_buf = dst.red.ptr<float>(i + 1);
float *b_buf = dst.black.ptr<float>(i + 1);
if (i % 2 == 0)
{
b_buf[0] = r_buf[1];
if (dst.red_even_len > dst.black_even_len)
b_buf[dst.black_even_len + 1] = r_buf[dst.red_even_len];
else
r_buf[dst.red_even_len + 1] = b_buf[dst.black_even_len];
}
else
{
r_buf[0] = b_buf[1];
if (dst.red_odd_len < dst.black_odd_len)
r_buf[dst.red_odd_len + 1] = b_buf[dst.black_odd_len];
else
b_buf[dst.black_odd_len + 1] = r_buf[dst.red_odd_len];
}
}
{
float *r_buf_border = dst.red.ptr<float>(dst.red.rows - 1);
float *b_buf_border = dst.black.ptr<float>(dst.black.rows - 1);
float *r_buf = dst.red.ptr<float>(dst.red.rows - 2);
float *b_buf = dst.black.ptr<float>(dst.black.rows - 2);
memcpy(r_buf_border, b_buf, buf_w * sizeof(float));
memcpy(b_buf_border, r_buf, buf_w * sizeof(float));
}
{
float *r_buf_border = dst.red.ptr<float>(0);
float *b_buf_border = dst.black.ptr<float>(0);
float *r_buf = dst.red.ptr<float>(1);
float *b_buf = dst.black.ptr<float>(1);
memcpy(r_buf_border, b_buf, buf_w * sizeof(float));
memcpy(b_buf_border, r_buf, buf_w * sizeof(float));
}
}
void VariationalRefinementImpl::RedBlackBuffer::create(Size s)
{
/* Allocate enough memory to include borders */
int w = (int)ceil(s.width / 2.0) + 2;
red.create(s.height + 2, w);
black.create(s.height + 2, w);
if (s.width % 2 == 0)
red_even_len = red_odd_len = black_even_len = black_odd_len = w - 2;
else
{
red_even_len = black_odd_len = w - 2;
red_odd_len = black_even_len = w - 3;
}
}
void VariationalRefinementImpl::RedBlackBuffer::release()
{
red.release();
black.release();
}
VariationalRefinementImpl::ParallelOp_ParBody::ParallelOp_ParBody(VariationalRefinementImpl &_var, vector<Op> _ops,
vector<void *> &_op1s, vector<void *> &_op2s,
vector<void *> &_op3s)
: var(&_var), ops(_ops), op1s(_op1s), op2s(_op2s), op3s(_op3s)
{
}
void VariationalRefinementImpl::ParallelOp_ParBody::operator()(const Range &range) const
{
for (int i = range.start; i < range.end; i++)
(var->*ops[i])(op1s[i], op2s[i], op3s[i]);
}
void VariationalRefinementImpl::warpImage(Mat &dst, Mat &src, Mat &flow_u, Mat &flow_v)
{
for (int i = 0; i < flow_u.rows; i++)
{
float *pFlowU = flow_u.ptr<float>(i);
float *pFlowV = flow_v.ptr<float>(i);
float *pMapX = mapX.ptr<float>(i);
float *pMapY = mapY.ptr<float>(i);
for (int j = 0; j < flow_u.cols; j++)
{
pMapX[j] = j + pFlowU[j];
pMapY[j] = i + pFlowV[j];
}
}
remap(src, dst, mapX, mapY, INTER_LINEAR, BORDER_REPLICATE);
}
void VariationalRefinementImpl::prepareBuffers(Mat &I0, Mat &I1, Mat &W_u, Mat &W_v)
{
Size s = I0.size();
A11.create(s);
A12.create(s);
A22.create(s);
b1.create(s);
b2.create(s);
weights.create(s);
weights.red.setTo(0.0f);
weights.black.setTo(0.0f);
tempW_u.create(s);
tempW_v.create(s);
dW_u.create(s);
dW_v.create(s);
W_u_rb.create(s);
W_v_rb.create(s);
Ix.create(s);
Iy.create(s);
Iz.create(s);
Ixx.create(s);
Ixy.create(s);
Iyy.create(s);
Ixz.create(s);
Iyz.create(s);
Ix_rb.create(s);
Iy_rb.create(s);
Iz_rb.create(s);
Ixx_rb.create(s);
Ixy_rb.create(s);
Iyy_rb.create(s);
Ixz_rb.create(s);
Iyz_rb.create(s);
mapX.create(s);
mapY.create(s);
/* Floating point warps work significantly better than fixed-point */
Mat I1flt, warpedI;
I1.convertTo(I1flt, CV_32F);
warpImage(warpedI, I1flt, W_u, W_v);
/* Computing an average of the current and warped next frames (to compute the derivatives on) and
* temporal derivative Iz
*/
Mat averagedI;
{
vector<void *> op1s;
op1s.push_back((void *)&I0);
op1s.push_back((void *)&warpedI);
vector<void *> op2s;
op2s.push_back((void *)&warpedI);
op2s.push_back((void *)&I0);
vector<void *> op3s;
op3s.push_back((void *)&averagedI);
op3s.push_back((void *)&Iz);
vector<Op> ops;
ops.push_back(&VariationalRefinementImpl::averageOp);
ops.push_back(&VariationalRefinementImpl::subtractOp);
parallel_for_(Range(0, 2), ParallelOp_ParBody(*this, ops, op1s, op2s, op3s));
}
splitCheckerboard(Iz_rb, Iz);
/* Computing first-order derivatives */
{
vector<void *> op1s;
op1s.push_back((void *)&averagedI);
op1s.push_back((void *)&averagedI);
op1s.push_back((void *)&Iz);
op1s.push_back((void *)&Iz);
vector<void *> op2s;
op2s.push_back((void *)&Ix);
op2s.push_back((void *)&Iy);
op2s.push_back((void *)&Ixz);
op2s.push_back((void *)&Iyz);
vector<void *> op3s;
op3s.push_back((void *)&Ix_rb);
op3s.push_back((void *)&Iy_rb);
op3s.push_back((void *)&Ixz_rb);
op3s.push_back((void *)&Iyz_rb);
vector<Op> ops;
ops.push_back(&VariationalRefinementImpl::gradHorizAndSplitOp);
ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp);
ops.push_back(&VariationalRefinementImpl::gradHorizAndSplitOp);
ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp);
parallel_for_(Range(0, 4), ParallelOp_ParBody(*this, ops, op1s, op2s, op3s));
}
/* Computing second-order derivatives */
{
vector<void *> op1s;
op1s.push_back((void *)&Ix);
op1s.push_back((void *)&Ix);
op1s.push_back((void *)&Iy);
vector<void *> op2s;
op2s.push_back((void *)&Ixx);
op2s.push_back((void *)&Ixy);
op2s.push_back((void *)&Iyy);
vector<void *> op3s;
op3s.push_back((void *)&Ixx_rb);
op3s.push_back((void *)&Ixy_rb);
op3s.push_back((void *)&Iyy_rb);
vector<Op> ops;
ops.push_back(&VariationalRefinementImpl::gradHorizAndSplitOp);
ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp);
ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp);
parallel_for_(Range(0, 3), ParallelOp_ParBody(*this, ops, op1s, op2s, op3s));
}
}
VariationalRefinementImpl::ComputeDataTerm_ParBody::ComputeDataTerm_ParBody(VariationalRefinementImpl &_var,
int _nstripes, int _h,
RedBlackBuffer &_dW_u,
RedBlackBuffer &_dW_v, bool _red_pass)
: var(&_var), nstripes(_nstripes), h(_h), dW_u(&_dW_u), dW_v(&_dW_v), red_pass(_red_pass)
{
stripe_sz = (int)ceil(h / (double)nstripes);
}
/* This function computes parts of the main linear system coefficients A11,A12,A22,b1,b1
* that correspond to the data term, which includes color and gradient constancy assumptions.
*/
void VariationalRefinementImpl::ComputeDataTerm_ParBody::operator()(const Range &range) const
{
int start_i = min(range.start * stripe_sz, h);
int end_i = min(range.end * stripe_sz, h);
float zeta_squared = var->zeta * var->zeta;
float epsilon_squared = var->epsilon * var->epsilon;
float gamma2 = var->gamma / 2;
float delta2 = var->delta / 2;
float *pIx, *pIy, *pIz;
float *pIxx, *pIxy, *pIyy, *pIxz, *pIyz;
float *pdU, *pdV;
float *pa11, *pa12, *pa22, *pb1, *pb2;
float derivNorm, derivNorm2;
float Ik1z, Ik1zx, Ik1zy;
float weight;
int len;
for (int i = start_i; i < end_i; i++)
{
#define INIT_ROW_POINTERS(color) \
pIx = var->Ix_rb.color.ptr<float>(i + 1) + 1; \
pIy = var->Iy_rb.color.ptr<float>(i + 1) + 1; \
pIz = var->Iz_rb.color.ptr<float>(i + 1) + 1; \
pIxx = var->Ixx_rb.color.ptr<float>(i + 1) + 1; \
pIxy = var->Ixy_rb.color.ptr<float>(i + 1) + 1; \
pIyy = var->Iyy_rb.color.ptr<float>(i + 1) + 1; \
pIxz = var->Ixz_rb.color.ptr<float>(i + 1) + 1; \
pIyz = var->Iyz_rb.color.ptr<float>(i + 1) + 1; \
pa11 = var->A11.color.ptr<float>(i + 1) + 1; \
pa12 = var->A12.color.ptr<float>(i + 1) + 1; \
pa22 = var->A22.color.ptr<float>(i + 1) + 1; \
pb1 = var->b1.color.ptr<float>(i + 1) + 1; \
pb2 = var->b2.color.ptr<float>(i + 1) + 1; \
pdU = dW_u->color.ptr<float>(i + 1) + 1; \
pdV = dW_v->color.ptr<float>(i + 1) + 1; \
if (i % 2 == 0) \
len = var->Ix_rb.color##_even_len; \
else \
len = var->Ix_rb.color##_odd_len;
if (red_pass)
{
INIT_ROW_POINTERS(red);
}
else
{
INIT_ROW_POINTERS(black);
}
#undef INIT_ROW_POINTERS
int j = 0;
#ifdef CV_SIMD128
v_float32x4 zeta_vec = v_setall_f32(zeta_squared);
v_float32x4 eps_vec = v_setall_f32(epsilon_squared);
v_float32x4 delta_vec = v_setall_f32(delta2);
v_float32x4 gamma_vec = v_setall_f32(gamma2);
v_float32x4 zero_vec = v_setall_f32(0.0f);
v_float32x4 pIx_vec, pIy_vec, pIz_vec, pdU_vec, pdV_vec;
v_float32x4 pIxx_vec, pIxy_vec, pIyy_vec, pIxz_vec, pIyz_vec;
v_float32x4 derivNorm_vec, derivNorm2_vec, weight_vec;
v_float32x4 Ik1z_vec, Ik1zx_vec, Ik1zy_vec;
v_float32x4 pa11_vec, pa12_vec, pa22_vec, pb1_vec, pb2_vec;
for (; j < len - 3; j += 4)
{
pIx_vec = v_load(pIx + j);
pIy_vec = v_load(pIy + j);
pIz_vec = v_load(pIz + j);
pdU_vec = v_load(pdU + j);
pdV_vec = v_load(pdV + j);
derivNorm_vec = pIx_vec * pIx_vec + pIy_vec * pIy_vec + zeta_vec;
Ik1z_vec = pIz_vec + pIx_vec * pdU_vec + pIy_vec * pdV_vec;
weight_vec = (delta_vec / v_sqrt(Ik1z_vec * Ik1z_vec / derivNorm_vec + eps_vec)) / derivNorm_vec;
pa11_vec = weight_vec * (pIx_vec * pIx_vec) + zeta_vec;
pa12_vec = weight_vec * (pIx_vec * pIy_vec);
pa22_vec = weight_vec * (pIy_vec * pIy_vec) + zeta_vec;
pb1_vec = zero_vec - weight_vec * (pIz_vec * pIx_vec);
pb2_vec = zero_vec - weight_vec * (pIz_vec * pIy_vec);
pIxx_vec = v_load(pIxx + j);
pIxy_vec = v_load(pIxy + j);
pIyy_vec = v_load(pIyy + j);
pIxz_vec = v_load(pIxz + j);
pIyz_vec = v_load(pIyz + j);
derivNorm_vec = pIxx_vec * pIxx_vec + pIxy_vec * pIxy_vec + zeta_vec;
derivNorm2_vec = pIyy_vec * pIyy_vec + pIxy_vec * pIxy_vec + zeta_vec;
Ik1zx_vec = pIxz_vec + pIxx_vec * pdU_vec + pIxy_vec * pdV_vec;
Ik1zy_vec = pIyz_vec + pIxy_vec * pdU_vec + pIyy_vec * pdV_vec;
weight_vec = gamma_vec / v_sqrt(Ik1zx_vec * Ik1zx_vec / derivNorm_vec +
Ik1zy_vec * Ik1zy_vec / derivNorm2_vec + eps_vec);
pa11_vec += weight_vec * (pIxx_vec * pIxx_vec / derivNorm_vec + pIxy_vec * pIxy_vec / derivNorm2_vec);
pa12_vec += weight_vec * (pIxx_vec * pIxy_vec / derivNorm_vec + pIxy_vec * pIyy_vec / derivNorm2_vec);
pa22_vec += weight_vec * (pIxy_vec * pIxy_vec / derivNorm_vec + pIyy_vec * pIyy_vec / derivNorm2_vec);
pb1_vec -= weight_vec * (pIxx_vec * pIxz_vec / derivNorm_vec + pIxy_vec * pIyz_vec / derivNorm2_vec);
pb2_vec -= weight_vec * (pIxy_vec * pIxz_vec / derivNorm_vec + pIyy_vec * pIyz_vec / derivNorm2_vec);
v_store(pa11 + j, pa11_vec);
v_store(pa12 + j, pa12_vec);
v_store(pa22 + j, pa22_vec);
v_store(pb1 + j, pb1_vec);
v_store(pb2 + j, pb2_vec);
}
#endif
for (; j < len; j++)
{
/* Step 1: Compute color constancy terms */
/* Normalization factor:*/
derivNorm = pIx[j] * pIx[j] + pIy[j] * pIy[j] + zeta_squared;
/* Color constancy penalty (computed by Taylor expansion):*/
Ik1z = pIz[j] + pIx[j] * pdU[j] + pIy[j] * pdV[j];
/* Weight of the color constancy term in the current fixed-point iteration divided by derivNorm: */
weight = (delta2 / sqrt(Ik1z * Ik1z / derivNorm + epsilon_squared)) / derivNorm;
/* Add respective color constancy terms to the linear system coefficients: */
pa11[j] = weight * (pIx[j] * pIx[j]) + zeta_squared;
pa12[j] = weight * (pIx[j] * pIy[j]);
pa22[j] = weight * (pIy[j] * pIy[j]) + zeta_squared;
pb1[j] = -weight * (pIz[j] * pIx[j]);
pb2[j] = -weight * (pIz[j] * pIy[j]);
/* Step 2: Compute gradient constancy terms */
/* Normalization factor for x gradient: */
derivNorm = pIxx[j] * pIxx[j] + pIxy[j] * pIxy[j] + zeta_squared;
/* Normalization factor for y gradient: */
derivNorm2 = pIyy[j] * pIyy[j] + pIxy[j] * pIxy[j] + zeta_squared;
/* Gradient constancy penalties (computed by Taylor expansion): */
Ik1zx = pIxz[j] + pIxx[j] * pdU[j] + pIxy[j] * pdV[j];
Ik1zy = pIyz[j] + pIxy[j] * pdU[j] + pIyy[j] * pdV[j];
/* Weight of the gradient constancy term in the current fixed-point iteration: */
weight = gamma2 / sqrt(Ik1zx * Ik1zx / derivNorm + Ik1zy * Ik1zy / derivNorm2 + epsilon_squared);
/* Add respective gradient constancy components to the linear system coefficients: */
pa11[j] += weight * (pIxx[j] * pIxx[j] / derivNorm + pIxy[j] * pIxy[j] / derivNorm2);
pa12[j] += weight * (pIxx[j] * pIxy[j] / derivNorm + pIxy[j] * pIyy[j] / derivNorm2);
pa22[j] += weight * (pIxy[j] * pIxy[j] / derivNorm + pIyy[j] * pIyy[j] / derivNorm2);
pb1[j] += -weight * (pIxx[j] * pIxz[j] / derivNorm + pIxy[j] * pIyz[j] / derivNorm2);
pb2[j] += -weight * (pIxy[j] * pIxz[j] / derivNorm + pIyy[j] * pIyz[j] / derivNorm2);
}
}
}
VariationalRefinementImpl::ComputeSmoothnessTermHorPass_ParBody::ComputeSmoothnessTermHorPass_ParBody(
VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_W_u, RedBlackBuffer &_W_v,
RedBlackBuffer &_tempW_u, RedBlackBuffer &_tempW_v, bool _red_pass)
: var(&_var), nstripes(_nstripes), h(_h), W_u(&_W_u), W_v(&_W_v), curW_u(&_tempW_u), curW_v(&_tempW_v),
red_pass(_red_pass)
{
stripe_sz = (int)ceil(h / (double)nstripes);
}
/* This function updates the linear system coefficients A11,A22,b1,b1 according to the
* flow smoothness term and computes corresponding weights for the current fixed point iteration.
* A11,A22,b1,b1 are updated only partially (horizontal pass). Doing both horizontal and vertical
* passes in one loop complicates parallelization (different threads write to the same elements).
*/
void VariationalRefinementImpl::ComputeSmoothnessTermHorPass_ParBody::operator()(const Range &range) const
{
int start_i = min(range.start * stripe_sz, h);
int end_i = min(range.end * stripe_sz, h);
float epsilon_squared = var->epsilon * var->epsilon;
float alpha2 = var->alpha / 2;
float *pWeight;
float *pA_u, *pA_u_next, *pA_v, *pA_v_next;
float *pB_u, *pB_u_next, *pB_v, *pB_v_next;
float *cW_u, *cW_u_next, *cW_u_next_row;
float *cW_v, *cW_v_next, *cW_v_next_row;
float *pW_u, *pW_u_next;
float *pW_v, *pW_v_next;
float ux, uy, vx, vy;
int len;
bool touches_right_border = true;
#define INIT_ROW_POINTERS(cur_color, next_color, next_offs_even, next_offs_odd, bool_default) \
pWeight = var->weights.cur_color.ptr<float>(i + 1) + 1; \
pA_u = var->A11.cur_color.ptr<float>(i + 1) + 1; \
pB_u = var->b1.cur_color.ptr<float>(i + 1) + 1; \
cW_u = curW_u->cur_color.ptr<float>(i + 1) + 1; \
pW_u = W_u->cur_color.ptr<float>(i + 1) + 1; \
pA_v = var->A22.cur_color.ptr<float>(i + 1) + 1; \
pB_v = var->b2.cur_color.ptr<float>(i + 1) + 1; \
cW_v = curW_v->cur_color.ptr<float>(i + 1) + 1; \
pW_v = W_v->cur_color.ptr<float>(i + 1) + 1; \
\
cW_u_next_row = curW_u->next_color.ptr<float>(i + 2) + 1; \
cW_v_next_row = curW_v->next_color.ptr<float>(i + 2) + 1; \
\
if (i % 2 == 0) \
{ \
pA_u_next = var->A11.next_color.ptr<float>(i + 1) + next_offs_even; \
pB_u_next = var->b1.next_color.ptr<float>(i + 1) + next_offs_even; \
cW_u_next = curW_u->next_color.ptr<float>(i + 1) + next_offs_even; \
pW_u_next = W_u->next_color.ptr<float>(i + 1) + next_offs_even; \
pA_v_next = var->A22.next_color.ptr<float>(i + 1) + next_offs_even; \
pB_v_next = var->b2.next_color.ptr<float>(i + 1) + next_offs_even; \
cW_v_next = curW_v->next_color.ptr<float>(i + 1) + next_offs_even; \
pW_v_next = W_v->next_color.ptr<float>(i + 1) + next_offs_even; \
len = var->A11.cur_color##_even_len; \
if (var->A11.cur_color##_even_len != var->A11.cur_color##_odd_len) \
touches_right_border = bool_default; \
else \
touches_right_border = !bool_default; \
} \
else \
{ \
pA_u_next = var->A11.next_color.ptr<float>(i + 1) + next_offs_odd; \
pB_u_next = var->b1.next_color.ptr<float>(i + 1) + next_offs_odd; \
cW_u_next = curW_u->next_color.ptr<float>(i + 1) + next_offs_odd; \
pW_u_next = W_u->next_color.ptr<float>(i + 1) + next_offs_odd; \
pA_v_next = var->A22.next_color.ptr<float>(i + 1) + next_offs_odd; \
pB_v_next = var->b2.next_color.ptr<float>(i + 1) + next_offs_odd; \
cW_v_next = curW_v->next_color.ptr<float>(i + 1) + next_offs_odd; \
pW_v_next = W_v->next_color.ptr<float>(i + 1) + next_offs_odd; \
len = var->A11.cur_color##_odd_len; \
if (var->A11.cur_color##_even_len != var->A11.cur_color##_odd_len) \
touches_right_border = !bool_default; \
else \
touches_right_border = bool_default; \
}
for (int i = start_i; i < end_i; i++)
{
if (red_pass)
{
INIT_ROW_POINTERS(red, black, 1, 2, true);
}
else
{
INIT_ROW_POINTERS(black, red, 2, 1, false);
}
#undef INIT_ROW_POINTERS
#define COMPUTE \
/* Gradients for the flow on the current fixed-point iteration: */ \
ux = cW_u_next[j] - cW_u[j]; \
vx = cW_v_next[j] - cW_v[j]; \
uy = cW_u_next_row[j] - cW_u[j]; \
vy = cW_v_next_row[j] - cW_v[j]; \
/* Weight of the smoothness term in the current fixed-point iteration: */ \
pWeight[j] = alpha2 / sqrt(ux * ux + vx * vx + uy * uy + vy * vy + epsilon_squared); \
/* Gradients for initial raw flow multiplied by weight:*/ \
ux = pWeight[j] * (pW_u_next[j] - pW_u[j]); \
vx = pWeight[j] * (pW_v_next[j] - pW_v[j]);
#define UPDATE \
pB_u[j] += ux; \
pA_u[j] += pWeight[j]; \
pB_v[j] += vx; \
pA_v[j] += pWeight[j]; \
pB_u_next[j] -= ux; \
pA_u_next[j] += pWeight[j]; \
pB_v_next[j] -= vx; \
pA_v_next[j] += pWeight[j];
int j = 0;
#ifdef CV_SIMD128
v_float32x4 alpha2_vec = v_setall_f32(alpha2);
v_float32x4 eps_vec = v_setall_f32(epsilon_squared);
v_float32x4 cW_u_vec, cW_v_vec;
v_float32x4 pWeight_vec, ux_vec, vx_vec, uy_vec, vy_vec;
for (; j < len - 4; j += 4)
{
cW_u_vec = v_load(cW_u + j);
cW_v_vec = v_load(cW_v + j);
ux_vec = v_load(cW_u_next + j) - cW_u_vec;
vx_vec = v_load(cW_v_next + j) - cW_v_vec;
uy_vec = v_load(cW_u_next_row + j) - cW_u_vec;
vy_vec = v_load(cW_v_next_row + j) - cW_v_vec;
pWeight_vec =
alpha2_vec / v_sqrt(ux_vec * ux_vec + vx_vec * vx_vec + uy_vec * uy_vec + vy_vec * vy_vec + eps_vec);
v_store(pWeight + j, pWeight_vec);
ux_vec = pWeight_vec * (v_load(pW_u_next + j) - v_load(pW_u + j));
vx_vec = pWeight_vec * (v_load(pW_v_next + j) - v_load(pW_v + j));
v_store(pA_u + j, v_load(pA_u + j) + pWeight_vec);
v_store(pA_v + j, v_load(pA_v + j) + pWeight_vec);
v_store(pB_u + j, v_load(pB_u + j) + ux_vec);
v_store(pB_v + j, v_load(pB_v + j) + vx_vec);
v_store(pA_u_next + j, v_load(pA_u_next + j) + pWeight_vec);
v_store(pA_v_next + j, v_load(pA_v_next + j) + pWeight_vec);
v_store(pB_u_next + j, v_load(pB_u_next + j) - ux_vec);
v_store(pB_v_next + j, v_load(pB_v_next + j) - vx_vec);
}
#endif
for (; j < len - 1; j++)
{
COMPUTE;
UPDATE;
}
/* Omit the update on the rightmost elements */
if (touches_right_border)
{
COMPUTE;
}
else
{
COMPUTE;
UPDATE;
}
}
#undef COMPUTE
#undef UPDATE
}
VariationalRefinementImpl::ComputeSmoothnessTermVertPass_ParBody::ComputeSmoothnessTermVertPass_ParBody(
VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_W_u, RedBlackBuffer &_W_v, bool _red_pass)
: var(&_var), nstripes(_nstripes), W_u(&_W_u), W_v(&_W_v), red_pass(_red_pass)
{
/* Omit the last row in the vertical pass */
h = _h - 1;
stripe_sz = (int)ceil(h / (double)nstripes);
}
/* This function adds the last remaining terms to the linear system coefficients A11,A22,b1,b1. */
void VariationalRefinementImpl::ComputeSmoothnessTermVertPass_ParBody::operator()(const Range &range) const
{
int start_i = min(range.start * stripe_sz, h);
int end_i = min(range.end * stripe_sz, h);
float *pWeight;
float *pA_u, *pA_u_next_row, *pA_v, *pA_v_next_row;
float *pB_u, *pB_u_next_row, *pB_v, *pB_v_next_row;
float *pW_u, *pW_u_next_row, *pW_v, *pW_v_next_row;
float vy, uy;
int len;
for (int i = start_i; i < end_i; i++)
{
#define INIT_ROW_POINTERS(cur_color, next_color) \
pWeight = var->weights.cur_color.ptr<float>(i + 1) + 1; \
pA_u = var->A11.cur_color.ptr<float>(i + 1) + 1; \
pB_u = var->b1.cur_color.ptr<float>(i + 1) + 1; \
pW_u = W_u->cur_color.ptr<float>(i + 1) + 1; \
pA_v = var->A22.cur_color.ptr<float>(i + 1) + 1; \
pB_v = var->b2.cur_color.ptr<float>(i + 1) + 1; \
pW_v = W_v->cur_color.ptr<float>(i + 1) + 1; \
\
pA_u_next_row = var->A11.next_color.ptr<float>(i + 2) + 1; \
pB_u_next_row = var->b1.next_color.ptr<float>(i + 2) + 1; \
pW_u_next_row = W_u->next_color.ptr<float>(i + 2) + 1; \
pA_v_next_row = var->A22.next_color.ptr<float>(i + 2) + 1; \
pB_v_next_row = var->b2.next_color.ptr<float>(i + 2) + 1; \
pW_v_next_row = W_v->next_color.ptr<float>(i + 2) + 1; \
\
if (i % 2 == 0) \
len = var->A11.cur_color##_even_len; \
else \
len = var->A11.cur_color##_odd_len;
if (red_pass)
{
INIT_ROW_POINTERS(red, black);
}
else
{
INIT_ROW_POINTERS(black, red);
}
#undef INIT_ROW_POINTERS
int j = 0;
#ifdef CV_SIMD128
v_float32x4 pWeight_vec, uy_vec, vy_vec;
for (; j < len - 3; j += 4)
{
pWeight_vec = v_load(pWeight + j);
uy_vec = pWeight_vec * (v_load(pW_u_next_row + j) - v_load(pW_u + j));
vy_vec = pWeight_vec * (v_load(pW_v_next_row + j) - v_load(pW_v + j));
v_store(pA_u + j, v_load(pA_u + j) + pWeight_vec);
v_store(pA_v + j, v_load(pA_v + j) + pWeight_vec);
v_store(pB_u + j, v_load(pB_u + j) + uy_vec);
v_store(pB_v + j, v_load(pB_v + j) + vy_vec);
v_store(pA_u_next_row + j, v_load(pA_u_next_row + j) + pWeight_vec);
v_store(pA_v_next_row + j, v_load(pA_v_next_row + j) + pWeight_vec);
v_store(pB_u_next_row + j, v_load(pB_u_next_row + j) - uy_vec);
v_store(pB_v_next_row + j, v_load(pB_v_next_row + j) - vy_vec);
}
#endif
for (; j < len; j++)
{
uy = pWeight[j] * (pW_u_next_row[j] - pW_u[j]);
vy = pWeight[j] * (pW_v_next_row[j] - pW_v[j]);
pB_u[j] += uy;
pA_u[j] += pWeight[j];
pB_v[j] += vy;
pA_v[j] += pWeight[j];
pB_u_next_row[j] -= uy;
pA_u_next_row[j] += pWeight[j];
pB_v_next_row[j] -= vy;
pA_v_next_row[j] += pWeight[j];
}
}
}
VariationalRefinementImpl::RedBlackSOR_ParBody::RedBlackSOR_ParBody(VariationalRefinementImpl &_var, int _nstripes,
int _h, RedBlackBuffer &_dW_u,
RedBlackBuffer &_dW_v, bool _red_pass)
: var(&_var), nstripes(_nstripes), h(_h), dW_u(&_dW_u), dW_v(&_dW_v), red_pass(_red_pass)
{
stripe_sz = (int)ceil(h / (double)nstripes);
}
/* This function implements the Red-Black SOR (successive-over relaxation) method for solving the main
* linear system in the current fixed-point iteration.
*/
void VariationalRefinementImpl::RedBlackSOR_ParBody::operator()(const Range &range) const
{
int start = min(range.start * stripe_sz, h);
int end = min(range.end * stripe_sz, h);
float *pa11, *pa12, *pa22, *pb1, *pb2, *pW, *pdu, *pdv;
float *pW_next, *pdu_next, *pdv_next;
float *pW_prev_row, *pdu_prev_row, *pdv_prev_row;
float *pdu_next_row, *pdv_next_row;
float sigmaU, sigmaV;
int j, len;
for (int i = start; i < end; i++)
{
#define INIT_ROW_POINTERS(cur_color, next_color, next_offs_even, next_offs_odd) \
pW = var->weights.cur_color.ptr<float>(i + 1) + 1; \
pa11 = var->A11.cur_color.ptr<float>(i + 1) + 1; \
pa12 = var->A12.cur_color.ptr<float>(i + 1) + 1; \
pa22 = var->A22.cur_color.ptr<float>(i + 1) + 1; \
pb1 = var->b1.cur_color.ptr<float>(i + 1) + 1; \
pb2 = var->b2.cur_color.ptr<float>(i + 1) + 1; \
pdu = dW_u->cur_color.ptr<float>(i + 1) + 1; \
pdv = dW_v->cur_color.ptr<float>(i + 1) + 1; \
\
pdu_next_row = dW_u->next_color.ptr<float>(i + 2) + 1; \
pdv_next_row = dW_v->next_color.ptr<float>(i + 2) + 1; \
\
pW_prev_row = var->weights.next_color.ptr<float>(i) + 1; \
pdu_prev_row = dW_u->next_color.ptr<float>(i) + 1; \
pdv_prev_row = dW_v->next_color.ptr<float>(i) + 1; \
\
if (i % 2 == 0) \
{ \
pW_next = var->weights.next_color.ptr<float>(i + 1) + next_offs_even; \
pdu_next = dW_u->next_color.ptr<float>(i + 1) + next_offs_even; \
pdv_next = dW_v->next_color.ptr<float>(i + 1) + next_offs_even; \
len = var->A11.cur_color##_even_len; \
} \
else \
{ \
pW_next = var->weights.next_color.ptr<float>(i + 1) + next_offs_odd; \
pdu_next = dW_u->next_color.ptr<float>(i + 1) + next_offs_odd; \
pdv_next = dW_v->next_color.ptr<float>(i + 1) + next_offs_odd; \
len = var->A11.cur_color##_odd_len; \
}
if (red_pass)
{
INIT_ROW_POINTERS(red, black, 1, 2);
}
else
{
INIT_ROW_POINTERS(black, red, 2, 1);
}
#undef INIT_ROW_POINTERS
j = 0;
#ifdef CV_SIMD128
v_float32x4 pW_prev_vec = v_setall_f32(pW_next[-1]);
v_float32x4 pdu_prev_vec = v_setall_f32(pdu_next[-1]);
v_float32x4 pdv_prev_vec = v_setall_f32(pdv_next[-1]);
v_float32x4 omega_vec = v_setall_f32(var->omega);
v_float32x4 pW_vec, pW_next_vec, pW_prev_row_vec;
v_float32x4 pdu_next_vec, pdu_prev_row_vec, pdu_next_row_vec;
v_float32x4 pdv_next_vec, pdv_prev_row_vec, pdv_next_row_vec;
v_float32x4 pW_shifted_vec, pdu_shifted_vec, pdv_shifted_vec;
v_float32x4 pa12_vec, sigmaU_vec, sigmaV_vec, pdu_vec, pdv_vec;
for (; j < len - 3; j += 4)
{
pW_vec = v_load(pW + j);
pW_next_vec = v_load(pW_next + j);
pW_prev_row_vec = v_load(pW_prev_row + j);
pdu_next_vec = v_load(pdu_next + j);
pdu_prev_row_vec = v_load(pdu_prev_row + j);
pdu_next_row_vec = v_load(pdu_next_row + j);
pdv_next_vec = v_load(pdv_next + j);
pdv_prev_row_vec = v_load(pdv_prev_row + j);
pdv_next_row_vec = v_load(pdv_next_row + j);
pa12_vec = v_load(pa12 + j);
pW_shifted_vec = v_reinterpret_as_f32(
v_extract<3>(v_reinterpret_as_s32(pW_prev_vec), v_reinterpret_as_s32(pW_next_vec)));
pdu_shifted_vec = v_reinterpret_as_f32(
v_extract<3>(v_reinterpret_as_s32(pdu_prev_vec), v_reinterpret_as_s32(pdu_next_vec)));
pdv_shifted_vec = v_reinterpret_as_f32(
v_extract<3>(v_reinterpret_as_s32(pdv_prev_vec), v_reinterpret_as_s32(pdv_next_vec)));
sigmaU_vec = pW_shifted_vec * pdu_shifted_vec + pW_vec * pdu_next_vec + pW_prev_row_vec * pdu_prev_row_vec +
pW_vec * pdu_next_row_vec;
sigmaV_vec = pW_shifted_vec * pdv_shifted_vec + pW_vec * pdv_next_vec + pW_prev_row_vec * pdv_prev_row_vec +
pW_vec * pdv_next_row_vec;
pdu_vec = v_load(pdu + j);
pdv_vec = v_load(pdv + j);
pdu_vec += omega_vec * ((sigmaU_vec + v_load(pb1 + j) - pdv_vec * pa12_vec) / v_load(pa11 + j) - pdu_vec);
pdv_vec += omega_vec * ((sigmaV_vec + v_load(pb2 + j) - pdu_vec * pa12_vec) / v_load(pa22 + j) - pdv_vec);
v_store(pdu + j, pdu_vec);
v_store(pdv + j, pdv_vec);
pW_prev_vec = pW_next_vec;
pdu_prev_vec = pdu_next_vec;
pdv_prev_vec = pdv_next_vec;
}
#endif
for (; j < len; j++)
{
sigmaU = pW_next[j - 1] * pdu_next[j - 1] + pW[j] * pdu_next[j] + pW_prev_row[j] * pdu_prev_row[j] +
pW[j] * pdu_next_row[j];
sigmaV = pW_next[j - 1] * pdv_next[j - 1] + pW[j] * pdv_next[j] + pW_prev_row[j] * pdv_prev_row[j] +
pW[j] * pdv_next_row[j];
pdu[j] += var->omega * ((sigmaU + pb1[j] - pdv[j] * pa12[j]) / pa11[j] - pdu[j]);
pdv[j] += var->omega * ((sigmaV + pb2[j] - pdu[j] * pa12[j]) / pa22[j] - pdv[j]);
}
}
}
void VariationalRefinementImpl::calc(InputArray I0, InputArray I1, InputOutputArray flow)
{
CV_Assert(!I0.empty() && I0.channels() == 1);
CV_Assert(!I1.empty() && I1.channels() == 1);
CV_Assert(I0.sameSize(I1));
CV_Assert((I0.depth() == CV_8U && I1.depth() == CV_8U) || (I0.depth() == CV_32F && I1.depth() == CV_32F));
CV_Assert(!flow.empty() && flow.depth() == CV_32F && flow.channels() == 2);
CV_Assert(I0.sameSize(flow));
Mat uv[2];
Mat &flowMat = flow.getMatRef();
split(flowMat, uv);
calcUV(I0, I1, uv[0], uv[1]);
merge(uv, 2, flowMat);
}
void VariationalRefinementImpl::calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v)
{
CV_Assert(!I0.empty() && I0.channels() == 1);
CV_Assert(!I1.empty() && I1.channels() == 1);
CV_Assert(I0.sameSize(I1));
CV_Assert((I0.depth() == CV_8U && I1.depth() == CV_8U) || (I0.depth() == CV_32F && I1.depth() == CV_32F));
CV_Assert(!flow_u.empty() && flow_u.depth() == CV_32F && flow_u.channels() == 1);
CV_Assert(!flow_v.empty() && flow_v.depth() == CV_32F && flow_v.channels() == 1);
CV_Assert(I0.sameSize(flow_u));
CV_Assert(flow_u.sameSize(flow_v));
int num_stripes = getNumThreads();
Mat I0Mat = I0.getMat();
Mat I1Mat = I1.getMat();
Mat &W_u = flow_u.getMatRef();
Mat &W_v = flow_v.getMatRef();
prepareBuffers(I0Mat, I1Mat, W_u, W_v);
splitCheckerboard(W_u_rb, W_u);
splitCheckerboard(W_v_rb, W_v);
W_u_rb.red.copyTo(tempW_u.red);
W_u_rb.black.copyTo(tempW_u.black);
W_v_rb.red.copyTo(tempW_v.red);
W_v_rb.black.copyTo(tempW_v.black);
dW_u.red.setTo(0.0f);
dW_u.black.setTo(0.0f);
dW_v.red.setTo(0.0f);
dW_v.black.setTo(0.0f);
for (int i = 0; i < fixedPointIterations; i++)
{
parallel_for_(Range(0, num_stripes), ComputeDataTerm_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, true));
parallel_for_(Range(0, num_stripes), ComputeDataTerm_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, false));
parallel_for_(Range(0, num_stripes), ComputeSmoothnessTermHorPass_ParBody(
*this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, tempW_u, tempW_v, true));
parallel_for_(Range(0, num_stripes), ComputeSmoothnessTermHorPass_ParBody(
*this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, tempW_u, tempW_v, false));
parallel_for_(Range(0, num_stripes),
ComputeSmoothnessTermVertPass_ParBody(*this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, true));
parallel_for_(Range(0, num_stripes),
ComputeSmoothnessTermVertPass_ParBody(*this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, false));
for (int j = 0; j < sorIterations; j++)
{
parallel_for_(Range(0, num_stripes), RedBlackSOR_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, true));
parallel_for_(Range(0, num_stripes), RedBlackSOR_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, false));
}
tempW_u.red = W_u_rb.red + dW_u.red;
tempW_u.black = W_u_rb.black + dW_u.black;
updateRepeatedBorders(tempW_u);
tempW_v.red = W_v_rb.red + dW_v.red;
tempW_v.black = W_v_rb.black + dW_v.black;
updateRepeatedBorders(tempW_v);
}
mergeCheckerboard(W_u, tempW_u);
mergeCheckerboard(W_v, tempW_v);
}
void VariationalRefinementImpl::collectGarbage()
{
Ix.release();
Iy.release();
Iz.release();
Ixx.release();
Ixy.release();
Iyy.release();
Ixz.release();
Iyz.release();
Ix_rb.release();
Iy_rb.release();
Iz_rb.release();
Ixx_rb.release();
Ixy_rb.release();
Iyy_rb.release();
Ixz_rb.release();
Iyz_rb.release();
A11.release();
A12.release();
A22.release();
b1.release();
b2.release();
weights.release();
mapX.release();
mapY.release();
tempW_u.release();
tempW_v.release();
dW_u.release();
dW_v.release();
W_u_rb.release();
W_v_rb.release();
}
Ptr<VariationalRefinement> VariationalRefinement::create()
{ return makePtr<VariationalRefinementImpl>(); }
}
......@@ -7,12 +7,10 @@
// copy or use the software.
//
//
// License Agreement
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
......@@ -25,7 +23,7 @@
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
......@@ -46,72 +44,53 @@
#ifdef HAVE_OPENCL
namespace opencv_test {
namespace ocl {
namespace opencv_test { namespace {
/////////////////////////////////////////////////////////////////////////////////////////////////
// Optical_flow_tvl1
namespace
PARAM_TEST_CASE(OCL_DenseOpticalFlow_DIS, int)
{
IMPLEMENT_PARAM_CLASS(UseInitFlow, bool)
IMPLEMENT_PARAM_CLASS(MedianFiltering, int)
IMPLEMENT_PARAM_CLASS(ScaleStep, double)
}
int preset;
PARAM_TEST_CASE(OpticalFlowTVL1, UseInitFlow, MedianFiltering, ScaleStep)
{
bool useInitFlow;
int medianFiltering;
double scaleStep;
virtual void SetUp()
{
useInitFlow = GET_PARAM(0);
medianFiltering = GET_PARAM(1);
scaleStep = GET_PARAM(2);
preset = GET_PARAM(0);
}
};
OCL_TEST_P(OpticalFlowTVL1, Mat)
OCL_TEST_P(OCL_DenseOpticalFlow_DIS, Mat)
{
cv::Mat frame0 = readImage("optflow/RubberWhale1.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty());
Mat frame1, frame2, GT;
cv::Mat frame1 = readImage("optflow/RubberWhale2.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
frame1 = imread(TS::ptr()->get_data_path() + "optflow/RubberWhale1.png");
frame2 = imread(TS::ptr()->get_data_path() + "optflow/RubberWhale2.png");
cv::Mat flow; cv::UMat uflow;
CV_Assert(!frame1.empty() && !frame2.empty());
//create algorithm
cv::Ptr<cv::DualTVL1OpticalFlow> alg = cv::createOptFlow_DualTVL1();
cvtColor(frame1, frame1, COLOR_BGR2GRAY);
cvtColor(frame2, frame2, COLOR_BGR2GRAY);
//set parameters
alg->setScaleStep(scaleStep);
alg->setMedianFiltering(medianFiltering);
Ptr<DenseOpticalFlow> algo;
//create initial flow as result of algorithm calculation
if (useInitFlow)
// iterate over presets:
for (int i = 0; i < cvtest::ocl::test_loop_times; i++)
{
OCL_ON(alg->calc(frame0, frame1, uflow));
uflow.copyTo(flow);
}
Mat flow;
UMat ocl_flow;
//set flag to use initial flow as it is ready to use
alg->setUseInitialFlow(useInitFlow);
algo = DISOpticalFlow::create(preset);
OCL_OFF(algo->calc(frame1, frame2, flow));
OCL_ON(algo->calc(frame1, frame2, ocl_flow));
ASSERT_EQ(flow.rows, ocl_flow.rows);
ASSERT_EQ(flow.cols, ocl_flow.cols);
OCL_OFF(alg->calc(frame0, frame1, flow));
OCL_ON(alg->calc(frame0, frame1, uflow));
EXPECT_MAT_SIMILAR(flow, uflow, 1e-2);
EXPECT_MAT_SIMILAR(flow, ocl_flow, 6e-3);
}
}
OCL_INSTANTIATE_TEST_CASE_P(Video, OpticalFlowTVL1,
Combine(
Values(UseInitFlow(false), UseInitFlow(true)),
Values(MedianFiltering(3), MedianFiltering(-1)),
Values(ScaleStep(0.3),ScaleStep(0.5))
)
);
OCL_INSTANTIATE_TEST_CASE_P(Video, OCL_DenseOpticalFlow_DIS,
Values(DISOpticalFlow::PRESET_ULTRAFAST,
DISOpticalFlow::PRESET_FAST,
DISOpticalFlow::PRESET_MEDIUM));
} } // namespace opencv_test::ocl
}} // namespace
#endif // HAVE_OPENCL
......@@ -43,131 +43,105 @@
namespace opencv_test { namespace {
//#define DUMP
static string getDataDir() { return TS::ptr()->get_data_path(); }
// first four bytes, should be the same in little endian
const float FLO_TAG_FLOAT = 202021.25f; // check for this when READING the file
static string getRubberWhaleFrame1() { return getDataDir() + "optflow/RubberWhale1.png"; }
#ifdef DUMP
// binary file format for flow data specified here:
// http://vision.middlebury.edu/flow/data/
void writeOpticalFlowToFile(const Mat_<Point2f>& flow, const string& fileName)
{
const char FLO_TAG_STRING[] = "PIEH"; // use this when WRITING the file
ofstream file(fileName.c_str(), ios_base::binary);
static string getRubberWhaleFrame2() { return getDataDir() + "optflow/RubberWhale2.png"; }
file << FLO_TAG_STRING;
static string getRubberWhaleGroundTruth() { return getDataDir() + "optflow/RubberWhale.flo"; }
file.write((const char*) &flow.cols, sizeof(int));
file.write((const char*) &flow.rows, sizeof(int));
static bool isFlowCorrect(float u) { return !cvIsNaN(u) && (fabs(u) < 1e9); }
for (int i = 0; i < flow.rows; ++i)
{
for (int j = 0; j < flow.cols; ++j)
{
const Point2f u = flow(i, j);
file.write((const char*) &u.x, sizeof(float));
file.write((const char*) &u.y, sizeof(float));
}
}
}
#endif
static float calcRMSE(Mat flow1, Mat flow2)
{
float sum = 0;
int counter = 0;
const int rows = flow1.rows;
const int cols = flow1.cols;
// binary file format for flow data specified here:
// http://vision.middlebury.edu/flow/data/
void readOpticalFlowFromFile(Mat_<Point2f>& flow, const string& fileName)
for (int y = 0; y < rows; ++y)
{
std::ifstream file(fileName.c_str(), std::ios_base::binary);
float tag;
file.read((char*) &tag, sizeof(float));
CV_Assert( tag == FLO_TAG_FLOAT );
Size size;
file.read((char*) &size.width, sizeof(int));
file.read((char*) &size.height, sizeof(int));
flow.create(size);
for (int i = 0; i < flow.rows; ++i)
for (int x = 0; x < cols; ++x)
{
for (int j = 0; j < flow.cols; ++j)
{
Point2f u;
file.read((char*) &u.x, sizeof(float));
file.read((char*) &u.y, sizeof(float));
flow(i, j) = u;
}
}
file.close();
}
Vec2f flow1_at_point = flow1.at<Vec2f>(y, x);
Vec2f flow2_at_point = flow2.at<Vec2f>(y, x);
bool isFlowCorrect(Point2f u)
{
return !cvIsNaN(u.x) && !cvIsNaN(u.y) && (fabs(u.x) < 1e9) && (fabs(u.y) < 1e9);
}
float u1 = flow1_at_point[0];
float v1 = flow1_at_point[1];
float u2 = flow2_at_point[0];
float v2 = flow2_at_point[1];
void check(const Mat_<Point2f>& gold, const Mat_<Point2f>& flow, double threshold = 0.1, double expectedAccuracy = 0.95)
{
threshold = threshold*threshold;
size_t gold_counter = 0;
size_t valid_counter = 0;
for (int i = 0; i < gold.rows; ++i)
{
for (int j = 0; j < gold.cols; ++j)
if (isFlowCorrect(u1) && isFlowCorrect(u2) && isFlowCorrect(v1) && isFlowCorrect(v2))
{
const Point2f u1 = gold(i, j);
const Point2f u2 = flow(i, j);
if (isFlowCorrect(u1))
{
gold_counter++;
if (isFlowCorrect(u2))
{
const Point2f diff = u1 - u2;
double err = diff.ddot(diff);
if (err <= threshold)
valid_counter++;
}
}
sum += (u1 - u2) * (u1 - u2) + (v1 - v2) * (v1 - v2);
counter++;
}
}
EXPECT_GE(valid_counter, expectedAccuracy * gold_counter);
}
return (float)sqrt(sum / (1e-9 + counter));
}
TEST(Video_calcOpticalFlowDual_TVL1, Regression)
bool readRubberWhale(Mat &dst_frame_1, Mat &dst_frame_2, Mat &dst_GT)
{
const string frame1_path = TS::ptr()->get_data_path() + "optflow/RubberWhale1.png";
const string frame2_path = TS::ptr()->get_data_path() + "optflow/RubberWhale2.png";
const string gold_flow_path = TS::ptr()->get_data_path() + "optflow/tvl1_flow.flo";
Mat frame1 = imread(frame1_path, IMREAD_GRAYSCALE);
Mat frame2 = imread(frame2_path, IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
ASSERT_FALSE(frame2.empty());
Mat_<Point2f> flow;
Ptr<DualTVL1OpticalFlow> tvl1 = cv::DualTVL1OpticalFlow::create();
const string frame1_path = getRubberWhaleFrame1();
const string frame2_path = getRubberWhaleFrame2();
const string gt_flow_path = getRubberWhaleGroundTruth();
dst_frame_1 = imread(frame1_path);
dst_frame_2 = imread(frame2_path);
dst_GT = readOpticalFlow(gt_flow_path);
if (dst_frame_1.empty() || dst_frame_2.empty() || dst_GT.empty())
return false;
else
return true;
}
tvl1->calc(frame1, frame2, flow);
TEST(DenseOpticalFlow_DIS, ReferenceAccuracy)
{
Mat frame1, frame2, GT;
ASSERT_TRUE(readRubberWhale(frame1, frame2, GT));
int presets[] = {DISOpticalFlow::PRESET_ULTRAFAST, DISOpticalFlow::PRESET_FAST, DISOpticalFlow::PRESET_MEDIUM};
float target_RMSE[] = {0.86f, 0.74f, 0.49f};
cvtColor(frame1, frame1, COLOR_BGR2GRAY);
cvtColor(frame2, frame2, COLOR_BGR2GRAY);
#ifdef DUMP
writeOpticalFlowToFile(flow, gold_flow_path);
#else
Mat_<Point2f> gold;
readOpticalFlowFromFile(gold, gold_flow_path);
Ptr<DenseOpticalFlow> algo;
ASSERT_EQ(gold.rows, flow.rows);
ASSERT_EQ(gold.cols, flow.cols);
// iterate over presets:
for (int i = 0; i < 3; i++)
{
Mat flow;
algo = DISOpticalFlow::create(presets[i]);
algo->calc(frame1, frame2, flow);
ASSERT_EQ(GT.rows, flow.rows);
ASSERT_EQ(GT.cols, flow.cols);
EXPECT_LE(calcRMSE(GT, flow), target_RMSE[i]);
}
}
check(gold, flow);
#endif
TEST(DenseOpticalFlow_VariationalRefinement, ReferenceAccuracy)
{
Mat frame1, frame2, GT;
ASSERT_TRUE(readRubberWhale(frame1, frame2, GT));
float target_RMSE = 0.86f;
cvtColor(frame1, frame1, COLOR_BGR2GRAY);
cvtColor(frame2, frame2, COLOR_BGR2GRAY);
Ptr<VariationalRefinement> var_ref;
var_ref = VariationalRefinement::create();
var_ref->setAlpha(20.0f);
var_ref->setDelta(5.0f);
var_ref->setGamma(10.0f);
var_ref->setSorIterations(25);
var_ref->setFixedPointIterations(25);
Mat flow(frame1.size(), CV_32FC2);
flow.setTo(0.0f);
var_ref->calc(frame1, frame2, flow);
ASSERT_EQ(GT.rows, flow.rows);
ASSERT_EQ(GT.cols, flow.cols);
EXPECT_LE(calcRMSE(GT, flow), target_RMSE);
}
}} // namespace
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
namespace opencv_test { namespace {
typedef tuple<Size> OFParams;
typedef TestWithParam<OFParams> DenseOpticalFlow_DIS;
typedef TestWithParam<OFParams> DenseOpticalFlow_VariationalRefinement;
TEST_P(DenseOpticalFlow_DIS, MultithreadReproducibility)
{
double MAX_DIF = 0.01;
double MAX_MEAN_DIF = 0.001;
int loopsCount = 2;
RNG rng(0);
OFParams params = GetParam();
Size size = get<0>(params);
int nThreads = cv::getNumThreads();
if (nThreads == 1)
throw SkipTestException("Single thread environment");
for (int iter = 0; iter <= loopsCount; iter++)
{
Mat frame1(size, CV_8U);
randu(frame1, 0, 255);
Mat frame2(size, CV_8U);
randu(frame2, 0, 255);
Ptr<DISOpticalFlow> algo = DISOpticalFlow::create();
int psz = rng.uniform(4, 16);
int pstr = rng.uniform(1, psz - 1);
int grad_iter = rng.uniform(1, 64);
int var_iter = rng.uniform(0, 10);
bool use_mean_normalization = !!rng.uniform(0, 2);
bool use_spatial_propagation = !!rng.uniform(0, 2);
algo->setFinestScale(0);
algo->setPatchSize(psz);
algo->setPatchStride(pstr);
algo->setGradientDescentIterations(grad_iter);
algo->setVariationalRefinementIterations(var_iter);
algo->setUseMeanNormalization(use_mean_normalization);
algo->setUseSpatialPropagation(use_spatial_propagation);
cv::setNumThreads(nThreads);
Mat resMultiThread;
algo->calc(frame1, frame2, resMultiThread);
cv::setNumThreads(1);
Mat resSingleThread;
algo->calc(frame1, frame2, resSingleThread);
EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_INF), MAX_DIF);
EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_L1), MAX_MEAN_DIF * frame1.total());
// resulting flow should be within the frame bounds:
double min_val, max_val;
minMaxLoc(resMultiThread, &min_val, &max_val);
EXPECT_LE(abs(min_val), sqrt( static_cast<double>(size.height * size.height + size.width * size.width)) );
EXPECT_LE(abs(max_val), sqrt( static_cast<double>(size.height * size.height + size.width * size.width)) );
}
}
INSTANTIATE_TEST_CASE_P(FullSet, DenseOpticalFlow_DIS, Values(szODD, szQVGA));
TEST_P(DenseOpticalFlow_VariationalRefinement, MultithreadReproducibility)
{
double MAX_DIF = 0.01;
double MAX_MEAN_DIF = 0.001;
float input_flow_rad = 5.0;
int loopsCount = 2;
RNG rng(0);
OFParams params = GetParam();
Size size = get<0>(params);
int nThreads = cv::getNumThreads();
if (nThreads == 1)
throw SkipTestException("Single thread environment");
for (int iter = 0; iter <= loopsCount; iter++)
{
Mat frame1(size, CV_8U);
randu(frame1, 0, 255);
Mat frame2(size, CV_8U);
randu(frame2, 0, 255);
Mat flow(size, CV_32FC2);
randu(flow, -input_flow_rad, input_flow_rad);
Ptr<VariationalRefinement> var = VariationalRefinement::create();
var->setAlpha(rng.uniform(1.0f, 100.0f));
var->setGamma(rng.uniform(0.1f, 10.0f));
var->setDelta(rng.uniform(0.1f, 10.0f));
var->setSorIterations(rng.uniform(1, 20));
var->setFixedPointIterations(rng.uniform(1, 20));
var->setOmega(rng.uniform(1.01f, 1.99f));
cv::setNumThreads(nThreads);
Mat resMultiThread;
flow.copyTo(resMultiThread);
var->calc(frame1, frame2, resMultiThread);
cv::setNumThreads(1);
Mat resSingleThread;
flow.copyTo(resSingleThread);
var->calc(frame1, frame2, resSingleThread);
EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_INF), MAX_DIF);
EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_L1), MAX_MEAN_DIF * frame1.total());
// resulting flow should be within the frame bounds:
double min_val, max_val;
minMaxLoc(resMultiThread, &min_val, &max_val);
EXPECT_LE(abs(min_val), sqrt( static_cast<double>(size.height * size.height + size.width * size.width)) );
EXPECT_LE(abs(max_val), sqrt( static_cast<double>(size.height * size.height + size.width * size.width)) );
}
}
INSTANTIATE_TEST_CASE_P(FullSet, DenseOpticalFlow_VariationalRefinement, Values(szODD, szQVGA));
}} // namespace
......@@ -6,5 +6,10 @@
#include "opencv2/ts.hpp"
#include "opencv2/video.hpp"
#include <opencv2/ts/ts_perf.hpp>
namespace opencv_test {
using namespace perf;
}
#endif
#include "opencv2/core/utility.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/video.hpp"
using namespace std;
using namespace cv;
static void help()
{
printf("Usage: dis_optflow <video_file>\n");
}
int main(int argc, char **argv)
{
VideoCapture cap;
if (argc < 2)
{
help();
exit(1);
}
cap.open(argv[1]);
if(!cap.isOpened())
{
printf("ERROR: Cannot open file %s\n", argv[1]);
return -1;
}
Mat prevgray, gray, rgb, frame;
Mat flow, flow_uv[2];
Mat mag, ang;
Mat hsv_split[3], hsv;
char ret;
namedWindow("flow", 1);
namedWindow("orig", 1);
Ptr<DenseOpticalFlow> algorithm = DISOpticalFlow::create(DISOpticalFlow::PRESET_MEDIUM);
while(true)
{
cap >> frame;
if (frame.empty())
break;
cvtColor(frame, gray, COLOR_BGR2GRAY);
if (!prevgray.empty())
{
algorithm->calc(prevgray, gray, flow);
split(flow, flow_uv);
multiply(flow_uv[1], -1, flow_uv[1]);
cartToPolar(flow_uv[0], flow_uv[1], mag, ang, true);
normalize(mag, mag, 0, 1, NORM_MINMAX);
hsv_split[0] = ang;
hsv_split[1] = mag;
hsv_split[2] = Mat::ones(ang.size(), ang.type());
merge(hsv_split, 3, hsv);
cvtColor(hsv, rgb, COLOR_HSV2BGR);
imshow("flow", rgb);
imshow("orig", frame);
}
if ((ret = (char)waitKey(20)) > 0)
break;
std::swap(prevgray, gray);
}
return 0;
}
#include <iostream>
#include <fstream>
#include <opencv2/core/utility.hpp>
#include "opencv2/video.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/highgui.hpp"
using namespace cv;
using namespace std;
inline bool isFlowCorrect(Point2f u)
{
return !cvIsNaN(u.x) && !cvIsNaN(u.y) && fabs(u.x) < 1e9 && fabs(u.y) < 1e9;
}
static Vec3b computeColor(float fx, float fy)
{
static bool first = true;
// relative lengths of color transitions:
// these are chosen based on perceptual similarity
// (e.g. one can distinguish more shades between red and yellow
// than between yellow and green)
const int RY = 15;
const int YG = 6;
const int GC = 4;
const int CB = 11;
const int BM = 13;
const int MR = 6;
const int NCOLS = RY + YG + GC + CB + BM + MR;
static Vec3i colorWheel[NCOLS];
if (first)
{
int k = 0;
for (int i = 0; i < RY; ++i, ++k)
colorWheel[k] = Vec3i(255, 255 * i / RY, 0);
for (int i = 0; i < YG; ++i, ++k)
colorWheel[k] = Vec3i(255 - 255 * i / YG, 255, 0);
for (int i = 0; i < GC; ++i, ++k)
colorWheel[k] = Vec3i(0, 255, 255 * i / GC);
for (int i = 0; i < CB; ++i, ++k)
colorWheel[k] = Vec3i(0, 255 - 255 * i / CB, 255);
for (int i = 0; i < BM; ++i, ++k)
colorWheel[k] = Vec3i(255 * i / BM, 0, 255);
for (int i = 0; i < MR; ++i, ++k)
colorWheel[k] = Vec3i(255, 0, 255 - 255 * i / MR);
first = false;
}
const float rad = sqrt(fx * fx + fy * fy);
const float a = atan2(-fy, -fx) / (float)CV_PI;
const float fk = (a + 1.0f) / 2.0f * (NCOLS - 1);
const int k0 = static_cast<int>(fk);
const int k1 = (k0 + 1) % NCOLS;
const float f = fk - k0;
Vec3b pix;
for (int b = 0; b < 3; b++)
{
const float col0 = colorWheel[k0][b] / 255.f;
const float col1 = colorWheel[k1][b] / 255.f;
float col = (1 - f) * col0 + f * col1;
if (rad <= 1)
col = 1 - rad * (1 - col); // increase saturation with radius
else
col *= .75; // out of range
pix[2 - b] = static_cast<uchar>(255.f * col);
}
return pix;
}
static void drawOpticalFlow(const Mat_<Point2f>& flow, Mat& dst, float maxmotion = -1)
{
dst.create(flow.size(), CV_8UC3);
dst.setTo(Scalar::all(0));
// determine motion range:
float maxrad = maxmotion;
if (maxmotion <= 0)
{
maxrad = 1;
for (int y = 0; y < flow.rows; ++y)
{
for (int x = 0; x < flow.cols; ++x)
{
Point2f u = flow(y, x);
if (!isFlowCorrect(u))
continue;
maxrad = max(maxrad, sqrt(u.x * u.x + u.y * u.y));
}
}
}
for (int y = 0; y < flow.rows; ++y)
{
for (int x = 0; x < flow.cols; ++x)
{
Point2f u = flow(y, x);
if (isFlowCorrect(u))
dst.at<Vec3b>(y, x) = computeColor(u.x / maxrad, u.y / maxrad);
}
}
}
// binary file format for flow data specified here:
// http://vision.middlebury.edu/flow/data/
static void writeOpticalFlowToFile(const Mat_<Point2f>& flow, const string& fileName)
{
static const char FLO_TAG_STRING[] = "PIEH";
ofstream file(fileName.c_str(), ios_base::binary);
file << FLO_TAG_STRING;
file.write((const char*) &flow.cols, sizeof(int));
file.write((const char*) &flow.rows, sizeof(int));
for (int i = 0; i < flow.rows; ++i)
{
for (int j = 0; j < flow.cols; ++j)
{
const Point2f u = flow(i, j);
file.write((const char*) &u.x, sizeof(float));
file.write((const char*) &u.y, sizeof(float));
}
}
}
int main(int argc, const char* argv[])
{
cv::CommandLineParser parser(argc, argv, "{help h || show help message}"
"{ @frame0 | | frame 0}{ @frame1 | | frame 1}{ @output | | output flow}");
if (parser.has("help"))
{
parser.printMessage();
return 0;
}
string frame0_name = parser.get<string>("@frame0");
string frame1_name = parser.get<string>("@frame1");
string file = parser.get<string>("@output");
if (frame0_name.empty() || frame1_name.empty() || file.empty())
{
cerr << "Usage : " << argv[0] << " [<frame0>] [<frame1>] [<output_flow>]" << endl;
return -1;
}
Mat frame0 = imread(frame0_name, IMREAD_GRAYSCALE);
Mat frame1 = imread(frame1_name, IMREAD_GRAYSCALE);
if (frame0.empty())
{
cerr << "Can't open image [" << parser.get<string>("frame0") << "]" << endl;
return -1;
}
if (frame1.empty())
{
cerr << "Can't open image [" << parser.get<string>("frame1") << "]" << endl;
return -1;
}
if (frame1.size() != frame0.size())
{
cerr << "Images should be of equal sizes" << endl;
return -1;
}
Mat_<Point2f> flow;
Ptr<DualTVL1OpticalFlow> tvl1 = cv::DualTVL1OpticalFlow::create();
const double start = (double)getTickCount();
tvl1->calc(frame0, frame1, flow);
const double timeSec = (getTickCount() - start) / getTickFrequency();
cout << "calcOpticalFlowDual_TVL1 : " << timeSec << " sec" << endl;
Mat out;
drawOpticalFlow(flow, out);
if (!file.empty())
writeOpticalFlowToFile(flow, file);
imshow("Flow", out);
waitKey();
return 0;
}
#!/usr/bin/env python
'''
example to show optical flow estimation using DISOpticalFlow
USAGE: dis_opt_flow.py [<video_source>]
Keys:
1 - toggle HSV flow visualization
2 - toggle glitch
3 - toggle spatial propagation of flow vectors
4 - toggle temporal propagation of flow vectors
ESC - exit
'''
# Python 2/3 compatibility
from __future__ import print_function
import numpy as np
import cv2 as cv
import video
def draw_flow(img, flow, step=16):
h, w = img.shape[:2]
y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)
fx, fy = flow[y,x].T
lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
lines = np.int32(lines + 0.5)
vis = cv.cvtColor(img, cv.COLOR_GRAY2BGR)
cv.polylines(vis, lines, 0, (0, 255, 0))
for (x1, y1), (x2, y2) in lines:
cv.circle(vis, (x1, y1), 1, (0, 255, 0), -1)
return vis
def draw_hsv(flow):
h, w = flow.shape[:2]
fx, fy = flow[:,:,0], flow[:,:,1]
ang = np.arctan2(fy, fx) + np.pi
v = np.sqrt(fx*fx+fy*fy)
hsv = np.zeros((h, w, 3), np.uint8)
hsv[...,0] = ang*(180/np.pi/2)
hsv[...,1] = 255
hsv[...,2] = np.minimum(v*4, 255)
bgr = cv.cvtColor(hsv, cv.COLOR_HSV2BGR)
return bgr
def warp_flow(img, flow):
h, w = flow.shape[:2]
flow = -flow
flow[:,:,0] += np.arange(w)
flow[:,:,1] += np.arange(h)[:,np.newaxis]
res = cv.remap(img, flow, None, cv.INTER_LINEAR)
return res
if __name__ == '__main__':
import sys
print(__doc__)
try:
fn = sys.argv[1]
except IndexError:
fn = 0
cam = video.create_capture(fn)
ret, prev = cam.read()
prevgray = cv.cvtColor(prev, cv.COLOR_BGR2GRAY)
show_hsv = False
show_glitch = False
use_spatial_propagation = False
use_temporal_propagation = True
cur_glitch = prev.copy()
inst = cv.DISOpticalFlow.create(cv.DISOPTICAL_FLOW_PRESET_MEDIUM)
inst.setUseSpatialPropagation(use_spatial_propagation)
flow = None
while True:
ret, img = cam.read()
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
if flow is not None and use_temporal_propagation:
#warp previous flow to get an initial approximation for the current flow:
flow = inst.calc(prevgray, gray, warp_flow(flow,flow))
else:
flow = inst.calc(prevgray, gray, None)
prevgray = gray
cv.imshow('flow', draw_flow(gray, flow))
if show_hsv:
cv.imshow('flow HSV', draw_hsv(flow))
if show_glitch:
cur_glitch = warp_flow(cur_glitch, flow)
cv.imshow('glitch', cur_glitch)
ch = 0xFF & cv.waitKey(5)
if ch == 27:
break
if ch == ord('1'):
show_hsv = not show_hsv
print('HSV flow visualization is', ['off', 'on'][show_hsv])
if ch == ord('2'):
show_glitch = not show_glitch
if show_glitch:
cur_glitch = img.copy()
print('glitch is', ['off', 'on'][show_glitch])
if ch == ord('3'):
use_spatial_propagation = not use_spatial_propagation
inst.setUseSpatialPropagation(use_spatial_propagation)
print('spatial propagation is', ['off', 'on'][use_spatial_propagation])
if ch == ord('4'):
use_temporal_propagation = not use_temporal_propagation
print('temporal propagation is', ['off', 'on'][use_temporal_propagation])
cv.destroyAllWindows()
......@@ -50,7 +50,7 @@ int main(int argc, const char* argv[])
const char* keys =
"{ h help | | print help message }"
"{ c camera | 0 | capture video from camera (device index starting from 0) }"
"{ a algorithm | fb | algorithm (supported: 'fb', 'tvl')}"
"{ a algorithm | fb | algorithm (supported: 'fb', 'dis')}"
"{ m cpu | | run without OpenCL }"
"{ v video | | use video as input }"
"{ o original | | use original frame size (do not resize to 640x480)}"
......@@ -84,11 +84,11 @@ int main(int argc, const char* argv[])
return 2;
}
cv::Ptr<cv::DenseOpticalFlow> alg;
Ptr<DenseOpticalFlow> alg;
if (algorithm == "fb")
alg = cv::FarnebackOpticalFlow::create();
else if (algorithm == "tvl")
alg = cv::DualTVL1OpticalFlow::create();
alg = FarnebackOpticalFlow::create();
else if (algorithm == "dis")
alg = DISOpticalFlow::create(DISOpticalFlow::PRESET_FAST);
else
{
cout << "Invalid algorithm: " << algorithm << endl;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment