Unverified Commit 4b632022 authored by Tobias Senst's avatar Tobias Senst Committed by GitHub

Merge pull request #2476 from tsenst:optimize_performance_rlof

Rework RLOF by using HAL universal instructions

* * distinguish between SSE2 and SSE 4.1 support
* SSE2 now implements no paralellized _blendv functiona and allows to compile with SSE2 instructions

* * add interface function do enable disable M-estimator framework
* bugfix blendv functions

* * make use of _mm_store and _mm_load functions in blendv_ps and blendv_epi function to fix compiler error

* * implement substitute of _mm_cvtepi8_epi16 and _mm_cvtepi16_epi32 when compiling with less than SSE 4.1 support

* * implement substitute of _mm_abs_epi16 when compiling with less than SSE 3 support

* * move _mm_abs_epi16 to SSE4 req.

* * add HAL intrinsic functions for RLOF ICA
* first tests are OK

* * HAL intrinsic functions -> RLOF ICA is ready now

* * add HAL functions for RLOF illumination model

* * add HAL for PLK ILL

* * add HAL functions for plk ica

* * commit HAL functions for beplk ica

* * add HAL functions for berlof ica

* * add HAL functions for BEPLK ILL

* *remove unused #ifdef

* * remove white spaces and unused variables

* * remove not used variables

* + remove get4BitMask function which contained pure SEE instructions

* * remove buffIdx which has not been used but initialized

* * fix replacement of #ifdef CV_SIMD128 with #if CV_SIMD128
* rename useMEstimator to setUseMEstimator

* *remove whitespace

* * remove unused variable abss

* * remove unused value

* * rename W_BITS1 to W_BITS since they contain same value
parent 8ab145c4
......@@ -66,12 +66,12 @@ public:
RLOFOpticalFlowParameter()
:solverType(ST_BILINEAR)
,supportRegionType(SR_CROSS)
,normSigma0(3.2f)
,normSigma1(7.f)
,normSigma0(std::numeric_limits<float>::max())
,normSigma1(std::numeric_limits<float>::max())
,smallWinSize(9)
,largeWinSize(21)
,crossSegmentationThreshold(25)
,maxLevel(5)
,maxLevel(4)
,useInitialFlow(false)
,useIlluminationModel(true)
,useGlobalMotionPrior(true)
......@@ -90,13 +90,13 @@ public:
*/
float normSigma0;
/**< &sigma paramter of the shrinked Hampel norm introduced in @cite Senst2012. If
/**< &sigma parameter of the shrinked Hampel norm introduced in @cite Senst2012. If
* &sigma = std::numeric_limist<float>::max() the least-square estimator will be used
* instead of the M-estimator. Althoug M-estimator is more robust against outlier in the support
* region the least-square can be fast in computation.
*/
float normSigma1;
/**< &sigma paramter of the shrinked Hampel norm introduced in @cite Senst2012. If
/**< &sigma parameter of the shrinked Hampel norm introduced in @cite Senst2012. If
* &sigma = std::numeric_limist<float>::max() the least-square estimator will be used
* instead of the M-estimator. Althoug M-estimator is more robust against outlier in the support
* region the least-square can be fast in computation.
......@@ -151,6 +151,14 @@ public:
* See @cite Senst2016 for more details.
*/
//! @brief Enable M-estimator or disable and use least-square estimator.
/** Enables M-estimator by setting sigma parameters to (3.2, 7.0). Disabling M-estimator can reduce
* runtime, while enabling can improve the accuracy.
* @param val If true M-estimator is used. If false least-square estimator is used.
* @see setNormSigma0, setNormSigma1
*/
CV_WRAP void setUseMEstimator(bool val);
CV_WRAP void setSolverType(SolverType val);
CV_WRAP SolverType getSolverType() const;
......@@ -216,9 +224,8 @@ public:
* For the RLOF configuration see optflow::RLOFOpticalFlowParameter for further details.
* Parameters have been described in @cite Senst2012 @cite Senst2013 @cite Senst2014 and @cite Senst2016.
*
* @note SIMD parallelization is only available when compiling with SSE4.1. If the grid size is set to (1,1) and the
* forward backward threshold <= 0 that the dense optical flow field is purely.
* computed with the RLOF.
* @note If the grid size is set to (1,1) and the forward backward threshold <= 0 than pixelwise dense optical flow field is
* computed by RLOF without using interpolation.
*
* @see optflow::calcOpticalFlowDenseRLOF(), optflow::RLOFOpticalFlowParameter
*/
......
......@@ -47,7 +47,7 @@ typedef tuple<std::string, int> INTERP_GRID_Dense_t;
typedef TestBaseWithParam<INTERP_GRID_Dense_t> INTERP_GRID_Dense;
PERF_TEST_P(INTERP_GRID_Dense, OpticalFlow_DenseRLOF,
testing::Combine(
testing::Values<std::string>("INTERP_EPIC", "INTERP_GEO"),
testing::Values<std::string>("INTERP_EPIC", "INTERP_GEO", "INTERP_RIC"),
testing::Values<int>(4,10))
)
{
......@@ -63,6 +63,8 @@ PERF_TEST_P(INTERP_GRID_Dense, OpticalFlow_DenseRLOF,
interp_type = INTERP_EPIC;
if (get<0>(GetParam()) == "INTERP_GEO")
interp_type = INTERP_GEO;
if (get<0>(GetParam()) == "INTERP_RIC")
interp_type = INTERP_RIC;
PERF_SAMPLE_BEGIN()
calcOpticalFlowDenseRLOF(frame1, frame2,flow, param, 1.0f, Size(get<1>(GetParam()), get<1>(GetParam())), interp_type);
PERF_SAMPLE_END()
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This diff is collapsed.
......@@ -470,16 +470,32 @@ void calcLocalOpticalFlowCore(
{
if (param.useIlluminationModel)
{
cv::parallel_for_(cv::Range(0, npoints),
plk::radial::TrackerInvoker(
prevImage, derivI, currImage, tRGBPrevPyr, tRGBNextPyr,
prevPts, nextPts, &status[0], &err[0], &gainPts[0],
level, maxLevel, winSizes,
param.maxIteration,
param.useInitialFlow,
param.supportRegionType,
param.minEigenValue,
param.crossSegmentationThreshold));
if (param.solverType == SolverType::ST_STANDART)
{
cv::parallel_for_(cv::Range(0, npoints),
plk::radial::TrackerInvoker(
prevImage, derivI, currImage, tRGBPrevPyr, tRGBNextPyr,
prevPts, nextPts, &status[0], &err[0], &gainPts[0],
level, maxLevel, winSizes,
param.maxIteration,
param.useInitialFlow,
param.supportRegionType,
param.minEigenValue,
param.crossSegmentationThreshold));
}
else
{
cv::parallel_for_(cv::Range(0, npoints),
beplk::radial::TrackerInvoker(
prevImage, derivI, currImage, tRGBPrevPyr, tRGBNextPyr,
prevPts, nextPts, &status[0], &err[0], &gainPts[0],
level, maxLevel, winSizes,
param.maxIteration,
param.useInitialFlow,
param.supportRegionType,
param.crossSegmentationThreshold,
param.minEigenValue));
}
}
else
{
......
......@@ -6,6 +6,7 @@
#include "rlof/geo_interpolation.hpp"
#include "opencv2/ximgproc.hpp"
namespace cv {
namespace optflow {
......@@ -14,6 +15,19 @@ Ptr<RLOFOpticalFlowParameter> RLOFOpticalFlowParameter::create()
return Ptr<RLOFOpticalFlowParameter>(new RLOFOpticalFlowParameter);
}
void RLOFOpticalFlowParameter::setUseMEstimator(bool val)
{
if (val)
{
normSigma0 = 3.2f;
normSigma1 = 7.f;
}
else
{
normSigma0 = std::numeric_limits<float>::max();
normSigma1 = std::numeric_limits<float>::max();
}
}
void RLOFOpticalFlowParameter::setSolverType(SolverType val){ solverType = val;}
SolverType RLOFOpticalFlowParameter::getSolverType() const { return solverType;}
......@@ -198,7 +212,7 @@ public:
gd->setLambda(lambda);
gd->setFGSLambda(fgs_lambda);
gd->setFGSSigma(fgs_sigma);
gd->setUsePostProcessing(false);
gd->setUsePostProcessing(use_post_proc);
gd->interpolate(prevImage, filtered_prevPoints, currImage, filtered_currPoints, dense_flow);
}
else if (interp_type == InterpolationType::INTERP_RIC)
......@@ -209,7 +223,7 @@ public:
gd->setFGSSigma(fgs_sigma);
gd->setSuperpixelSize(sp_size);
gd->setSuperpixelMode(slic_type);
gd->setUseGlobalSmootherFilter(false);
gd->setUseGlobalSmootherFilter(use_post_proc);
gd->setUseVariationalRefinement(false);
gd->interpolate(prevImage, filtered_prevPoints, currImage, filtered_currPoints, dense_flow);
}
......@@ -225,6 +239,10 @@ public:
cv::bilateralFilter(vecMats[0], vecMats2[0], 5, 2, 20);
cv::bilateralFilter(vecMats[1], vecMats2[1], 5, 2, 20);
cv::merge(vecMats2, dense_flow);
if (use_post_proc)
{
ximgproc::fastGlobalSmootherFilter(prevImage, flow, flow, fgs_lambda, fgs_sigma);
}
}
if (use_variational_refinement)
{
......@@ -235,10 +253,6 @@ public:
variationalrefine->setOmega(1.9f);
variationalrefine->calc(prevGrey, currGrey, flow);
}
if (use_post_proc)
{
ximgproc::fastGlobalSmootherFilter(prevImage, flow, flow, fgs_lambda, fgs_sigma);
}
}
virtual void collectGarbage() CV_OVERRIDE
......
......@@ -196,6 +196,7 @@ TEST(SparseOpticalFlow, ReferenceAccuracy)
param->supportRegionType = SR_CROSS;
param->useIlluminationModel = true;
param->solverType = ST_BILINEAR;
param->setUseMEstimator(true);
algo->setRLOFOpticalFlowParameter(param);
algo->calc(frame1, frame2, prevPts, currPts, status, err);
EXPECT_LE(calcRMSE(prevPts, currPts, GT), 0.3f);
......@@ -216,8 +217,7 @@ TEST(SparseOpticalFlow, ReferenceAccuracy)
algo->calc(frame1, frame2, prevPts, currPts, status, err);
EXPECT_LE(calcRMSE(prevPts, currPts, GT), 0.27f);
param->normSigma0 = numeric_limits<float>::max();
param->normSigma1 = numeric_limits<float>::max();
param->setUseMEstimator(false);
param->useIlluminationModel = true;
param->solverType = ST_BILINEAR;
......@@ -250,6 +250,7 @@ TEST(DenseOpticalFlow_RLOF, ReferenceAccuracy)
Mat flow;
Ptr<DenseRLOFOpticalFlow> algo = DenseRLOFOpticalFlow::create();
Ptr<RLOFOpticalFlowParameter> param = Ptr<RLOFOpticalFlowParameter>(new RLOFOpticalFlowParameter);
param->setUseMEstimator(true);
param->supportRegionType = SR_CROSS;
param->solverType = ST_BILINEAR;
algo->setRLOFOpticalFlowParameter(param);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment