Merge pull request #2476 from tsenst:optimize_performance_rlof

Rework RLOF by using HAL universal instructions * * distinguish between SSE2 and SSE 4.1 support * SSE2 now implements no paralellized _blendv functiona and allows to compile with SSE2 instructions * * add interface function do enable disable M-estimator framework * bugfix blendv functions * * make use of _mm_store and _mm_load functions in blendv_ps and blendv_epi function to fix compiler error * * implement substitute of _mm_cvtepi8_epi16 and _mm_cvtepi16_epi32 when compiling with less than SSE 4.1 support * * implement substitute of _mm_abs_epi16 when compiling with less than SSE 3 support * * move _mm_abs_epi16 to SSE4 req. * * add HAL intrinsic functions for RLOF ICA * first tests are OK * * HAL intrinsic functions -> RLOF ICA is ready now * * add HAL functions for RLOF illumination model * * add HAL for PLK ILL * * add HAL functions for plk ica * * commit HAL functions for beplk ica * * add HAL functions for berlof ica * * add HAL functions for BEPLK ILL * *remove unused #ifdef * * remove white spaces and unused variables * * remove not used variables * + remove get4BitMask function which contained pure SEE instructions * * remove buffIdx which has not been used but initialized * * fix replacement of #ifdef CV_SIMD128 with #if CV_SIMD128 * rename useMEstimator to setUseMEstimator * *remove whitespace * * remove unused variable abss * * remove unused value * * rename W_BITS1 to W_BITS since they contain same value

Merge pull request #2476 from tsenst:optimize_performance_rlof
Rework RLOF by using HAL universal instructions * * distinguish between SSE2 and SSE 4.1 support * SSE2 now implements no paralellized _blendv functiona and allows to compile with SSE2 instructions * * add interface function do enable disable M-estimator framework * bugfix blendv functions * * make use of _mm_store and _mm_load functions in blendv_ps and blendv_epi function to fix compiler error * * implement substitute of _mm_cvtepi8_epi16 and _mm_cvtepi16_epi32 when compiling with less than SSE 4.1 support * * implement substitute of _mm_abs_epi16 when compiling with less than SSE 3 support * * move _mm_abs_epi16 to SSE4 req. * * add HAL intrinsic functions for RLOF ICA * first tests are OK * * HAL intrinsic functions -> RLOF ICA is ready now * * add HAL functions for RLOF illumination model * * add HAL for PLK ILL * * add HAL functions for plk ica * * commit HAL functions for beplk ica * * add HAL functions for berlof ica * * add HAL functions for BEPLK ILL * *remove unused #ifdef * * remove white spaces and unused variables * * remove not used variables * + remove get4BitMask function which contained pure SEE instructions * * remove buffIdx which has not been used but initialized * * fix replacement of #ifdef CV_SIMD128 with #if CV_SIMD128 * rename useMEstimator to setUseMEstimator * *remove whitespace * * remove unused variable abss * * remove unused value * * rename W_BITS1 to W_BITS since they contain same value
4b632022 · Tobias Senst · GitHub · 8ab145c4 · 4b632022 · 4b632022
Unverified Commit 4b632022 authored Mar 31, 2020 by Tobias Senst Committed by GitHub Mar 31, 2020
9 changed files
--- a/modules/optflow/include/opencv2/optflow/rlofflow.hpp
+++ b/modules/optflow/include/opencv2/optflow/rlofflow.hpp
@@ -66,12 +66,12 @@ public:
    RLOFOpticalFlowParameter()
        :solverType(ST_BILINEAR)
        ,supportRegionType(SR_CROSS)
-        ,normSigma0(3.2f)
+        ,normSigma0(std::numeric_limits<float>::max())
-        ,normSigma1(7.f)
+        ,normSigma1(std::numeric_limits<float>::max())
        ,smallWinSize(9)
        ,largeWinSize(21)
        ,crossSegmentationThreshold(25)
-        ,maxLevel(5)
+        ,maxLevel(4)
        ,useInitialFlow(false)
        ,useIlluminationModel(true)
        ,useGlobalMotionPrior(true)
@@ -90,13 +90,13 @@ public:
    */
    float normSigma0;
-    /**< &sigma paramter of the shrinked Hampel norm introduced in @cite Senst2012. If
+    /**< &sigma parameter of the shrinked Hampel norm introduced in @cite Senst2012. If
     * &sigma = std::numeric_limist<float>::max() the least-square estimator will be used
     * instead of the M-estimator. Althoug M-estimator is more robust against outlier in the support
     * region the least-square can be fast in computation.
    */
    float normSigma1;
-    /**< &sigma paramter of the shrinked Hampel norm introduced in @cite Senst2012. If
+    /**< &sigma parameter of the shrinked Hampel norm introduced in @cite Senst2012. If
     * &sigma = std::numeric_limist<float>::max() the least-square estimator will be used
     * instead of the M-estimator. Althoug M-estimator is more robust against outlier in the support
     * region the least-square can be fast in computation.
@@ -151,6 +151,14 @@ public:
     *   See @cite Senst2016 for more details.
    */
+    //! @brief Enable M-estimator or disable and use least-square estimator.
+    /** Enables M-estimator by setting sigma parameters to (3.2, 7.0). Disabling M-estimator can reduce
+     *  runtime, while enabling can improve the accuracy.
+     *  @param val If true M-estimator is used. If false least-square estimator is used.
+     *    @see setNormSigma0, setNormSigma1
+    */
+    CV_WRAP void setUseMEstimator(bool val);
    CV_WRAP void setSolverType(SolverType val);
    CV_WRAP SolverType getSolverType() const;
@@ -216,9 +224,8 @@ public:
 * For the RLOF configuration see optflow::RLOFOpticalFlowParameter for further details.
 * Parameters have been described in @cite Senst2012 @cite Senst2013 @cite Senst2014 and @cite Senst2016.
 *
- * @note SIMD parallelization is only available when compiling with SSE4.1. If the grid size is set to (1,1) and the
+ * @note If the grid size is set to (1,1) and the forward backward threshold <= 0 than pixelwise dense optical flow field is
- * forward backward threshold <= 0 that the dense optical flow field is purely.
+ * computed by RLOF without using interpolation.
- * computed with the RLOF.
 *
 * @see optflow::calcOpticalFlowDenseRLOF(), optflow::RLOFOpticalFlowParameter
 */

--- a/modules/optflow/perf/perf_rlof.cpp
+++ b/modules/optflow/perf/perf_rlof.cpp
@@ -47,7 +47,7 @@ typedef tuple<std::string, int> INTERP_GRID_Dense_t;
 typedef TestBaseWithParam<INTERP_GRID_Dense_t> INTERP_GRID_Dense;
 PERF_TEST_P(INTERP_GRID_Dense, OpticalFlow_DenseRLOF,
    testing::Combine(
-        testing::Values<std::string>("INTERP_EPIC", "INTERP_GEO"),
+        testing::Values<std::string>("INTERP_EPIC", "INTERP_GEO", "INTERP_RIC"),
        testing::Values<int>(4,10))
 )
 {
@@ -63,6 +63,8 @@ PERF_TEST_P(INTERP_GRID_Dense, OpticalFlow_DenseRLOF,
        interp_type = INTERP_EPIC;
    if (get<0>(GetParam()) == "INTERP_GEO")
        interp_type = INTERP_GEO;
+    if (get<0>(GetParam()) == "INTERP_RIC")
+        interp_type = INTERP_RIC;
    PERF_SAMPLE_BEGIN()
        calcOpticalFlowDenseRLOF(frame1, frame2,flow, param, 1.0f, Size(get<1>(GetParam()), get<1>(GetParam())), interp_type);
    PERF_SAMPLE_END()

--- a/modules/optflow/src/rlof/berlof_invoker.hpp
+++ b/modules/optflow/src/rlof/berlof_invoker.hpp
--- a/modules/optflow/src/rlof/plk_invoker.hpp
+++ b/modules/optflow/src/rlof/plk_invoker.hpp
--- a/modules/optflow/src/rlof/rlof_invoker.hpp
+++ b/modules/optflow/src/rlof/rlof_invoker.hpp
--- a/modules/optflow/src/rlof/rlof_invokerbase.hpp
+++ b/modules/optflow/src/rlof/rlof_invokerbase.hpp
--- a/modules/optflow/src/rlof/rlof_localflow.cpp
+++ b/modules/optflow/src/rlof/rlof_localflow.cpp
@@ -470,16 +470,32 @@ void calcLocalOpticalFlowCore(
        {
            if (param.useIlluminationModel)
            {
-                cv::parallel_for_(cv::Range(0, npoints),
+                if (param.solverType == SolverType::ST_STANDART)
-                    plk::radial::TrackerInvoker(
+                {
-                        prevImage, derivI, currImage, tRGBPrevPyr, tRGBNextPyr,
+                    cv::parallel_for_(cv::Range(0, npoints),
-                        prevPts, nextPts, &status[0], &err[0], &gainPts[0],
+                        plk::radial::TrackerInvoker(
-                        level, maxLevel, winSizes,
+                            prevImage, derivI, currImage, tRGBPrevPyr, tRGBNextPyr,
-                        param.maxIteration,
+                            prevPts, nextPts, &status[0], &err[0], &gainPts[0],
-                        param.useInitialFlow,
+                            level, maxLevel, winSizes,
-                        param.supportRegionType,
+                            param.maxIteration,
-                        param.minEigenValue,
+                            param.useInitialFlow,
-                        param.crossSegmentationThreshold));
+                            param.supportRegionType,
+                            param.minEigenValue,
+                            param.crossSegmentationThreshold));
+                }
+                else
+                {
+                    cv::parallel_for_(cv::Range(0, npoints),
+                        beplk::radial::TrackerInvoker(
+                            prevImage, derivI, currImage, tRGBPrevPyr, tRGBNextPyr,
+                            prevPts, nextPts, &status[0], &err[0], &gainPts[0],
+                            level, maxLevel, winSizes,
+                            param.maxIteration,
+                            param.useInitialFlow,
+                            param.supportRegionType,
+                            param.crossSegmentationThreshold,
+                            param.minEigenValue));
+                }
            }
            else
            {

--- a/modules/optflow/src/rlofflow.cpp
+++ b/modules/optflow/src/rlofflow.cpp
@@ -6,6 +6,7 @@
 #include "rlof/geo_interpolation.hpp"
 #include "opencv2/ximgproc.hpp"
 namespace cv {
 namespace optflow {
@@ -14,6 +15,19 @@ Ptr<RLOFOpticalFlowParameter> RLOFOpticalFlowParameter::create()
    return Ptr<RLOFOpticalFlowParameter>(new RLOFOpticalFlowParameter);
 }
+void RLOFOpticalFlowParameter::setUseMEstimator(bool val)
+{
+    if (val)
+    {
+        normSigma0 = 3.2f;
+        normSigma1 = 7.f;
+    }
+    else
+    {
+        normSigma0 = std::numeric_limits<float>::max();
+        normSigma1 = std::numeric_limits<float>::max();
+    }
+}
 void RLOFOpticalFlowParameter::setSolverType(SolverType val){ solverType = val;}
 SolverType RLOFOpticalFlowParameter::getSolverType() const { return solverType;}
@@ -198,7 +212,7 @@ public:
            gd->setLambda(lambda);
            gd->setFGSLambda(fgs_lambda);
            gd->setFGSSigma(fgs_sigma);
-            gd->setUsePostProcessing(false);
+            gd->setUsePostProcessing(use_post_proc);
            gd->interpolate(prevImage, filtered_prevPoints, currImage, filtered_currPoints, dense_flow);
        }
        else if (interp_type == InterpolationType::INTERP_RIC)
@@ -209,7 +223,7 @@ public:
            gd->setFGSSigma(fgs_sigma);
            gd->setSuperpixelSize(sp_size);
            gd->setSuperpixelMode(slic_type);
-            gd->setUseGlobalSmootherFilter(false);
+            gd->setUseGlobalSmootherFilter(use_post_proc);
            gd->setUseVariationalRefinement(false);
            gd->interpolate(prevImage, filtered_prevPoints, currImage, filtered_currPoints, dense_flow);
        }
@@ -225,6 +239,10 @@ public:
            cv::bilateralFilter(vecMats[0], vecMats2[0], 5, 2, 20);
            cv::bilateralFilter(vecMats[1], vecMats2[1], 5, 2, 20);
            cv::merge(vecMats2, dense_flow);
+            if (use_post_proc)
+            {
+                ximgproc::fastGlobalSmootherFilter(prevImage, flow, flow, fgs_lambda, fgs_sigma);
+            }
        }
        if (use_variational_refinement)
        {
@@ -235,10 +253,6 @@ public:
            variationalrefine->setOmega(1.9f);
            variationalrefine->calc(prevGrey, currGrey, flow);
        }
-        if (use_post_proc)
-        {
-            ximgproc::fastGlobalSmootherFilter(prevImage, flow, flow, fgs_lambda, fgs_sigma);
-        }
    }
    virtual void collectGarbage() CV_OVERRIDE

--- a/modules/optflow/test/test_OF_accuracy.cpp
+++ b/modules/optflow/test/test_OF_accuracy.cpp
@@ -196,6 +196,7 @@ TEST(SparseOpticalFlow, ReferenceAccuracy)
    param->supportRegionType = SR_CROSS;
    param->useIlluminationModel = true;
    param->solverType = ST_BILINEAR;
+    param->setUseMEstimator(true);
    algo->setRLOFOpticalFlowParameter(param);
    algo->calc(frame1, frame2, prevPts, currPts, status, err);
    EXPECT_LE(calcRMSE(prevPts, currPts, GT), 0.3f);
@@ -216,8 +217,7 @@ TEST(SparseOpticalFlow, ReferenceAccuracy)
    algo->calc(frame1, frame2, prevPts, currPts, status, err);
    EXPECT_LE(calcRMSE(prevPts, currPts, GT), 0.27f);
-    param->normSigma0 = numeric_limits<float>::max();
+    param->setUseMEstimator(false);
-    param->normSigma1 = numeric_limits<float>::max();
    param->useIlluminationModel = true;
    param->solverType = ST_BILINEAR;
@@ -250,6 +250,7 @@ TEST(DenseOpticalFlow_RLOF, ReferenceAccuracy)
    Mat flow;
    Ptr<DenseRLOFOpticalFlow> algo = DenseRLOFOpticalFlow::create();
    Ptr<RLOFOpticalFlowParameter> param = Ptr<RLOFOpticalFlowParameter>(new RLOFOpticalFlowParameter);
+    param->setUseMEstimator(true);
    param->supportRegionType = SR_CROSS;
    param->solverType = ST_BILINEAR;
    algo->setRLOFOpticalFlowParameter(param);