Merge pull request #710 from VladX:optflow

dd9b2eb4 · Vadim Pisarevsky · 9a342b51 · 1764f924 · dd9b2eb4 · dd9b2eb4
Commit dd9b2eb4 authored Aug 04, 2016 by Vadim Pisarevsky
8 changed files
--- a/modules/optflow/include/opencv2/optflow.hpp
+++ b/modules/optflow/include/opencv2/optflow.hpp
@@ -43,6 +43,9 @@ the use of this software, even if advised of the possibility of such damage.
 #include "opencv2/core.hpp"
 #include "opencv2/video.hpp"

+#include "opencv2/optflow/pcaflow.hpp"
+#include "opencv2/optflow/sparse_matching_gpc.hpp"
+
 /**
 @defgroup optflow Optical Flow Algorithms


--- a/modules/optflow/include/opencv2/optflow/pcaflow.hpp
+++ b/modules/optflow/include/opencv2/optflow/pcaflow.hpp
+/*
+By downloading, copying, installing or using the software you agree to this
+license. If you do not agree to this license, do not download, install,
+copy or use the software.
+
+
+                          License Agreement
+               For Open Source Computer Vision Library
+                       (3-clause BSD License)
+
+Copyright (C) 2016, OpenCV Foundation, all rights reserved.
+Third party copyrights are property of their respective owners.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+  * Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+  * Neither the names of the copyright holders nor the names of the contributors
+    may be used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as is" and
+any express or implied warranties, including, but not limited to, the implied
+warranties of merchantability and fitness for a particular purpose are
+disclaimed. In no event shall copyright holders or contributors be liable for
+any direct, indirect, incidental, special, exemplary, or consequential damages
+(including, but not limited to, procurement of substitute goods or services;
+loss of use, data, or profits; or business interruption) however caused
+and on any theory of liability, whether in contract, strict liability,
+or tort (including negligence or otherwise) arising in any way out of
+the use of this software, even if advised of the possibility of such damage.
+*/
+
+/*
+Implementation of the PCAFlow algorithm from the following paper:
+http://files.is.tue.mpg.de/black/papers/cvpr2015_pcaflow.pdf
+
+@inproceedings{Wulff:CVPR:2015,
+  title = {Efficient Sparse-to-Dense Optical Flow Estimation using a Learned Basis and Layers},
+  author = {Wulff, Jonas and Black, Michael J.},
+  booktitle = { IEEE Conf. on Computer Vision and Pattern Recognition (CVPR) 2015},
+  month = jun,
+  year = {2015}
+}
+
+There are some key differences which distinguish this algorithm from the original PCAFlow (see paper):
+  - Discrete Cosine Transform basis is used instead of basis extracted with PCA.
+    Reasoning: DCT basis has comparable performance and it doesn't require additional storage space.
+    Also, this decision helps to avoid overloading the algorithm with a lot of external input.
+  - Usage of built-in OpenCV feature tracking instead of libviso.
+*/
+
+#ifndef __OPENCV_OPTFLOW_PCAFLOW_HPP__
+#define __OPENCV_OPTFLOW_PCAFLOW_HPP__
+
+#include "opencv2/core.hpp"
+#include "opencv2/video.hpp"
+
+namespace cv
+{
+namespace optflow
+{
+
+/*
+ * This class can be used for imposing a learned prior on the resulting optical flow.
+ * Solution will be regularized according to this prior.
+ * You need to generate appropriate prior file with "learn_prior.py" script beforehand.
+ */
+class CV_EXPORTS_W PCAPrior
+{
+private:
+  Mat L1;
+  Mat L2;
+  Mat c1;
+  Mat c2;
+
+public:
+  PCAPrior( const char *pathToPrior );
+
+  int getPadding() const { return L1.size().height; }
+
+  int getBasisSize() const { return L1.size().width; }
+
+  void fillConstraints( float *A1, float *A2, float *b1, float *b2 ) const;
+};
+
+class CV_EXPORTS_W OpticalFlowPCAFlow : public DenseOpticalFlow
+{
+protected:
+  const Ptr<const PCAPrior> prior;
+  const Size basisSize;
+  const float sparseRate;              // (0 .. 0.1)
+  const float retainedCornersFraction; // [0 .. 1]
+  const float occlusionsThreshold;
+  const float dampingFactor;
+  const float claheClip;
+  bool useOpenCL;
+
+public:
+  OpticalFlowPCAFlow( Ptr<const PCAPrior> _prior = Ptr<const PCAPrior>(), const Size _basisSize = Size( 18, 14 ),
+                      float _sparseRate = 0.024, float _retainedCornersFraction = 0.2,
+                      float _occlusionsThreshold = 0.0003, float _dampingFactor = 0.00002, float _claheClip = 14 );
+
+  void calc( InputArray I0, InputArray I1, InputOutputArray flow );
+  void collectGarbage();
+
+private:
+  void findSparseFeatures( UMat &from, UMat &to, std::vector<Point2f> &features,
+                           std::vector<Point2f> &predictedFeatures ) const;
+
+  void removeOcclusions( UMat &from, UMat &to, std::vector<Point2f> &features,
+                         std::vector<Point2f> &predictedFeatures ) const;
+
+  void getSystem( OutputArray AOut, OutputArray b1Out, OutputArray b2Out, const std::vector<Point2f> &features,
+                  const std::vector<Point2f> &predictedFeatures, const Size size );
+
+  void getSystem( OutputArray A1Out, OutputArray A2Out, OutputArray b1Out, OutputArray b2Out,
+                  const std::vector<Point2f> &features, const std::vector<Point2f> &predictedFeatures,
+                  const Size size );
+
+  OpticalFlowPCAFlow& operator=( const OpticalFlowPCAFlow& ); // make it non-assignable
+};
+
+CV_EXPORTS_W Ptr<DenseOpticalFlow> createOptFlow_PCAFlow();
+}
+}
+
+#endif
--- a/modules/optflow/include/opencv2/optflow/sparse_matching_gpc.hpp
+++ b/modules/optflow/include/opencv2/optflow/sparse_matching_gpc.hpp
+/*
+By downloading, copying, installing or using the software you agree to this
+license. If you do not agree to this license, do not download, install,
+copy or use the software.
+
+
+                          License Agreement
+               For Open Source Computer Vision Library
+                       (3-clause BSD License)
+
+Copyright (C) 2016, OpenCV Foundation, all rights reserved.
+Third party copyrights are property of their respective owners.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+  * Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+  * Neither the names of the copyright holders nor the names of the contributors
+    may be used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as is" and
+any express or implied warranties, including, but not limited to, the implied
+warranties of merchantability and fitness for a particular purpose are
+disclaimed. In no event shall copyright holders or contributors be liable for
+any direct, indirect, incidental, special, exemplary, or consequential damages
+(including, but not limited to, procurement of substitute goods or services;
+loss of use, data, or profits; or business interruption) however caused
+and on any theory of liability, whether in contract, strict liability,
+or tort (including negligence or otherwise) arising in any way out of
+the use of this software, even if advised of the possibility of such damage.
+*/
+
+/*
+Implementation of the Global Patch Collider algorithm from the following paper:
+http://research.microsoft.com/en-us/um/people/pkohli/papers/wfrik_cvpr2016.pdf
+
+@InProceedings{Wang_2016_CVPR,
+ author = {Wang, Shenlong and Ryan Fanello, Sean and Rhemann, Christoph and Izadi, Shahram and Kohli, Pushmeet},
+ title = {The Global Patch Collider},
+ booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2016}
+}
+*/
+
+#ifndef __OPENCV_OPTFLOW_SPARSE_MATCHING_GPC_HPP__
+#define __OPENCV_OPTFLOW_SPARSE_MATCHING_GPC_HPP__
+
+#include "opencv2/core.hpp"
+
+namespace cv
+{
+namespace optflow
+{
+
+struct CV_EXPORTS_W GPCPatchDescriptor
+{
+  static const unsigned nFeatures = 18; // number of features in a patch descriptor
+  Vec< double, nFeatures > feature;
+
+  GPCPatchDescriptor( const Mat *imgCh, int i, int j );
+};
+
+typedef std::pair< GPCPatchDescriptor, GPCPatchDescriptor > GPCPatchSample;
+typedef std::vector< GPCPatchSample > GPCSamplesVector;
+
+/** @brief Class encapsulating training samples.
+ */
+class CV_EXPORTS_W GPCTrainingSamples
+{
+private:
+  GPCSamplesVector samples;
+
+public:
+  /** @brief This function can be used to extract samples from a pair of images and a ground truth flow.
+   * Sizes of all the provided vectors must be equal.
+   */
+  static Ptr< GPCTrainingSamples > create( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo,
+                                           const std::vector< String > &gt );
+
+  size_t size() const { return samples.size(); }
+
+  operator GPCSamplesVector() const { return samples; }
+
+  operator GPCSamplesVector &() { return samples; }
+};
+
+class CV_EXPORTS_W GPCTree : public Algorithm
+{
+public:
+  struct Node
+  {
+    Vec< double, GPCPatchDescriptor::nFeatures > coef; // hyperplane coefficients
+    double rhs;
+    unsigned left;
+    unsigned right;
+
+    bool operator==( const Node &n ) const { return coef == n.coef && rhs == n.rhs && left == n.left && right == n.right; }
+  };
+
+private:
+  typedef GPCSamplesVector::iterator SIter;
+
+  std::vector< Node > nodes;
+
+  bool trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth );
+
+public:
+  void train( GPCSamplesVector &samples );
+
+  void write( FileStorage &fs ) const;
+
+  void read( const FileNode &fn );
+
+  static Ptr< GPCTree > create() { return makePtr< GPCTree >(); }
+
+  bool operator==( const GPCTree &t ) const { return nodes == t.nodes; }
+};
+
+template < int T > class CV_EXPORTS_W GPCForest : public Algorithm
+{
+private:
+  GPCTree tree[T];
+
+public:
+  /** @brief Train the forest using one sample set for every tree.
+   * Please, consider using the next method instead of this one for better quality.
+   */
+  void train( GPCSamplesVector &samples )
+  {
+    for ( int i = 0; i < T; ++i )
+      tree[i].train( samples );
+  }
+
+  /** @brief Train the forest using individual samples for each tree.
+   * It is generally better to use this instead of the first method.
+   */
+  void train( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo, const std::vector< String > &gt )
+  {
+    for ( int i = 0; i < T; ++i )
+    {
+      Ptr< GPCTrainingSamples > samples = GPCTrainingSamples::create( imagesFrom, imagesTo, gt ); // Create training set for the tree
+      tree[i].train( *samples );
+    }
+  }
+
+  void write( FileStorage &fs ) const
+  {
+    fs << "ntrees" << T << "trees"
+       << "[";
+    for ( int i = 0; i < T; ++i )
+    {
+      fs << "{";
+      tree[i].write( fs );
+      fs << "}";
+    }
+    fs << "]";
+  }
+
+  void read( const FileNode &fn )
+  {
+    CV_Assert( T == (int)fn["ntrees"] );
+    FileNodeIterator it = fn["trees"].begin();
+    for ( int i = 0; i < T; ++i, ++it )
+      tree[i].read( *it );
+  }
+
+  static Ptr< GPCForest > create() { return makePtr< GPCForest >(); }
+};
+}
+
+CV_EXPORTS void write( FileStorage &fs, const String &name, const optflow::GPCTree::Node &node );
+
+CV_EXPORTS void read( const FileNode &fn, optflow::GPCTree::Node &node, optflow::GPCTree::Node );
+}
+
+#endif
--- a/modules/optflow/samples/gpc_train.cpp
+++ b/modules/optflow/samples/gpc_train.cpp
+#include "opencv2/optflow.hpp"
+#include <iostream>
+
+const int nTrees = 5;
+
+int main( int argc, const char **argv )
+{
+  int nSequences = argc - 1;
+
+  if ( nSequences <= 0 || nSequences % 3 != 0 )
+  {
+    std::cerr << "Usage: " << argv[0] << " ImageFrom1 ImageTo1 GroundTruth1 ... ImageFromN ImageToN GroundTruthN" << std::endl;
+    return 1;
+  }
+
+  nSequences /= 3;
+  std::vector< cv::String > img1, img2, gt;
+
+  for ( int i = 0; i < nSequences; ++i )
+  {
+    img1.push_back( argv[1 + i * 3] );
+    img2.push_back( argv[1 + i * 3 + 1] );
+    gt.push_back( argv[1 + i * 3 + 2] );
+  }
+
+  cv::Ptr< cv::optflow::GPCForest< nTrees > > forest = cv::optflow::GPCForest< nTrees >::create();
+  forest->train( img1, img2, gt );
+  forest->save( "forest.dump" );
+
+  return 0;
+}
--- a/modules/optflow/samples/optical_flow_evaluation.cpp
+++ b/modules/optflow/samples/optical_flow_evaluation.cpp
 #include "opencv2/highgui.hpp"
 #include "opencv2/video.hpp"
 #include "opencv2/optflow.hpp"
+#include "opencv2/core/ocl.hpp"
 #include <fstream>
 #include <limits>

@@ -11,11 +12,13 @@ using namespace optflow;
 const String keys = "{help h usage ? |      | print this message   }"
        "{@image1        |      | image1               }"
        "{@image2        |      | image2               }"
-        "{@algorithm     |      | [farneback, simpleflow, tvl1, deepflow, sparsetodenseflow, DISflow_ultrafast, DISflow_fast, DISflow_medium] }"
+        "{@algorithm     |      | [farneback, simpleflow, tvl1, deepflow, sparsetodenseflow, pcaflow, DISflow_ultrafast, DISflow_fast, DISflow_medium] }"
        "{@groundtruth   |      | path to the .flo file  (optional), Middlebury format }"
        "{m measure      |endpoint| error measure - [endpoint or angular] }"
        "{r region       |all   | region to compute stats about [all, discontinuities, untextured] }"
-        "{d display      |      | display additional info images (pauses program execution) }";
+        "{d display      |      | display additional info images (pauses program execution) }"
+        "{g gpu          |      | use OpenCL}"
+        "{prior          |      | path to a prior file for PCAFlow}";

 inline bool isFlowCorrect( const Point2f u )
 {
@@ -200,6 +203,7 @@ int main( int argc, char** argv )
    String error_measure = parser.get<String>("measure");
    String region = parser.get<String>("region");
    bool display_images = parser.has("display");
+    const bool useGpu = parser.has("gpu");

    if ( !parser.check() )
    {
@@ -207,6 +211,9 @@ int main( int argc, char** argv )
        return 0;
    }

+    cv::ocl::setUseOpenCL(useGpu);
+    printf("OpenCL Enabled: %u\n", useGpu && cv::ocl::haveOpenCL());
+
    Mat i1, i2;
    Mat_<Point2f> flow, ground_truth;
    Mat computed_errors;
@@ -252,6 +259,15 @@ int main( int argc, char** argv )
        algorithm = createOptFlow_DeepFlow();
    else if ( method == "sparsetodenseflow" )
        algorithm = createOptFlow_SparseToDense();
+    else if ( method == "pcaflow" ) {
+        if ( parser.has("prior") ) {
+            String prior = parser.get<String>("prior");
+            printf("Using prior file: %s\n", prior.c_str());
+            algorithm = makePtr<OpticalFlowPCAFlow>(makePtr<PCAPrior>(prior.c_str()));
+        }
+        else
+            algorithm = createOptFlow_PCAFlow();
+    }
    else if ( method == "DISflow_ultrafast" )
        algorithm = createOptFlow_DIS(DISOpticalFlow::PRESET_ULTRAFAST);
    else if (method == "DISflow_fast")
@@ -267,7 +283,12 @@ int main( int argc, char** argv )

    double startTick, time;
    startTick = (double) getTickCount(); // measure time
-    algorithm->calc(i1, i2, flow);
+
+    if (useGpu)
+        algorithm->calc(i1, i2, flow.getUMat(ACCESS_RW));
+    else
+        algorithm->calc(i1, i2, flow);
+
    time = ((double) getTickCount() - startTick) / getTickFrequency();
    printf("\nTime [s]: %.3f\n", time);
    if(display_images)

--- a/modules/optflow/src/learn_prior.py
+++ b/modules/optflow/src/learn_prior.py
+#!/usr/bin/env python
+
+import os
+import sys
+import numpy as np
+import cv2
+import struct
+import argparse
+from math import sqrt
+
+argparser = argparse.ArgumentParser(
+    description='''Use this script to generate prior for using with PCAFlow.
+Basis size here must match corresponding parameter in the PCAFlow.
+Gamma should be selected experimentally.''')
+
+argparser.add_argument('-f',
+                       '--files',
+                       nargs='+',
+                       help='List of optical flow .flo files for learning. You can pass a directory here and it will be scanned recursively for .flo files.',
+                       required=True)
+argparser.add_argument('-o',
+                       '--output',
+                       help='Output file for prior',
+                       required=True)
+argparser.add_argument('--width',
+                       type=int,
+                       help='Size of the basis first dimension',
+                       required=True,
+                       default=18)
+argparser.add_argument('--height',
+                       type=int,
+                       help='Size of the basis second dimension',
+                       required=True,
+                       default=14)
+argparser.add_argument(
+    '-g',
+    '--gamma',
+    type=float,
+    help='Amount of regularization. The greater this parameter, the bigger will be an impact of the regularization.',
+    required=True)
+args = argparser.parse_args()
+
+basis_size = (args.height, args.width)
+gamma = args.gamma
+
+
+def find_flo(pp):
+    f = []
+    for p in pp:
+        if os.path.isfile(p):
+            f.append(p)
+        else:
+            for root, subdirs, files in os.walk(p):
+                f += map(lambda x: os.path.join(root, x),
+                         filter(lambda x: x.split('.')[-1] == 'flo', files))
+    return list(set(f))
+
+
+def load_flo(flo):
+    with open(flo, 'rb') as f:
+        magic = np.fromfile(f, np.float32, count=1)[0]
+        if 202021.25 != magic:
+            print('Magic number incorrect. Invalid .flo file')
+        else:
+            w = np.fromfile(f, np.int32, count=1)[0]
+            h = np.fromfile(f, np.int32, count=1)[0]
+            print('Reading %dx%d flo file %s' % (w, h, flo))
+            data = np.fromfile(f, np.float32, count=2 * w * h)
+            # Reshape data into 3D array (columns, rows, bands)
+            flow = np.reshape(data, (h, w, 2))
+            return flow[:, :, 0], flow[:, :, 1]
+
+
+def get_w(m):
+    s = m.shape
+    w = cv2.dct(m)
+    w *= 2.0 / sqrt(s[0] * s[1])
+    #w[0,0] *= 0.5
+    w[:, 0] *= sqrt(0.5)
+    w[0, :] *= sqrt(0.5)
+    w = w[0:basis_size[0], 0:basis_size[1]].transpose().flatten()
+    return w
+
+
+w1 = []
+w2 = []
+
+for flo in find_flo(args.files):
+    x, y = load_flo(flo)
+    w1.append(get_w(x))
+    w2.append(get_w(y))
+
+w1mean = sum(w1) / len(w1)
+w2mean = sum(w2) / len(w2)
+
+for i in xrange(len(w1)):
+    w1[i] -= w1mean
+for i in xrange(len(w2)):
+    w2[i] -= w2mean
+
+Q1 = sum([w1[i].reshape(-1, 1).dot(w1[i].reshape(1, -1))
+          for i in xrange(len(w1))]) / len(w1)
+Q2 = sum([w2[i].reshape(-1, 1).dot(w2[i].reshape(1, -1))
+          for i in xrange(len(w2))]) / len(w2)
+Q1 = np.matrix(Q1)
+Q2 = np.matrix(Q2)
+
+if len(w1) > 1:
+    while True:
+        try:
+            L1 = np.linalg.cholesky(Q1)
+            break
+        except np.linalg.linalg.LinAlgError:
+            mev = min(np.linalg.eig(Q1)[0]).real
+            assert (mev < 0)
+            print('Q1', mev)
+            if -mev < 1e-6:
+                mev = -1e-6
+            Q1 += (-mev * 1.000001) * np.identity(Q1.shape[0])
+
+    while True:
+        try:
+            L2 = np.linalg.cholesky(Q2)
+            break
+        except np.linalg.linalg.LinAlgError:
+            mev = min(np.linalg.eig(Q2)[0]).real
+            assert (mev < 0)
+            print('Q2', mev)
+            if -mev < 1e-6:
+                mev = -1e-6
+            Q2 += (-mev * 1.000001) * np.identity(Q2.shape[0])
+else:
+    L1 = np.identity(Q1.shape[0])
+    L2 = np.identity(Q2.shape[0])
+
+L1 = np.linalg.inv(L1) * gamma
+L2 = np.linalg.inv(L2) * gamma
+
+assert (L1.shape == L2.shape)
+assert (L1.shape[0] == L1.shape[1])
+
+f = open(args.output, 'wb')
+
+f.write(struct.pack('I', L1.shape[0]))
+f.write(struct.pack('I', L1.shape[1]))
+
+for i in xrange(L1.shape[0]):
+    for j in xrange(L1.shape[1]):
+        f.write(struct.pack('f', L1[i, j]))
+
+for i in xrange(L2.shape[0]):
+    for j in xrange(L2.shape[1]):
+        f.write(struct.pack('f', L2[i, j]))
+
+b1 = L1.dot(w1mean.reshape(-1, 1))
+b2 = L2.dot(w2mean.reshape(-1, 1))
+
+assert (L1.shape[0] == b1.shape[0])
+
+for i in xrange(b1.shape[0]):
+    f.write(struct.pack('f', b1[i, 0]))
+
+for i in xrange(b2.shape[0]):
+    f.write(struct.pack('f', b2[i, 0]))
+
+f.close()
--- a/modules/optflow/src/pcaflow.cpp
+++ b/modules/optflow/src/pcaflow.cpp
--- a/modules/optflow/src/sparse_matching_gpc.cpp
+++ b/modules/optflow/src/sparse_matching_gpc.cpp