Commit dd9b2eb4 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

Merge pull request #710 from VladX:optflow

parents 9a342b51 1764f924
......@@ -43,6 +43,9 @@ the use of this software, even if advised of the possibility of such damage.
#include "opencv2/core.hpp"
#include "opencv2/video.hpp"
#include "opencv2/optflow/pcaflow.hpp"
#include "opencv2/optflow/sparse_matching_gpc.hpp"
/**
@defgroup optflow Optical Flow Algorithms
......
/*
By downloading, copying, installing or using the software you agree to this
license. If you do not agree to this license, do not download, install,
copy or use the software.
License Agreement
For Open Source Computer Vision Library
(3-clause BSD License)
Copyright (C) 2016, OpenCV Foundation, all rights reserved.
Third party copyrights are property of their respective owners.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the names of the copyright holders nor the names of the contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
This software is provided by the copyright holders and contributors "as is" and
any express or implied warranties, including, but not limited to, the implied
warranties of merchantability and fitness for a particular purpose are
disclaimed. In no event shall copyright holders or contributors be liable for
any direct, indirect, incidental, special, exemplary, or consequential damages
(including, but not limited to, procurement of substitute goods or services;
loss of use, data, or profits; or business interruption) however caused
and on any theory of liability, whether in contract, strict liability,
or tort (including negligence or otherwise) arising in any way out of
the use of this software, even if advised of the possibility of such damage.
*/
/*
Implementation of the PCAFlow algorithm from the following paper:
http://files.is.tue.mpg.de/black/papers/cvpr2015_pcaflow.pdf
@inproceedings{Wulff:CVPR:2015,
title = {Efficient Sparse-to-Dense Optical Flow Estimation using a Learned Basis and Layers},
author = {Wulff, Jonas and Black, Michael J.},
booktitle = { IEEE Conf. on Computer Vision and Pattern Recognition (CVPR) 2015},
month = jun,
year = {2015}
}
There are some key differences which distinguish this algorithm from the original PCAFlow (see paper):
- Discrete Cosine Transform basis is used instead of basis extracted with PCA.
Reasoning: DCT basis has comparable performance and it doesn't require additional storage space.
Also, this decision helps to avoid overloading the algorithm with a lot of external input.
- Usage of built-in OpenCV feature tracking instead of libviso.
*/
#ifndef __OPENCV_OPTFLOW_PCAFLOW_HPP__
#define __OPENCV_OPTFLOW_PCAFLOW_HPP__
#include "opencv2/core.hpp"
#include "opencv2/video.hpp"
namespace cv
{
namespace optflow
{
/*
* This class can be used for imposing a learned prior on the resulting optical flow.
* Solution will be regularized according to this prior.
* You need to generate appropriate prior file with "learn_prior.py" script beforehand.
*/
class CV_EXPORTS_W PCAPrior
{
private:
Mat L1;
Mat L2;
Mat c1;
Mat c2;
public:
PCAPrior( const char *pathToPrior );
int getPadding() const { return L1.size().height; }
int getBasisSize() const { return L1.size().width; }
void fillConstraints( float *A1, float *A2, float *b1, float *b2 ) const;
};
class CV_EXPORTS_W OpticalFlowPCAFlow : public DenseOpticalFlow
{
protected:
const Ptr<const PCAPrior> prior;
const Size basisSize;
const float sparseRate; // (0 .. 0.1)
const float retainedCornersFraction; // [0 .. 1]
const float occlusionsThreshold;
const float dampingFactor;
const float claheClip;
bool useOpenCL;
public:
OpticalFlowPCAFlow( Ptr<const PCAPrior> _prior = Ptr<const PCAPrior>(), const Size _basisSize = Size( 18, 14 ),
float _sparseRate = 0.024, float _retainedCornersFraction = 0.2,
float _occlusionsThreshold = 0.0003, float _dampingFactor = 0.00002, float _claheClip = 14 );
void calc( InputArray I0, InputArray I1, InputOutputArray flow );
void collectGarbage();
private:
void findSparseFeatures( UMat &from, UMat &to, std::vector<Point2f> &features,
std::vector<Point2f> &predictedFeatures ) const;
void removeOcclusions( UMat &from, UMat &to, std::vector<Point2f> &features,
std::vector<Point2f> &predictedFeatures ) const;
void getSystem( OutputArray AOut, OutputArray b1Out, OutputArray b2Out, const std::vector<Point2f> &features,
const std::vector<Point2f> &predictedFeatures, const Size size );
void getSystem( OutputArray A1Out, OutputArray A2Out, OutputArray b1Out, OutputArray b2Out,
const std::vector<Point2f> &features, const std::vector<Point2f> &predictedFeatures,
const Size size );
OpticalFlowPCAFlow& operator=( const OpticalFlowPCAFlow& ); // make it non-assignable
};
CV_EXPORTS_W Ptr<DenseOpticalFlow> createOptFlow_PCAFlow();
}
}
#endif
/*
By downloading, copying, installing or using the software you agree to this
license. If you do not agree to this license, do not download, install,
copy or use the software.
License Agreement
For Open Source Computer Vision Library
(3-clause BSD License)
Copyright (C) 2016, OpenCV Foundation, all rights reserved.
Third party copyrights are property of their respective owners.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the names of the copyright holders nor the names of the contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
This software is provided by the copyright holders and contributors "as is" and
any express or implied warranties, including, but not limited to, the implied
warranties of merchantability and fitness for a particular purpose are
disclaimed. In no event shall copyright holders or contributors be liable for
any direct, indirect, incidental, special, exemplary, or consequential damages
(including, but not limited to, procurement of substitute goods or services;
loss of use, data, or profits; or business interruption) however caused
and on any theory of liability, whether in contract, strict liability,
or tort (including negligence or otherwise) arising in any way out of
the use of this software, even if advised of the possibility of such damage.
*/
/*
Implementation of the Global Patch Collider algorithm from the following paper:
http://research.microsoft.com/en-us/um/people/pkohli/papers/wfrik_cvpr2016.pdf
@InProceedings{Wang_2016_CVPR,
author = {Wang, Shenlong and Ryan Fanello, Sean and Rhemann, Christoph and Izadi, Shahram and Kohli, Pushmeet},
title = {The Global Patch Collider},
booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2016}
}
*/
#ifndef __OPENCV_OPTFLOW_SPARSE_MATCHING_GPC_HPP__
#define __OPENCV_OPTFLOW_SPARSE_MATCHING_GPC_HPP__
#include "opencv2/core.hpp"
namespace cv
{
namespace optflow
{
struct CV_EXPORTS_W GPCPatchDescriptor
{
static const unsigned nFeatures = 18; // number of features in a patch descriptor
Vec< double, nFeatures > feature;
GPCPatchDescriptor( const Mat *imgCh, int i, int j );
};
typedef std::pair< GPCPatchDescriptor, GPCPatchDescriptor > GPCPatchSample;
typedef std::vector< GPCPatchSample > GPCSamplesVector;
/** @brief Class encapsulating training samples.
*/
class CV_EXPORTS_W GPCTrainingSamples
{
private:
GPCSamplesVector samples;
public:
/** @brief This function can be used to extract samples from a pair of images and a ground truth flow.
* Sizes of all the provided vectors must be equal.
*/
static Ptr< GPCTrainingSamples > create( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo,
const std::vector< String > &gt );
size_t size() const { return samples.size(); }
operator GPCSamplesVector() const { return samples; }
operator GPCSamplesVector &() { return samples; }
};
class CV_EXPORTS_W GPCTree : public Algorithm
{
public:
struct Node
{
Vec< double, GPCPatchDescriptor::nFeatures > coef; // hyperplane coefficients
double rhs;
unsigned left;
unsigned right;
bool operator==( const Node &n ) const { return coef == n.coef && rhs == n.rhs && left == n.left && right == n.right; }
};
private:
typedef GPCSamplesVector::iterator SIter;
std::vector< Node > nodes;
bool trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth );
public:
void train( GPCSamplesVector &samples );
void write( FileStorage &fs ) const;
void read( const FileNode &fn );
static Ptr< GPCTree > create() { return makePtr< GPCTree >(); }
bool operator==( const GPCTree &t ) const { return nodes == t.nodes; }
};
template < int T > class CV_EXPORTS_W GPCForest : public Algorithm
{
private:
GPCTree tree[T];
public:
/** @brief Train the forest using one sample set for every tree.
* Please, consider using the next method instead of this one for better quality.
*/
void train( GPCSamplesVector &samples )
{
for ( int i = 0; i < T; ++i )
tree[i].train( samples );
}
/** @brief Train the forest using individual samples for each tree.
* It is generally better to use this instead of the first method.
*/
void train( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo, const std::vector< String > &gt )
{
for ( int i = 0; i < T; ++i )
{
Ptr< GPCTrainingSamples > samples = GPCTrainingSamples::create( imagesFrom, imagesTo, gt ); // Create training set for the tree
tree[i].train( *samples );
}
}
void write( FileStorage &fs ) const
{
fs << "ntrees" << T << "trees"
<< "[";
for ( int i = 0; i < T; ++i )
{
fs << "{";
tree[i].write( fs );
fs << "}";
}
fs << "]";
}
void read( const FileNode &fn )
{
CV_Assert( T == (int)fn["ntrees"] );
FileNodeIterator it = fn["trees"].begin();
for ( int i = 0; i < T; ++i, ++it )
tree[i].read( *it );
}
static Ptr< GPCForest > create() { return makePtr< GPCForest >(); }
};
}
CV_EXPORTS void write( FileStorage &fs, const String &name, const optflow::GPCTree::Node &node );
CV_EXPORTS void read( const FileNode &fn, optflow::GPCTree::Node &node, optflow::GPCTree::Node );
}
#endif
#include "opencv2/optflow.hpp"
#include <iostream>
const int nTrees = 5;
int main( int argc, const char **argv )
{
int nSequences = argc - 1;
if ( nSequences <= 0 || nSequences % 3 != 0 )
{
std::cerr << "Usage: " << argv[0] << " ImageFrom1 ImageTo1 GroundTruth1 ... ImageFromN ImageToN GroundTruthN" << std::endl;
return 1;
}
nSequences /= 3;
std::vector< cv::String > img1, img2, gt;
for ( int i = 0; i < nSequences; ++i )
{
img1.push_back( argv[1 + i * 3] );
img2.push_back( argv[1 + i * 3 + 1] );
gt.push_back( argv[1 + i * 3 + 2] );
}
cv::Ptr< cv::optflow::GPCForest< nTrees > > forest = cv::optflow::GPCForest< nTrees >::create();
forest->train( img1, img2, gt );
forest->save( "forest.dump" );
return 0;
}
#include "opencv2/highgui.hpp"
#include "opencv2/video.hpp"
#include "opencv2/optflow.hpp"
#include "opencv2/core/ocl.hpp"
#include <fstream>
#include <limits>
......@@ -11,11 +12,13 @@ using namespace optflow;
const String keys = "{help h usage ? | | print this message }"
"{@image1 | | image1 }"
"{@image2 | | image2 }"
"{@algorithm | | [farneback, simpleflow, tvl1, deepflow, sparsetodenseflow, DISflow_ultrafast, DISflow_fast, DISflow_medium] }"
"{@algorithm | | [farneback, simpleflow, tvl1, deepflow, sparsetodenseflow, pcaflow, DISflow_ultrafast, DISflow_fast, DISflow_medium] }"
"{@groundtruth | | path to the .flo file (optional), Middlebury format }"
"{m measure |endpoint| error measure - [endpoint or angular] }"
"{r region |all | region to compute stats about [all, discontinuities, untextured] }"
"{d display | | display additional info images (pauses program execution) }";
"{d display | | display additional info images (pauses program execution) }"
"{g gpu | | use OpenCL}"
"{prior | | path to a prior file for PCAFlow}";
inline bool isFlowCorrect( const Point2f u )
{
......@@ -200,6 +203,7 @@ int main( int argc, char** argv )
String error_measure = parser.get<String>("measure");
String region = parser.get<String>("region");
bool display_images = parser.has("display");
const bool useGpu = parser.has("gpu");
if ( !parser.check() )
{
......@@ -207,6 +211,9 @@ int main( int argc, char** argv )
return 0;
}
cv::ocl::setUseOpenCL(useGpu);
printf("OpenCL Enabled: %u\n", useGpu && cv::ocl::haveOpenCL());
Mat i1, i2;
Mat_<Point2f> flow, ground_truth;
Mat computed_errors;
......@@ -252,6 +259,15 @@ int main( int argc, char** argv )
algorithm = createOptFlow_DeepFlow();
else if ( method == "sparsetodenseflow" )
algorithm = createOptFlow_SparseToDense();
else if ( method == "pcaflow" ) {
if ( parser.has("prior") ) {
String prior = parser.get<String>("prior");
printf("Using prior file: %s\n", prior.c_str());
algorithm = makePtr<OpticalFlowPCAFlow>(makePtr<PCAPrior>(prior.c_str()));
}
else
algorithm = createOptFlow_PCAFlow();
}
else if ( method == "DISflow_ultrafast" )
algorithm = createOptFlow_DIS(DISOpticalFlow::PRESET_ULTRAFAST);
else if (method == "DISflow_fast")
......@@ -267,7 +283,12 @@ int main( int argc, char** argv )
double startTick, time;
startTick = (double) getTickCount(); // measure time
algorithm->calc(i1, i2, flow);
if (useGpu)
algorithm->calc(i1, i2, flow.getUMat(ACCESS_RW));
else
algorithm->calc(i1, i2, flow);
time = ((double) getTickCount() - startTick) / getTickFrequency();
printf("\nTime [s]: %.3f\n", time);
if(display_images)
......
#!/usr/bin/env python
import os
import sys
import numpy as np
import cv2
import struct
import argparse
from math import sqrt
argparser = argparse.ArgumentParser(
description='''Use this script to generate prior for using with PCAFlow.
Basis size here must match corresponding parameter in the PCAFlow.
Gamma should be selected experimentally.''')
argparser.add_argument('-f',
'--files',
nargs='+',
help='List of optical flow .flo files for learning. You can pass a directory here and it will be scanned recursively for .flo files.',
required=True)
argparser.add_argument('-o',
'--output',
help='Output file for prior',
required=True)
argparser.add_argument('--width',
type=int,
help='Size of the basis first dimension',
required=True,
default=18)
argparser.add_argument('--height',
type=int,
help='Size of the basis second dimension',
required=True,
default=14)
argparser.add_argument(
'-g',
'--gamma',
type=float,
help='Amount of regularization. The greater this parameter, the bigger will be an impact of the regularization.',
required=True)
args = argparser.parse_args()
basis_size = (args.height, args.width)
gamma = args.gamma
def find_flo(pp):
f = []
for p in pp:
if os.path.isfile(p):
f.append(p)
else:
for root, subdirs, files in os.walk(p):
f += map(lambda x: os.path.join(root, x),
filter(lambda x: x.split('.')[-1] == 'flo', files))
return list(set(f))
def load_flo(flo):
with open(flo, 'rb') as f:
magic = np.fromfile(f, np.float32, count=1)[0]
if 202021.25 != magic:
print('Magic number incorrect. Invalid .flo file')
else:
w = np.fromfile(f, np.int32, count=1)[0]
h = np.fromfile(f, np.int32, count=1)[0]
print('Reading %dx%d flo file %s' % (w, h, flo))
data = np.fromfile(f, np.float32, count=2 * w * h)
# Reshape data into 3D array (columns, rows, bands)
flow = np.reshape(data, (h, w, 2))
return flow[:, :, 0], flow[:, :, 1]
def get_w(m):
s = m.shape
w = cv2.dct(m)
w *= 2.0 / sqrt(s[0] * s[1])
#w[0,0] *= 0.5
w[:, 0] *= sqrt(0.5)
w[0, :] *= sqrt(0.5)
w = w[0:basis_size[0], 0:basis_size[1]].transpose().flatten()
return w
w1 = []
w2 = []
for flo in find_flo(args.files):
x, y = load_flo(flo)
w1.append(get_w(x))
w2.append(get_w(y))
w1mean = sum(w1) / len(w1)
w2mean = sum(w2) / len(w2)
for i in xrange(len(w1)):
w1[i] -= w1mean
for i in xrange(len(w2)):
w2[i] -= w2mean
Q1 = sum([w1[i].reshape(-1, 1).dot(w1[i].reshape(1, -1))
for i in xrange(len(w1))]) / len(w1)
Q2 = sum([w2[i].reshape(-1, 1).dot(w2[i].reshape(1, -1))
for i in xrange(len(w2))]) / len(w2)
Q1 = np.matrix(Q1)
Q2 = np.matrix(Q2)
if len(w1) > 1:
while True:
try:
L1 = np.linalg.cholesky(Q1)
break
except np.linalg.linalg.LinAlgError:
mev = min(np.linalg.eig(Q1)[0]).real
assert (mev < 0)
print('Q1', mev)
if -mev < 1e-6:
mev = -1e-6
Q1 += (-mev * 1.000001) * np.identity(Q1.shape[0])
while True:
try:
L2 = np.linalg.cholesky(Q2)
break
except np.linalg.linalg.LinAlgError:
mev = min(np.linalg.eig(Q2)[0]).real
assert (mev < 0)
print('Q2', mev)
if -mev < 1e-6:
mev = -1e-6
Q2 += (-mev * 1.000001) * np.identity(Q2.shape[0])
else:
L1 = np.identity(Q1.shape[0])
L2 = np.identity(Q2.shape[0])
L1 = np.linalg.inv(L1) * gamma
L2 = np.linalg.inv(L2) * gamma
assert (L1.shape == L2.shape)
assert (L1.shape[0] == L1.shape[1])
f = open(args.output, 'wb')
f.write(struct.pack('I', L1.shape[0]))
f.write(struct.pack('I', L1.shape[1]))
for i in xrange(L1.shape[0]):
for j in xrange(L1.shape[1]):
f.write(struct.pack('f', L1[i, j]))
for i in xrange(L2.shape[0]):
for j in xrange(L2.shape[1]):
f.write(struct.pack('f', L2[i, j]))
b1 = L1.dot(w1mean.reshape(-1, 1))
b2 = L2.dot(w2mean.reshape(-1, 1))
assert (L1.shape[0] == b1.shape[0])
for i in xrange(b1.shape[0]):
f.write(struct.pack('f', b1[i, 0]))
for i in xrange(b2.shape[0]):
f.write(struct.pack('f', b2[i, 0]))
f.close()
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment