Commit ac62d70f authored by Vladislav Samsonov's avatar Vladislav Samsonov Committed by Maksim Shabunin

[GSoC] Implementation of the Global Patch Collider and demo for PCAFlow (#752)

* Minor fixes

* Start adding correspondence finding

* Added finding of correspondences using GPC

* New evaluation tool for GPC

* Changed default parameters

* Display ground truth in the evaluation tool

* Added training tool for MPI Sintel dataset

* Added the training tool for Middlebury dataset

* Added some OpenCL optimization

* Added explanatory notes

* Minor improvements: time measurements + little ocl optimization

* Added demos

* Fixed warnings

* Make parameter struct assignable

* Fix warning

* Proper command line argument usage

* Prettified training tool, added parameters

* Fixed VS warning

* Fixed VS warning

* Using of compressed forest.yml.gz files by default to save space

* Added OpenCL flag to the evaluation tool

* Updated documentation

* Major speed and memory improvements:
1) Added new (optional) type of patch descriptors which are much faster. Retraining with option --descriptor-type=1 is required.
2) Got rid of hash table for descriptors, less memory usage.

* Fixed various floating point errors related to precision.
SIMD for dot product, forest traversing is a little bit faster now.

* Tolerant floating point comparison

* Triplets

* Added comment

* Choosing negative sample among nearest neighbors

* Fix warning

* Usage of parallel_for_() in critical places. Performance improvments.

* Simulated annealing heuristic

* Moved OpenCL kernel to separate file

* Moved implementation to source file

* Added basic accuracy tests for GPC and PCAFlow

* Fixing warnings

* Test accuracy constraints were too strict

* Test accuracy constraints were too strict

* Make tests more lightweight
parent 25575af6
...@@ -52,3 +52,19 @@ ...@@ -52,3 +52,19 @@
pages={25--36}, pages={25--36},
year={2004} year={2004}
} }
@inproceedings{Wulff:CVPR:2015,
title = {Efficient Sparse-to-Dense Optical Flow Estimation using a Learned Basis and Layers},
author = {Wulff, Jonas and Black, Michael J.},
booktitle = { IEEE Conf. on Computer Vision and Pattern Recognition (CVPR) 2015},
month = {June},
year = {2015}
}
@inproceedings{Wang_2016_CVPR,
author = {Wang, Shenlong and Ryan Fanello, Sean and Rhemann, Christoph and Izadi, Shahram and Kohli, Pushmeet},
title = {The Global Patch Collider},
booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2016}
}
...@@ -43,9 +43,6 @@ the use of this software, even if advised of the possibility of such damage. ...@@ -43,9 +43,6 @@ the use of this software, even if advised of the possibility of such damage.
#include "opencv2/core.hpp" #include "opencv2/core.hpp"
#include "opencv2/video.hpp" #include "opencv2/video.hpp"
#include "opencv2/optflow/pcaflow.hpp"
#include "opencv2/optflow/sparse_matching_gpc.hpp"
/** /**
@defgroup optflow Optical Flow Algorithms @defgroup optflow Optical Flow Algorithms
...@@ -69,6 +66,9 @@ Functions reading and writing .flo files in "Middlebury" format, see: <http://vi ...@@ -69,6 +66,9 @@ Functions reading and writing .flo files in "Middlebury" format, see: <http://vi
*/ */
#include "opencv2/optflow/pcaflow.hpp"
#include "opencv2/optflow/sparse_matching_gpc.hpp"
namespace cv namespace cv
{ {
namespace optflow namespace optflow
......
...@@ -37,23 +37,19 @@ or tort (including negligence or otherwise) arising in any way out of ...@@ -37,23 +37,19 @@ or tort (including negligence or otherwise) arising in any way out of
the use of this software, even if advised of the possibility of such damage. the use of this software, even if advised of the possibility of such damage.
*/ */
/* /**
Implementation of the PCAFlow algorithm from the following paper: * @file pcaflow.hpp
http://files.is.tue.mpg.de/black/papers/cvpr2015_pcaflow.pdf * @author Vladislav Samsonov <vvladxx@gmail.com>
* @brief Implementation of the PCAFlow algorithm from the following paper:
@inproceedings{Wulff:CVPR:2015, * http://files.is.tue.mpg.de/black/papers/cvpr2015_pcaflow.pdf
title = {Efficient Sparse-to-Dense Optical Flow Estimation using a Learned Basis and Layers}, *
author = {Wulff, Jonas and Black, Michael J.}, * @cite Wulff:CVPR:2015
booktitle = { IEEE Conf. on Computer Vision and Pattern Recognition (CVPR) 2015}, *
month = jun, * There are some key differences which distinguish this algorithm from the original PCAFlow (see paper):
year = {2015} * - Discrete Cosine Transform basis is used instead of basis extracted with PCA.
} * Reasoning: DCT basis has comparable performance and it doesn't require additional storage space.
* Also, this decision helps to avoid overloading the algorithm with a lot of external input.
There are some key differences which distinguish this algorithm from the original PCAFlow (see paper): * - Usage of built-in OpenCV feature tracking instead of libviso.
- Discrete Cosine Transform basis is used instead of basis extracted with PCA.
Reasoning: DCT basis has comparable performance and it doesn't require additional storage space.
Also, this decision helps to avoid overloading the algorithm with a lot of external input.
- Usage of built-in OpenCV feature tracking instead of libviso.
*/ */
#ifndef __OPENCV_OPTFLOW_PCAFLOW_HPP__ #ifndef __OPENCV_OPTFLOW_PCAFLOW_HPP__
...@@ -67,7 +63,10 @@ namespace cv ...@@ -67,7 +63,10 @@ namespace cv
namespace optflow namespace optflow
{ {
/* //! @addtogroup optflow
//! @{
/** @brief
* This class can be used for imposing a learned prior on the resulting optical flow. * This class can be used for imposing a learned prior on the resulting optical flow.
* Solution will be regularized according to this prior. * Solution will be regularized according to this prior.
* You need to generate appropriate prior file with "learn_prior.py" script beforehand. * You need to generate appropriate prior file with "learn_prior.py" script beforehand.
...@@ -90,6 +89,8 @@ public: ...@@ -90,6 +89,8 @@ public:
void fillConstraints( float *A1, float *A2, float *b1, float *b2 ) const; void fillConstraints( float *A1, float *A2, float *b1, float *b2 ) const;
}; };
/** @brief PCAFlow algorithm.
*/
class CV_EXPORTS_W OpticalFlowPCAFlow : public DenseOpticalFlow class CV_EXPORTS_W OpticalFlowPCAFlow : public DenseOpticalFlow
{ {
protected: protected:
...@@ -103,6 +104,15 @@ protected: ...@@ -103,6 +104,15 @@ protected:
bool useOpenCL; bool useOpenCL;
public: public:
/** @brief Creates an instance of PCAFlow algorithm.
* @param _prior Learned prior or no prior (default). @see cv::optflow::PCAPrior
* @param _basisSize Number of basis vectors.
* @param _sparseRate Controls density of sparse matches.
* @param _retainedCornersFraction Retained corners fraction.
* @param _occlusionsThreshold Occlusion threshold.
* @param _dampingFactor Regularization term for solving least-squares. It is not related to the prior regularization.
* @param _claheClip Clip parameter for CLAHE.
*/
OpticalFlowPCAFlow( Ptr<const PCAPrior> _prior = Ptr<const PCAPrior>(), const Size _basisSize = Size( 18, 14 ), OpticalFlowPCAFlow( Ptr<const PCAPrior> _prior = Ptr<const PCAPrior>(), const Size _basisSize = Size( 18, 14 ),
float _sparseRate = 0.024, float _retainedCornersFraction = 0.2, float _sparseRate = 0.024, float _retainedCornersFraction = 0.2,
float _occlusionsThreshold = 0.0003, float _dampingFactor = 0.00002, float _claheClip = 14 ); float _occlusionsThreshold = 0.0003, float _dampingFactor = 0.00002, float _claheClip = 14 );
...@@ -127,7 +137,12 @@ private: ...@@ -127,7 +137,12 @@ private:
OpticalFlowPCAFlow& operator=( const OpticalFlowPCAFlow& ); // make it non-assignable OpticalFlowPCAFlow& operator=( const OpticalFlowPCAFlow& ); // make it non-assignable
}; };
/** @brief Creates an instance of PCAFlow
*/
CV_EXPORTS_W Ptr<DenseOpticalFlow> createOptFlow_PCAFlow(); CV_EXPORTS_W Ptr<DenseOpticalFlow> createOptFlow_PCAFlow();
//! @}
} }
} }
......
...@@ -37,68 +37,135 @@ or tort (including negligence or otherwise) arising in any way out of ...@@ -37,68 +37,135 @@ or tort (including negligence or otherwise) arising in any way out of
the use of this software, even if advised of the possibility of such damage. the use of this software, even if advised of the possibility of such damage.
*/ */
/* /**
Implementation of the Global Patch Collider algorithm from the following paper: * @file sparse_matching_gpc.hpp
http://research.microsoft.com/en-us/um/people/pkohli/papers/wfrik_cvpr2016.pdf * @author Vladislav Samsonov <vvladxx@gmail.com>
* @brief Implementation of the Global Patch Collider.
@InProceedings{Wang_2016_CVPR, *
author = {Wang, Shenlong and Ryan Fanello, Sean and Rhemann, Christoph and Izadi, Shahram and Kohli, Pushmeet}, * Implementation of the Global Patch Collider algorithm from the following paper:
title = {The Global Patch Collider}, * http://research.microsoft.com/en-us/um/people/pkohli/papers/wfrik_cvpr2016.pdf
booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, *
month = {June}, * @cite Wang_2016_CVPR
year = {2016} */
}
*/
#ifndef __OPENCV_OPTFLOW_SPARSE_MATCHING_GPC_HPP__ #ifndef __OPENCV_OPTFLOW_SPARSE_MATCHING_GPC_HPP__
#define __OPENCV_OPTFLOW_SPARSE_MATCHING_GPC_HPP__ #define __OPENCV_OPTFLOW_SPARSE_MATCHING_GPC_HPP__
#include "opencv2/core.hpp" #include "opencv2/core.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "opencv2/imgproc.hpp"
namespace cv namespace cv
{ {
namespace optflow namespace optflow
{ {
//! @addtogroup optflow
//! @{
struct CV_EXPORTS_W GPCPatchDescriptor struct CV_EXPORTS_W GPCPatchDescriptor
{ {
static const unsigned nFeatures = 18; // number of features in a patch descriptor static const unsigned nFeatures = 18; //!< number of features in a patch descriptor
Vec< double, nFeatures > feature; Vec< double, nFeatures > feature;
GPCPatchDescriptor( const Mat *imgCh, int i, int j ); double dot( const Vec< double, nFeatures > &coef ) const;
void markAsSeparated() { feature[0] = std::numeric_limits< double >::quiet_NaN(); }
bool isSeparated() const { return cvIsNaN( feature[0] ) != 0; }
};
struct CV_EXPORTS_W GPCPatchSample
{
GPCPatchDescriptor ref;
GPCPatchDescriptor pos;
GPCPatchDescriptor neg;
void getDirections( bool &refdir, bool &posdir, bool &negdir, const Vec< double, GPCPatchDescriptor::nFeatures > &coef, double rhs ) const;
}; };
typedef std::pair< GPCPatchDescriptor, GPCPatchDescriptor > GPCPatchSample;
typedef std::vector< GPCPatchSample > GPCSamplesVector; typedef std::vector< GPCPatchSample > GPCSamplesVector;
/** @brief Descriptor types for the Global Patch Collider.
*/
enum GPCDescType
{
GPC_DESCRIPTOR_DCT = 0, //!< Better quality but slow
GPC_DESCRIPTOR_WHT //!< Worse quality but much faster
};
/** @brief Class encapsulating training samples. /** @brief Class encapsulating training samples.
*/ */
class CV_EXPORTS_W GPCTrainingSamples class CV_EXPORTS_W GPCTrainingSamples
{ {
private: private:
GPCSamplesVector samples; GPCSamplesVector samples;
int descriptorType;
public: public:
/** @brief This function can be used to extract samples from a pair of images and a ground truth flow. /** @brief This function can be used to extract samples from a pair of images and a ground truth flow.
* Sizes of all the provided vectors must be equal. * Sizes of all the provided vectors must be equal.
*/ */
static Ptr< GPCTrainingSamples > create( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo, static Ptr< GPCTrainingSamples > create( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo,
const std::vector< String > &gt ); const std::vector< String > &gt, int descriptorType );
static Ptr< GPCTrainingSamples > create( InputArrayOfArrays imagesFrom, InputArrayOfArrays imagesTo, InputArrayOfArrays gt,
int descriptorType );
size_t size() const { return samples.size(); } size_t size() const { return samples.size(); }
operator GPCSamplesVector() const { return samples; } int type() const { return descriptorType; }
operator GPCSamplesVector &() { return samples; } operator GPCSamplesVector &() { return samples; }
}; };
/** @brief Class encapsulating training parameters.
*/
struct GPCTrainingParams
{
unsigned maxTreeDepth; //!< Maximum tree depth to stop partitioning.
int minNumberOfSamples; //!< Minimum number of samples in the node to stop partitioning.
int descriptorType; //!< Type of descriptors to use.
bool printProgress; //!< Print progress to stdout.
GPCTrainingParams( unsigned _maxTreeDepth = 20, int _minNumberOfSamples = 3, GPCDescType _descriptorType = GPC_DESCRIPTOR_DCT,
bool _printProgress = true )
: maxTreeDepth( _maxTreeDepth ), minNumberOfSamples( _minNumberOfSamples ), descriptorType( _descriptorType ),
printProgress( _printProgress )
{
CV_Assert( check() );
}
GPCTrainingParams( const GPCTrainingParams &params )
: maxTreeDepth( params.maxTreeDepth ), minNumberOfSamples( params.minNumberOfSamples ), descriptorType( params.descriptorType ),
printProgress( params.printProgress )
{
CV_Assert( check() );
}
bool check() const { return maxTreeDepth > 1 && minNumberOfSamples > 1; }
};
/** @brief Class encapsulating matching parameters.
*/
struct GPCMatchingParams
{
bool useOpenCL; //!< Whether to use OpenCL to speed up the matching.
GPCMatchingParams( bool _useOpenCL = false ) : useOpenCL( _useOpenCL ) {}
GPCMatchingParams( const GPCMatchingParams &params ) : useOpenCL( params.useOpenCL ) {}
};
/** @brief Class for individual tree.
*/
class CV_EXPORTS_W GPCTree : public Algorithm class CV_EXPORTS_W GPCTree : public Algorithm
{ {
public: public:
struct Node struct Node
{ {
Vec< double, GPCPatchDescriptor::nFeatures > coef; // hyperplane coefficients Vec< double, GPCPatchDescriptor::nFeatures > coef; //!< Hyperplane coefficients
double rhs; double rhs; //!< Bias term of the hyperplane
unsigned left; unsigned left;
unsigned right; unsigned right;
...@@ -109,45 +176,100 @@ private: ...@@ -109,45 +176,100 @@ private:
typedef GPCSamplesVector::iterator SIter; typedef GPCSamplesVector::iterator SIter;
std::vector< Node > nodes; std::vector< Node > nodes;
GPCTrainingParams params;
bool trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth ); bool trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth );
public: public:
void train( GPCSamplesVector &samples ); void train( GPCTrainingSamples &samples, const GPCTrainingParams params = GPCTrainingParams() );
void write( FileStorage &fs ) const; void write( FileStorage &fs ) const;
void read( const FileNode &fn ); void read( const FileNode &fn );
unsigned findLeafForPatch( const GPCPatchDescriptor &descr ) const;
static Ptr< GPCTree > create() { return makePtr< GPCTree >(); } static Ptr< GPCTree > create() { return makePtr< GPCTree >(); }
bool operator==( const GPCTree &t ) const { return nodes == t.nodes; } bool operator==( const GPCTree &t ) const { return nodes == t.nodes; }
int getDescriptorType() const { return params.descriptorType; }
}; };
template < int T > class CV_EXPORTS_W GPCForest : public Algorithm template < int T > class CV_EXPORTS_W GPCForest : public Algorithm
{ {
private: private:
struct Trail
{
unsigned leaf[T]; //!< Inside which leaf of the tree 0..T the patch fell?
Point2i coord; //!< Patch coordinates.
bool operator==( const Trail &trail ) const { return memcmp( leaf, trail.leaf, sizeof( leaf ) ) == 0; }
bool operator<( const Trail &trail ) const
{
for ( int i = 0; i < T - 1; ++i )
if ( leaf[i] != trail.leaf[i] )
return leaf[i] < trail.leaf[i];
return leaf[T - 1] < trail.leaf[T - 1];
}
};
class ParallelTrailsFilling : public ParallelLoopBody
{
private:
const GPCForest *forest;
const std::vector< GPCPatchDescriptor > *descr;
std::vector< Trail > *trails;
ParallelTrailsFilling &operator=( const ParallelTrailsFilling & );
public:
ParallelTrailsFilling( const GPCForest *_forest, const std::vector< GPCPatchDescriptor > *_descr, std::vector< Trail > *_trails )
: forest( _forest ), descr( _descr ), trails( _trails ){};
void operator()( const Range &range ) const
{
for ( int t = range.start; t < range.end; ++t )
for ( size_t i = 0; i < descr->size(); ++i )
trails->at( i ).leaf[t] = forest->tree[t].findLeafForPatch( descr->at( i ) );
}
};
GPCTree tree[T]; GPCTree tree[T];
public: public:
/** @brief Train the forest using one sample set for every tree. /** @brief Train the forest using one sample set for every tree.
* Please, consider using the next method instead of this one for better quality. * Please, consider using the next method instead of this one for better quality.
*/ */
void train( GPCSamplesVector &samples ) void train( GPCTrainingSamples &samples, const GPCTrainingParams params = GPCTrainingParams() )
{ {
for ( int i = 0; i < T; ++i ) for ( int i = 0; i < T; ++i )
tree[i].train( samples ); tree[i].train( samples, params );
} }
/** @brief Train the forest using individual samples for each tree. /** @brief Train the forest using individual samples for each tree.
* It is generally better to use this instead of the first method. * It is generally better to use this instead of the first method.
*/ */
void train( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo, const std::vector< String > &gt ) void train( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo, const std::vector< String > &gt,
const GPCTrainingParams params = GPCTrainingParams() )
{ {
for ( int i = 0; i < T; ++i ) for ( int i = 0; i < T; ++i )
{ {
Ptr< GPCTrainingSamples > samples = GPCTrainingSamples::create( imagesFrom, imagesTo, gt ); // Create training set for the tree Ptr< GPCTrainingSamples > samples =
tree[i].train( *samples ); GPCTrainingSamples::create( imagesFrom, imagesTo, gt, params.descriptorType ); // Create training set for the tree
tree[i].train( *samples, params );
}
}
void train( InputArrayOfArrays imagesFrom, InputArrayOfArrays imagesTo, InputArrayOfArrays gt,
const GPCTrainingParams params = GPCTrainingParams() )
{
for ( int i = 0; i < T; ++i )
{
Ptr< GPCTrainingSamples > samples =
GPCTrainingSamples::create( imagesFrom, imagesTo, gt, params.descriptorType ); // Create training set for the tree
tree[i].train( *samples, params );
} }
} }
...@@ -166,19 +288,93 @@ public: ...@@ -166,19 +288,93 @@ public:
void read( const FileNode &fn ) void read( const FileNode &fn )
{ {
CV_Assert( T == (int)fn["ntrees"] ); CV_Assert( T <= (int)fn["ntrees"] );
FileNodeIterator it = fn["trees"].begin(); FileNodeIterator it = fn["trees"].begin();
for ( int i = 0; i < T; ++i, ++it ) for ( int i = 0; i < T; ++i, ++it )
tree[i].read( *it ); tree[i].read( *it );
} }
/** @brief Find correspondences between two images.
* @param[in] imgFrom First image in a sequence.
* @param[in] imgTo Second image in a sequence.
* @param[out] corr Output vector with pairs of corresponding points.
* @param[in] params Additional matching parameters for fine-tuning.
*/
void findCorrespondences( InputArray imgFrom, InputArray imgTo, std::vector< std::pair< Point2i, Point2i > > &corr,
const GPCMatchingParams params = GPCMatchingParams() ) const;
static Ptr< GPCForest > create() { return makePtr< GPCForest >(); } static Ptr< GPCForest > create() { return makePtr< GPCForest >(); }
}; };
class CV_EXPORTS_W GPCDetails
{
public:
static void dropOutliers( std::vector< std::pair< Point2i, Point2i > > &corr );
static void getAllDescriptorsForImage( const Mat *imgCh, std::vector< GPCPatchDescriptor > &descr, const GPCMatchingParams &mp,
int type );
static void getCoordinatesFromIndex( size_t index, Size sz, int &x, int &y );
};
template < int T >
void GPCForest< T >::findCorrespondences( InputArray imgFrom, InputArray imgTo, std::vector< std::pair< Point2i, Point2i > > &corr,
const GPCMatchingParams params ) const
{
CV_Assert( imgFrom.channels() == 3 );
CV_Assert( imgTo.channels() == 3 );
Mat from, to;
imgFrom.getMat().convertTo( from, CV_32FC3 );
imgTo.getMat().convertTo( to, CV_32FC3 );
cvtColor( from, from, COLOR_BGR2YCrCb );
cvtColor( to, to, COLOR_BGR2YCrCb );
Mat fromCh[3], toCh[3];
split( from, fromCh );
split( to, toCh );
std::vector< GPCPatchDescriptor > descr;
GPCDetails::getAllDescriptorsForImage( fromCh, descr, params, tree[0].getDescriptorType() );
std::vector< Trail > trailsFrom( descr.size() ), trailsTo( descr.size() );
for ( size_t i = 0; i < descr.size(); ++i )
GPCDetails::getCoordinatesFromIndex( i, from.size(), trailsFrom[i].coord.x, trailsFrom[i].coord.y );
parallel_for_( Range( 0, T ), ParallelTrailsFilling( this, &descr, &trailsFrom ) );
descr.clear();
GPCDetails::getAllDescriptorsForImage( toCh, descr, params, tree[0].getDescriptorType() );
for ( size_t i = 0; i < descr.size(); ++i )
GPCDetails::getCoordinatesFromIndex( i, to.size(), trailsTo[i].coord.x, trailsTo[i].coord.y );
parallel_for_( Range( 0, T ), ParallelTrailsFilling( this, &descr, &trailsTo ) );
std::sort( trailsFrom.begin(), trailsFrom.end() );
std::sort( trailsTo.begin(), trailsTo.end() );
for ( size_t i = 0; i < trailsFrom.size(); ++i )
{
bool uniq = true;
while ( i + 1 < trailsFrom.size() && trailsFrom[i] == trailsFrom[i + 1] )
++i, uniq = false;
if ( uniq )
{
typename std::vector< Trail >::const_iterator lb = std::lower_bound( trailsTo.begin(), trailsTo.end(), trailsFrom[i] );
if ( lb != trailsTo.end() && *lb == trailsFrom[i] && ( ( lb + 1 ) == trailsTo.end() || !( *lb == *( lb + 1 ) ) ) )
corr.push_back( std::make_pair( trailsFrom[i].coord, lb->coord ) );
}
}
GPCDetails::dropOutliers( corr );
} }
//! @}
} // namespace optflow
CV_EXPORTS void write( FileStorage &fs, const String &name, const optflow::GPCTree::Node &node ); CV_EXPORTS void write( FileStorage &fs, const String &name, const optflow::GPCTree::Node &node );
CV_EXPORTS void read( const FileNode &fn, optflow::GPCTree::Node &node, optflow::GPCTree::Node ); CV_EXPORTS void read( const FileNode &fn, optflow::GPCTree::Node &node, optflow::GPCTree::Node );
} } // namespace cv
#endif #endif
#include "opencv2/core/ocl.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/optflow.hpp"
#include <fstream>
#include <iostream>
#include <stdio.h>
/* This tool finds correspondences between two images using Global Patch Collider
* and calculates error using provided ground truth flow.
*
* It will look for the file named "forest.yml.gz" with a learned forest.
* You can obtain the "forest.yml.gz" either by manually training it using another tool with *_train suffix
* or by downloading one of the files trained on some publicly available dataset from here:
*
* https://drive.google.com/open?id=0B7Hb8cfuzrIIZDFscXVYd0NBNFU
*/
using namespace cv;
const String keys = "{help h ? | | print this message}"
"{@image1 |<none> | image1}"
"{@image2 |<none> | image2}"
"{@groundtruth |<none> | path to the .flo file}"
"{@output | | output to a file instead of displaying, output image path}"
"{g gpu | | use OpenCL}"
"{f forest |forest.yml.gz| path to the forest.yml.gz}";
const int nTrees = 5;
static double normL2( const Point2f &v ) { return sqrt( v.x * v.x + v.y * v.y ); }
static Vec3d getFlowColor( const Point2f &f, const bool logScale = true, const double scaleDown = 5 )
{
if ( f.x == 0 && f.y == 0 )
return Vec3d( 0, 0, 1 );
double radius = normL2( f );
if ( logScale )
radius = log( radius + 1 );
radius /= scaleDown;
radius = std::min( 1.0, radius );
double angle = ( atan2( -f.y, -f.x ) + CV_PI ) * 180 / CV_PI;
return Vec3d( angle, radius, 1 );
}
static void displayFlow( InputArray _flow, OutputArray _img )
{
const Size sz = _flow.size();
Mat flow = _flow.getMat();
_img.create( sz, CV_32FC3 );
Mat img = _img.getMat();
for ( int i = 0; i < sz.height; ++i )
for ( int j = 0; j < sz.width; ++j )
img.at< Vec3f >( i, j ) = getFlowColor( flow.at< Point2f >( i, j ) );
cvtColor( img, img, COLOR_HSV2BGR );
}
static bool fileProbe( const char *name ) { return std::ifstream( name ).good(); }
int main( int argc, const char **argv )
{
CommandLineParser parser( argc, argv, keys );
parser.about( "Global Patch Collider evaluation tool" );
if ( parser.has( "help" ) )
{
parser.printMessage();
return 0;
}
String fromPath = parser.get< String >( 0 );
String toPath = parser.get< String >( 1 );
String gtPath = parser.get< String >( 2 );
String outPath = parser.get< String >( 3 );
const bool useOpenCL = parser.has( "gpu" );
String forestDumpPath = parser.get< String >( "forest" );
if ( !parser.check() )
{
parser.printErrors();
return 1;
}
if ( !fileProbe( forestDumpPath.c_str() ) )
{
std::cerr << "Can't open the file with a trained model: `" << forestDumpPath
<< "`.\nYou can obtain this file either by manually training the model using another tool with *_train suffix or by "
"downloading one of the files trained on some publicly available dataset from "
"here:\nhttps://drive.google.com/open?id=0B7Hb8cfuzrIIZDFscXVYd0NBNFU"
<< std::endl;
return 1;
}
ocl::setUseOpenCL( useOpenCL );
Ptr< optflow::GPCForest< nTrees > > forest = Algorithm::load< optflow::GPCForest< nTrees > >( forestDumpPath );
Mat from = imread( fromPath );
Mat to = imread( toPath );
Mat gt = optflow::readOpticalFlow( gtPath );
std::vector< std::pair< Point2i, Point2i > > corr;
TickMeter meter;
meter.start();
forest->findCorrespondences( from, to, corr, optflow::GPCMatchingParams( useOpenCL ) );
meter.stop();
std::cout << "Found " << corr.size() << " matches." << std::endl;
std::cout << "Time: " << meter.getTimeSec() << " sec." << std::endl;
double error = 0;
Mat dispErr = Mat::zeros( from.size(), CV_32FC3 );
dispErr = Scalar( 0, 0, 1 );
Mat disp = Mat::zeros( from.size(), CV_32FC3 );
disp = Scalar( 0, 0, 1 );
for ( size_t i = 0; i < corr.size(); ++i )
{
const Point2f a = corr[i].first;
const Point2f b = corr[i].second;
const Point2f c = a + gt.at< Point2f >( corr[i].first.y, corr[i].first.x );
error += normL2( b - c );
circle( disp, a, 3, getFlowColor( b - a ), -1 );
circle( dispErr, a, 3, getFlowColor( b - c, false, 32 ), -1 );
}
error /= corr.size();
std::cout << "Average endpoint error: " << error << " px." << std::endl;
cvtColor( disp, disp, COLOR_HSV2BGR );
cvtColor( dispErr, dispErr, COLOR_HSV2BGR );
Mat dispGroundTruth;
displayFlow( gt, dispGroundTruth );
if ( outPath.length() )
{
putText( disp, "Sparse matching: Global Patch Collider", Point2i( 24, 40 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA );
char buf[256];
sprintf( buf, "Average EPE: %.2f", error );
putText( disp, buf, Point2i( 24, 80 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA );
sprintf( buf, "Number of matches: %u", (unsigned)corr.size() );
putText( disp, buf, Point2i( 24, 120 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA );
disp *= 255;
imwrite( outPath, disp );
return 0;
}
namedWindow( "Correspondences", WINDOW_AUTOSIZE );
imshow( "Correspondences", disp );
namedWindow( "Error", WINDOW_AUTOSIZE );
imshow( "Error", dispErr );
namedWindow( "Ground truth", WINDOW_AUTOSIZE );
imshow( "Ground truth", dispGroundTruth );
waitKey( 0 );
return 0;
}
#include "opencv2/optflow.hpp" #include "opencv2/optflow.hpp"
#include <iostream> #include <iostream>
/* This tool trains the forest for the Global Patch Collider and stores output to the "forest.yml.gz".
*/
using namespace cv;
const String keys = "{help h ? | | print this message}"
"{max-tree-depth | | Maximum tree depth to stop partitioning}"
"{min-samples | | Minimum number of samples in the node to stop partitioning}"
"{descriptor-type|0 | Descriptor type. Set to 0 for quality, 1 for speed.}"
"{print-progress | | Set to 0 to enable quiet mode, set to 1 to print progress}"
"{f forest |forest.yml.gz| Path where to store resulting forest. It is recommended to use .yml.gz extension.}";
const int nTrees = 5; const int nTrees = 5;
int main( int argc, const char **argv ) static void fillInputImagesFromCommandLine( std::vector< String > &img1, std::vector< String > &img2, std::vector< String > &gt, int argc,
const char **argv )
{ {
int nSequences = argc - 1; for ( int i = 1, j = 0; i < argc; ++i )
if ( nSequences <= 0 || nSequences % 3 != 0 )
{ {
std::cerr << "Usage: " << argv[0] << " ImageFrom1 ImageTo1 GroundTruth1 ... ImageFromN ImageToN GroundTruthN" << std::endl; if ( argv[i][0] == '-' )
return 1; continue;
if ( j % 3 == 0 )
img1.push_back( argv[i] );
if ( j % 3 == 1 )
img2.push_back( argv[i] );
if ( j % 3 == 2 )
gt.push_back( argv[i] );
++j;
} }
}
nSequences /= 3; int main( int argc, const char **argv )
std::vector< cv::String > img1, img2, gt; {
CommandLineParser parser( argc, argv, keys );
parser.about( "Global Patch Collider training tool" );
std::vector< String > img1, img2, gt;
optflow::GPCTrainingParams params;
for ( int i = 0; i < nSequences; ++i ) if ( parser.has( "max-tree-depth" ) )
params.maxTreeDepth = parser.get< unsigned >( "max-tree-depth" );
if ( parser.has( "min-samples" ) )
params.minNumberOfSamples = parser.get< unsigned >( "min-samples" );
if ( parser.has( "descriptor-type" ) )
params.descriptorType = parser.get< int >( "descriptor-type" );
if ( parser.has( "print-progress" ) )
params.printProgress = parser.get< unsigned >( "print-progress" ) != 0;
fillInputImagesFromCommandLine( img1, img2, gt, argc, argv );
if ( parser.has( "help" ) || img1.size() != img2.size() || img1.size() != gt.size() || img1.size() == 0 )
{ {
img1.push_back( argv[1 + i * 3] ); std::cerr << "\nUsage: " << argv[0] << " [params] ImageFrom1 ImageTo1 GroundTruth1 ... ImageFromN ImageToN GroundTruthN\n" << std::endl;
img2.push_back( argv[1 + i * 3 + 1] ); parser.printMessage();
gt.push_back( argv[1 + i * 3 + 2] ); return 1;
} }
cv::Ptr< cv::optflow::GPCForest< nTrees > > forest = cv::optflow::GPCForest< nTrees >::create(); Ptr< optflow::GPCForest< nTrees > > forest = optflow::GPCForest< nTrees >::create();
forest->train( img1, img2, gt ); forest->train( img1, img2, gt, params );
forest->save( "forest.dump" ); forest->save( parser.get< String >( "forest" ) );
return 0; return 0;
} }
import argparse
import glob
import os
import subprocess
def execute(cmd):
popen = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
for stdout_line in iter(popen.stdout.readline, ''):
print(stdout_line.rstrip())
for stderr_line in iter(popen.stderr.readline, ''):
print(stderr_line.rstrip())
popen.stdout.close()
popen.stderr.close()
return_code = popen.wait()
if return_code != 0:
raise subprocess.CalledProcessError(return_code, cmd)
def main():
parser = argparse.ArgumentParser(
description='Train Global Patch Collider using Middlebury dataset')
parser.add_argument(
'--bin_path',
help='Path to the training executable (example_optflow_gpc_train)',
required=True)
parser.add_argument('--dataset_path',
help='Path to the directory with frames',
required=True)
parser.add_argument('--gt_path',
help='Path to the directory with ground truth flow',
required=True)
parser.add_argument('--descriptor_type',
help='Descriptor type',
type=int,
default=0)
args = parser.parse_args()
seq = glob.glob(os.path.join(args.dataset_path, '*'))
seq.sort()
input_files = []
for s in seq:
if os.path.isdir(s):
seq_name = os.path.basename(s)
frames = glob.glob(os.path.join(s, 'frame*.png'))
frames.sort()
assert (len(frames) == 2)
assert (os.path.basename(frames[0]) == 'frame10.png')
assert (os.path.basename(frames[1]) == 'frame11.png')
gt_flow = os.path.join(args.gt_path, seq_name, 'flow10.flo')
if os.path.isfile(gt_flow):
input_files += [frames[0], frames[1], gt_flow]
execute([args.bin_path, '--descriptor-type=%d' % args.descriptor_type] + input_files)
if __name__ == '__main__':
main()
import argparse
import glob
import os
import subprocess
FRAME_DIST = 2
assert (FRAME_DIST >= 1)
def execute(cmd):
popen = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
for stdout_line in iter(popen.stdout.readline, ''):
print(stdout_line.rstrip())
for stderr_line in iter(popen.stderr.readline, ''):
print(stderr_line.rstrip())
popen.stdout.close()
popen.stderr.close()
return_code = popen.wait()
if return_code != 0:
raise subprocess.CalledProcessError(return_code, cmd)
def main():
parser = argparse.ArgumentParser(
description='Train Global Patch Collider using MPI Sintel dataset')
parser.add_argument(
'--bin_path',
help='Path to the training executable (example_optflow_gpc_train)',
required=True)
parser.add_argument('--dataset_path',
help='Path to the directory with frames',
required=True)
parser.add_argument('--gt_path',
help='Path to the directory with ground truth flow',
required=True)
parser.add_argument('--descriptor_type',
help='Descriptor type',
type=int,
default=0)
args = parser.parse_args()
seq = glob.glob(os.path.join(args.dataset_path, '*'))
seq.sort()
input_files = []
for s in seq:
seq_name = os.path.basename(s)
frames = glob.glob(os.path.join(s, 'frame*.png'))
frames.sort()
for i in range(0, len(frames) - 1, FRAME_DIST):
gt_flow = os.path.join(args.gt_path, seq_name,
os.path.basename(frames[i])[0:-4] + '.flo')
assert (os.path.isfile(gt_flow))
input_files += [frames[i], frames[i + 1], gt_flow]
execute([args.bin_path, '--descriptor-type=%d' % args.descriptor_type] + input_files)
if __name__ == '__main__':
main()
#include "opencv2/core/ocl.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/optflow.hpp"
#include <fstream>
#include <iostream>
#include <stdio.h>
using namespace cv;
using optflow::OpticalFlowPCAFlow;
using optflow::PCAPrior;
const String keys = "{help h ? | | print this message}"
"{@image1 |<none>| image1}"
"{@image2 |<none>| image2}"
"{@groundtruth |<none>| path to the .flo file}"
"{@prior |<none>| path to a prior file for PCAFlow}"
"{@output |<none>| output image path}"
"{g gpu | | use OpenCL}";
static double normL2( const Point2f &v ) { return sqrt( v.x * v.x + v.y * v.y ); }
static bool fileProbe( const char *name ) { return std::ifstream( name ).good(); }
static Vec3d getFlowColor( const Point2f &f, const bool logScale = true, const double scaleDown = 5 )
{
if ( f.x == 0 && f.y == 0 )
return Vec3d( 0, 0, 1 );
double radius = normL2( f );
if ( logScale )
radius = log( radius + 1 );
radius /= scaleDown;
radius = std::min( 1.0, radius );
double angle = ( atan2( -f.y, -f.x ) + CV_PI ) * 180 / CV_PI;
return Vec3d( angle, radius, 1 );
}
static void displayFlow( InputArray _flow, OutputArray _img )
{
const Size sz = _flow.size();
Mat flow = _flow.getMat();
_img.create( sz, CV_32FC3 );
Mat img = _img.getMat();
for ( int i = 0; i < sz.height; ++i )
for ( int j = 0; j < sz.width; ++j )
img.at< Vec3f >( i, j ) = getFlowColor( flow.at< Point2f >( i, j ) );
cvtColor( img, img, COLOR_HSV2BGR );
}
static bool isFlowCorrect( const Point2f &u )
{
return !cvIsNaN( u.x ) && !cvIsNaN( u.y ) && ( fabs( u.x ) < 1e9 ) && ( fabs( u.y ) < 1e9 );
}
static double calcEPE( const Mat &f1, const Mat &f2 )
{
double sum = 0;
Size sz = f1.size();
size_t cnt = 0;
for ( int i = 0; i < sz.height; ++i )
for ( int j = 0; j < sz.width; ++j )
if ( isFlowCorrect( f1.at< Point2f >( i, j ) ) && isFlowCorrect( f2.at< Point2f >( i, j ) ) )
{
sum += normL2( f1.at< Point2f >( i, j ) - f2.at< Point2f >( i, j ) );
++cnt;
}
return sum / cnt;
}
static void displayResult( Mat &i1, Mat &i2, Mat &gt, Ptr< DenseOpticalFlow > &algo, OutputArray _img, const char *descr,
const bool useGpu = false )
{
Mat flow( i1.size[0], i1.size[1], CV_32FC2 );
TickMeter meter;
meter.start();
if ( useGpu )
algo->calc( i1, i2, flow.getUMat( ACCESS_RW ) );
else
algo->calc( i1, i2, flow );
meter.stop();
displayFlow( flow, _img );
Mat img = _img.getMat();
putText( img, descr, Point2i( 24, 40 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA );
char buf[256];
sprintf( buf, "Average EPE: %.2f", calcEPE( flow, gt ) );
putText( img, buf, Point2i( 24, 80 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA );
sprintf( buf, "Time: %.2fs", meter.getTimeSec() );
putText( img, buf, Point2i( 24, 120 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA );
}
static void displayGT( InputArray _flow, OutputArray _img, const char *descr )
{
displayFlow( _flow, _img );
Mat img = _img.getMat();
putText( img, descr, Point2i( 24, 40 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA );
}
int main( int argc, const char **argv )
{
CommandLineParser parser( argc, argv, keys );
parser.about( "PCAFlow demonstration" );
if ( parser.has( "help" ) )
{
parser.printMessage();
return 0;
}
String img1 = parser.get< String >( 0 );
String img2 = parser.get< String >( 1 );
String groundtruth = parser.get< String >( 2 );
String prior = parser.get< String >( 3 );
String outimg = parser.get< String >( 4 );
const bool useGpu = parser.has( "gpu" );
if ( !parser.check() )
{
parser.printErrors();
return 1;
}
if ( !fileProbe( prior.c_str() ) )
{
std::cerr << "Can't open the file with prior! Check the provided path: " << prior << std::endl;
return 1;
}
cv::ocl::setUseOpenCL( useGpu );
Mat i1 = imread( img1 );
Mat i2 = imread( img2 );
Mat gt = optflow::readOpticalFlow( groundtruth );
Mat i1g, i2g;
cvtColor( i1, i1g, COLOR_BGR2GRAY );
cvtColor( i2, i2g, COLOR_BGR2GRAY );
Mat pcaflowDisp, pcaflowpriDisp, farnebackDisp, gtDisp;
{
Ptr< DenseOpticalFlow > pcaflow = makePtr< OpticalFlowPCAFlow >( makePtr< PCAPrior >( prior.c_str() ) );
displayResult( i1, i2, gt, pcaflow, pcaflowpriDisp, "PCAFlow with prior", useGpu );
}
{
Ptr< DenseOpticalFlow > pcaflow = makePtr< OpticalFlowPCAFlow >();
displayResult( i1, i2, gt, pcaflow, pcaflowDisp, "PCAFlow without prior", useGpu );
}
{
Ptr< DenseOpticalFlow > farneback = optflow::createOptFlow_Farneback();
displayResult( i1g, i2g, gt, farneback, farnebackDisp, "Farneback", useGpu );
}
displayGT( gt, gtDisp, "Ground truth" );
Mat disp1, disp2;
vconcat( pcaflowpriDisp, farnebackDisp, disp1 );
vconcat( pcaflowDisp, gtDisp, disp2 );
hconcat( disp1, disp2, disp1 );
disp1 *= 255;
imwrite( outimg, disp1 );
return 0;
}
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2016, Itseez, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Vladislav Samsonov, vvladxx@gmail.com
__kernel void getPatchDescriptor(
__global const uchar* imgCh0, int ic0step, int ic0off,
__global const uchar* imgCh1, int ic1step, int ic1off,
__global const uchar* imgCh2, int ic2step, int ic2off,
__global uchar* out, int outstep, int outoff,
const int gh, const int gw, const int PR )
{
const int i = get_global_id(0);
const int j = get_global_id(1);
if (i >= gh || j >= gw)
return;
__global double* desc = (__global double*)(out + (outstep * (i * gw + j) + outoff));
const int patchRadius = PR * 2;
float patch[PATCH_RADIUS_DOUBLED][PATCH_RADIUS_DOUBLED];
for (int i0 = 0; i0 < patchRadius; ++i0) {
__global const float* ch0Row = (__global const float*)(imgCh0 + (ic0step * (i + i0) + ic0off + j * sizeof(float)));
for (int j0 = 0; j0 < patchRadius; ++j0)
patch[i0][j0] = ch0Row[j0];
}
#pragma unroll
for (int n0 = 0; n0 < 4; ++n0) {
#pragma unroll
for (int n1 = 0; n1 < 4; ++n1) {
double sum = 0;
for (int i0 = 0; i0 < patchRadius; ++i0)
for (int j0 = 0; j0 < patchRadius; ++j0)
sum += patch[i0][j0] * cos(CV_PI * (i0 + 0.5) * n0 / patchRadius) * cos(CV_PI * (j0 + 0.5) * n1 / patchRadius);
desc[n0 * 4 + n1] = sum / PR;
}
}
for (int k = 0; k < 4; ++k) {
desc[k] *= SQRT2_INV;
desc[k * 4] *= SQRT2_INV;
}
double sum = 0;
for (int i0 = 0; i0 < patchRadius; ++i0) {
__global const float* ch1Row = (__global const float*)(imgCh1 + (ic1step * (i + i0) + ic1off + j * sizeof(float)));
for (int j0 = 0; j0 < patchRadius; ++j0)
sum += ch1Row[j0];
}
desc[16] = sum / patchRadius;
sum = 0;
for (int i0 = 0; i0 < patchRadius; ++i0) {
__global const float* ch2Row = (__global const float*)(imgCh2 + (ic2step * (i + i0) + ic2off + j * sizeof(float)));
for (int j0 = 0; j0 < patchRadius; ++j0)
sum += ch2Row[j0];
}
desc[17] = sum / patchRadius;
}
...@@ -41,8 +41,22 @@ ...@@ -41,8 +41,22 @@
//M*/ //M*/
#include "opencv2/core/core_c.h" #include "opencv2/core/core_c.h"
#include "opencv2/core/private.hpp"
#include "opencv2/flann/miniflann.hpp"
#include "opencv2/highgui.hpp" #include "opencv2/highgui.hpp"
#include "precomp.hpp" #include "precomp.hpp"
#include "opencl_kernels_optflow.hpp"
/* Disable "from double to float" and "from size_t to int" warnings.
* Fixing these would make the code look ugly by introducing explicit cast all around.
* Here these warning are pointless anyway.
*/
#ifdef _MSC_VER
#pragma warning( disable : 4244 4267 4838 )
#endif
#ifdef __clang__
#pragma clang diagnostic ignored "-Wshorten-64-to-32"
#endif
namespace cv namespace cv
{ {
...@@ -51,12 +65,23 @@ namespace optflow ...@@ -51,12 +65,23 @@ namespace optflow
namespace namespace
{ {
const int patchRadius = 10; #define PATCH_RADIUS 10
const double thresholdMagnitudeFrac = 0.6666666666; #define PATCH_RADIUS_DOUBLED 20
#define SQRT2_INV 0.7071067811865475
const int patchRadius = PATCH_RADIUS;
const int globalIters = 3; const int globalIters = 3;
const int localIters = 500; const int localIters = 500;
const int minNumberOfSamples = 2; const double thresholdOutliers = 0.98;
//const bool debugOutput = true; const double thresholdMagnitudeFrac = 0.8;
const double epsTolerance = 1e-12;
const unsigned scoreGainPos = 5;
const unsigned scoreGainNeg = 1;
const unsigned negSearchKNN = 5;
const double simulatedAnnealingTemperatureCoef = 200.0;
const double sigmaGrowthRate = 0.2;
RNG rng;
struct Magnitude struct Magnitude
{ {
...@@ -79,9 +104,9 @@ struct PartitionPredicate1 ...@@ -79,9 +104,9 @@ struct PartitionPredicate1
bool operator()( const GPCPatchSample &sample ) const bool operator()( const GPCPatchSample &sample ) const
{ {
const bool direction1 = ( coef.dot( sample.first.feature ) < rhs ); bool refdir, posdir, negdir;
const bool direction2 = ( coef.dot( sample.second.feature ) < rhs ); sample.getDirections( refdir, posdir, negdir, coef, rhs );
return direction1 == false && direction1 == direction2; return refdir == false && ( posdir == false || negdir == true );
} }
}; };
...@@ -94,20 +119,276 @@ struct PartitionPredicate2 ...@@ -94,20 +119,276 @@ struct PartitionPredicate2
bool operator()( const GPCPatchSample &sample ) const bool operator()( const GPCPatchSample &sample ) const
{ {
const bool direction1 = ( coef.dot( sample.first.feature ) < rhs ); bool refdir, posdir, negdir;
const bool direction2 = ( coef.dot( sample.second.feature ) < rhs ); sample.getDirections( refdir, posdir, negdir, coef, rhs );
return direction1 != direction2; return refdir != posdir && refdir == negdir;
}
};
struct CompareWithTolerance
{
double val;
CompareWithTolerance( double _val ) : val( _val ) {};
bool operator()( const double &elem ) const
{
const double diff = ( val + elem == 0 ) ? std::abs( val - elem ) : std::abs( ( val - elem ) / ( val + elem ) );
return diff <= epsTolerance;
} }
}; };
float normL2Sqr( const Vec2f &v ) { return v[0] * v[0] + v[1] * v[1]; } float normL2Sqr( const Vec2f &v ) { return v[0] * v[0] + v[1] * v[1]; }
int normL2Sqr( const Point2i &v ) { return v.x * v.x + v.y * v.y; }
bool checkBounds( int i, int j, Size sz ) bool checkBounds( int i, int j, Size sz )
{ {
return i >= patchRadius && j >= patchRadius && i + patchRadius < sz.height && j + patchRadius < sz.width; return i >= patchRadius && j >= patchRadius && i + patchRadius < sz.height && j + patchRadius < sz.width;
} }
void getTrainingSamples( const Mat &from, const Mat &to, const Mat &gt, GPCSamplesVector &samples ) void getDCTPatchDescriptor( GPCPatchDescriptor &patchDescr, const Mat *imgCh, int i, int j )
{
Rect roi( j - patchRadius, i - patchRadius, 2 * patchRadius, 2 * patchRadius );
Mat freqDomain;
dct( imgCh[0]( roi ), freqDomain );
double *feature = patchDescr.feature.val;
feature[0] = freqDomain.at< float >( 0, 0 );
feature[1] = freqDomain.at< float >( 0, 1 );
feature[2] = freqDomain.at< float >( 0, 2 );
feature[3] = freqDomain.at< float >( 0, 3 );
feature[4] = freqDomain.at< float >( 1, 0 );
feature[5] = freqDomain.at< float >( 1, 1 );
feature[6] = freqDomain.at< float >( 1, 2 );
feature[7] = freqDomain.at< float >( 1, 3 );
feature[8] = freqDomain.at< float >( 2, 0 );
feature[9] = freqDomain.at< float >( 2, 1 );
feature[10] = freqDomain.at< float >( 2, 2 );
feature[11] = freqDomain.at< float >( 2, 3 );
feature[12] = freqDomain.at< float >( 3, 0 );
feature[13] = freqDomain.at< float >( 3, 1 );
feature[14] = freqDomain.at< float >( 3, 2 );
feature[15] = freqDomain.at< float >( 3, 3 );
feature[16] = cv::sum( imgCh[1]( roi ) )[0] / ( 2 * patchRadius );
feature[17] = cv::sum( imgCh[2]( roi ) )[0] / ( 2 * patchRadius );
}
double sumInt( const Mat &integ, int i, int j, int h, int w )
{
return integ.at< double >( i + h, j + w ) - integ.at< double >( i + h, j ) - integ.at< double >( i, j + w ) + integ.at< double >( i, j );
}
void getWHTPatchDescriptor( GPCPatchDescriptor &patchDescr, const Mat *imgCh, int i, int j )
{
i -= patchRadius;
j -= patchRadius;
const int k = 2 * patchRadius;
const double s = sumInt( imgCh[0], i, j, k, k );
double *feature = patchDescr.feature.val;
feature[0] = s;
feature[1] = s - 2 * sumInt( imgCh[0], i, j + k / 2, k, k / 2 );
feature[2] = s - 2 * sumInt( imgCh[0], i, j + k / 4, k, k / 2 );
feature[3] = s - 2 * sumInt( imgCh[0], i, j + k / 4, k, k / 4 ) - 2 * sumInt( imgCh[0], i, j + 3 * k / 4, k, k / 4 );
feature[4] = s - 2 * sumInt( imgCh[0], i + k / 2, j, k / 2, k );
feature[5] = s - 2 * sumInt( imgCh[0], i, j + k / 2, k / 2, k / 2 ) - 2 * sumInt( imgCh[0], i + k / 2, j, k / 2, k / 2 );
feature[6] = s - 2 * sumInt( imgCh[0], i, j + k / 4, k / 2, k / 2 ) - 2 * sumInt( imgCh[0], i + k / 2, j, k / 2, k / 4 ) -
2 * sumInt( imgCh[0], i + k / 2, j + 3 * k / 4, k / 2, k / 4 );
feature[7] = s - 2 * sumInt( imgCh[0], i, j + k / 4, k / 2, k / 4 ) - 2 * sumInt( imgCh[0], i, j + 3 * k / 4, k / 2, k / 4 ) -
2 * sumInt( imgCh[0], i + k / 2, j, k / 2, k / 4 ) - 2 * sumInt( imgCh[0], i + k / 2, j + k / 2, k / 2, k / 4 );
feature[8] = s - 2 * sumInt( imgCh[0], i + k / 4, j, k / 2, k );
feature[9] = s - 2 * sumInt( imgCh[0], i + k / 4, j, k / 2, k / 2 ) - 2 * sumInt( imgCh[0], i, j + k / 2, k / 4, k / 2 ) -
2 * sumInt( imgCh[0], i + 3 * k / 4, j + k / 2, k / 4, k / 2 );
feature[10] = s - 2 * sumInt( imgCh[0], i + k / 4, j, k / 2, k / 4 ) - 2 * sumInt( imgCh[0], i + k / 4, j + 3 * k / 4, k / 2, k / 4 ) -
2 * sumInt( imgCh[0], i, j + k / 4, k / 4, k / 2 ) - 2 * sumInt( imgCh[0], i + 3 * k / 4, j + k / 4, k / 4, k / 2 );
feature[11] = s - 2 * sumInt( imgCh[0], i, j + k / 4, k / 4, k / 4 ) - 2 * sumInt( imgCh[0], i, j + 3 * k / 4, k / 4, k / 4 ) -
2 * sumInt( imgCh[0], i + k / 4, j, k / 2, k / 4 ) - 2 * sumInt( imgCh[0], i + k / 4, j + k / 2, k / 2, k / 4 ) -
2 * sumInt( imgCh[0], i + 3 * k / 4, j + k / 4, k / 4, k / 4 ) -
2 * sumInt( imgCh[0], i + 3 * k / 4, j + 3 * k / 4, k / 4, k / 4 );
feature[12] = s - 2 * sumInt( imgCh[0], i + k / 4, j, k / 4, k ) - 2 * sumInt( imgCh[0], i + 3 * k / 4, j, k / 4, k );
feature[13] = s - 2 * sumInt( imgCh[0], i + k / 4, j, k / 4, k / 2 ) - 2 * sumInt( imgCh[0], i + 3 * k / 4, j, k / 4, k / 2 ) -
2 * sumInt( imgCh[0], i, j + k / 2, k / 4, k / 2 ) - 2 * sumInt( imgCh[0], i + k / 2, j + k / 2, k / 4, k / 2 );
feature[14] = s - 2 * sumInt( imgCh[0], i + k / 4, j, k / 4, k / 4 ) - 2 * sumInt( imgCh[0], i + 3 * k / 4, j, k / 4, k / 4 ) -
2 * sumInt( imgCh[0], i, j + k / 4, k / 4, k / 2 ) - 2 * sumInt( imgCh[0], i + k / 2, j + k / 4, k / 4, k / 2 ) -
2 * sumInt( imgCh[0], i + k / 4, j + 3 * k / 4, k / 4, k / 4 ) -
2 * sumInt( imgCh[0], i + 3 * k / 4, j + 3 * k / 4, k / 4, k / 4 );
feature[15] = s - 2 * sumInt( imgCh[0], i, j + k / 4, k / 4, k / 4 ) - 2 * sumInt( imgCh[0], i, j + 3 * k / 4, k / 4, k / 4 ) -
2 * sumInt( imgCh[0], i + k / 4, j, k / 4, k / 4 ) - 2 * sumInt( imgCh[0], i + k / 4, j + k / 2, k / 4, k / 4 ) -
2 * sumInt( imgCh[0], i + k / 2, j + k / 4, k / 4, k / 4 ) -
2 * sumInt( imgCh[0], i + k / 2, j + 3 * k / 4, k / 4, k / 4 ) - 2 * sumInt( imgCh[0], i + 3 * k / 4, j, k / 4, k / 4 ) -
2 * sumInt( imgCh[0], i + 3 * k / 4, j + k / 2, k / 4, k / 4 );
feature[16] = sumInt( imgCh[1], i, j, k, k );
feature[17] = sumInt( imgCh[2], i, j, k, k );
patchDescr.feature /= patchRadius;
}
class ParallelDCTFiller : public ParallelLoopBody
{
private:
const Size sz;
const Mat *imgCh;
std::vector< GPCPatchDescriptor > *descr;
ParallelDCTFiller &operator=( const ParallelDCTFiller & );
public:
ParallelDCTFiller( const Size &_sz, const Mat *_imgCh, std::vector< GPCPatchDescriptor > *_descr )
: sz( _sz ), imgCh( _imgCh ), descr( _descr ){};
void operator()( const Range &range ) const
{
for ( int i = range.start; i < range.end; ++i )
{
int x, y;
GPCDetails::getCoordinatesFromIndex( i, sz, x, y );
getDCTPatchDescriptor( descr->at( i ), imgCh, y, x );
}
}
};
#ifdef HAVE_OPENCL
bool ocl_getAllDCTDescriptorsForImage( const Mat *imgCh, std::vector< GPCPatchDescriptor > &descr )
{
const Size sz = imgCh[0].size();
ocl::Kernel kernel( "getPatchDescriptor", ocl::optflow::sparse_matching_gpc_oclsrc,
format( "-DPATCH_RADIUS_DOUBLED=%d -DCV_PI=%f -DSQRT2_INV=%f", PATCH_RADIUS_DOUBLED, CV_PI, SQRT2_INV ) );
size_t globSize[] = {sz.height - 2 * patchRadius, sz.width - 2 * patchRadius};
UMat out( globSize[0] * globSize[1], GPCPatchDescriptor::nFeatures, CV_64F );
if (
kernel
.args( cv::ocl::KernelArg::ReadOnlyNoSize( imgCh[0].getUMat( ACCESS_READ ) ),
cv::ocl::KernelArg::ReadOnlyNoSize( imgCh[1].getUMat( ACCESS_READ ) ),
cv::ocl::KernelArg::ReadOnlyNoSize( imgCh[2].getUMat( ACCESS_READ ) ),
cv::ocl::KernelArg::WriteOnlyNoSize( out ),
(int)globSize[0], (int)globSize[1], (int)patchRadius )
.run( 2, globSize, 0, true ) == false )
return false;
Mat cpuOut = out.getMat( 0 );
for ( int i = 0; i + 2 * patchRadius < sz.height; ++i )
for ( int j = 0; j + 2 * patchRadius < sz.width; ++j )
descr.push_back( *cpuOut.ptr< GPCPatchDescriptor >( i * globSize[1] + j ) );
return true;
}
#endif
void getAllDCTDescriptorsForImage( const Mat *imgCh, std::vector< GPCPatchDescriptor > &descr, const GPCMatchingParams &mp )
{
const Size sz = imgCh[0].size();
descr.reserve( ( sz.height - 2 * patchRadius ) * ( sz.width - 2 * patchRadius ) );
(void)mp; // Fix unused parameter warning in case OpenCL is not available
CV_OCL_RUN( mp.useOpenCL, ocl_getAllDCTDescriptorsForImage( imgCh, descr ) )
descr.resize( ( sz.height - 2 * patchRadius ) * ( sz.width - 2 * patchRadius ) );
parallel_for_( Range( 0, descr.size() ), ParallelDCTFiller( sz, imgCh, &descr ) );
}
class ParallelWHTFiller : public ParallelLoopBody
{
private:
const Size sz;
const Mat *imgChInt;
std::vector< GPCPatchDescriptor > *descr;
ParallelWHTFiller &operator=( const ParallelWHTFiller & );
public:
ParallelWHTFiller( const Size &_sz, const Mat *_imgChInt, std::vector< GPCPatchDescriptor > *_descr )
: sz( _sz ), imgChInt( _imgChInt ), descr( _descr ){};
void operator()( const Range &range ) const
{
for ( int i = range.start; i < range.end; ++i )
{
int x, y;
GPCDetails::getCoordinatesFromIndex( i, sz, x, y );
getWHTPatchDescriptor( descr->at( i ), imgChInt, y, x );
}
}
};
void getAllWHTDescriptorsForImage( const Mat *imgCh, std::vector< GPCPatchDescriptor > &descr, const GPCMatchingParams & )
{
const Size sz = imgCh[0].size();
descr.resize( ( sz.height - 2 * patchRadius ) * ( sz.width - 2 * patchRadius ) );
Mat imgChInt[3];
integral( imgCh[0], imgChInt[0], CV_64F );
integral( imgCh[1], imgChInt[1], CV_64F );
integral( imgCh[2], imgChInt[2], CV_64F );
parallel_for_( Range( 0, descr.size() ), ParallelWHTFiller( sz, imgChInt, &descr ) );
}
void buildIndex( OutputArray featuresOut, flann::Index &index, const Mat *imgCh,
void ( *getAllDescrFn )( const Mat *, std::vector< GPCPatchDescriptor > &, const GPCMatchingParams & ) )
{
std::vector< GPCPatchDescriptor > descriptors;
getAllDescrFn( imgCh, descriptors, GPCMatchingParams() );
featuresOut.create( descriptors.size(), GPCPatchDescriptor::nFeatures, CV_32F );
Mat features = featuresOut.getMat();
for ( size_t i = 0; i < descriptors.size(); ++i )
*features.ptr< Vec< float, GPCPatchDescriptor::nFeatures > >( i ) = descriptors[i].feature;
cv::flann::KDTreeIndexParams indexParams;
index.build( features, indexParams, cvflann::FLANN_DIST_L2 );
}
void getTriplet( const Magnitude &mag, const Mat &gt, const Mat *fromCh, const Mat *toCh, GPCSamplesVector &samples, flann::Index &index,
void ( *getDescFn )( GPCPatchDescriptor &, const Mat *, int, int ) )
{
const Size sz = gt.size();
const int i0 = mag.i;
const int j0 = mag.j;
const int i1 = i0 + cvRound( gt.at< Vec2f >( i0, j0 )[1] );
const int j1 = j0 + cvRound( gt.at< Vec2f >( i0, j0 )[0] );
if ( checkBounds( i1, j1, sz ) )
{
GPCPatchSample ps;
getDescFn( ps.ref, fromCh, i0, j0 );
getDescFn( ps.pos, toCh, i1, j1 );
ps.neg.markAsSeparated();
Matx< float, 1, GPCPatchDescriptor::nFeatures > ref32;
Matx< int, 1, negSearchKNN > indices;
int maxDist = 0;
for ( unsigned i = 0; i < GPCPatchDescriptor::nFeatures; ++i )
ref32( 0, i ) = ps.ref.feature[i];
index.knnSearch( ref32, indices, noArray(), negSearchKNN );
for ( unsigned i = 0; i < negSearchKNN; ++i )
{
int i2, j2;
GPCDetails::getCoordinatesFromIndex( indices( 0, i ), sz, j2, i2 );
const int dist = ( i2 - i1 ) * ( i2 - i1 ) + ( j2 - j1 ) * ( j2 - j1 );
if ( maxDist < dist )
{
maxDist = dist;
getDescFn( ps.neg, toCh, i2, j2 );
}
}
samples.push_back( ps );
}
}
void getTrainingSamples( const Mat &from, const Mat &to, const Mat &gt, GPCSamplesVector &samples, const int type )
{ {
const Size sz = gt.size(); const Size sz = gt.size();
std::vector< Magnitude > mag; std::vector< Magnitude > mag;
...@@ -116,33 +397,53 @@ void getTrainingSamples( const Mat &from, const Mat &to, const Mat &gt, GPCSampl ...@@ -116,33 +397,53 @@ void getTrainingSamples( const Mat &from, const Mat &to, const Mat &gt, GPCSampl
for ( int j = patchRadius; j + patchRadius < sz.width; ++j ) for ( int j = patchRadius; j + patchRadius < sz.width; ++j )
mag.push_back( Magnitude( normL2Sqr( gt.at< Vec2f >( i, j ) ), i, j ) ); mag.push_back( Magnitude( normL2Sqr( gt.at< Vec2f >( i, j ) ), i, j ) );
size_t n = size_t(mag.size() * thresholdMagnitudeFrac); // As suggested in the paper, we discard part of the training samples size_t n = size_t( mag.size() * thresholdMagnitudeFrac ); // As suggested in the paper, we discard part of the training samples
// with a small displacement and train to better distinguish hard pairs. // with a small displacement and train to better distinguish hard pairs.
std::nth_element( mag.begin(), mag.begin() + n, mag.end() ); std::nth_element( mag.begin(), mag.begin() + n, mag.end() );
mag.resize( n ); mag.resize( n );
std::random_shuffle( mag.begin(), mag.end() ); std::random_shuffle( mag.begin(), mag.end() );
n /= patchRadius; n /= patchRadius;
mag.resize( n ); mag.resize( n );
Mat fromCh[3], toCh[3]; if ( type == GPC_DESCRIPTOR_DCT )
split( from, fromCh ); {
split( to, toCh ); Mat fromCh[3], toCh[3];
split( from, fromCh );
split( to, toCh );
Mat allDescriptors;
flann::Index index;
buildIndex( allDescriptors, index, toCh, getAllDCTDescriptorsForImage );
for ( size_t k = 0; k < n; ++k ) for ( size_t k = 0; k < n; ++k )
getTriplet( mag[k], gt, fromCh, toCh, samples, index, getDCTPatchDescriptor );
}
else if ( type == GPC_DESCRIPTOR_WHT )
{ {
int i0 = mag[k].i; Mat fromCh[3], toCh[3], fromChInt[3], toChInt[3];
int j0 = mag[k].j; split( from, fromCh );
int i1 = i0 + cvRound( gt.at< Vec2f >( i0, j0 )[1] ); split( to, toCh );
int j1 = j0 + cvRound( gt.at< Vec2f >( i0, j0 )[0] ); integral( fromCh[0], fromChInt[0], CV_64F );
if ( checkBounds( i1, j1, sz ) ) integral( fromCh[1], fromChInt[1], CV_64F );
samples.push_back( std::make_pair( GPCPatchDescriptor( fromCh, i0, j0 ), GPCPatchDescriptor( toCh, i1, j1 ) ) ); integral( fromCh[2], fromChInt[2], CV_64F );
integral( toCh[0], toChInt[0], CV_64F );
integral( toCh[1], toChInt[1], CV_64F );
integral( toCh[2], toChInt[2], CV_64F );
Mat allDescriptors;
flann::Index index;
buildIndex( allDescriptors, index, toCh, getAllWHTDescriptorsForImage );
for ( size_t k = 0; k < n; ++k )
getTriplet( mag[k], gt, fromChInt, toChInt, samples, index, getWHTPatchDescriptor );
} }
else
CV_Error( CV_StsBadArg, "Unknown descriptor type" );
} }
/* Sample random number from Cauchy distribution. */ /* Sample random number from Cauchy distribution. */
double getRandomCauchyScalar() double getRandomCauchyScalar()
{ {
static RNG rng;
return tan( rng.uniform( -1.54, 1.54 ) ); // I intentionally used the value slightly less than PI/2 to enforce strictly return tan( rng.uniform( -1.54, 1.54 ) ); // I intentionally used the value slightly less than PI/2 to enforce strictly
// zero probability for large numbers. Resulting PDF for Cauchy has // zero probability for large numbers. Resulting PDF for Cauchy has
// truncated "tails". // truncated "tails".
...@@ -155,41 +456,65 @@ void getRandomCauchyVector( Vec< double, GPCPatchDescriptor::nFeatures > &v ) ...@@ -155,41 +456,65 @@ void getRandomCauchyVector( Vec< double, GPCPatchDescriptor::nFeatures > &v )
for ( unsigned i = 0; i < GPCPatchDescriptor::nFeatures; ++i ) for ( unsigned i = 0; i < GPCPatchDescriptor::nFeatures; ++i )
v[i] = getRandomCauchyScalar(); v[i] = getRandomCauchyScalar();
} }
double getRobustMedian( double m ) { return m < 0 ? m * ( 1.0 + epsTolerance ) : m * ( 1.0 - epsTolerance ); }
} }
GPCPatchDescriptor::GPCPatchDescriptor( const Mat *imgCh, int i, int j ) double GPCPatchDescriptor::dot( const Vec< double, nFeatures > &coef ) const
{ {
Rect roi( j - patchRadius, i - patchRadius, 2 * patchRadius, 2 * patchRadius ); #if CV_SIMD128_64F
Mat freqDomain; v_float64x2 sum = v_setzero_f64();
dct( imgCh[0]( roi ), freqDomain ); for ( unsigned i = 0; i < nFeatures; i += 2 )
{
feature[0] = freqDomain.at< float >( 0, 0 ); v_float64x2 x = v_load_aligned( &feature.val[i] );
feature[1] = freqDomain.at< float >( 0, 1 ); v_float64x2 y = v_load_aligned( &coef.val[i] );
feature[2] = freqDomain.at< float >( 0, 2 ); sum = v_muladd( x, y, sum );
feature[3] = freqDomain.at< float >( 0, 3 ); }
#if CV_SSE2
feature[4] = freqDomain.at< float >( 1, 0 ); __m128d sumrev = _mm_shuffle_pd( sum.val, sum.val, _MM_SHUFFLE2( 0, 1 ) );
feature[5] = freqDomain.at< float >( 1, 1 ); return _mm_cvtsd_f64( _mm_add_pd( sum.val, sumrev ) );
feature[6] = freqDomain.at< float >( 1, 2 ); #else
feature[7] = freqDomain.at< float >( 1, 3 ); double CV_DECL_ALIGNED( 16 ) buf[2];
v_store_aligned( buf, sum );
return OPENCV_HAL_ADD( buf[0], buf[1] );
#endif
#else
return feature.dot( coef );
#endif
}
feature[8] = freqDomain.at< float >( 2, 0 ); void GPCPatchSample::getDirections( bool &refdir, bool &posdir, bool &negdir, const Vec< double, GPCPatchDescriptor::nFeatures > &coef, double rhs ) const
feature[9] = freqDomain.at< float >( 2, 1 ); {
feature[10] = freqDomain.at< float >( 2, 2 ); refdir = ( ref.dot( coef ) < rhs );
feature[11] = freqDomain.at< float >( 2, 3 ); posdir = pos.isSeparated() ? ( !refdir ) : ( pos.dot( coef ) < rhs );
negdir = neg.isSeparated() ? ( !refdir ) : ( neg.dot( coef ) < rhs );
}
feature[12] = freqDomain.at< float >( 3, 0 ); void GPCDetails::getAllDescriptorsForImage( const Mat *imgCh, std::vector< GPCPatchDescriptor > &descr, const GPCMatchingParams &mp,
feature[13] = freqDomain.at< float >( 3, 1 ); int type )
feature[14] = freqDomain.at< float >( 3, 2 ); {
feature[15] = freqDomain.at< float >( 3, 3 ); if ( type == GPC_DESCRIPTOR_DCT )
getAllDCTDescriptorsForImage( imgCh, descr, mp );
else if ( type == GPC_DESCRIPTOR_WHT )
getAllWHTDescriptorsForImage( imgCh, descr, mp );
else
CV_Error( CV_StsBadArg, "Unknown descriptor type" );
}
feature[16] = cv::sum( imgCh[1]( roi ) )[0] / ( 2 * patchRadius ); void GPCDetails::getCoordinatesFromIndex( size_t index, Size sz, int &x, int &y )
feature[17] = cv::sum( imgCh[2]( roi ) )[0] / ( 2 * patchRadius ); {
const size_t stride = sz.width - patchRadius * 2;
y = int( index / stride );
x = int( index - y * stride + patchRadius );
y += patchRadius;
} }
bool GPCTree::trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth ) bool GPCTree::trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth )
{ {
if ( std::distance( begin, end ) < minNumberOfSamples ) const int nSamples = (int)std::distance( begin, end );
if ( nSamples < params.minNumberOfSamples || depth >= params.maxTreeDepth )
return false; return false;
if ( nodeId >= nodes.size() ) if ( nodeId >= nodes.size() )
...@@ -200,6 +525,7 @@ bool GPCTree::trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth ) ...@@ -200,6 +525,7 @@ bool GPCTree::trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth )
// Select the best hyperplane // Select the best hyperplane
unsigned globalBestScore = 0; unsigned globalBestScore = 0;
std::vector< double > values; std::vector< double > values;
values.reserve( nSamples * 2 );
for ( int j = 0; j < globalIters; ++j ) for ( int j = 0; j < globalIters; ++j )
{ // Global search step { // Global search step
...@@ -209,50 +535,81 @@ bool GPCTree::trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth ) ...@@ -209,50 +535,81 @@ bool GPCTree::trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth )
for ( int i = 0; i < localIters; ++i ) for ( int i = 0; i < localIters; ++i )
{ // Local search step { // Local search step
double randomModification = getRandomCauchyScalar(); double randomModification = getRandomCauchyScalar() * ( 1.0 + sigmaGrowthRate * int( i / GPCPatchDescriptor::nFeatures ) );
const int pos = i % GPCPatchDescriptor::nFeatures; const int pos = i % GPCPatchDescriptor::nFeatures;
std::swap( coef[pos], randomModification ); std::swap( coef[pos], randomModification );
values.clear(); values.clear();
for ( SIter iter = begin; iter != end; ++iter ) for ( SIter iter = begin; iter != end; ++iter )
{ values.push_back( iter->ref.dot( coef ) );
values.push_back( coef.dot( iter->first.feature ) );
values.push_back( coef.dot( iter->second.feature ) ); std::nth_element( values.begin(), values.begin() + nSamples / 2, values.end() );
} double median = values[nSamples / 2];
// Skip obviously malformed division. This may happen in case there are a large number of equal samples.
// Most likely this won't happen with samples collected from a good dataset.
// Happens in case dataset contains plain (or close to plain) images.
if ( std::count_if( values.begin(), values.end(), CompareWithTolerance( median ) ) > std::max( 1, nSamples / 4 ) )
continue;
std::nth_element( values.begin(), values.begin() + values.size() / 2, values.end() ); median = getRobustMedian( median );
const double median = values[values.size() / 2];
unsigned correct = 0;
unsigned score = 0;
for ( SIter iter = begin; iter != end; ++iter ) for ( SIter iter = begin; iter != end; ++iter )
{ {
const bool direction = ( coef.dot( iter->first.feature ) < median ); bool refdir, posdir, negdir;
if ( direction == ( coef.dot( iter->second.feature ) < median ) ) iter->getDirections( refdir, posdir, negdir, coef, median );
++correct; if ( refdir == posdir )
score += scoreGainPos;
if ( refdir != negdir )
score += scoreGainNeg;
} }
if ( correct > localBestScore ) if ( score > localBestScore )
localBestScore = correct; localBestScore = score;
else else
coef[pos] = randomModification; {
const double beta = simulatedAnnealingTemperatureCoef * std::sqrt( i ) / ( nSamples * ( scoreGainPos + scoreGainNeg ) );
if ( rng.uniform( 0.0, 1.0 ) > std::exp( -beta * ( localBestScore - score) ) )
coef[pos] = randomModification;
}
if ( correct > globalBestScore ) if ( score > globalBestScore )
{ {
globalBestScore = correct; globalBestScore = score;
node.coef = coef; node.coef = coef;
node.rhs = median; node.rhs = median;
/*if ( debugOutput )
{
printf( "[%u] Updating weights: correct %.2f (%u/%ld)\n", depth, double( correct ) / std::distance( begin, end ), correct,
std::distance( begin, end ) );
for ( unsigned k = 0; k < GPCPatchDescriptor::nFeatures; ++k )
printf( "%.3f ", coef[k] );
printf( "\n" );
}*/
} }
} }
} }
if ( globalBestScore == 0 )
return false;
if ( params.printProgress )
{
const int maxScore = nSamples * ( scoreGainPos + scoreGainNeg );
const double correctRatio = double( globalBestScore ) / maxScore;
printf( "[%u] Correct %.2f (%u/%d)\nWeights:", depth, correctRatio, globalBestScore, maxScore );
for ( unsigned k = 0; k < GPCPatchDescriptor::nFeatures; ++k )
printf( " %.3f", node.coef[k] );
printf( "\n" );
}
for ( SIter iter = begin; iter != end; ++iter )
{
bool refdir, posdir, negdir;
iter->getDirections( refdir, posdir, negdir, node.coef, node.rhs );
// We shouldn't account for positive sample in the scoring in case it was separated before. So mark it as separated.
// After all, we can't bring back samples which were separated from reference on early levels.
if ( refdir != posdir )
iter->pos.markAsSeparated();
// The same for negative sample.
if ( refdir != negdir )
iter->neg.markAsSeparated();
// If both positive and negative were separated before then such triplet doesn't make sense on deeper levels. We discard it.
}
// Partition vector with samples according to the hyperplane in QuickSort-like manner. // Partition vector with samples according to the hyperplane in QuickSort-like manner.
// Unlike QuickSort, we need to partition it into 3 parts (left subtree samples; undefined samples; right subtree // Unlike QuickSort, we need to partition it into 3 parts (left subtree samples; undefined samples; right subtree
// samples), so we call it two times. // samples), so we call it two times.
...@@ -260,16 +617,21 @@ bool GPCTree::trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth ) ...@@ -260,16 +617,21 @@ bool GPCTree::trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth )
SIter rightBegin = SIter rightBegin =
std::partition( leftEnd, end, PartitionPredicate2( node.coef, node.rhs ) ); // Separate undefined samples from right subtree samples. std::partition( leftEnd, end, PartitionPredicate2( node.coef, node.rhs ) ); // Separate undefined samples from right subtree samples.
node.left = ( trainNode( nodeId * 2 + 1, begin, leftEnd, depth + 1 ) ) ? unsigned(nodeId * 2 + 1) : 0; node.left = ( trainNode( nodeId * 2 + 1, begin, leftEnd, depth + 1 ) ) ? unsigned( nodeId * 2 + 1 ) : 0;
node.right = ( trainNode( nodeId * 2 + 2, rightBegin, end, depth + 1 ) ) ? unsigned(nodeId * 2 + 2) : 0; node.right = ( trainNode( nodeId * 2 + 2, rightBegin, end, depth + 1 ) ) ? unsigned( nodeId * 2 + 2 ) : 0;
return true; return true;
} }
void GPCTree::train( GPCSamplesVector &samples ) void GPCTree::train( GPCTrainingSamples &samples, const GPCTrainingParams _params )
{ {
if ( _params.descriptorType != samples.type() )
CV_Error( CV_StsBadArg, "Descriptor type mismatch! Check that samples are collected with the same descriptor type." );
nodes.clear();
nodes.reserve( samples.size() * 2 - 1 ); // set upper bound for the possible number of nodes so all subsequent resize() will be no-op nodes.reserve( samples.size() * 2 - 1 ); // set upper bound for the possible number of nodes so all subsequent resize() will be no-op
trainNode( 0, samples.begin(), samples.end(), 0 ); params = _params;
GPCSamplesVector &sv = samples;
trainNode( 0, sv.begin(), sv.end(), 0 );
} }
void GPCTree::write( FileStorage &fs ) const void GPCTree::write( FileStorage &fs ) const
...@@ -277,17 +639,39 @@ void GPCTree::write( FileStorage &fs ) const ...@@ -277,17 +639,39 @@ void GPCTree::write( FileStorage &fs ) const
if ( nodes.empty() ) if ( nodes.empty() )
CV_Error( CV_StsBadArg, "Tree have not been trained" ); CV_Error( CV_StsBadArg, "Tree have not been trained" );
fs << "nodes" << nodes; fs << "nodes" << nodes;
fs << "dtype" << (int)params.descriptorType;
}
void GPCTree::read( const FileNode &fn )
{
fn["nodes"] >> nodes;
fn["dtype"] >> (int &)params.descriptorType;
} }
void GPCTree::read( const FileNode &fn ) { fn["nodes"] >> nodes; } unsigned GPCTree::findLeafForPatch( const GPCPatchDescriptor &descr ) const
{
unsigned id = 0, prevId;
do
{
prevId = id;
if ( descr.dot( nodes[id].coef ) < nodes[id].rhs )
id = nodes[id].right;
else
id = nodes[id].left;
} while ( id );
return prevId;
}
Ptr< GPCTrainingSamples > GPCTrainingSamples::create( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo, Ptr< GPCTrainingSamples > GPCTrainingSamples::create( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo,
const std::vector< String > &gt ) const std::vector< String > &gt, int _descriptorType )
{ {
CV_Assert( imagesFrom.size() == imagesTo.size() ); CV_Assert( imagesFrom.size() == imagesTo.size() );
CV_Assert( imagesFrom.size() == gt.size() ); CV_Assert( imagesFrom.size() == gt.size() );
Ptr< GPCTrainingSamples > ts = makePtr< GPCTrainingSamples >(); Ptr< GPCTrainingSamples > ts = makePtr< GPCTrainingSamples >();
ts->descriptorType = _descriptorType;
for ( size_t i = 0; i < imagesFrom.size(); ++i ) for ( size_t i = 0; i < imagesFrom.size(); ++i )
{ {
Mat from = imread( imagesFrom[i] ); Mat from = imread( imagesFrom[i] );
...@@ -304,12 +688,69 @@ Ptr< GPCTrainingSamples > GPCTrainingSamples::create( const std::vector< String ...@@ -304,12 +688,69 @@ Ptr< GPCTrainingSamples > GPCTrainingSamples::create( const std::vector< String
cvtColor( from, from, COLOR_BGR2YCrCb ); cvtColor( from, from, COLOR_BGR2YCrCb );
cvtColor( to, to, COLOR_BGR2YCrCb ); cvtColor( to, to, COLOR_BGR2YCrCb );
getTrainingSamples( from, to, gtFlow, ts->samples ); getTrainingSamples( from, to, gtFlow, ts->samples, ts->descriptorType );
} }
return ts; return ts;
} }
Ptr< GPCTrainingSamples > GPCTrainingSamples::create( InputArrayOfArrays imagesFrom, InputArrayOfArrays imagesTo,
InputArrayOfArrays gt, int _descriptorType )
{
CV_Assert( imagesFrom.total() == imagesTo.total() );
CV_Assert( imagesFrom.total() == gt.total() );
Ptr< GPCTrainingSamples > ts = makePtr< GPCTrainingSamples >();
ts->descriptorType = _descriptorType;
for ( size_t i = 0; i < imagesFrom.total(); ++i )
{
Mat from = imagesFrom.getMat( static_cast<int>( i ) );
Mat to = imagesTo.getMat( static_cast<int>( i ) );
Mat gtFlow = gt.getMat( static_cast<int>( i ) );
CV_Assert( from.size == to.size );
CV_Assert( from.size == gtFlow.size );
CV_Assert( from.channels() == 3 );
CV_Assert( to.channels() == 3 );
from.convertTo( from, CV_32FC3 );
to.convertTo( to, CV_32FC3 );
cvtColor( from, from, COLOR_BGR2YCrCb );
cvtColor( to, to, COLOR_BGR2YCrCb );
getTrainingSamples( from, to, gtFlow, ts->samples, ts->descriptorType );
}
return ts;
}
void GPCDetails::dropOutliers( std::vector< std::pair< Point2i, Point2i > > &corr )
{
std::vector< float > mag( corr.size() );
for ( size_t i = 0; i < corr.size(); ++i )
mag[i] = normL2Sqr( corr[i].first - corr[i].second );
const size_t threshold = size_t( mag.size() * thresholdOutliers );
std::nth_element( mag.begin(), mag.begin() + threshold, mag.end() );
const float percentile = mag[threshold];
size_t i = 0, j = 0;
while ( i < corr.size() )
{
if ( normL2Sqr( corr[i].first - corr[i].second ) <= percentile )
{
corr[j] = corr[i];
++j;
}
++i;
}
corr.resize( j );
}
} // namespace optflow } // namespace optflow
void write( FileStorage &fs, const String &name, const optflow::GPCTree::Node &node ) void write( FileStorage &fs, const String &name, const optflow::GPCTree::Node &node )
......
...@@ -49,8 +49,16 @@ using namespace optflow; ...@@ -49,8 +49,16 @@ using namespace optflow;
static string getDataDir() { return TS::ptr()->get_data_path(); } static string getDataDir() { return TS::ptr()->get_data_path(); }
static string getRubberWhaleFrame1() { return getDataDir() + "optflow/RubberWhale1.png"; }
static string getRubberWhaleFrame2() { return getDataDir() + "optflow/RubberWhale2.png"; }
static string getRubberWhaleGroundTruth() { return getDataDir() + "optflow/RubberWhale.flo"; }
static bool isFlowCorrect(float u) { return !cvIsNaN(u) && (fabs(u) < 1e9); } static bool isFlowCorrect(float u) { return !cvIsNaN(u) && (fabs(u) < 1e9); }
static bool isFlowCorrect(double u) { return !cvIsNaN(u) && (fabs(u) < 1e9); }
static float calcRMSE(Mat flow1, Mat flow2) static float calcRMSE(Mat flow1, Mat flow2)
{ {
float sum = 0; float sum = 0;
...@@ -80,11 +88,36 @@ static float calcRMSE(Mat flow1, Mat flow2) ...@@ -80,11 +88,36 @@ static float calcRMSE(Mat flow1, Mat flow2)
return (float)sqrt(sum / (1e-9 + counter)); return (float)sqrt(sum / (1e-9 + counter));
} }
static float calcAvgEPE(vector< pair<Point2i, Point2i> > corr, Mat flow)
{
double sum = 0;
int counter = 0;
for (size_t i = 0; i < corr.size(); ++i)
{
Vec2f flow1_at_point = Point2f(corr[i].second - corr[i].first);
Vec2f flow2_at_point = flow.at<Vec2f>(corr[i].first.y, corr[i].first.x);
double u1 = (double)flow1_at_point[0];
double v1 = (double)flow1_at_point[1];
double u2 = (double)flow2_at_point[0];
double v2 = (double)flow2_at_point[1];
if (isFlowCorrect(u1) && isFlowCorrect(u2) && isFlowCorrect(v1) && isFlowCorrect(v2))
{
sum += sqrt((u1 - u2) * (u1 - u2) + (v1 - v2) * (v1 - v2));
counter++;
}
}
return (float)(sum / counter);
}
bool readRubberWhale(Mat &dst_frame_1, Mat &dst_frame_2, Mat &dst_GT) bool readRubberWhale(Mat &dst_frame_1, Mat &dst_frame_2, Mat &dst_GT)
{ {
const string frame1_path = getDataDir() + "optflow/RubberWhale1.png"; const string frame1_path = getRubberWhaleFrame1();
const string frame2_path = getDataDir() + "optflow/RubberWhale2.png"; const string frame2_path = getRubberWhaleFrame2();
const string gt_flow_path = getDataDir() + "optflow/RubberWhale.flo"; const string gt_flow_path = getRubberWhaleGroundTruth();
dst_frame_1 = imread(frame1_path); dst_frame_1 = imread(frame1_path);
dst_frame_2 = imread(frame2_path); dst_frame_2 = imread(frame2_path);
...@@ -188,3 +221,65 @@ TEST(DenseOpticalFlow_VariationalRefinement, ReferenceAccuracy) ...@@ -188,3 +221,65 @@ TEST(DenseOpticalFlow_VariationalRefinement, ReferenceAccuracy)
ASSERT_EQ(GT.cols, flow.cols); ASSERT_EQ(GT.cols, flow.cols);
EXPECT_LE(calcRMSE(GT, flow), target_RMSE); EXPECT_LE(calcRMSE(GT, flow), target_RMSE);
} }
TEST(DenseOpticalFlow_PCAFlow, ReferenceAccuracy)
{
Mat frame1, frame2, GT;
ASSERT_TRUE(readRubberWhale(frame1, frame2, GT));
const float target_RMSE = 0.55f;
Mat flow;
Ptr<DenseOpticalFlow> algo = createOptFlow_PCAFlow();
algo->calc(frame1, frame2, flow);
ASSERT_EQ(GT.rows, flow.rows);
ASSERT_EQ(GT.cols, flow.cols);
EXPECT_LE(calcRMSE(GT, flow), target_RMSE);
}
TEST(DenseOpticalFlow_GlobalPatchColliderDCT, ReferenceAccuracy)
{
Mat frame1, frame2, GT;
ASSERT_TRUE(readRubberWhale(frame1, frame2, GT));
const Size sz = frame1.size() / 2;
frame1 = frame1(Rect(0, 0, sz.width, sz.height));
frame2 = frame2(Rect(0, 0, sz.width, sz.height));
GT = GT(Rect(0, 0, sz.width, sz.height));
vector<Mat> img1, img2, gt;
vector< pair<Point2i, Point2i> > corr;
img1.push_back(frame1);
img2.push_back(frame2);
gt.push_back(GT);
Ptr< GPCForest<5> > forest = GPCForest<5>::create();
forest->train(img1, img2, gt, GPCTrainingParams(8, 3, GPC_DESCRIPTOR_DCT, false));
forest->findCorrespondences(frame1, frame2, corr);
ASSERT_LE(7500U, corr.size());
ASSERT_LE(calcAvgEPE(corr, GT), 0.5f);
}
TEST(DenseOpticalFlow_GlobalPatchColliderWHT, ReferenceAccuracy)
{
Mat frame1, frame2, GT;
ASSERT_TRUE(readRubberWhale(frame1, frame2, GT));
const Size sz = frame1.size() / 2;
frame1 = frame1(Rect(0, 0, sz.width, sz.height));
frame2 = frame2(Rect(0, 0, sz.width, sz.height));
GT = GT(Rect(0, 0, sz.width, sz.height));
vector<Mat> img1, img2, gt;
vector< pair<Point2i, Point2i> > corr;
img1.push_back(frame1);
img2.push_back(frame2);
gt.push_back(GT);
Ptr< GPCForest<5> > forest = GPCForest<5>::create();
forest->train(img1, img2, gt, GPCTrainingParams(8, 3, GPC_DESCRIPTOR_WHT, false));
forest->findCorrespondences(frame1, frame2, corr);
ASSERT_LE(7000U, corr.size());
ASSERT_LE(calcAvgEPE(corr, GT), 0.5f);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment