Commit ac62d70f authored by Vladislav Samsonov's avatar Vladislav Samsonov Committed by Maksim Shabunin

[GSoC] Implementation of the Global Patch Collider and demo for PCAFlow (#752)

* Minor fixes

* Start adding correspondence finding

* Added finding of correspondences using GPC

* New evaluation tool for GPC

* Changed default parameters

* Display ground truth in the evaluation tool

* Added training tool for MPI Sintel dataset

* Added the training tool for Middlebury dataset

* Added some OpenCL optimization

* Added explanatory notes

* Minor improvements: time measurements + little ocl optimization

* Added demos

* Fixed warnings

* Make parameter struct assignable

* Fix warning

* Proper command line argument usage

* Prettified training tool, added parameters

* Fixed VS warning

* Fixed VS warning

* Using of compressed forest.yml.gz files by default to save space

* Added OpenCL flag to the evaluation tool

* Updated documentation

* Major speed and memory improvements:
1) Added new (optional) type of patch descriptors which are much faster. Retraining with option --descriptor-type=1 is required.
2) Got rid of hash table for descriptors, less memory usage.

* Fixed various floating point errors related to precision.
SIMD for dot product, forest traversing is a little bit faster now.

* Tolerant floating point comparison

* Triplets

* Added comment

* Choosing negative sample among nearest neighbors

* Fix warning

* Usage of parallel_for_() in critical places. Performance improvments.

* Simulated annealing heuristic

* Moved OpenCL kernel to separate file

* Moved implementation to source file

* Added basic accuracy tests for GPC and PCAFlow

* Fixing warnings

* Test accuracy constraints were too strict

* Test accuracy constraints were too strict

* Make tests more lightweight
parent 25575af6
......@@ -52,3 +52,19 @@
pages={25--36},
year={2004}
}
@inproceedings{Wulff:CVPR:2015,
title = {Efficient Sparse-to-Dense Optical Flow Estimation using a Learned Basis and Layers},
author = {Wulff, Jonas and Black, Michael J.},
booktitle = { IEEE Conf. on Computer Vision and Pattern Recognition (CVPR) 2015},
month = {June},
year = {2015}
}
@inproceedings{Wang_2016_CVPR,
author = {Wang, Shenlong and Ryan Fanello, Sean and Rhemann, Christoph and Izadi, Shahram and Kohli, Pushmeet},
title = {The Global Patch Collider},
booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2016}
}
......@@ -43,9 +43,6 @@ the use of this software, even if advised of the possibility of such damage.
#include "opencv2/core.hpp"
#include "opencv2/video.hpp"
#include "opencv2/optflow/pcaflow.hpp"
#include "opencv2/optflow/sparse_matching_gpc.hpp"
/**
@defgroup optflow Optical Flow Algorithms
......@@ -69,6 +66,9 @@ Functions reading and writing .flo files in "Middlebury" format, see: <http://vi
*/
#include "opencv2/optflow/pcaflow.hpp"
#include "opencv2/optflow/sparse_matching_gpc.hpp"
namespace cv
{
namespace optflow
......
......@@ -37,23 +37,19 @@ or tort (including negligence or otherwise) arising in any way out of
the use of this software, even if advised of the possibility of such damage.
*/
/*
Implementation of the PCAFlow algorithm from the following paper:
http://files.is.tue.mpg.de/black/papers/cvpr2015_pcaflow.pdf
@inproceedings{Wulff:CVPR:2015,
title = {Efficient Sparse-to-Dense Optical Flow Estimation using a Learned Basis and Layers},
author = {Wulff, Jonas and Black, Michael J.},
booktitle = { IEEE Conf. on Computer Vision and Pattern Recognition (CVPR) 2015},
month = jun,
year = {2015}
}
There are some key differences which distinguish this algorithm from the original PCAFlow (see paper):
- Discrete Cosine Transform basis is used instead of basis extracted with PCA.
Reasoning: DCT basis has comparable performance and it doesn't require additional storage space.
Also, this decision helps to avoid overloading the algorithm with a lot of external input.
- Usage of built-in OpenCV feature tracking instead of libviso.
/**
* @file pcaflow.hpp
* @author Vladislav Samsonov <vvladxx@gmail.com>
* @brief Implementation of the PCAFlow algorithm from the following paper:
* http://files.is.tue.mpg.de/black/papers/cvpr2015_pcaflow.pdf
*
* @cite Wulff:CVPR:2015
*
* There are some key differences which distinguish this algorithm from the original PCAFlow (see paper):
* - Discrete Cosine Transform basis is used instead of basis extracted with PCA.
* Reasoning: DCT basis has comparable performance and it doesn't require additional storage space.
* Also, this decision helps to avoid overloading the algorithm with a lot of external input.
* - Usage of built-in OpenCV feature tracking instead of libviso.
*/
#ifndef __OPENCV_OPTFLOW_PCAFLOW_HPP__
......@@ -67,7 +63,10 @@ namespace cv
namespace optflow
{
/*
//! @addtogroup optflow
//! @{
/** @brief
* This class can be used for imposing a learned prior on the resulting optical flow.
* Solution will be regularized according to this prior.
* You need to generate appropriate prior file with "learn_prior.py" script beforehand.
......@@ -90,6 +89,8 @@ public:
void fillConstraints( float *A1, float *A2, float *b1, float *b2 ) const;
};
/** @brief PCAFlow algorithm.
*/
class CV_EXPORTS_W OpticalFlowPCAFlow : public DenseOpticalFlow
{
protected:
......@@ -103,6 +104,15 @@ protected:
bool useOpenCL;
public:
/** @brief Creates an instance of PCAFlow algorithm.
* @param _prior Learned prior or no prior (default). @see cv::optflow::PCAPrior
* @param _basisSize Number of basis vectors.
* @param _sparseRate Controls density of sparse matches.
* @param _retainedCornersFraction Retained corners fraction.
* @param _occlusionsThreshold Occlusion threshold.
* @param _dampingFactor Regularization term for solving least-squares. It is not related to the prior regularization.
* @param _claheClip Clip parameter for CLAHE.
*/
OpticalFlowPCAFlow( Ptr<const PCAPrior> _prior = Ptr<const PCAPrior>(), const Size _basisSize = Size( 18, 14 ),
float _sparseRate = 0.024, float _retainedCornersFraction = 0.2,
float _occlusionsThreshold = 0.0003, float _dampingFactor = 0.00002, float _claheClip = 14 );
......@@ -127,7 +137,12 @@ private:
OpticalFlowPCAFlow& operator=( const OpticalFlowPCAFlow& ); // make it non-assignable
};
/** @brief Creates an instance of PCAFlow
*/
CV_EXPORTS_W Ptr<DenseOpticalFlow> createOptFlow_PCAFlow();
//! @}
}
}
......
......@@ -37,68 +37,135 @@ or tort (including negligence or otherwise) arising in any way out of
the use of this software, even if advised of the possibility of such damage.
*/
/*
Implementation of the Global Patch Collider algorithm from the following paper:
http://research.microsoft.com/en-us/um/people/pkohli/papers/wfrik_cvpr2016.pdf
@InProceedings{Wang_2016_CVPR,
author = {Wang, Shenlong and Ryan Fanello, Sean and Rhemann, Christoph and Izadi, Shahram and Kohli, Pushmeet},
title = {The Global Patch Collider},
booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2016}
}
*/
/**
* @file sparse_matching_gpc.hpp
* @author Vladislav Samsonov <vvladxx@gmail.com>
* @brief Implementation of the Global Patch Collider.
*
* Implementation of the Global Patch Collider algorithm from the following paper:
* http://research.microsoft.com/en-us/um/people/pkohli/papers/wfrik_cvpr2016.pdf
*
* @cite Wang_2016_CVPR
*/
#ifndef __OPENCV_OPTFLOW_SPARSE_MATCHING_GPC_HPP__
#define __OPENCV_OPTFLOW_SPARSE_MATCHING_GPC_HPP__
#include "opencv2/core.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "opencv2/imgproc.hpp"
namespace cv
{
namespace optflow
{
//! @addtogroup optflow
//! @{
struct CV_EXPORTS_W GPCPatchDescriptor
{
static const unsigned nFeatures = 18; // number of features in a patch descriptor
static const unsigned nFeatures = 18; //!< number of features in a patch descriptor
Vec< double, nFeatures > feature;
GPCPatchDescriptor( const Mat *imgCh, int i, int j );
double dot( const Vec< double, nFeatures > &coef ) const;
void markAsSeparated() { feature[0] = std::numeric_limits< double >::quiet_NaN(); }
bool isSeparated() const { return cvIsNaN( feature[0] ) != 0; }
};
struct CV_EXPORTS_W GPCPatchSample
{
GPCPatchDescriptor ref;
GPCPatchDescriptor pos;
GPCPatchDescriptor neg;
void getDirections( bool &refdir, bool &posdir, bool &negdir, const Vec< double, GPCPatchDescriptor::nFeatures > &coef, double rhs ) const;
};
typedef std::pair< GPCPatchDescriptor, GPCPatchDescriptor > GPCPatchSample;
typedef std::vector< GPCPatchSample > GPCSamplesVector;
/** @brief Descriptor types for the Global Patch Collider.
*/
enum GPCDescType
{
GPC_DESCRIPTOR_DCT = 0, //!< Better quality but slow
GPC_DESCRIPTOR_WHT //!< Worse quality but much faster
};
/** @brief Class encapsulating training samples.
*/
class CV_EXPORTS_W GPCTrainingSamples
{
private:
GPCSamplesVector samples;
int descriptorType;
public:
/** @brief This function can be used to extract samples from a pair of images and a ground truth flow.
* Sizes of all the provided vectors must be equal.
*/
static Ptr< GPCTrainingSamples > create( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo,
const std::vector< String > &gt );
const std::vector< String > &gt, int descriptorType );
static Ptr< GPCTrainingSamples > create( InputArrayOfArrays imagesFrom, InputArrayOfArrays imagesTo, InputArrayOfArrays gt,
int descriptorType );
size_t size() const { return samples.size(); }
operator GPCSamplesVector() const { return samples; }
int type() const { return descriptorType; }
operator GPCSamplesVector &() { return samples; }
};
/** @brief Class encapsulating training parameters.
*/
struct GPCTrainingParams
{
unsigned maxTreeDepth; //!< Maximum tree depth to stop partitioning.
int minNumberOfSamples; //!< Minimum number of samples in the node to stop partitioning.
int descriptorType; //!< Type of descriptors to use.
bool printProgress; //!< Print progress to stdout.
GPCTrainingParams( unsigned _maxTreeDepth = 20, int _minNumberOfSamples = 3, GPCDescType _descriptorType = GPC_DESCRIPTOR_DCT,
bool _printProgress = true )
: maxTreeDepth( _maxTreeDepth ), minNumberOfSamples( _minNumberOfSamples ), descriptorType( _descriptorType ),
printProgress( _printProgress )
{
CV_Assert( check() );
}
GPCTrainingParams( const GPCTrainingParams &params )
: maxTreeDepth( params.maxTreeDepth ), minNumberOfSamples( params.minNumberOfSamples ), descriptorType( params.descriptorType ),
printProgress( params.printProgress )
{
CV_Assert( check() );
}
bool check() const { return maxTreeDepth > 1 && minNumberOfSamples > 1; }
};
/** @brief Class encapsulating matching parameters.
*/
struct GPCMatchingParams
{
bool useOpenCL; //!< Whether to use OpenCL to speed up the matching.
GPCMatchingParams( bool _useOpenCL = false ) : useOpenCL( _useOpenCL ) {}
GPCMatchingParams( const GPCMatchingParams &params ) : useOpenCL( params.useOpenCL ) {}
};
/** @brief Class for individual tree.
*/
class CV_EXPORTS_W GPCTree : public Algorithm
{
public:
struct Node
{
Vec< double, GPCPatchDescriptor::nFeatures > coef; // hyperplane coefficients
double rhs;
Vec< double, GPCPatchDescriptor::nFeatures > coef; //!< Hyperplane coefficients
double rhs; //!< Bias term of the hyperplane
unsigned left;
unsigned right;
......@@ -109,45 +176,100 @@ private:
typedef GPCSamplesVector::iterator SIter;
std::vector< Node > nodes;
GPCTrainingParams params;
bool trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth );
public:
void train( GPCSamplesVector &samples );
void train( GPCTrainingSamples &samples, const GPCTrainingParams params = GPCTrainingParams() );
void write( FileStorage &fs ) const;
void read( const FileNode &fn );
unsigned findLeafForPatch( const GPCPatchDescriptor &descr ) const;
static Ptr< GPCTree > create() { return makePtr< GPCTree >(); }
bool operator==( const GPCTree &t ) const { return nodes == t.nodes; }
int getDescriptorType() const { return params.descriptorType; }
};
template < int T > class CV_EXPORTS_W GPCForest : public Algorithm
{
private:
struct Trail
{
unsigned leaf[T]; //!< Inside which leaf of the tree 0..T the patch fell?
Point2i coord; //!< Patch coordinates.
bool operator==( const Trail &trail ) const { return memcmp( leaf, trail.leaf, sizeof( leaf ) ) == 0; }
bool operator<( const Trail &trail ) const
{
for ( int i = 0; i < T - 1; ++i )
if ( leaf[i] != trail.leaf[i] )
return leaf[i] < trail.leaf[i];
return leaf[T - 1] < trail.leaf[T - 1];
}
};
class ParallelTrailsFilling : public ParallelLoopBody
{
private:
const GPCForest *forest;
const std::vector< GPCPatchDescriptor > *descr;
std::vector< Trail > *trails;
ParallelTrailsFilling &operator=( const ParallelTrailsFilling & );
public:
ParallelTrailsFilling( const GPCForest *_forest, const std::vector< GPCPatchDescriptor > *_descr, std::vector< Trail > *_trails )
: forest( _forest ), descr( _descr ), trails( _trails ){};
void operator()( const Range &range ) const
{
for ( int t = range.start; t < range.end; ++t )
for ( size_t i = 0; i < descr->size(); ++i )
trails->at( i ).leaf[t] = forest->tree[t].findLeafForPatch( descr->at( i ) );
}
};
GPCTree tree[T];
public:
/** @brief Train the forest using one sample set for every tree.
* Please, consider using the next method instead of this one for better quality.
*/
void train( GPCSamplesVector &samples )
void train( GPCTrainingSamples &samples, const GPCTrainingParams params = GPCTrainingParams() )
{
for ( int i = 0; i < T; ++i )
tree[i].train( samples );
tree[i].train( samples, params );
}
/** @brief Train the forest using individual samples for each tree.
* It is generally better to use this instead of the first method.
*/
void train( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo, const std::vector< String > &gt )
void train( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo, const std::vector< String > &gt,
const GPCTrainingParams params = GPCTrainingParams() )
{
for ( int i = 0; i < T; ++i )
{
Ptr< GPCTrainingSamples > samples = GPCTrainingSamples::create( imagesFrom, imagesTo, gt ); // Create training set for the tree
tree[i].train( *samples );
Ptr< GPCTrainingSamples > samples =
GPCTrainingSamples::create( imagesFrom, imagesTo, gt, params.descriptorType ); // Create training set for the tree
tree[i].train( *samples, params );
}
}
void train( InputArrayOfArrays imagesFrom, InputArrayOfArrays imagesTo, InputArrayOfArrays gt,
const GPCTrainingParams params = GPCTrainingParams() )
{
for ( int i = 0; i < T; ++i )
{
Ptr< GPCTrainingSamples > samples =
GPCTrainingSamples::create( imagesFrom, imagesTo, gt, params.descriptorType ); // Create training set for the tree
tree[i].train( *samples, params );
}
}
......@@ -166,19 +288,93 @@ public:
void read( const FileNode &fn )
{
CV_Assert( T == (int)fn["ntrees"] );
CV_Assert( T <= (int)fn["ntrees"] );
FileNodeIterator it = fn["trees"].begin();
for ( int i = 0; i < T; ++i, ++it )
tree[i].read( *it );
}
/** @brief Find correspondences between two images.
* @param[in] imgFrom First image in a sequence.
* @param[in] imgTo Second image in a sequence.
* @param[out] corr Output vector with pairs of corresponding points.
* @param[in] params Additional matching parameters for fine-tuning.
*/
void findCorrespondences( InputArray imgFrom, InputArray imgTo, std::vector< std::pair< Point2i, Point2i > > &corr,
const GPCMatchingParams params = GPCMatchingParams() ) const;
static Ptr< GPCForest > create() { return makePtr< GPCForest >(); }
};
class CV_EXPORTS_W GPCDetails
{
public:
static void dropOutliers( std::vector< std::pair< Point2i, Point2i > > &corr );
static void getAllDescriptorsForImage( const Mat *imgCh, std::vector< GPCPatchDescriptor > &descr, const GPCMatchingParams &mp,
int type );
static void getCoordinatesFromIndex( size_t index, Size sz, int &x, int &y );
};
template < int T >
void GPCForest< T >::findCorrespondences( InputArray imgFrom, InputArray imgTo, std::vector< std::pair< Point2i, Point2i > > &corr,
const GPCMatchingParams params ) const
{
CV_Assert( imgFrom.channels() == 3 );
CV_Assert( imgTo.channels() == 3 );
Mat from, to;
imgFrom.getMat().convertTo( from, CV_32FC3 );
imgTo.getMat().convertTo( to, CV_32FC3 );
cvtColor( from, from, COLOR_BGR2YCrCb );
cvtColor( to, to, COLOR_BGR2YCrCb );
Mat fromCh[3], toCh[3];
split( from, fromCh );
split( to, toCh );
std::vector< GPCPatchDescriptor > descr;
GPCDetails::getAllDescriptorsForImage( fromCh, descr, params, tree[0].getDescriptorType() );
std::vector< Trail > trailsFrom( descr.size() ), trailsTo( descr.size() );
for ( size_t i = 0; i < descr.size(); ++i )
GPCDetails::getCoordinatesFromIndex( i, from.size(), trailsFrom[i].coord.x, trailsFrom[i].coord.y );
parallel_for_( Range( 0, T ), ParallelTrailsFilling( this, &descr, &trailsFrom ) );
descr.clear();
GPCDetails::getAllDescriptorsForImage( toCh, descr, params, tree[0].getDescriptorType() );
for ( size_t i = 0; i < descr.size(); ++i )
GPCDetails::getCoordinatesFromIndex( i, to.size(), trailsTo[i].coord.x, trailsTo[i].coord.y );
parallel_for_( Range( 0, T ), ParallelTrailsFilling( this, &descr, &trailsTo ) );
std::sort( trailsFrom.begin(), trailsFrom.end() );
std::sort( trailsTo.begin(), trailsTo.end() );
for ( size_t i = 0; i < trailsFrom.size(); ++i )
{
bool uniq = true;
while ( i + 1 < trailsFrom.size() && trailsFrom[i] == trailsFrom[i + 1] )
++i, uniq = false;
if ( uniq )
{
typename std::vector< Trail >::const_iterator lb = std::lower_bound( trailsTo.begin(), trailsTo.end(), trailsFrom[i] );
if ( lb != trailsTo.end() && *lb == trailsFrom[i] && ( ( lb + 1 ) == trailsTo.end() || !( *lb == *( lb + 1 ) ) ) )
corr.push_back( std::make_pair( trailsFrom[i].coord, lb->coord ) );
}
}
GPCDetails::dropOutliers( corr );
}
//! @}
} // namespace optflow
CV_EXPORTS void write( FileStorage &fs, const String &name, const optflow::GPCTree::Node &node );
CV_EXPORTS void read( const FileNode &fn, optflow::GPCTree::Node &node, optflow::GPCTree::Node );
}
} // namespace cv
#endif
#include "opencv2/core/ocl.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/optflow.hpp"
#include <fstream>
#include <iostream>
#include <stdio.h>
/* This tool finds correspondences between two images using Global Patch Collider
* and calculates error using provided ground truth flow.
*
* It will look for the file named "forest.yml.gz" with a learned forest.
* You can obtain the "forest.yml.gz" either by manually training it using another tool with *_train suffix
* or by downloading one of the files trained on some publicly available dataset from here:
*
* https://drive.google.com/open?id=0B7Hb8cfuzrIIZDFscXVYd0NBNFU
*/
using namespace cv;
const String keys = "{help h ? | | print this message}"
"{@image1 |<none> | image1}"
"{@image2 |<none> | image2}"
"{@groundtruth |<none> | path to the .flo file}"
"{@output | | output to a file instead of displaying, output image path}"
"{g gpu | | use OpenCL}"
"{f forest |forest.yml.gz| path to the forest.yml.gz}";
const int nTrees = 5;
static double normL2( const Point2f &v ) { return sqrt( v.x * v.x + v.y * v.y ); }
static Vec3d getFlowColor( const Point2f &f, const bool logScale = true, const double scaleDown = 5 )
{
if ( f.x == 0 && f.y == 0 )
return Vec3d( 0, 0, 1 );
double radius = normL2( f );
if ( logScale )
radius = log( radius + 1 );
radius /= scaleDown;
radius = std::min( 1.0, radius );
double angle = ( atan2( -f.y, -f.x ) + CV_PI ) * 180 / CV_PI;
return Vec3d( angle, radius, 1 );
}
static void displayFlow( InputArray _flow, OutputArray _img )
{
const Size sz = _flow.size();
Mat flow = _flow.getMat();
_img.create( sz, CV_32FC3 );
Mat img = _img.getMat();
for ( int i = 0; i < sz.height; ++i )
for ( int j = 0; j < sz.width; ++j )
img.at< Vec3f >( i, j ) = getFlowColor( flow.at< Point2f >( i, j ) );
cvtColor( img, img, COLOR_HSV2BGR );
}
static bool fileProbe( const char *name ) { return std::ifstream( name ).good(); }
int main( int argc, const char **argv )
{
CommandLineParser parser( argc, argv, keys );
parser.about( "Global Patch Collider evaluation tool" );
if ( parser.has( "help" ) )
{
parser.printMessage();
return 0;
}
String fromPath = parser.get< String >( 0 );
String toPath = parser.get< String >( 1 );
String gtPath = parser.get< String >( 2 );
String outPath = parser.get< String >( 3 );
const bool useOpenCL = parser.has( "gpu" );
String forestDumpPath = parser.get< String >( "forest" );
if ( !parser.check() )
{
parser.printErrors();
return 1;
}
if ( !fileProbe( forestDumpPath.c_str() ) )
{
std::cerr << "Can't open the file with a trained model: `" << forestDumpPath
<< "`.\nYou can obtain this file either by manually training the model using another tool with *_train suffix or by "
"downloading one of the files trained on some publicly available dataset from "
"here:\nhttps://drive.google.com/open?id=0B7Hb8cfuzrIIZDFscXVYd0NBNFU"
<< std::endl;
return 1;
}
ocl::setUseOpenCL( useOpenCL );
Ptr< optflow::GPCForest< nTrees > > forest = Algorithm::load< optflow::GPCForest< nTrees > >( forestDumpPath );
Mat from = imread( fromPath );
Mat to = imread( toPath );
Mat gt = optflow::readOpticalFlow( gtPath );
std::vector< std::pair< Point2i, Point2i > > corr;
TickMeter meter;
meter.start();
forest->findCorrespondences( from, to, corr, optflow::GPCMatchingParams( useOpenCL ) );
meter.stop();
std::cout << "Found " << corr.size() << " matches." << std::endl;
std::cout << "Time: " << meter.getTimeSec() << " sec." << std::endl;
double error = 0;
Mat dispErr = Mat::zeros( from.size(), CV_32FC3 );
dispErr = Scalar( 0, 0, 1 );
Mat disp = Mat::zeros( from.size(), CV_32FC3 );
disp = Scalar( 0, 0, 1 );
for ( size_t i = 0; i < corr.size(); ++i )
{
const Point2f a = corr[i].first;
const Point2f b = corr[i].second;
const Point2f c = a + gt.at< Point2f >( corr[i].first.y, corr[i].first.x );
error += normL2( b - c );
circle( disp, a, 3, getFlowColor( b - a ), -1 );
circle( dispErr, a, 3, getFlowColor( b - c, false, 32 ), -1 );
}
error /= corr.size();
std::cout << "Average endpoint error: " << error << " px." << std::endl;
cvtColor( disp, disp, COLOR_HSV2BGR );
cvtColor( dispErr, dispErr, COLOR_HSV2BGR );
Mat dispGroundTruth;
displayFlow( gt, dispGroundTruth );
if ( outPath.length() )
{
putText( disp, "Sparse matching: Global Patch Collider", Point2i( 24, 40 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA );
char buf[256];
sprintf( buf, "Average EPE: %.2f", error );
putText( disp, buf, Point2i( 24, 80 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA );
sprintf( buf, "Number of matches: %u", (unsigned)corr.size() );
putText( disp, buf, Point2i( 24, 120 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA );
disp *= 255;
imwrite( outPath, disp );
return 0;
}
namedWindow( "Correspondences", WINDOW_AUTOSIZE );
imshow( "Correspondences", disp );
namedWindow( "Error", WINDOW_AUTOSIZE );
imshow( "Error", dispErr );
namedWindow( "Ground truth", WINDOW_AUTOSIZE );
imshow( "Ground truth", dispGroundTruth );
waitKey( 0 );
return 0;
}
#include "opencv2/optflow.hpp"
#include <iostream>
/* This tool trains the forest for the Global Patch Collider and stores output to the "forest.yml.gz".
*/
using namespace cv;
const String keys = "{help h ? | | print this message}"
"{max-tree-depth | | Maximum tree depth to stop partitioning}"
"{min-samples | | Minimum number of samples in the node to stop partitioning}"
"{descriptor-type|0 | Descriptor type. Set to 0 for quality, 1 for speed.}"
"{print-progress | | Set to 0 to enable quiet mode, set to 1 to print progress}"
"{f forest |forest.yml.gz| Path where to store resulting forest. It is recommended to use .yml.gz extension.}";
const int nTrees = 5;
int main( int argc, const char **argv )
static void fillInputImagesFromCommandLine( std::vector< String > &img1, std::vector< String > &img2, std::vector< String > &gt, int argc,
const char **argv )
{
int nSequences = argc - 1;
if ( nSequences <= 0 || nSequences % 3 != 0 )
for ( int i = 1, j = 0; i < argc; ++i )
{
std::cerr << "Usage: " << argv[0] << " ImageFrom1 ImageTo1 GroundTruth1 ... ImageFromN ImageToN GroundTruthN" << std::endl;
return 1;
if ( argv[i][0] == '-' )
continue;
if ( j % 3 == 0 )
img1.push_back( argv[i] );
if ( j % 3 == 1 )
img2.push_back( argv[i] );
if ( j % 3 == 2 )
gt.push_back( argv[i] );
++j;
}
}
nSequences /= 3;
std::vector< cv::String > img1, img2, gt;
int main( int argc, const char **argv )
{
CommandLineParser parser( argc, argv, keys );
parser.about( "Global Patch Collider training tool" );
std::vector< String > img1, img2, gt;
optflow::GPCTrainingParams params;
for ( int i = 0; i < nSequences; ++i )
if ( parser.has( "max-tree-depth" ) )
params.maxTreeDepth = parser.get< unsigned >( "max-tree-depth" );
if ( parser.has( "min-samples" ) )
params.minNumberOfSamples = parser.get< unsigned >( "min-samples" );
if ( parser.has( "descriptor-type" ) )
params.descriptorType = parser.get< int >( "descriptor-type" );
if ( parser.has( "print-progress" ) )
params.printProgress = parser.get< unsigned >( "print-progress" ) != 0;
fillInputImagesFromCommandLine( img1, img2, gt, argc, argv );
if ( parser.has( "help" ) || img1.size() != img2.size() || img1.size() != gt.size() || img1.size() == 0 )
{
img1.push_back( argv[1 + i * 3] );
img2.push_back( argv[1 + i * 3 + 1] );
gt.push_back( argv[1 + i * 3 + 2] );
std::cerr << "\nUsage: " << argv[0] << " [params] ImageFrom1 ImageTo1 GroundTruth1 ... ImageFromN ImageToN GroundTruthN\n" << std::endl;
parser.printMessage();
return 1;
}
cv::Ptr< cv::optflow::GPCForest< nTrees > > forest = cv::optflow::GPCForest< nTrees >::create();
forest->train( img1, img2, gt );
forest->save( "forest.dump" );
Ptr< optflow::GPCForest< nTrees > > forest = optflow::GPCForest< nTrees >::create();
forest->train( img1, img2, gt, params );
forest->save( parser.get< String >( "forest" ) );
return 0;
}
import argparse
import glob
import os
import subprocess
def execute(cmd):
popen = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
for stdout_line in iter(popen.stdout.readline, ''):
print(stdout_line.rstrip())
for stderr_line in iter(popen.stderr.readline, ''):
print(stderr_line.rstrip())
popen.stdout.close()
popen.stderr.close()
return_code = popen.wait()
if return_code != 0:
raise subprocess.CalledProcessError(return_code, cmd)
def main():
parser = argparse.ArgumentParser(
description='Train Global Patch Collider using Middlebury dataset')
parser.add_argument(
'--bin_path',
help='Path to the training executable (example_optflow_gpc_train)',
required=True)
parser.add_argument('--dataset_path',
help='Path to the directory with frames',
required=True)
parser.add_argument('--gt_path',
help='Path to the directory with ground truth flow',
required=True)
parser.add_argument('--descriptor_type',
help='Descriptor type',
type=int,
default=0)
args = parser.parse_args()
seq = glob.glob(os.path.join(args.dataset_path, '*'))
seq.sort()
input_files = []
for s in seq:
if os.path.isdir(s):
seq_name = os.path.basename(s)
frames = glob.glob(os.path.join(s, 'frame*.png'))
frames.sort()
assert (len(frames) == 2)
assert (os.path.basename(frames[0]) == 'frame10.png')
assert (os.path.basename(frames[1]) == 'frame11.png')
gt_flow = os.path.join(args.gt_path, seq_name, 'flow10.flo')
if os.path.isfile(gt_flow):
input_files += [frames[0], frames[1], gt_flow]
execute([args.bin_path, '--descriptor-type=%d' % args.descriptor_type] + input_files)
if __name__ == '__main__':
main()
import argparse
import glob
import os
import subprocess
FRAME_DIST = 2
assert (FRAME_DIST >= 1)
def execute(cmd):
popen = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
for stdout_line in iter(popen.stdout.readline, ''):
print(stdout_line.rstrip())
for stderr_line in iter(popen.stderr.readline, ''):
print(stderr_line.rstrip())
popen.stdout.close()
popen.stderr.close()
return_code = popen.wait()
if return_code != 0:
raise subprocess.CalledProcessError(return_code, cmd)
def main():
parser = argparse.ArgumentParser(
description='Train Global Patch Collider using MPI Sintel dataset')
parser.add_argument(
'--bin_path',
help='Path to the training executable (example_optflow_gpc_train)',
required=True)
parser.add_argument('--dataset_path',
help='Path to the directory with frames',
required=True)
parser.add_argument('--gt_path',
help='Path to the directory with ground truth flow',
required=True)
parser.add_argument('--descriptor_type',
help='Descriptor type',
type=int,
default=0)
args = parser.parse_args()
seq = glob.glob(os.path.join(args.dataset_path, '*'))
seq.sort()
input_files = []
for s in seq:
seq_name = os.path.basename(s)
frames = glob.glob(os.path.join(s, 'frame*.png'))
frames.sort()
for i in range(0, len(frames) - 1, FRAME_DIST):
gt_flow = os.path.join(args.gt_path, seq_name,
os.path.basename(frames[i])[0:-4] + '.flo')
assert (os.path.isfile(gt_flow))
input_files += [frames[i], frames[i + 1], gt_flow]
execute([args.bin_path, '--descriptor-type=%d' % args.descriptor_type] + input_files)
if __name__ == '__main__':
main()
#include "opencv2/core/ocl.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/optflow.hpp"
#include <fstream>
#include <iostream>
#include <stdio.h>
using namespace cv;
using optflow::OpticalFlowPCAFlow;
using optflow::PCAPrior;
const String keys = "{help h ? | | print this message}"
"{@image1 |<none>| image1}"
"{@image2 |<none>| image2}"
"{@groundtruth |<none>| path to the .flo file}"
"{@prior |<none>| path to a prior file for PCAFlow}"
"{@output |<none>| output image path}"
"{g gpu | | use OpenCL}";
static double normL2( const Point2f &v ) { return sqrt( v.x * v.x + v.y * v.y ); }
static bool fileProbe( const char *name ) { return std::ifstream( name ).good(); }
static Vec3d getFlowColor( const Point2f &f, const bool logScale = true, const double scaleDown = 5 )
{
if ( f.x == 0 && f.y == 0 )
return Vec3d( 0, 0, 1 );
double radius = normL2( f );
if ( logScale )
radius = log( radius + 1 );
radius /= scaleDown;
radius = std::min( 1.0, radius );
double angle = ( atan2( -f.y, -f.x ) + CV_PI ) * 180 / CV_PI;
return Vec3d( angle, radius, 1 );
}
static void displayFlow( InputArray _flow, OutputArray _img )
{
const Size sz = _flow.size();
Mat flow = _flow.getMat();
_img.create( sz, CV_32FC3 );
Mat img = _img.getMat();
for ( int i = 0; i < sz.height; ++i )
for ( int j = 0; j < sz.width; ++j )
img.at< Vec3f >( i, j ) = getFlowColor( flow.at< Point2f >( i, j ) );
cvtColor( img, img, COLOR_HSV2BGR );
}
static bool isFlowCorrect( const Point2f &u )
{
return !cvIsNaN( u.x ) && !cvIsNaN( u.y ) && ( fabs( u.x ) < 1e9 ) && ( fabs( u.y ) < 1e9 );
}
static double calcEPE( const Mat &f1, const Mat &f2 )
{
double sum = 0;
Size sz = f1.size();
size_t cnt = 0;
for ( int i = 0; i < sz.height; ++i )
for ( int j = 0; j < sz.width; ++j )
if ( isFlowCorrect( f1.at< Point2f >( i, j ) ) && isFlowCorrect( f2.at< Point2f >( i, j ) ) )
{
sum += normL2( f1.at< Point2f >( i, j ) - f2.at< Point2f >( i, j ) );
++cnt;
}
return sum / cnt;
}
static void displayResult( Mat &i1, Mat &i2, Mat &gt, Ptr< DenseOpticalFlow > &algo, OutputArray _img, const char *descr,
const bool useGpu = false )
{
Mat flow( i1.size[0], i1.size[1], CV_32FC2 );
TickMeter meter;
meter.start();
if ( useGpu )
algo->calc( i1, i2, flow.getUMat( ACCESS_RW ) );
else
algo->calc( i1, i2, flow );
meter.stop();
displayFlow( flow, _img );
Mat img = _img.getMat();
putText( img, descr, Point2i( 24, 40 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA );
char buf[256];
sprintf( buf, "Average EPE: %.2f", calcEPE( flow, gt ) );
putText( img, buf, Point2i( 24, 80 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA );
sprintf( buf, "Time: %.2fs", meter.getTimeSec() );
putText( img, buf, Point2i( 24, 120 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA );
}
static void displayGT( InputArray _flow, OutputArray _img, const char *descr )
{
displayFlow( _flow, _img );
Mat img = _img.getMat();
putText( img, descr, Point2i( 24, 40 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA );
}
int main( int argc, const char **argv )
{
CommandLineParser parser( argc, argv, keys );
parser.about( "PCAFlow demonstration" );
if ( parser.has( "help" ) )
{
parser.printMessage();
return 0;
}
String img1 = parser.get< String >( 0 );
String img2 = parser.get< String >( 1 );
String groundtruth = parser.get< String >( 2 );
String prior = parser.get< String >( 3 );
String outimg = parser.get< String >( 4 );
const bool useGpu = parser.has( "gpu" );
if ( !parser.check() )
{
parser.printErrors();
return 1;
}
if ( !fileProbe( prior.c_str() ) )
{
std::cerr << "Can't open the file with prior! Check the provided path: " << prior << std::endl;
return 1;
}
cv::ocl::setUseOpenCL( useGpu );
Mat i1 = imread( img1 );
Mat i2 = imread( img2 );
Mat gt = optflow::readOpticalFlow( groundtruth );
Mat i1g, i2g;
cvtColor( i1, i1g, COLOR_BGR2GRAY );
cvtColor( i2, i2g, COLOR_BGR2GRAY );
Mat pcaflowDisp, pcaflowpriDisp, farnebackDisp, gtDisp;
{
Ptr< DenseOpticalFlow > pcaflow = makePtr< OpticalFlowPCAFlow >( makePtr< PCAPrior >( prior.c_str() ) );
displayResult( i1, i2, gt, pcaflow, pcaflowpriDisp, "PCAFlow with prior", useGpu );
}
{
Ptr< DenseOpticalFlow > pcaflow = makePtr< OpticalFlowPCAFlow >();
displayResult( i1, i2, gt, pcaflow, pcaflowDisp, "PCAFlow without prior", useGpu );
}
{
Ptr< DenseOpticalFlow > farneback = optflow::createOptFlow_Farneback();
displayResult( i1g, i2g, gt, farneback, farnebackDisp, "Farneback", useGpu );
}
displayGT( gt, gtDisp, "Ground truth" );
Mat disp1, disp2;
vconcat( pcaflowpriDisp, farnebackDisp, disp1 );
vconcat( pcaflowDisp, gtDisp, disp2 );
hconcat( disp1, disp2, disp1 );
disp1 *= 255;
imwrite( outimg, disp1 );
return 0;
}
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2016, Itseez, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Vladislav Samsonov, vvladxx@gmail.com
__kernel void getPatchDescriptor(
__global const uchar* imgCh0, int ic0step, int ic0off,
__global const uchar* imgCh1, int ic1step, int ic1off,
__global const uchar* imgCh2, int ic2step, int ic2off,
__global uchar* out, int outstep, int outoff,
const int gh, const int gw, const int PR )
{
const int i = get_global_id(0);
const int j = get_global_id(1);
if (i >= gh || j >= gw)
return;
__global double* desc = (__global double*)(out + (outstep * (i * gw + j) + outoff));
const int patchRadius = PR * 2;
float patch[PATCH_RADIUS_DOUBLED][PATCH_RADIUS_DOUBLED];
for (int i0 = 0; i0 < patchRadius; ++i0) {
__global const float* ch0Row = (__global const float*)(imgCh0 + (ic0step * (i + i0) + ic0off + j * sizeof(float)));
for (int j0 = 0; j0 < patchRadius; ++j0)
patch[i0][j0] = ch0Row[j0];
}
#pragma unroll
for (int n0 = 0; n0 < 4; ++n0) {
#pragma unroll
for (int n1 = 0; n1 < 4; ++n1) {
double sum = 0;
for (int i0 = 0; i0 < patchRadius; ++i0)
for (int j0 = 0; j0 < patchRadius; ++j0)
sum += patch[i0][j0] * cos(CV_PI * (i0 + 0.5) * n0 / patchRadius) * cos(CV_PI * (j0 + 0.5) * n1 / patchRadius);
desc[n0 * 4 + n1] = sum / PR;
}
}
for (int k = 0; k < 4; ++k) {
desc[k] *= SQRT2_INV;
desc[k * 4] *= SQRT2_INV;
}
double sum = 0;
for (int i0 = 0; i0 < patchRadius; ++i0) {
__global const float* ch1Row = (__global const float*)(imgCh1 + (ic1step * (i + i0) + ic1off + j * sizeof(float)));
for (int j0 = 0; j0 < patchRadius; ++j0)
sum += ch1Row[j0];
}
desc[16] = sum / patchRadius;
sum = 0;
for (int i0 = 0; i0 < patchRadius; ++i0) {
__global const float* ch2Row = (__global const float*)(imgCh2 + (ic2step * (i + i0) + ic2off + j * sizeof(float)));
for (int j0 = 0; j0 < patchRadius; ++j0)
sum += ch2Row[j0];
}
desc[17] = sum / patchRadius;
}
......@@ -41,8 +41,22 @@
//M*/
#include "opencv2/core/core_c.h"
#include "opencv2/core/private.hpp"
#include "opencv2/flann/miniflann.hpp"
#include "opencv2/highgui.hpp"
#include "precomp.hpp"
#include "opencl_kernels_optflow.hpp"
/* Disable "from double to float" and "from size_t to int" warnings.
* Fixing these would make the code look ugly by introducing explicit cast all around.
* Here these warning are pointless anyway.
*/
#ifdef _MSC_VER
#pragma warning( disable : 4244 4267 4838 )
#endif
#ifdef __clang__
#pragma clang diagnostic ignored "-Wshorten-64-to-32"
#endif
namespace cv
{
......@@ -51,12 +65,23 @@ namespace optflow
namespace
{
const int patchRadius = 10;
const double thresholdMagnitudeFrac = 0.6666666666;
#define PATCH_RADIUS 10
#define PATCH_RADIUS_DOUBLED 20
#define SQRT2_INV 0.7071067811865475
const int patchRadius = PATCH_RADIUS;
const int globalIters = 3;
const int localIters = 500;
const int minNumberOfSamples = 2;
//const bool debugOutput = true;
const double thresholdOutliers = 0.98;
const double thresholdMagnitudeFrac = 0.8;
const double epsTolerance = 1e-12;
const unsigned scoreGainPos = 5;
const unsigned scoreGainNeg = 1;
const unsigned negSearchKNN = 5;
const double simulatedAnnealingTemperatureCoef = 200.0;
const double sigmaGrowthRate = 0.2;
RNG rng;
struct Magnitude
{
......@@ -79,9 +104,9 @@ struct PartitionPredicate1
bool operator()( const GPCPatchSample &sample ) const
{
const bool direction1 = ( coef.dot( sample.first.feature ) < rhs );
const bool direction2 = ( coef.dot( sample.second.feature ) < rhs );
return direction1 == false && direction1 == direction2;
bool refdir, posdir, negdir;
sample.getDirections( refdir, posdir, negdir, coef, rhs );
return refdir == false && ( posdir == false || negdir == true );
}
};
......@@ -94,20 +119,276 @@ struct PartitionPredicate2
bool operator()( const GPCPatchSample &sample ) const
{
const bool direction1 = ( coef.dot( sample.first.feature ) < rhs );
const bool direction2 = ( coef.dot( sample.second.feature ) < rhs );
return direction1 != direction2;
bool refdir, posdir, negdir;
sample.getDirections( refdir, posdir, negdir, coef, rhs );
return refdir != posdir && refdir == negdir;
}
};
struct CompareWithTolerance
{
double val;
CompareWithTolerance( double _val ) : val( _val ) {};
bool operator()( const double &elem ) const
{
const double diff = ( val + elem == 0 ) ? std::abs( val - elem ) : std::abs( ( val - elem ) / ( val + elem ) );
return diff <= epsTolerance;
}
};
float normL2Sqr( const Vec2f &v ) { return v[0] * v[0] + v[1] * v[1]; }
int normL2Sqr( const Point2i &v ) { return v.x * v.x + v.y * v.y; }
bool checkBounds( int i, int j, Size sz )
{
return i >= patchRadius && j >= patchRadius && i + patchRadius < sz.height && j + patchRadius < sz.width;
}
void getTrainingSamples( const Mat &from, const Mat &to, const Mat &gt, GPCSamplesVector &samples )
void getDCTPatchDescriptor( GPCPatchDescriptor &patchDescr, const Mat *imgCh, int i, int j )
{
Rect roi( j - patchRadius, i - patchRadius, 2 * patchRadius, 2 * patchRadius );
Mat freqDomain;
dct( imgCh[0]( roi ), freqDomain );
double *feature = patchDescr.feature.val;
feature[0] = freqDomain.at< float >( 0, 0 );
feature[1] = freqDomain.at< float >( 0, 1 );
feature[2] = freqDomain.at< float >( 0, 2 );
feature[3] = freqDomain.at< float >( 0, 3 );
feature[4] = freqDomain.at< float >( 1, 0 );
feature[5] = freqDomain.at< float >( 1, 1 );
feature[6] = freqDomain.at< float >( 1, 2 );
feature[7] = freqDomain.at< float >( 1, 3 );
feature[8] = freqDomain.at< float >( 2, 0 );
feature[9] = freqDomain.at< float >( 2, 1 );
feature[10] = freqDomain.at< float >( 2, 2 );
feature[11] = freqDomain.at< float >( 2, 3 );
feature[12] = freqDomain.at< float >( 3, 0 );
feature[13] = freqDomain.at< float >( 3, 1 );
feature[14] = freqDomain.at< float >( 3, 2 );
feature[15] = freqDomain.at< float >( 3, 3 );
feature[16] = cv::sum( imgCh[1]( roi ) )[0] / ( 2 * patchRadius );
feature[17] = cv::sum( imgCh[2]( roi ) )[0] / ( 2 * patchRadius );
}
double sumInt( const Mat &integ, int i, int j, int h, int w )
{
return integ.at< double >( i + h, j + w ) - integ.at< double >( i + h, j ) - integ.at< double >( i, j + w ) + integ.at< double >( i, j );
}
void getWHTPatchDescriptor( GPCPatchDescriptor &patchDescr, const Mat *imgCh, int i, int j )
{
i -= patchRadius;
j -= patchRadius;
const int k = 2 * patchRadius;
const double s = sumInt( imgCh[0], i, j, k, k );
double *feature = patchDescr.feature.val;
feature[0] = s;
feature[1] = s - 2 * sumInt( imgCh[0], i, j + k / 2, k, k / 2 );
feature[2] = s - 2 * sumInt( imgCh[0], i, j + k / 4, k, k / 2 );
feature[3] = s - 2 * sumInt( imgCh[0], i, j + k / 4, k, k / 4 ) - 2 * sumInt( imgCh[0], i, j + 3 * k / 4, k, k / 4 );
feature[4] = s - 2 * sumInt( imgCh[0], i + k / 2, j, k / 2, k );
feature[5] = s - 2 * sumInt( imgCh[0], i, j + k / 2, k / 2, k / 2 ) - 2 * sumInt( imgCh[0], i + k / 2, j, k / 2, k / 2 );
feature[6] = s - 2 * sumInt( imgCh[0], i, j + k / 4, k / 2, k / 2 ) - 2 * sumInt( imgCh[0], i + k / 2, j, k / 2, k / 4 ) -
2 * sumInt( imgCh[0], i + k / 2, j + 3 * k / 4, k / 2, k / 4 );
feature[7] = s - 2 * sumInt( imgCh[0], i, j + k / 4, k / 2, k / 4 ) - 2 * sumInt( imgCh[0], i, j + 3 * k / 4, k / 2, k / 4 ) -
2 * sumInt( imgCh[0], i + k / 2, j, k / 2, k / 4 ) - 2 * sumInt( imgCh[0], i + k / 2, j + k / 2, k / 2, k / 4 );
feature[8] = s - 2 * sumInt( imgCh[0], i + k / 4, j, k / 2, k );
feature[9] = s - 2 * sumInt( imgCh[0], i + k / 4, j, k / 2, k / 2 ) - 2 * sumInt( imgCh[0], i, j + k / 2, k / 4, k / 2 ) -
2 * sumInt( imgCh[0], i + 3 * k / 4, j + k / 2, k / 4, k / 2 );
feature[10] = s - 2 * sumInt( imgCh[0], i + k / 4, j, k / 2, k / 4 ) - 2 * sumInt( imgCh[0], i + k / 4, j + 3 * k / 4, k / 2, k / 4 ) -
2 * sumInt( imgCh[0], i, j + k / 4, k / 4, k / 2 ) - 2 * sumInt( imgCh[0], i + 3 * k / 4, j + k / 4, k / 4, k / 2 );
feature[11] = s - 2 * sumInt( imgCh[0], i, j + k / 4, k / 4, k / 4 ) - 2 * sumInt( imgCh[0], i, j + 3 * k / 4, k / 4, k / 4 ) -
2 * sumInt( imgCh[0], i + k / 4, j, k / 2, k / 4 ) - 2 * sumInt( imgCh[0], i + k / 4, j + k / 2, k / 2, k / 4 ) -
2 * sumInt( imgCh[0], i + 3 * k / 4, j + k / 4, k / 4, k / 4 ) -
2 * sumInt( imgCh[0], i + 3 * k / 4, j + 3 * k / 4, k / 4, k / 4 );
feature[12] = s - 2 * sumInt( imgCh[0], i + k / 4, j, k / 4, k ) - 2 * sumInt( imgCh[0], i + 3 * k / 4, j, k / 4, k );
feature[13] = s - 2 * sumInt( imgCh[0], i + k / 4, j, k / 4, k / 2 ) - 2 * sumInt( imgCh[0], i + 3 * k / 4, j, k / 4, k / 2 ) -
2 * sumInt( imgCh[0], i, j + k / 2, k / 4, k / 2 ) - 2 * sumInt( imgCh[0], i + k / 2, j + k / 2, k / 4, k / 2 );
feature[14] = s - 2 * sumInt( imgCh[0], i + k / 4, j, k / 4, k / 4 ) - 2 * sumInt( imgCh[0], i + 3 * k / 4, j, k / 4, k / 4 ) -
2 * sumInt( imgCh[0], i, j + k / 4, k / 4, k / 2 ) - 2 * sumInt( imgCh[0], i + k / 2, j + k / 4, k / 4, k / 2 ) -
2 * sumInt( imgCh[0], i + k / 4, j + 3 * k / 4, k / 4, k / 4 ) -
2 * sumInt( imgCh[0], i + 3 * k / 4, j + 3 * k / 4, k / 4, k / 4 );
feature[15] = s - 2 * sumInt( imgCh[0], i, j + k / 4, k / 4, k / 4 ) - 2 * sumInt( imgCh[0], i, j + 3 * k / 4, k / 4, k / 4 ) -
2 * sumInt( imgCh[0], i + k / 4, j, k / 4, k / 4 ) - 2 * sumInt( imgCh[0], i + k / 4, j + k / 2, k / 4, k / 4 ) -
2 * sumInt( imgCh[0], i + k / 2, j + k / 4, k / 4, k / 4 ) -
2 * sumInt( imgCh[0], i + k / 2, j + 3 * k / 4, k / 4, k / 4 ) - 2 * sumInt( imgCh[0], i + 3 * k / 4, j, k / 4, k / 4 ) -
2 * sumInt( imgCh[0], i + 3 * k / 4, j + k / 2, k / 4, k / 4 );
feature[16] = sumInt( imgCh[1], i, j, k, k );
feature[17] = sumInt( imgCh[2], i, j, k, k );
patchDescr.feature /= patchRadius;
}
class ParallelDCTFiller : public ParallelLoopBody
{
private:
const Size sz;
const Mat *imgCh;
std::vector< GPCPatchDescriptor > *descr;
ParallelDCTFiller &operator=( const ParallelDCTFiller & );
public:
ParallelDCTFiller( const Size &_sz, const Mat *_imgCh, std::vector< GPCPatchDescriptor > *_descr )
: sz( _sz ), imgCh( _imgCh ), descr( _descr ){};
void operator()( const Range &range ) const
{
for ( int i = range.start; i < range.end; ++i )
{
int x, y;
GPCDetails::getCoordinatesFromIndex( i, sz, x, y );
getDCTPatchDescriptor( descr->at( i ), imgCh, y, x );
}
}
};
#ifdef HAVE_OPENCL
bool ocl_getAllDCTDescriptorsForImage( const Mat *imgCh, std::vector< GPCPatchDescriptor > &descr )
{
const Size sz = imgCh[0].size();
ocl::Kernel kernel( "getPatchDescriptor", ocl::optflow::sparse_matching_gpc_oclsrc,
format( "-DPATCH_RADIUS_DOUBLED=%d -DCV_PI=%f -DSQRT2_INV=%f", PATCH_RADIUS_DOUBLED, CV_PI, SQRT2_INV ) );
size_t globSize[] = {sz.height - 2 * patchRadius, sz.width - 2 * patchRadius};
UMat out( globSize[0] * globSize[1], GPCPatchDescriptor::nFeatures, CV_64F );
if (
kernel
.args( cv::ocl::KernelArg::ReadOnlyNoSize( imgCh[0].getUMat( ACCESS_READ ) ),
cv::ocl::KernelArg::ReadOnlyNoSize( imgCh[1].getUMat( ACCESS_READ ) ),
cv::ocl::KernelArg::ReadOnlyNoSize( imgCh[2].getUMat( ACCESS_READ ) ),
cv::ocl::KernelArg::WriteOnlyNoSize( out ),
(int)globSize[0], (int)globSize[1], (int)patchRadius )
.run( 2, globSize, 0, true ) == false )
return false;
Mat cpuOut = out.getMat( 0 );
for ( int i = 0; i + 2 * patchRadius < sz.height; ++i )
for ( int j = 0; j + 2 * patchRadius < sz.width; ++j )
descr.push_back( *cpuOut.ptr< GPCPatchDescriptor >( i * globSize[1] + j ) );
return true;
}
#endif
void getAllDCTDescriptorsForImage( const Mat *imgCh, std::vector< GPCPatchDescriptor > &descr, const GPCMatchingParams &mp )
{
const Size sz = imgCh[0].size();
descr.reserve( ( sz.height - 2 * patchRadius ) * ( sz.width - 2 * patchRadius ) );
(void)mp; // Fix unused parameter warning in case OpenCL is not available
CV_OCL_RUN( mp.useOpenCL, ocl_getAllDCTDescriptorsForImage( imgCh, descr ) )
descr.resize( ( sz.height - 2 * patchRadius ) * ( sz.width - 2 * patchRadius ) );
parallel_for_( Range( 0, descr.size() ), ParallelDCTFiller( sz, imgCh, &descr ) );
}
class ParallelWHTFiller : public ParallelLoopBody
{
private:
const Size sz;
const Mat *imgChInt;
std::vector< GPCPatchDescriptor > *descr;
ParallelWHTFiller &operator=( const ParallelWHTFiller & );
public:
ParallelWHTFiller( const Size &_sz, const Mat *_imgChInt, std::vector< GPCPatchDescriptor > *_descr )
: sz( _sz ), imgChInt( _imgChInt ), descr( _descr ){};
void operator()( const Range &range ) const
{
for ( int i = range.start; i < range.end; ++i )
{
int x, y;
GPCDetails::getCoordinatesFromIndex( i, sz, x, y );
getWHTPatchDescriptor( descr->at( i ), imgChInt, y, x );
}
}
};
void getAllWHTDescriptorsForImage( const Mat *imgCh, std::vector< GPCPatchDescriptor > &descr, const GPCMatchingParams & )
{
const Size sz = imgCh[0].size();
descr.resize( ( sz.height - 2 * patchRadius ) * ( sz.width - 2 * patchRadius ) );
Mat imgChInt[3];
integral( imgCh[0], imgChInt[0], CV_64F );
integral( imgCh[1], imgChInt[1], CV_64F );
integral( imgCh[2], imgChInt[2], CV_64F );
parallel_for_( Range( 0, descr.size() ), ParallelWHTFiller( sz, imgChInt, &descr ) );
}
void buildIndex( OutputArray featuresOut, flann::Index &index, const Mat *imgCh,
void ( *getAllDescrFn )( const Mat *, std::vector< GPCPatchDescriptor > &, const GPCMatchingParams & ) )
{
std::vector< GPCPatchDescriptor > descriptors;
getAllDescrFn( imgCh, descriptors, GPCMatchingParams() );
featuresOut.create( descriptors.size(), GPCPatchDescriptor::nFeatures, CV_32F );
Mat features = featuresOut.getMat();
for ( size_t i = 0; i < descriptors.size(); ++i )
*features.ptr< Vec< float, GPCPatchDescriptor::nFeatures > >( i ) = descriptors[i].feature;
cv::flann::KDTreeIndexParams indexParams;
index.build( features, indexParams, cvflann::FLANN_DIST_L2 );
}
void getTriplet( const Magnitude &mag, const Mat &gt, const Mat *fromCh, const Mat *toCh, GPCSamplesVector &samples, flann::Index &index,
void ( *getDescFn )( GPCPatchDescriptor &, const Mat *, int, int ) )
{
const Size sz = gt.size();
const int i0 = mag.i;
const int j0 = mag.j;
const int i1 = i0 + cvRound( gt.at< Vec2f >( i0, j0 )[1] );
const int j1 = j0 + cvRound( gt.at< Vec2f >( i0, j0 )[0] );
if ( checkBounds( i1, j1, sz ) )
{
GPCPatchSample ps;
getDescFn( ps.ref, fromCh, i0, j0 );
getDescFn( ps.pos, toCh, i1, j1 );
ps.neg.markAsSeparated();
Matx< float, 1, GPCPatchDescriptor::nFeatures > ref32;
Matx< int, 1, negSearchKNN > indices;
int maxDist = 0;
for ( unsigned i = 0; i < GPCPatchDescriptor::nFeatures; ++i )
ref32( 0, i ) = ps.ref.feature[i];
index.knnSearch( ref32, indices, noArray(), negSearchKNN );
for ( unsigned i = 0; i < negSearchKNN; ++i )
{
int i2, j2;
GPCDetails::getCoordinatesFromIndex( indices( 0, i ), sz, j2, i2 );
const int dist = ( i2 - i1 ) * ( i2 - i1 ) + ( j2 - j1 ) * ( j2 - j1 );
if ( maxDist < dist )
{
maxDist = dist;
getDescFn( ps.neg, toCh, i2, j2 );
}
}
samples.push_back( ps );
}
}
void getTrainingSamples( const Mat &from, const Mat &to, const Mat &gt, GPCSamplesVector &samples, const int type )
{
const Size sz = gt.size();
std::vector< Magnitude > mag;
......@@ -116,33 +397,53 @@ void getTrainingSamples( const Mat &from, const Mat &to, const Mat &gt, GPCSampl
for ( int j = patchRadius; j + patchRadius < sz.width; ++j )
mag.push_back( Magnitude( normL2Sqr( gt.at< Vec2f >( i, j ) ), i, j ) );
size_t n = size_t(mag.size() * thresholdMagnitudeFrac); // As suggested in the paper, we discard part of the training samples
// with a small displacement and train to better distinguish hard pairs.
size_t n = size_t( mag.size() * thresholdMagnitudeFrac ); // As suggested in the paper, we discard part of the training samples
// with a small displacement and train to better distinguish hard pairs.
std::nth_element( mag.begin(), mag.begin() + n, mag.end() );
mag.resize( n );
std::random_shuffle( mag.begin(), mag.end() );
n /= patchRadius;
mag.resize( n );
Mat fromCh[3], toCh[3];
split( from, fromCh );
split( to, toCh );
if ( type == GPC_DESCRIPTOR_DCT )
{
Mat fromCh[3], toCh[3];
split( from, fromCh );
split( to, toCh );
Mat allDescriptors;
flann::Index index;
buildIndex( allDescriptors, index, toCh, getAllDCTDescriptorsForImage );
for ( size_t k = 0; k < n; ++k )
for ( size_t k = 0; k < n; ++k )
getTriplet( mag[k], gt, fromCh, toCh, samples, index, getDCTPatchDescriptor );
}
else if ( type == GPC_DESCRIPTOR_WHT )
{
int i0 = mag[k].i;
int j0 = mag[k].j;
int i1 = i0 + cvRound( gt.at< Vec2f >( i0, j0 )[1] );
int j1 = j0 + cvRound( gt.at< Vec2f >( i0, j0 )[0] );
if ( checkBounds( i1, j1, sz ) )
samples.push_back( std::make_pair( GPCPatchDescriptor( fromCh, i0, j0 ), GPCPatchDescriptor( toCh, i1, j1 ) ) );
Mat fromCh[3], toCh[3], fromChInt[3], toChInt[3];
split( from, fromCh );
split( to, toCh );
integral( fromCh[0], fromChInt[0], CV_64F );
integral( fromCh[1], fromChInt[1], CV_64F );
integral( fromCh[2], fromChInt[2], CV_64F );
integral( toCh[0], toChInt[0], CV_64F );
integral( toCh[1], toChInt[1], CV_64F );
integral( toCh[2], toChInt[2], CV_64F );
Mat allDescriptors;
flann::Index index;
buildIndex( allDescriptors, index, toCh, getAllWHTDescriptorsForImage );
for ( size_t k = 0; k < n; ++k )
getTriplet( mag[k], gt, fromChInt, toChInt, samples, index, getWHTPatchDescriptor );
}
else
CV_Error( CV_StsBadArg, "Unknown descriptor type" );
}
/* Sample random number from Cauchy distribution. */
double getRandomCauchyScalar()
{
static RNG rng;
return tan( rng.uniform( -1.54, 1.54 ) ); // I intentionally used the value slightly less than PI/2 to enforce strictly
// zero probability for large numbers. Resulting PDF for Cauchy has
// truncated "tails".
......@@ -155,41 +456,65 @@ void getRandomCauchyVector( Vec< double, GPCPatchDescriptor::nFeatures > &v )
for ( unsigned i = 0; i < GPCPatchDescriptor::nFeatures; ++i )
v[i] = getRandomCauchyScalar();
}
double getRobustMedian( double m ) { return m < 0 ? m * ( 1.0 + epsTolerance ) : m * ( 1.0 - epsTolerance ); }
}
GPCPatchDescriptor::GPCPatchDescriptor( const Mat *imgCh, int i, int j )
double GPCPatchDescriptor::dot( const Vec< double, nFeatures > &coef ) const
{
Rect roi( j - patchRadius, i - patchRadius, 2 * patchRadius, 2 * patchRadius );
Mat freqDomain;
dct( imgCh[0]( roi ), freqDomain );
feature[0] = freqDomain.at< float >( 0, 0 );
feature[1] = freqDomain.at< float >( 0, 1 );
feature[2] = freqDomain.at< float >( 0, 2 );
feature[3] = freqDomain.at< float >( 0, 3 );
feature[4] = freqDomain.at< float >( 1, 0 );
feature[5] = freqDomain.at< float >( 1, 1 );
feature[6] = freqDomain.at< float >( 1, 2 );
feature[7] = freqDomain.at< float >( 1, 3 );
#if CV_SIMD128_64F
v_float64x2 sum = v_setzero_f64();
for ( unsigned i = 0; i < nFeatures; i += 2 )
{
v_float64x2 x = v_load_aligned( &feature.val[i] );
v_float64x2 y = v_load_aligned( &coef.val[i] );
sum = v_muladd( x, y, sum );
}
#if CV_SSE2
__m128d sumrev = _mm_shuffle_pd( sum.val, sum.val, _MM_SHUFFLE2( 0, 1 ) );
return _mm_cvtsd_f64( _mm_add_pd( sum.val, sumrev ) );
#else
double CV_DECL_ALIGNED( 16 ) buf[2];
v_store_aligned( buf, sum );
return OPENCV_HAL_ADD( buf[0], buf[1] );
#endif
#else
return feature.dot( coef );
#endif
}
feature[8] = freqDomain.at< float >( 2, 0 );
feature[9] = freqDomain.at< float >( 2, 1 );
feature[10] = freqDomain.at< float >( 2, 2 );
feature[11] = freqDomain.at< float >( 2, 3 );
void GPCPatchSample::getDirections( bool &refdir, bool &posdir, bool &negdir, const Vec< double, GPCPatchDescriptor::nFeatures > &coef, double rhs ) const
{
refdir = ( ref.dot( coef ) < rhs );
posdir = pos.isSeparated() ? ( !refdir ) : ( pos.dot( coef ) < rhs );
negdir = neg.isSeparated() ? ( !refdir ) : ( neg.dot( coef ) < rhs );
}
feature[12] = freqDomain.at< float >( 3, 0 );
feature[13] = freqDomain.at< float >( 3, 1 );
feature[14] = freqDomain.at< float >( 3, 2 );
feature[15] = freqDomain.at< float >( 3, 3 );
void GPCDetails::getAllDescriptorsForImage( const Mat *imgCh, std::vector< GPCPatchDescriptor > &descr, const GPCMatchingParams &mp,
int type )
{
if ( type == GPC_DESCRIPTOR_DCT )
getAllDCTDescriptorsForImage( imgCh, descr, mp );
else if ( type == GPC_DESCRIPTOR_WHT )
getAllWHTDescriptorsForImage( imgCh, descr, mp );
else
CV_Error( CV_StsBadArg, "Unknown descriptor type" );
}
feature[16] = cv::sum( imgCh[1]( roi ) )[0] / ( 2 * patchRadius );
feature[17] = cv::sum( imgCh[2]( roi ) )[0] / ( 2 * patchRadius );
void GPCDetails::getCoordinatesFromIndex( size_t index, Size sz, int &x, int &y )
{
const size_t stride = sz.width - patchRadius * 2;
y = int( index / stride );
x = int( index - y * stride + patchRadius );
y += patchRadius;
}
bool GPCTree::trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth )
{
if ( std::distance( begin, end ) < minNumberOfSamples )
const int nSamples = (int)std::distance( begin, end );
if ( nSamples < params.minNumberOfSamples || depth >= params.maxTreeDepth )
return false;
if ( nodeId >= nodes.size() )
......@@ -200,6 +525,7 @@ bool GPCTree::trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth )
// Select the best hyperplane
unsigned globalBestScore = 0;
std::vector< double > values;
values.reserve( nSamples * 2 );
for ( int j = 0; j < globalIters; ++j )
{ // Global search step
......@@ -209,50 +535,81 @@ bool GPCTree::trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth )
for ( int i = 0; i < localIters; ++i )
{ // Local search step
double randomModification = getRandomCauchyScalar();
double randomModification = getRandomCauchyScalar() * ( 1.0 + sigmaGrowthRate * int( i / GPCPatchDescriptor::nFeatures ) );
const int pos = i % GPCPatchDescriptor::nFeatures;
std::swap( coef[pos], randomModification );
values.clear();
for ( SIter iter = begin; iter != end; ++iter )
{
values.push_back( coef.dot( iter->first.feature ) );
values.push_back( coef.dot( iter->second.feature ) );
}
values.push_back( iter->ref.dot( coef ) );
std::nth_element( values.begin(), values.begin() + nSamples / 2, values.end() );
double median = values[nSamples / 2];
// Skip obviously malformed division. This may happen in case there are a large number of equal samples.
// Most likely this won't happen with samples collected from a good dataset.
// Happens in case dataset contains plain (or close to plain) images.
if ( std::count_if( values.begin(), values.end(), CompareWithTolerance( median ) ) > std::max( 1, nSamples / 4 ) )
continue;
std::nth_element( values.begin(), values.begin() + values.size() / 2, values.end() );
const double median = values[values.size() / 2];
unsigned correct = 0;
median = getRobustMedian( median );
unsigned score = 0;
for ( SIter iter = begin; iter != end; ++iter )
{
const bool direction = ( coef.dot( iter->first.feature ) < median );
if ( direction == ( coef.dot( iter->second.feature ) < median ) )
++correct;
bool refdir, posdir, negdir;
iter->getDirections( refdir, posdir, negdir, coef, median );
if ( refdir == posdir )
score += scoreGainPos;
if ( refdir != negdir )
score += scoreGainNeg;
}
if ( correct > localBestScore )
localBestScore = correct;
if ( score > localBestScore )
localBestScore = score;
else
coef[pos] = randomModification;
{
const double beta = simulatedAnnealingTemperatureCoef * std::sqrt( i ) / ( nSamples * ( scoreGainPos + scoreGainNeg ) );
if ( rng.uniform( 0.0, 1.0 ) > std::exp( -beta * ( localBestScore - score) ) )
coef[pos] = randomModification;
}
if ( correct > globalBestScore )
if ( score > globalBestScore )
{
globalBestScore = correct;
globalBestScore = score;
node.coef = coef;
node.rhs = median;
/*if ( debugOutput )
{
printf( "[%u] Updating weights: correct %.2f (%u/%ld)\n", depth, double( correct ) / std::distance( begin, end ), correct,
std::distance( begin, end ) );
for ( unsigned k = 0; k < GPCPatchDescriptor::nFeatures; ++k )
printf( "%.3f ", coef[k] );
printf( "\n" );
}*/
}
}
}
if ( globalBestScore == 0 )
return false;
if ( params.printProgress )
{
const int maxScore = nSamples * ( scoreGainPos + scoreGainNeg );
const double correctRatio = double( globalBestScore ) / maxScore;
printf( "[%u] Correct %.2f (%u/%d)\nWeights:", depth, correctRatio, globalBestScore, maxScore );
for ( unsigned k = 0; k < GPCPatchDescriptor::nFeatures; ++k )
printf( " %.3f", node.coef[k] );
printf( "\n" );
}
for ( SIter iter = begin; iter != end; ++iter )
{
bool refdir, posdir, negdir;
iter->getDirections( refdir, posdir, negdir, node.coef, node.rhs );
// We shouldn't account for positive sample in the scoring in case it was separated before. So mark it as separated.
// After all, we can't bring back samples which were separated from reference on early levels.
if ( refdir != posdir )
iter->pos.markAsSeparated();
// The same for negative sample.
if ( refdir != negdir )
iter->neg.markAsSeparated();
// If both positive and negative were separated before then such triplet doesn't make sense on deeper levels. We discard it.
}
// Partition vector with samples according to the hyperplane in QuickSort-like manner.
// Unlike QuickSort, we need to partition it into 3 parts (left subtree samples; undefined samples; right subtree
// samples), so we call it two times.
......@@ -260,16 +617,21 @@ bool GPCTree::trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth )
SIter rightBegin =
std::partition( leftEnd, end, PartitionPredicate2( node.coef, node.rhs ) ); // Separate undefined samples from right subtree samples.
node.left = ( trainNode( nodeId * 2 + 1, begin, leftEnd, depth + 1 ) ) ? unsigned(nodeId * 2 + 1) : 0;
node.right = ( trainNode( nodeId * 2 + 2, rightBegin, end, depth + 1 ) ) ? unsigned(nodeId * 2 + 2) : 0;
node.left = ( trainNode( nodeId * 2 + 1, begin, leftEnd, depth + 1 ) ) ? unsigned( nodeId * 2 + 1 ) : 0;
node.right = ( trainNode( nodeId * 2 + 2, rightBegin, end, depth + 1 ) ) ? unsigned( nodeId * 2 + 2 ) : 0;
return true;
}
void GPCTree::train( GPCSamplesVector &samples )
void GPCTree::train( GPCTrainingSamples &samples, const GPCTrainingParams _params )
{
if ( _params.descriptorType != samples.type() )
CV_Error( CV_StsBadArg, "Descriptor type mismatch! Check that samples are collected with the same descriptor type." );
nodes.clear();
nodes.reserve( samples.size() * 2 - 1 ); // set upper bound for the possible number of nodes so all subsequent resize() will be no-op
trainNode( 0, samples.begin(), samples.end(), 0 );
params = _params;
GPCSamplesVector &sv = samples;
trainNode( 0, sv.begin(), sv.end(), 0 );
}
void GPCTree::write( FileStorage &fs ) const
......@@ -277,17 +639,39 @@ void GPCTree::write( FileStorage &fs ) const
if ( nodes.empty() )
CV_Error( CV_StsBadArg, "Tree have not been trained" );
fs << "nodes" << nodes;
fs << "dtype" << (int)params.descriptorType;
}
void GPCTree::read( const FileNode &fn )
{
fn["nodes"] >> nodes;
fn["dtype"] >> (int &)params.descriptorType;
}
void GPCTree::read( const FileNode &fn ) { fn["nodes"] >> nodes; }
unsigned GPCTree::findLeafForPatch( const GPCPatchDescriptor &descr ) const
{
unsigned id = 0, prevId;
do
{
prevId = id;
if ( descr.dot( nodes[id].coef ) < nodes[id].rhs )
id = nodes[id].right;
else
id = nodes[id].left;
} while ( id );
return prevId;
}
Ptr< GPCTrainingSamples > GPCTrainingSamples::create( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo,
const std::vector< String > &gt )
const std::vector< String > &gt, int _descriptorType )
{
CV_Assert( imagesFrom.size() == imagesTo.size() );
CV_Assert( imagesFrom.size() == gt.size() );
Ptr< GPCTrainingSamples > ts = makePtr< GPCTrainingSamples >();
ts->descriptorType = _descriptorType;
for ( size_t i = 0; i < imagesFrom.size(); ++i )
{
Mat from = imread( imagesFrom[i] );
......@@ -304,12 +688,69 @@ Ptr< GPCTrainingSamples > GPCTrainingSamples::create( const std::vector< String
cvtColor( from, from, COLOR_BGR2YCrCb );
cvtColor( to, to, COLOR_BGR2YCrCb );
getTrainingSamples( from, to, gtFlow, ts->samples );
getTrainingSamples( from, to, gtFlow, ts->samples, ts->descriptorType );
}
return ts;
}
Ptr< GPCTrainingSamples > GPCTrainingSamples::create( InputArrayOfArrays imagesFrom, InputArrayOfArrays imagesTo,
InputArrayOfArrays gt, int _descriptorType )
{
CV_Assert( imagesFrom.total() == imagesTo.total() );
CV_Assert( imagesFrom.total() == gt.total() );
Ptr< GPCTrainingSamples > ts = makePtr< GPCTrainingSamples >();
ts->descriptorType = _descriptorType;
for ( size_t i = 0; i < imagesFrom.total(); ++i )
{
Mat from = imagesFrom.getMat( static_cast<int>( i ) );
Mat to = imagesTo.getMat( static_cast<int>( i ) );
Mat gtFlow = gt.getMat( static_cast<int>( i ) );
CV_Assert( from.size == to.size );
CV_Assert( from.size == gtFlow.size );
CV_Assert( from.channels() == 3 );
CV_Assert( to.channels() == 3 );
from.convertTo( from, CV_32FC3 );
to.convertTo( to, CV_32FC3 );
cvtColor( from, from, COLOR_BGR2YCrCb );
cvtColor( to, to, COLOR_BGR2YCrCb );
getTrainingSamples( from, to, gtFlow, ts->samples, ts->descriptorType );
}
return ts;
}
void GPCDetails::dropOutliers( std::vector< std::pair< Point2i, Point2i > > &corr )
{
std::vector< float > mag( corr.size() );
for ( size_t i = 0; i < corr.size(); ++i )
mag[i] = normL2Sqr( corr[i].first - corr[i].second );
const size_t threshold = size_t( mag.size() * thresholdOutliers );
std::nth_element( mag.begin(), mag.begin() + threshold, mag.end() );
const float percentile = mag[threshold];
size_t i = 0, j = 0;
while ( i < corr.size() )
{
if ( normL2Sqr( corr[i].first - corr[i].second ) <= percentile )
{
corr[j] = corr[i];
++j;
}
++i;
}
corr.resize( j );
}
} // namespace optflow
void write( FileStorage &fs, const String &name, const optflow::GPCTree::Node &node )
......
......@@ -49,8 +49,16 @@ using namespace optflow;
static string getDataDir() { return TS::ptr()->get_data_path(); }
static string getRubberWhaleFrame1() { return getDataDir() + "optflow/RubberWhale1.png"; }
static string getRubberWhaleFrame2() { return getDataDir() + "optflow/RubberWhale2.png"; }
static string getRubberWhaleGroundTruth() { return getDataDir() + "optflow/RubberWhale.flo"; }
static bool isFlowCorrect(float u) { return !cvIsNaN(u) && (fabs(u) < 1e9); }
static bool isFlowCorrect(double u) { return !cvIsNaN(u) && (fabs(u) < 1e9); }
static float calcRMSE(Mat flow1, Mat flow2)
{
float sum = 0;
......@@ -80,11 +88,36 @@ static float calcRMSE(Mat flow1, Mat flow2)
return (float)sqrt(sum / (1e-9 + counter));
}
static float calcAvgEPE(vector< pair<Point2i, Point2i> > corr, Mat flow)
{
double sum = 0;
int counter = 0;
for (size_t i = 0; i < corr.size(); ++i)
{
Vec2f flow1_at_point = Point2f(corr[i].second - corr[i].first);
Vec2f flow2_at_point = flow.at<Vec2f>(corr[i].first.y, corr[i].first.x);
double u1 = (double)flow1_at_point[0];
double v1 = (double)flow1_at_point[1];
double u2 = (double)flow2_at_point[0];
double v2 = (double)flow2_at_point[1];
if (isFlowCorrect(u1) && isFlowCorrect(u2) && isFlowCorrect(v1) && isFlowCorrect(v2))
{
sum += sqrt((u1 - u2) * (u1 - u2) + (v1 - v2) * (v1 - v2));
counter++;
}
}
return (float)(sum / counter);
}
bool readRubberWhale(Mat &dst_frame_1, Mat &dst_frame_2, Mat &dst_GT)
{
const string frame1_path = getDataDir() + "optflow/RubberWhale1.png";
const string frame2_path = getDataDir() + "optflow/RubberWhale2.png";
const string gt_flow_path = getDataDir() + "optflow/RubberWhale.flo";
const string frame1_path = getRubberWhaleFrame1();
const string frame2_path = getRubberWhaleFrame2();
const string gt_flow_path = getRubberWhaleGroundTruth();
dst_frame_1 = imread(frame1_path);
dst_frame_2 = imread(frame2_path);
......@@ -188,3 +221,65 @@ TEST(DenseOpticalFlow_VariationalRefinement, ReferenceAccuracy)
ASSERT_EQ(GT.cols, flow.cols);
EXPECT_LE(calcRMSE(GT, flow), target_RMSE);
}
TEST(DenseOpticalFlow_PCAFlow, ReferenceAccuracy)
{
Mat frame1, frame2, GT;
ASSERT_TRUE(readRubberWhale(frame1, frame2, GT));
const float target_RMSE = 0.55f;
Mat flow;
Ptr<DenseOpticalFlow> algo = createOptFlow_PCAFlow();
algo->calc(frame1, frame2, flow);
ASSERT_EQ(GT.rows, flow.rows);
ASSERT_EQ(GT.cols, flow.cols);
EXPECT_LE(calcRMSE(GT, flow), target_RMSE);
}
TEST(DenseOpticalFlow_GlobalPatchColliderDCT, ReferenceAccuracy)
{
Mat frame1, frame2, GT;
ASSERT_TRUE(readRubberWhale(frame1, frame2, GT));
const Size sz = frame1.size() / 2;
frame1 = frame1(Rect(0, 0, sz.width, sz.height));
frame2 = frame2(Rect(0, 0, sz.width, sz.height));
GT = GT(Rect(0, 0, sz.width, sz.height));
vector<Mat> img1, img2, gt;
vector< pair<Point2i, Point2i> > corr;
img1.push_back(frame1);
img2.push_back(frame2);
gt.push_back(GT);
Ptr< GPCForest<5> > forest = GPCForest<5>::create();
forest->train(img1, img2, gt, GPCTrainingParams(8, 3, GPC_DESCRIPTOR_DCT, false));
forest->findCorrespondences(frame1, frame2, corr);
ASSERT_LE(7500U, corr.size());
ASSERT_LE(calcAvgEPE(corr, GT), 0.5f);
}
TEST(DenseOpticalFlow_GlobalPatchColliderWHT, ReferenceAccuracy)
{
Mat frame1, frame2, GT;
ASSERT_TRUE(readRubberWhale(frame1, frame2, GT));
const Size sz = frame1.size() / 2;
frame1 = frame1(Rect(0, 0, sz.width, sz.height));
frame2 = frame2(Rect(0, 0, sz.width, sz.height));
GT = GT(Rect(0, 0, sz.width, sz.height));
vector<Mat> img1, img2, gt;
vector< pair<Point2i, Point2i> > corr;
img1.push_back(frame1);
img2.push_back(frame2);
gt.push_back(GT);
Ptr< GPCForest<5> > forest = GPCForest<5>::create();
forest->train(img1, img2, gt, GPCTrainingParams(8, 3, GPC_DESCRIPTOR_WHT, false));
forest->findCorrespondences(frame1, frame2, corr);
ASSERT_LE(7000U, corr.size());
ASSERT_LE(calcAvgEPE(corr, GT), 0.5f);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment