Commit 7f93d951 authored by Vladislav Samsonov's avatar Vladislav Samsonov

Added training part of the Global Patch Collider

parent 3dbbad12
......@@ -44,6 +44,7 @@ the use of this software, even if advised of the possibility of such damage.
#include "opencv2/video.hpp"
#include "opencv2/optflow/pcaflow.hpp"
#include "opencv2/optflow/sparse_matching_gpc.hpp"
/**
@defgroup optflow Optical Flow Algorithms
......
/*
By downloading, copying, installing or using the software you agree to this
license. If you do not agree to this license, do not download, install,
copy or use the software.
License Agreement
For Open Source Computer Vision Library
(3-clause BSD License)
Copyright (C) 2016, OpenCV Foundation, all rights reserved.
Third party copyrights are property of their respective owners.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the names of the copyright holders nor the names of the contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
This software is provided by the copyright holders and contributors "as is" and
any express or implied warranties, including, but not limited to, the implied
warranties of merchantability and fitness for a particular purpose are
disclaimed. In no event shall copyright holders or contributors be liable for
any direct, indirect, incidental, special, exemplary, or consequential damages
(including, but not limited to, procurement of substitute goods or services;
loss of use, data, or profits; or business interruption) however caused
and on any theory of liability, whether in contract, strict liability,
or tort (including negligence or otherwise) arising in any way out of
the use of this software, even if advised of the possibility of such damage.
*/
/*
Implementation of the Global Patch Collider algorithm from the following paper:
http://research.microsoft.com/en-us/um/people/pkohli/papers/wfrik_cvpr2016.pdf
@InProceedings{Wang_2016_CVPR,
author = {Wang, Shenlong and Ryan Fanello, Sean and Rhemann, Christoph and Izadi, Shahram and Kohli, Pushmeet},
title = {The Global Patch Collider},
booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2016}
}
*/
#ifndef __OPENCV_OPTFLOW_SPARSE_MATCHING_GPC_HPP__
#define __OPENCV_OPTFLOW_SPARSE_MATCHING_GPC_HPP__
#include "opencv2/core.hpp"
namespace cv
{
namespace optflow
{
struct CV_EXPORTS_W GPCPatchDescriptor
{
static const unsigned nFeatures = 18; // number of features in a patch descriptor
Vec<double, nFeatures> feature;
GPCPatchDescriptor( const Mat *imgCh, int i, int j );
};
typedef std::pair<GPCPatchDescriptor, GPCPatchDescriptor> GPCPatchSample;
typedef std::vector<GPCPatchSample> GPCSamplesVector;
class CV_EXPORTS_W GPCTree : public Algorithm
{
public:
struct Node
{
Vec<double, GPCPatchDescriptor::nFeatures> coef; // hyperplane coefficients
double rhs;
unsigned left;
unsigned right;
bool operator==( const Node &n ) const { return coef == n.coef && rhs == n.rhs && left == n.left && right == n.right; }
};
private:
typedef GPCSamplesVector::iterator SIter;
std::vector<Node> nodes;
bool trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth );
public:
void train( GPCSamplesVector &samples );
void write( FileStorage &fs ) const;
void read( const FileNode &fn );
static Ptr<GPCTree> create() { return makePtr<GPCTree>(); }
bool operator==( const GPCTree &t ) const { return nodes == t.nodes; }
};
template <int T> class CV_EXPORTS_W GPCForest : public Algorithm
{
private:
GPCTree tree[T];
public:
void train( GPCSamplesVector &samples )
{
for ( int i = 0; i < T; ++i )
tree[i].train( samples );
}
void write( FileStorage &fs ) const
{
fs << "ntrees" << T << "trees"
<< "[";
for ( int i = 0; i < T; ++i )
{
fs << "{";
tree[i].write( fs );
fs << "}";
}
fs << "]";
}
void read( const FileNode &fn )
{
CV_Assert( T == (int)fn["ntrees"] );
FileNodeIterator it = fn["trees"].begin();
for ( int i = 0; i < T; ++i, ++it )
tree[i].read( *it );
}
static Ptr<GPCForest> create() { return makePtr<GPCForest>(); }
};
/** @brief Class encapsulating training samples.
*/
class CV_EXPORTS_W GPCTrainingSamples
{
private:
GPCSamplesVector samples;
public:
/** @brief This function can be used to extract samples from a pair of images and a ground truth flow.
* Sizes of all the provided vectors must be equal.
*/
static Ptr<GPCTrainingSamples> create( const std::vector<String> &imagesFrom, const std::vector<String> &imagesTo,
const std::vector<String> &gt );
size_t size() const { return samples.size(); }
operator GPCSamplesVector() const { return samples; }
operator GPCSamplesVector &() { return samples; }
};
}
CV_EXPORTS void write( FileStorage &fs, const String &name, const optflow::GPCTree::Node &node );
CV_EXPORTS void read( const FileNode &fn, optflow::GPCTree::Node &node, optflow::GPCTree::Node );
}
#endif
#include "opencv2/optflow.hpp"
#include <iostream>
const int nTrees = 5;
int main( int argc, const char **argv )
{
int nSequences = argc - 1;
if ( nSequences <= 0 || nSequences % 3 != 0 )
{
std::cerr << "Usage: " << argv[0] << " ImageFrom1 ImageTo1 GroundTruth1 ... ImageFromN ImageToN GroundTruthN" << std::endl;
return 1;
}
nSequences /= 3;
std::vector<cv::String> img1, img2, gt;
for ( int i = 0; i < nSequences; ++i )
{
img1.push_back( argv[1 + i * 3] );
img2.push_back( argv[1 + i * 3 + 1] );
gt.push_back( argv[1 + i * 3 + 2] );
}
cv::Ptr<cv::optflow::GPCTrainingSamples> ts = cv::optflow::GPCTrainingSamples::create( img1, img2, gt );
std::cout << "Got " << ts->size() << " samples." << std::endl;
cv::Ptr< cv::optflow::GPCForest<nTrees> > forest = cv::optflow::GPCForest<nTrees>::create();
forest->train( *ts );
forest->save( "forest.dump" );
return 0;
}
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/core/core_c.h"
#include "opencv2/highgui.hpp"
#include "precomp.hpp"
namespace cv
{
namespace optflow
{
namespace
{
const int patchRadius = 10;
const double thresholdMagnitudeFrac = 0.6666666666;
const int globalIters = 3;
const int localIters = 500;
const unsigned minNumberOfSamples = 2;
const bool debugOutput = true;
struct Magnitude
{
float val;
int i;
int j;
Magnitude( float _val, int _i, int _j ) : val( _val ), i( _i ), j( _j ) {}
Magnitude() {}
bool operator<( const Magnitude &m ) { return val > m.val; }
};
struct PartitionPredicate1
{
Vec<double, GPCPatchDescriptor::nFeatures> coef;
double rhs;
PartitionPredicate1( const Vec<double, GPCPatchDescriptor::nFeatures> &_coef, double _rhs ) : coef( _coef ), rhs( _rhs ) {}
bool operator()( const GPCPatchSample &sample ) const
{
const bool direction1 = ( coef.dot( sample.first.feature ) < rhs );
const bool direction2 = ( coef.dot( sample.second.feature ) < rhs );
return direction1 == false && direction1 == direction2;
}
};
struct PartitionPredicate2
{
Vec<double, GPCPatchDescriptor::nFeatures> coef;
double rhs;
PartitionPredicate2( const Vec<double, GPCPatchDescriptor::nFeatures> &_coef, double _rhs ) : coef( _coef ), rhs( _rhs ) {}
bool operator()( const GPCPatchSample &sample ) const
{
const bool direction1 = ( coef.dot( sample.first.feature ) < rhs );
const bool direction2 = ( coef.dot( sample.second.feature ) < rhs );
return direction1 != direction2;
}
};
float normL2Sqr( const Vec2f &v ) { return v[0] * v[0] + v[1] * v[1]; }
bool checkBounds( int i, int j, Size sz )
{
return i >= patchRadius && j >= patchRadius && i + patchRadius < sz.height && j + patchRadius < sz.width;
}
void getTrainingSamples( const Mat &from, const Mat &to, const Mat &gt, GPCSamplesVector &samples )
{
const Size sz = gt.size();
std::vector<Magnitude> mag;
for ( int i = patchRadius; i + patchRadius < sz.height; ++i )
for ( int j = patchRadius; j + patchRadius < sz.width; ++j )
mag.push_back( Magnitude( normL2Sqr( gt.at<Vec2f>( i, j ) ), i, j ) );
size_t n = mag.size() * thresholdMagnitudeFrac;
std::nth_element( mag.begin(), mag.begin() + n, mag.end() );
mag.resize( n );
std::random_shuffle( mag.begin(), mag.end() );
n /= patchRadius;
mag.resize( n );
Mat fromCh[3], toCh[3];
split( from, fromCh );
split( to, toCh );
for ( size_t k = 0; k < n; ++k )
{
int i0 = mag[k].i;
int j0 = mag[k].j;
int i1 = i0 + cvRound( gt.at<Vec2f>( i0, j0 )[1] );
int j1 = j0 + cvRound( gt.at<Vec2f>( i0, j0 )[0] );
if ( checkBounds( i1, j1, sz ) )
samples.push_back( std::make_pair( GPCPatchDescriptor( fromCh, i0, j0 ), GPCPatchDescriptor( toCh, i1, j1 ) ) );
}
}
/* Sample random number from Cauchy distribution. */
double getRandomCauchyScalar()
{
static RNG rng;
return tan( rng.uniform( -1.54, 1.54 ) ); // I intentionally used the value slightly less than PI/2 to enforce strictly
// zero probability for large numbers. Resulting PDF for Cauchy has
// truncated "tails".
}
/* Sample random vector from Cauchy distribution (pointwise, i.e. vector whose components are independent random
* variables from Cauchy distribution) */
void getRandomCauchyVector( Vec<double, GPCPatchDescriptor::nFeatures> &v )
{
for ( unsigned i = 0; i < GPCPatchDescriptor::nFeatures; ++i )
v[i] = getRandomCauchyScalar();
}
}
GPCPatchDescriptor::GPCPatchDescriptor( const Mat *imgCh, int i, int j )
{
Rect roi( j - patchRadius, i - patchRadius, 2 * patchRadius, 2 * patchRadius );
Mat freqDomain;
dct( imgCh[0]( roi ), freqDomain );
feature[0] = freqDomain.at<float>( 0, 0 );
feature[1] = freqDomain.at<float>( 0, 1 );
feature[2] = freqDomain.at<float>( 0, 2 );
feature[3] = freqDomain.at<float>( 0, 3 );
feature[4] = freqDomain.at<float>( 1, 0 );
feature[5] = freqDomain.at<float>( 1, 1 );
feature[6] = freqDomain.at<float>( 1, 2 );
feature[7] = freqDomain.at<float>( 1, 3 );
feature[8] = freqDomain.at<float>( 2, 0 );
feature[9] = freqDomain.at<float>( 2, 1 );
feature[10] = freqDomain.at<float>( 2, 2 );
feature[11] = freqDomain.at<float>( 2, 3 );
feature[12] = freqDomain.at<float>( 3, 0 );
feature[13] = freqDomain.at<float>( 3, 1 );
feature[14] = freqDomain.at<float>( 3, 2 );
feature[15] = freqDomain.at<float>( 3, 3 );
feature[16] = cv::sum( imgCh[1]( roi ) )[0] / ( 2 * patchRadius );
feature[17] = cv::sum( imgCh[2]( roi ) )[0] / ( 2 * patchRadius );
}
bool GPCTree::trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth )
{
if ( std::distance( begin, end ) < minNumberOfSamples )
return false;
if ( nodeId >= nodes.size() )
nodes.resize( nodeId + 1 );
Node &node = nodes[nodeId];
// Select the best hyperplane
unsigned globalBestScore = 0;
std::vector<double> values;
for ( int j = 0; j < globalIters; ++j )
{ // Global search step
Vec<double, GPCPatchDescriptor::nFeatures> coef;
unsigned localBestScore = 0;
getRandomCauchyVector( coef );
for ( int i = 0; i < localIters; ++i )
{ // Local search step
double randomModification = getRandomCauchyScalar();
const int pos = i % GPCPatchDescriptor::nFeatures;
std::swap( coef[pos], randomModification );
values.clear();
for ( SIter iter = begin; iter != end; ++iter )
{
values.push_back( coef.dot( iter->first.feature ) );
values.push_back( coef.dot( iter->second.feature ) );
}
std::nth_element( values.begin(), values.begin() + values.size() / 2, values.end() );
const double median = values[values.size() / 2];
unsigned correct = 0;
for ( SIter iter = begin; iter != end; ++iter )
{
const bool direction = ( coef.dot( iter->first.feature ) < median );
if ( direction == ( coef.dot( iter->second.feature ) < median ) )
++correct;
}
if ( correct > localBestScore )
localBestScore = correct;
else
coef[pos] = randomModification;
if ( correct > globalBestScore )
{
globalBestScore = correct;
node.coef = coef;
node.rhs = median;
if ( debugOutput )
{
printf( "[%u] Updating weights: correct %.2f (%u/%ld)\n", depth, double( correct ) / std::distance( begin, end ), correct,
std::distance( begin, end ) );
for ( unsigned k = 0; k < GPCPatchDescriptor::nFeatures; ++k )
printf( "%.3f ", coef[k] );
printf( "\n" );
}
}
}
}
// Partition vector with samples according to the hyperplane in QuickSort-like manner.
// Unlike QuickSort, we need to partition it into 3 parts (left subtree samples; undefined samples; right subtree
// samples), so we call it two times.
SIter leftEnd = std::partition( begin, end, PartitionPredicate1( node.coef, node.rhs ) ); // Separate left subtree samples from others.
SIter rightBegin =
std::partition( leftEnd, end, PartitionPredicate2( node.coef, node.rhs ) ); // Separate undefined samples from right subtree samples.
node.left = ( trainNode( nodeId * 2 + 1, begin, leftEnd, depth + 1 ) ) ? nodeId * 2 + 1 : 0;
node.right = ( trainNode( nodeId * 2 + 2, rightBegin, end, depth + 1 ) ) ? nodeId * 2 + 2 : 0;
return true;
}
void GPCTree::train( GPCSamplesVector &samples )
{
nodes.reserve( samples.size() * 2 - 1 ); // set upper bound for the possible number of nodes so all subsequent resize() will be no-op
trainNode( 0, samples.begin(), samples.end(), 0 );
}
void GPCTree::write( FileStorage &fs ) const
{
if ( nodes.empty() )
CV_Error( CV_StsBadArg, "Tree have not been trained" );
fs << "nodes" << nodes;
}
void GPCTree::read( const FileNode &fn ) { fn["nodes"] >> nodes; }
Ptr<GPCTrainingSamples> GPCTrainingSamples::create( const std::vector<String> &imagesFrom, const std::vector<String> &imagesTo,
const std::vector<String> &gt )
{
CV_Assert( imagesFrom.size() == imagesTo.size() );
CV_Assert( imagesFrom.size() == gt.size() );
Ptr<GPCTrainingSamples> ts = makePtr<GPCTrainingSamples>();
for ( size_t i = 0; i < imagesFrom.size(); ++i )
{
Mat from = imread( imagesFrom[i] );
Mat to = imread( imagesTo[i] );
Mat gtFlow = readOpticalFlow( gt[i] );
CV_Assert( from.size == to.size );
CV_Assert( from.size == gtFlow.size );
CV_Assert( from.channels() == 3 );
CV_Assert( to.channels() == 3 );
from.convertTo( from, CV_32FC3 );
to.convertTo( to, CV_32FC3 );
cvtColor( from, from, COLOR_BGR2YCrCb );
cvtColor( to, to, COLOR_BGR2YCrCb );
getTrainingSamples( from, to, gtFlow, ts->samples );
}
return ts;
}
} // namespace optflow
void write( FileStorage &fs, const String &name, const optflow::GPCTree::Node &node )
{
cv::internal::WriteStructContext ws( fs, name, CV_NODE_SEQ + CV_NODE_FLOW );
for ( unsigned i = 0; i < optflow::GPCPatchDescriptor::nFeatures; ++i )
write( fs, node.coef[i] );
write( fs, node.rhs );
write( fs, (int)node.left );
write( fs, (int)node.right );
}
void read( const FileNode &fn, optflow::GPCTree::Node &node, optflow::GPCTree::Node )
{
FileNodeIterator it = fn.begin();
for ( unsigned i = 0; i < optflow::GPCPatchDescriptor::nFeatures; ++i )
it >> node.coef[i];
it >> node.rhs >> (int &)node.left >> (int &)node.right;
}
} // namespace cv
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment