Commit d611fb61 authored by P. Druzhkov's avatar P. Druzhkov

Gradient Boosting Trees (CvGBTrees) added to opencv mll. Test for all CvGBTrees…

Gradient Boosting Trees (CvGBTrees) added to opencv mll. Test for all CvGBTrees public methods added.
parent d7880076
......@@ -183,6 +183,7 @@ CV_INLINE CvParamLattice cvDefaultParamLattice( void )
#define CV_TYPE_NAME_ML_ANN_MLP "opencv-ml-ann-mlp"
#define CV_TYPE_NAME_ML_CNN "opencv-ml-cnn"
#define CV_TYPE_NAME_ML_RTREES "opencv-ml-random-trees"
#define CV_TYPE_NAME_ML_GBT "opencv-ml-gradient-boosting-trees"
#define CV_TRAIN_ERROR 0
#define CV_TEST_ERROR 1
......@@ -1359,6 +1360,532 @@ protected:
};
/****************************************************************************************\
* Gradient Boosted Trees *
\****************************************************************************************/
// DataType: STRUCT CvGBTreesParams
// Parameters of GBT (Gradient Boosted trees model), including single
// tree settings and ensemble parameters.
//
// weak_count - count of trees in the ensemble
// loss_function_type - loss function used for ensemble training
// subsample_portion - portion of whole training set used for
// every single tree training.
// subsample_portion value is in (0.0, 1.0].
// subsample_portion == 1.0 when whole dataset is
// used on each step. Count of sample used on each
// step is computed as
// int(total_samples_count * subsample_portion).
// shrinkage - regularization parameter.
// Each tree prediction is multiplied on shrinkage value.
struct CV_EXPORTS CvGBTreesParams : public CvDTreeParams
{
int weak_count;
int loss_function_type;
float subsample_portion;
float shrinkage;
CvGBTreesParams();
CvGBTreesParams( int loss_function_type, int weak_count, float shrinkage,
float subsample_portion, int max_depth, bool use_surrogates );
};
// DataType: CLASS CvGBTrees
// Gradient Boosting Trees (GBT) algorithm implementation.
//
// data - training dataset
// params - parameters of the CvGBTrees
// weak - array[0..(class_count-1)] of CvSeq
// for storing tree ensembles
// orig_response - original responses of the training set samples
// sum_response - predicitons of the current model on the training dataset.
// this matrix is updated on every iteration.
// sum_response_tmp - predicitons of the model on the training set on the next
// step. On every iteration values of sum_responses_tmp are
// computed via sum_responses values. When the current
// step is complete sum_response values become equal to
// sum_responses_tmp.
// sample_idx - indices of samples used for training the ensemble.
// CvGBTrees training procedure takes a set of samples
// (train_data) and a set of responses (responses).
// Only pairs (train_data[i], responses[i]), where i is
// in sample_idx are used for training the ensemble.
// subsample_train - indices of samples used for training a single decision
// tree on the current step. This indices are countered
// relatively to the sample_idx, so that pairs
// (train_data[sample_idx[i]], responses[sample_idx[i]])
// are used for training a decision tree.
// Training set is randomly splited
// in two parts (subsample_train and subsample_test)
// on every iteration accordingly to the portion parameter.
// subsample_test - relative indices of samples from the training set,
// which are not used for training a tree on the current
// step.
// missing - mask of the missing values in the training set. This
// matrix has the same size as train_data. 1 - missing
// value, 0 - not a missing value.
// class_labels - output class labels map.
// rng - random number generator. Used for spliting the
// training set.
// class_count - count of output classes.
// class_count == 1 in the case of regression,
// and > 1 in the case of classification.
// delta - Huber loss function parameter.
// base_value - start point of the gradient descent procedure.
// model prediction is
// f(x) = f_0 + sum_{i=1..weak_count-1}(f_i(x)), where
// f_0 is the base value.
class CV_EXPORTS CvGBTrees : public CvStatModel
{
public:
/*
// DataType: ENUM
// Loss functions implemented in CvGBTrees.
//
// SQUARED_LOSS
// problem: regression
// loss = (x - x')^2
//
// ABSOLUTE_LOSS
// problem: regression
// loss = abs(x - x')
//
// HUBER_LOSS
// problem: regression
// loss = delta*( abs(x - x') - delta/2), if abs(x - x') > delta
// 1/2*(x - x')^2, if abs(x - x') <= delta,
// where delta is the alpha-quantile of pseudo responses from
// the training set.
//
// DEVIANCE_LOSS
// problem: classification
//
*/
enum {SQUARED_LOSS=0, ABSOLUTE_LOSS, HUBER_LOSS=3, DEVIANCE_LOSS};
/*
// Default constructor. Creates a model only (without training).
// Should be followed by one form of the train(...) function.
//
// API
// CvGBTrees();
// INPUT
// OUTPUT
// RESULT
*/
CvGBTrees();
/*
// Full form constructor. Creates a gradient boosting model and does the
// train.
//
// API
// CvGBTrees( const CvMat* _train_data, int _tflag,
const CvMat* _responses, const CvMat* _var_idx=0,
const CvMat* _sample_idx=0, const CvMat* _var_type=0,
const CvMat* _missing_mask=0,
CvGBTreesParams params=CvGBTreesParams() );
// INPUT
// _train_data - a set of input feature vectors.
// size of matrix is
// <count of samples> x <variables count>
// or <variables count> x <count of samples>
// depending on the _tflag parameter.
// matrix values are float.
// _tflag - a flag showing how do samples stored in the
// _train_data matrix row by row (tflag=CV_ROW_SAMPLE)
// or column by column (tflag=CV_COL_SAMPLE).
// _responses - a vector of responses corresponding to the samples
// in _train_data.
// _var_idx - indices of used variables. zero value means that all
// variables are active.
// _sample_idx - indices of used samples. zero value means that all
// samples from _train_data are in the training set.
// _var_type - vector of <variables count> length. gives every
// variable type CV_VAR_CATEGORICAL or CV_VAR_ORDERED.
// _var_type = 0 means all variables are numerical.
// _missing_mask - a mask of misiing values in _train_data.
// _missing_mask = 0 means that there are no missing
// values.
// params - parameters of GTB algorithm.
// OUTPUT
// RESULT
*/
CvGBTrees( const CvMat* _train_data, int _tflag,
const CvMat* _responses, const CvMat* _var_idx=0,
const CvMat* _sample_idx=0, const CvMat* _var_type=0,
const CvMat* _missing_mask=0,
CvGBTreesParams params=CvGBTreesParams() );
/*
// Destructor.
*/
virtual ~CvGBTrees();
/*
// Gradient tree boosting model training
//
// API
// virtual bool train( const CvMat* _train_data, int _tflag,
const CvMat* _responses, const CvMat* _var_idx=0,
const CvMat* _sample_idx=0, const CvMat* _var_type=0,
const CvMat* _missing_mask=0,
CvGBTreesParams params=CvGBTreesParams(),
bool update=false );
// INPUT
// _train_data - a set of input feature vectors.
// size of matrix is
// <count of samples> x <variables count>
// or <variables count> x <count of samples>
// depending on the _tflag parameter.
// matrix values are float.
// _tflag - a flag showing how do samples stored in the
// _train_data matrix row by row (tflag=CV_ROW_SAMPLE)
// or column by column (tflag=CV_COL_SAMPLE).
// _responses - a vector of responses corresponding to the samples
// in _train_data.
// _var_idx - indices of used variables. zero value means that all
// variables are active.
// _sample_idx - indices of used samples. zero value means that all
// samples from _train_data are in the training set.
// _var_type - vector of <variables count> length. gives every
// variable type CV_VAR_CATEGORICAL or CV_VAR_ORDERED.
// _var_type = 0 means all variables are numerical.
// _missing_mask - a mask of misiing values in _train_data.
// _missing_mask = 0 means that there are no missing
// values.
// params - parameters of GTB algorithm.
// update - is not supported now. (!)
// OUTPUT
// RESULT
// Error state.
*/
virtual bool train( const CvMat* _train_data, int _tflag,
const CvMat* _responses, const CvMat* _var_idx=0,
const CvMat* _sample_idx=0, const CvMat* _var_type=0,
const CvMat* _missing_mask=0,
CvGBTreesParams params=CvGBTreesParams(),
bool update=false );
/*
// Gradient tree boosting model training
//
// API
// virtual bool train( CvMLData* data,
CvGBTreesParams params=CvGBTreesParams(),
bool update=false ) {return false;};
// INPUT
// data - training set.
// params - parameters of GTB algorithm.
// update - is not supported now. (!)
// OUTPUT
// RESULT
// Error state.
*/
virtual bool train( CvMLData* data,
CvGBTreesParams params=CvGBTreesParams(),
bool update=false );
/*
// Response value prediction
//
// API
// virtual float predict( const CvMat* _sample, const CvMat* _missing=0,
CvMat* weak_responses=0, CvSlice slice = CV_WHOLE_SEQ,
int k=-1 ) const;
// INPUT
// _sample - input sample of the same type as in the training set.
// _missing - missing values mask. _missing=0 if there are no
// missing values in _sample vector.
// weak_responses - predictions of all of the trees.
// not implemented (!)
// slice - part of the ensemble used for prediction.
// slice = CV_WHOLE_SEQ when all trees are used.
// k - number of ensemble used.
// k is in {-1,0,1,..,<count of output classes-1>}.
// in the case of classification problem
// <count of output classes-1> ensembles are built.
// If k = -1 ordinary prediction is the result,
// otherwise function gives the prediction of the
// k-th ensemble only.
// OUTPUT
// RESULT
// Predicted value.
*/
virtual float predict( const CvMat* _sample, const CvMat* _missing=0,
CvMat* weak_responses=0, CvSlice slice = CV_WHOLE_SEQ,
int k=-1 ) const;
/*
// Delete all temporary data.
//
// API
// virtual void clear();
// INPUT
// OUTPUT
// delete data, weak, orig_response, sum_response,
// weak_eval, ubsample_train, subsample_test,
// sample_idx, missing, lass_labels
// delta = 0.0
// RESULT
*/
virtual void clear();
/*
// Compute error on the train/test set.
//
// API
// virtual float calc_error( CvMLData* _data, int type,
// std::vector<float> *resp = 0 );
//
// INPUT
// data - dataset
// type - defines which error is to compute^ train (CV_TRAIN_ERROR) or
// test (CV_TEST_ERROR).
// OUTPUT
// resp - vector of predicitons
// RESULT
// Error value.
*/
virtual float calc_error( CvMLData* _data, int type,
std::vector<float> *resp = 0 );
/*
//
// Write parameters of the gtb model and data. Write learned model.
//
// API
// virtual void write( CvFileStorage* fs, const char* name ) const;
//
// INPUT
// fs - file storage to read parameters from.
// name - model name.
// OUTPUT
// RESULT
*/
virtual void write( CvFileStorage* fs, const char* name ) const;
/*
//
// Read parameters of the gtb model and data. Read learned model.
//
// API
// virtual void read( CvFileStorage* fs, CvFileNode* node );
//
// INPUT
// fs - file storage to read parameters from.
// node - file node.
// OUTPUT
// RESULT
*/
virtual void read( CvFileStorage* fs, CvFileNode* node );
protected:
/*
// Compute the gradient vector components.
//
// API
// virtual void find_gradient( const int k = 0);
// INPUT
// k - used for classification problem, determining current
// tree ensemble.
// OUTPUT
// changes components of data->responses
// which correspond to samples used for training
// on the current step.
// RESULT
*/
virtual void find_gradient( const int k = 0);
/*
//
// Change values in tree leaves according to the used loss function.
//
// API
// virtual void change_values(CvDTree* tree, const int k = 0);
//
// INPUT
// tree - decision tree to change.
// k - used for classification problem, determining current
// tree ensemble.
// OUTPUT
// changes 'value' fields of the trees' leaves.
// changes sum_response_tmp.
// RESULT
*/
virtual void change_values(CvDTree* tree, const int k = 0);
/*
//
// Find optimal constant prediction value according to the used loss
// function.
// The goal is to find a constant which gives the minimal summary loss
// on the _Idx samples.
//
// API
// virtual float find_optimal_value( const CvMat* _Idx );
//
// INPUT
// _Idx - indices of the samples from the training set.
// OUTPUT
// RESULT
// optimal constant value.
*/
virtual float find_optimal_value( const CvMat* _Idx );
/*
//
// Randomly split the whole training set in two parts according
// to params.portion.
//
// API
// virtual void do_subsample();
//
// INPUT
// OUTPUT
// subsample_train - indices of samples used for training
// subsample_test - indices of samples used for test
// RESULT
*/
virtual void do_subsample();
/*
//
// Internal recursive function giving an array of subtree tree leaves.
//
// API
// void leaves_get( CvDTreeNode** leaves, int& count, CvDTreeNode* node );
//
// INPUT
// node - current leaf.
// OUTPUT
// count - count of leaves in the subtree.
// leaves - array of pointers to leaves.
// RESULT
*/
void leaves_get( CvDTreeNode** leaves, int& count, CvDTreeNode* node );
/*
//
// Get leaves of the tree.
//
// API
// CvDTreeNode** GetLeaves( const CvDTree* dtree, int& len );
//
// INPUT
// dtree - decision tree.
// OUTPUT
// len - count of the leaves.
// RESULT
// CvDTreeNode** - array of pointers to leaves.
*/
CvDTreeNode** GetLeaves( const CvDTree* dtree, int& len );
/*
//
// Is it a regression or a classification.
//
// API
// bool problem_type();
//
// INPUT
// OUTPUT
// RESULT
// false if it is a classification problem,
// true - if regression.
*/
virtual bool problem_type() const;
/*
//
// Write parameters of the gtb model.
//
// API
// virtual void write_params( CvFileStorage* fs ) const;
//
// INPUT
// fs - file storage to write parameters to.
// OUTPUT
// RESULT
*/
virtual void write_params( CvFileStorage* fs ) const;
/*
//
// Read parameters of the gtb model and data.
//
// API
// virtual void read_params( CvFileStorage* fs );
//
// INPUT
// fs - file storage to read parameters from.
// OUTPUT
// params - parameters of the gtb model.
// data - contains information about the structure
// of the data set (count of variables,
// their types, etc.).
// class_labels - output class labels map.
// RESULT
*/
virtual void read_params( CvFileStorage* fs, CvFileNode* fnode );
CvDTreeTrainData* data;
CvGBTreesParams params;
CvSeq** weak;
CvMat* orig_response;
CvMat* sum_response;
CvMat* sum_response_tmp;
CvMat* weak_eval;
CvMat* sample_idx;
CvMat* subsample_train;
CvMat* subsample_test;
CvMat* missing;
CvMat* class_labels;
CvRNG rng;
int class_count;
float delta;
float base_value;
};
/****************************************************************************************\
* Artificial Neural Networks (ANN) *
\****************************************************************************************/
......@@ -1936,6 +2463,8 @@ typedef CvBoostTree BoostTree;
typedef CvBoost Boost;
typedef CvANN_MLP_TrainParams ANN_MLP_TrainParams;
typedef CvANN_MLP NeuralNet_MLP;
typedef CvGBTreesParams GradientBoostingTreesParams;
typedef CvGBTrees GradientBoostingTrees;
}
......
#include "precomp.hpp"
#include <string>
#include <time.h>
using namespace std;
#define pCvSeq CvSeq*
#define pCvDTreeNode CvDTreeNode*
#define CV_CMP_FLOAT(a,b) ((a) < (b))
static CV_IMPLEMENT_QSORT_EX( icvSortFloat, float, CV_CMP_FLOAT, float)
//===========================================================================
string ToString(int i)
{
stringstream tmp;
tmp << i;
return tmp.str();
}
//===========================================================================
int get_len(const CvMat* mat)
{
return (mat->cols > mat->rows) ? mat->cols : mat->rows;
}
//===========================================================================
//----------------------------- CvGBTreesParams -----------------------------
//===========================================================================
CvGBTreesParams::CvGBTreesParams()
: CvDTreeParams( 3, 10, 0, true, 10, 0, false, false, 0 )
{
weak_count = 50;
loss_function_type = CvGBTrees::SQUARED_LOSS;
subsample_portion = 1.0f;
shrinkage = 1.0f;
}
//===========================================================================
CvGBTreesParams::CvGBTreesParams( int _loss_function_type, int _weak_count,
float _shrinkage, float _subsample_portion,
int _max_depth, bool _use_surrogates )
: CvDTreeParams( 3, 10, 0, true, 10, 0, false, false, 0 )
{
loss_function_type = _loss_function_type;
weak_count = _weak_count;
shrinkage = _shrinkage;
subsample_portion = _subsample_portion;
max_depth = _max_depth;
use_surrogates = _use_surrogates;
}
//===========================================================================
//------------------------------- CvGBTrees ---------------------------------
//===========================================================================
CvGBTrees::CvGBTrees()
{
data = 0;
weak = 0;
default_model_name = "my_boost_tree";
orig_response = sum_response = sum_response_tmp = 0;
weak_eval = subsample_train = subsample_test = 0;
missing = sample_idx = 0;
class_labels = 0;
class_count = 1;
delta = 0.0f;
clear();
}
//===========================================================================
void CvGBTrees::clear()
{
if( weak )
{
CvSeqReader reader;
CvSlice slice = CV_WHOLE_SEQ;
int weak_count = cvSliceLength( slice, weak[class_count-1] );
CvDTree* tree;
//data->shared = false;
for (int i=0; i<class_count; ++i)
{
if ((weak[i]) && (weak_count))
{
cvStartReadSeq( weak[i], &reader );
cvSetSeqReaderPos( &reader, slice.start_index );
for (int j=0; j<weak_count; ++j)
{
CV_READ_SEQ_ELEM( tree, reader );
//tree->clear();
delete tree;
tree = 0;
}
}
}
for (int i=0; i<class_count; ++i)
if (weak[i]) cvReleaseMemStorage( &(weak[i]->storage) );
delete[] weak;
}
if (data)
{
data->shared = false;
delete data;
}
weak = 0;
data = 0;
delta = 0.0f;
cvReleaseMat( &orig_response );
cvReleaseMat( &sum_response );
cvReleaseMat( &sum_response_tmp );
cvReleaseMat( &weak_eval );
cvReleaseMat( &subsample_train );
cvReleaseMat( &subsample_test );
cvReleaseMat( &sample_idx );
cvReleaseMat( &missing );
cvReleaseMat( &class_labels );
}
//===========================================================================
CvGBTrees::~CvGBTrees()
{
clear();
}
//===========================================================================
CvGBTrees::CvGBTrees( const CvMat* _train_data, int _tflag,
const CvMat* _responses, const CvMat* _var_idx,
const CvMat* _sample_idx, const CvMat* _var_type,
const CvMat* _missing_mask, CvGBTreesParams _params )
{
weak = 0;
data = 0;
default_model_name = "my_boost_tree";
orig_response = sum_response = sum_response_tmp = 0;
weak_eval = subsample_train = subsample_test = 0;
missing = sample_idx = 0;
class_labels = 0;
class_count = 1;
delta = 0.0f;
train( _train_data, _tflag, _responses, _var_idx, _sample_idx,
_var_type, _missing_mask, _params );
}
//===========================================================================
bool CvGBTrees::problem_type() const
{
switch (params.loss_function_type)
{
case DEVIANCE_LOSS: return false;
default: return true;
}
}
//===========================================================================
bool
CvGBTrees::train( CvMLData* data, CvGBTreesParams params, bool update )
{
bool result;
result = train ( data->get_values(), CV_ROW_SAMPLE,
data->get_responses(), data->get_var_idx(),
data->get_train_sample_idx(), data->get_var_types(),
data->get_missing(), params, update);
//update is not supported
return result;
}
//===========================================================================
bool
CvGBTrees::train( const CvMat* _train_data, int _tflag,
const CvMat* _responses, const CvMat* _var_idx,
const CvMat* _sample_idx, const CvMat* _var_type,
const CvMat* _missing_mask,
CvGBTreesParams _params, bool _update ) //update is not supported
{
CvMemStorage* storage = 0;
params = _params;
bool is_regression = problem_type();
clear();
int len = get_len(_responses);
CvMat* new_responses = cvCreateMat( len, 1, CV_32F);
cvZero(new_responses);
data = new CvDTreeTrainData( _train_data, _tflag, new_responses, _var_idx,
_sample_idx, _var_type, _missing_mask, _params, true, true );
if (_missing_mask)
{
missing = cvCreateMat(_missing_mask->rows, _missing_mask->cols,
_missing_mask->type);
cvCopy( _missing_mask, missing);
}
orig_response = cvCreateMat( _responses->rows, _responses->cols,
_responses->type );
cvCopy( _responses, orig_response);
orig_response->step = CV_ELEM_SIZE(_responses->type);
if (!is_regression)
{
int max_label = -1;
for (int i=0; i<get_len(orig_response); ++i)
if (max_label < orig_response->data.fl[i])
max_label = int(orig_response->data.fl[i]);
max_label++;
class_labels = cvCreateMat(1, max_label, CV_32S);
cvZero(class_labels);
for (int i=0; i<get_len(orig_response); ++i)
class_labels->data.i[int(orig_response->data.fl[i])] = 1;
class_count = 0;
for (int i=0; i<max_label; ++i)
if (class_labels->data.i[i])
class_labels->data.i[i] = ++class_count;
}
data->is_classifier = false;
if (_sample_idx)
{
sample_idx = cvCreateMat( _sample_idx->rows, _sample_idx->cols,
_sample_idx->type );
cvCopy( _sample_idx, sample_idx);
icvSortFloat(sample_idx->data.fl, get_len(sample_idx), 0);
}
else
{
int n = (_tflag == CV_ROW_SAMPLE) ? _train_data->rows
: _train_data->cols;
sample_idx = cvCreateMat( 1, n, CV_32S );
for (int i=0; i<n; ++i)
sample_idx->data.i[i] = i;
}
sum_response = cvCreateMat(class_count, len, CV_32F);
sum_response_tmp = cvCreateMat(class_count, len, CV_32F);
cvZero(sum_response);
delta = 0.0f;
if (is_regression) base_value = find_optimal_value(sample_idx);
else base_value = 0.0f;
cvSet( sum_response, cvScalar(base_value) );
weak = new pCvSeq[class_count];
for (int i=0; i<class_count; ++i)
{
storage = cvCreateMemStorage();
weak[i] = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvDTree*), storage );
storage = 0;
}
// subsample params and data
rng = CvRNG(time(0));
int samples_count = get_len(sample_idx);
//if ( params.subsample_portion > 1) params.subsample_portion = 1;
//if ( params.subsample_portion < 0) params.subsample_portion = 1;
params.subsample_portion = params.subsample_portion <= FLT_EPSILON ||
1 - params.subsample_portion <= FLT_EPSILON
? 1 : params.subsample_portion;
int train_sample_count = cvFloor(params.subsample_portion * samples_count);
if (train_sample_count == 0)
train_sample_count = samples_count;
int test_sample_count = samples_count - train_sample_count;
int* idx_data = new int[samples_count];
subsample_train = cvCreateMatHeader( 1, train_sample_count, CV_32SC1 );
*subsample_train = cvMat( 1, train_sample_count, CV_32SC1, idx_data );
if (test_sample_count)
{
subsample_test = cvCreateMatHeader( 1, test_sample_count, CV_32SC1 );
*subsample_test = cvMat( 1, test_sample_count, CV_32SC1,
idx_data + train_sample_count );
}
// training procedure
for ( int i=0; i < params.weak_count; ++i )
{
for ( int m=0; m < class_count; ++m )
{
do_subsample();
find_gradient(m);
CvDTree* tree = new CvDTree;
tree->train( data, subsample_train );
change_values(tree, m);
if (subsample_test)
{
CvMat x;
CvMat x_miss;
int* sample_data = sample_idx->data.i;
int* subsample_data = subsample_test->data.i;
int s_step = (sample_idx->cols > sample_idx->rows) ? 1
: sample_idx->step/CV_ELEM_SIZE(sample_idx->type);
for (int j=0; j<get_len(subsample_test); ++j)
{
for (int k=0; k<class_count; ++k)
{
int idx = *(sample_data + subsample_data[j]*s_step);
float res = 0.0f;
cvGetRow( data->train_data, &x, idx);
if (missing)
{
cvGetRow( missing, &x_miss, idx);
res = (float)tree->predict(&x, &x_miss)->value;
}
else
{
res = (float)tree->predict(&x)->value;
}
sum_response_tmp->data.fl[idx + k*len] =
sum_response->data.fl[idx + k*len] +
params.shrinkage * res;
}
}
}
cvSeqPush( weak[m], &tree );
tree = 0;
} // m=0..class_count
CvMat* tmp;
tmp = sum_response_tmp;
sum_response_tmp = sum_response;
sum_response = tmp;
tmp = 0;
} // i=0..params.weak_count
delete[] idx_data;
cvReleaseMat(&new_responses);
data->free_train_data();
return true;
} // CvGBTrees::train(...)
//===========================================================================
float Sign(float x)
{
if (x<0.0f) return -1.0f;
else if (x>0.0f) return 1.0f;
return 0.0f;
}
//===========================================================================
void CvGBTrees::find_gradient(const int k)
{
int* sample_data = sample_idx->data.i;
int* subsample_data = subsample_train->data.i;
float* grad_data = data->responses->data.fl;
float* resp_data = orig_response->data.fl;
float* current_data = sum_response->data.fl;
switch (params.loss_function_type)
// loss_function_type in
// {SQUARED_LOSS, ABSOLUTE_LOSS, HUBER_LOSS, DEVIANCE_LOSS}
{
case SQUARED_LOSS:
{
for (int i=0; i<get_len(subsample_train); ++i)
{
int s_step = (sample_idx->cols > sample_idx->rows) ? 1
: sample_idx->step/CV_ELEM_SIZE(sample_idx->type);
int idx = *(sample_data + subsample_data[i]*s_step);
grad_data[idx] = resp_data[idx] - current_data[idx];
}
}; break;
case ABSOLUTE_LOSS:
{
for (int i=0; i<get_len(subsample_train); ++i)
{
int s_step = (sample_idx->cols > sample_idx->rows) ? 1
: sample_idx->step/CV_ELEM_SIZE(sample_idx->type);
int idx = *(sample_data + subsample_data[i]*s_step);
grad_data[idx] = Sign(resp_data[idx] - current_data[idx]);
}
}; break;
case HUBER_LOSS:
{
float alpha = 0.2f;
int n = get_len(subsample_train);
int s_step = (sample_idx->cols > sample_idx->rows) ? 1
: sample_idx->step/CV_ELEM_SIZE(sample_idx->type);
float* residuals = new float[n];
for (int i=0; i<n; ++i)
{
int idx = *(sample_data + subsample_data[i]*s_step);
residuals[i] = fabs(resp_data[idx] - current_data[idx]);
}
icvSortFloat(residuals, n, 0.0f);
delta = residuals[int(ceil(n*alpha))];
for (int i=0; i<n; ++i)
{
int idx = *(sample_data + subsample_data[i]*s_step);
float r = resp_data[idx] - current_data[idx];
grad_data[idx] = (fabs(r) > delta) ? delta*Sign(r) : r;
}
delete[] residuals;
}; break;
case DEVIANCE_LOSS:
{
for (int i=0; i<get_len(subsample_train); ++i)
{
long double exp_fk = 0;
long double exp_sfi = 0;
int s_step = (sample_idx->cols > sample_idx->rows) ? 1
: sample_idx->step/CV_ELEM_SIZE(sample_idx->type);
int idx = *(sample_data + subsample_data[i]*s_step);
for (int j=0; j<class_count; ++j)
{
long double res;
res = current_data[idx + j*sum_response->cols];
res = expl(res);
if (j == k) exp_fk = res;
exp_sfi += res;
}
int orig_label = int(resp_data[idx]);
grad_data[idx] = (float)(!(k-class_labels->data.i[orig_label]+1)) -
(float)(exp_fk / exp_sfi);
}
}; break;
default: break;
}
} // CvGBTrees::find_gradient(...)
//===========================================================================
void CvGBTrees::change_values(CvDTree* tree, const int _k)
{
CvDTreeNode** predictions = new pCvDTreeNode[get_len(subsample_train)];
int* sample_data = sample_idx->data.i;
int* subsample_data = subsample_train->data.i;
int s_step = (sample_idx->cols > sample_idx->rows) ? 1
: sample_idx->step/CV_ELEM_SIZE(sample_idx->type);
CvMat x;
CvMat miss_x;
for (int i=0; i<get_len(subsample_train); ++i)
{
int idx = *(sample_data + subsample_data[i]*s_step);
cvGetRow( data->train_data, &x, idx);
if (missing)
{
cvGetRow( missing, &miss_x, idx);
predictions[i] = tree->predict(&x, &miss_x);
}
else
predictions[i] = tree->predict(&x);
}
CvDTreeNode** leaves;
int leaves_count = 0;
leaves = GetLeaves( tree, leaves_count);
for (int i=0; i<leaves_count; ++i)
{
int samples_in_leaf = 0;
for (int j=0; j<get_len(subsample_train); ++j)
{
if (leaves[i] == predictions[j]) samples_in_leaf++;
}
if (!samples_in_leaf) // It should not be done anyways! but...
{
leaves[i]->value = 0.0;
continue;
}
CvMat* leaf_idx = cvCreateMat(1, samples_in_leaf, CV_32S);
int* leaf_idx_data = leaf_idx->data.i;
for (int j=0; j<get_len(subsample_train); ++j)
{
int idx = *(sample_data + subsample_data[j]*s_step);
if (leaves[i] == predictions[j])
*leaf_idx_data++ = idx;
}
float value = find_optimal_value(leaf_idx);
leaves[i]->value = value;
leaf_idx_data = leaf_idx->data.i;
int len = sum_response_tmp->cols;
for (int j=0; j<get_len(leaf_idx); ++j)
{
int idx = leaf_idx_data[j];
sum_response_tmp->data.fl[idx + _k*len] =
sum_response->data.fl[idx + _k*len] +
params.shrinkage * value;
}
leaf_idx_data = 0;
cvReleaseMat(&leaf_idx);
}
// releasing the memory
for (int i=0; i<get_len(subsample_train); ++i)
{
predictions[i] = 0;
}
delete[] predictions;
for (int i=0; i<leaves_count; ++i)
{
leaves[i] = 0;
}
delete[] leaves;
}
//===========================================================================
/*
void CvGBTrees::change_values(CvDTree* tree, const int _k)
{
CvDTreeNode** leaves;
int leaves_count = 0;
leaves = GetLeaves( tree, leaves_count);
for (int i=0; i<leaves_count; ++i)
{
int n = leaves[i]->sample_count;
int* leaf_idx_data = new int[n];
data->get_sample_indices(leaves[i], leaf_idx_data);
CvMat* leaf_idx = 0;
cvInitMatHeader(leaf_idx, n, 1, CV_32S, leaf_idx_data);
float value = find_optimal_value(leaf_idx);
leaves[i]->value = value;
int len = sum_response_tmp->cols;
for (int j=0; j<n; ++j)
{
int idx = leaf_idx_data[j] + _k*len;
sum_response_tmp->data.fl[idx] = sum_response->data.fl[idx] +
params.shrinkage * value;
}
leaf_idx_data = 0;
cvReleaseMat(&leaf_idx);
}
// releasing the memory
for (int i=0; i<leaves_count; ++i)
{
leaves[i] = 0;
}
delete[] leaves;
} //change_values(...);
*/
//===========================================================================
float CvGBTrees::find_optimal_value( const CvMat* _Idx )
{
long double gamma = (long double)0.0;
int* idx = _Idx->data.i;
float* resp_data = orig_response->data.fl;
float* cur_data = sum_response->data.fl;
int n = get_len(_Idx);
switch (params.loss_function_type)
// SQUARED_LOSS=0, ABSOLUTE_LOSS=1, HUBER_LOSS=3, DEVIANCE_LOSS=4
{
case SQUARED_LOSS:
{
for (int i=0; i<n; ++i)
gamma += resp_data[idx[i]] - cur_data[idx[i]];
gamma /= (long double)n;
}; break;
case ABSOLUTE_LOSS:
{
float* residuals = new float[n];
for (int i=0; i<n; ++i)
residuals[i] = (resp_data[*idx] - cur_data[*idx++]);
icvSortFloat(residuals, n, 0.0f);
if (n % 2)
gamma = residuals[n/2];
else gamma = (residuals[n/2-1] + residuals[n/2]) / 2.0f;
delete[] residuals;
}; break;
case HUBER_LOSS:
{
float* residuals = new float[n];
for (int i=0; i<n; ++i)
residuals[i] = (resp_data[*idx] - cur_data[*idx++]);
icvSortFloat(residuals, n, 0.0f);
int n_half = n >> 1;
float r_median = (n == n_half<<1) ?
(residuals[n_half-1] + residuals[n_half]) / 2.0f :
residuals[n_half];
for (int i=0; i<n; ++i)
{
float dif = residuals[i] - r_median;
gamma += (delta < fabs(dif)) ? Sign(dif)*delta : dif;
}
gamma /= (long double)n;
gamma += r_median;
delete[] residuals;
}; break;
case DEVIANCE_LOSS:
{
float* grad_data = data->responses->data.fl;
long double tmp1 = 0;
long double tmp2 = 0;
long double tmp = 0;
for (int i=0; i<n; ++i)
{
tmp = grad_data[idx[i]];
tmp1 += tmp;
tmp2 += fabs(tmp)*(1-fabs(tmp));
};
if (tmp2 == 0)
{
tmp2 = 1;
}
gamma = ((long double)(class_count-1)) / (long double)class_count * (tmp1/tmp2);
}; break;
default: break;
}
return float(gamma);
} // CvGBTrees::find_optimal_value
//===========================================================================
void CvGBTrees::leaves_get( CvDTreeNode** leaves, int& count, CvDTreeNode* node )
{
if (node->left != NULL) leaves_get(leaves, count, node->left);
if (node->right != NULL) leaves_get(leaves, count, node->right);
if ((node->left == NULL) && (node->right == NULL))
leaves[count++] = node;
}
//---------------------------------------------------------------------------
CvDTreeNode** CvGBTrees::GetLeaves( const CvDTree* dtree, int& len )
{
len = 0;
CvDTreeNode** leaves = new pCvDTreeNode[1 << params.max_depth];
leaves_get(leaves, len, const_cast<pCvDTreeNode>(dtree->get_root()));
return leaves;
}
//===========================================================================
void CvGBTrees::do_subsample()
{
int n = get_len(sample_idx);
int* idx = subsample_train->data.i;
for (int i = 0; i < n; i++ )
idx[i] = i;
if (subsample_test)
for (int i = 0; i < n; i++)
{
int a = cvRandInt( &rng ) % n;
int b = cvRandInt( &rng ) % n;
int t;
CV_SWAP( idx[a], idx[b], t );
}
/*
int n = get_len(sample_idx);
if (subsample_train == 0)
subsample_train = cvCreateMat(1, n, CV_32S);
int* subsample_data = subsample_train->data.i;
for (int i=0; i<n; ++i)
subsample_data[i] = i;
subsample_test = 0;
*/
}
//===========================================================================
float CvGBTrees::predict( const CvMat* _sample, const CvMat* _missing,
CvMat* weak_responses, CvSlice slice, int k) const
{
float result = 0.0f;
if (!weak) return 0.0f;
float* sum = new float[class_count];
for (int i=0; i<class_count; ++i)
sum[i] = base_value;
CvSeqReader reader;
int weak_count = cvSliceLength( slice, weak[class_count-1] );
CvDTree* tree;
for (int i=0; i<class_count; ++i)
{
if ((weak[i]) && (weak_count))
{
cvStartReadSeq( weak[i], &reader );
cvSetSeqReaderPos( &reader, slice.start_index );
for (int j=0; j<weak_count; ++j)
{
CV_READ_SEQ_ELEM( tree, reader );
sum[i] += params.shrinkage *
(float)(tree->predict(_sample, _missing)->value);
}
}
}
if (class_count == 1)
{
result = sum[0];
delete[] sum;
return result;
}
if ((k>=0) && (k<class_count))
{
result = sum[k];
delete[] sum;
return result;
}
float max = sum[0];
int class_label = 0;
for (int i=1; i<class_count; ++i)
if (sum[i] > max)
{
max = sum[i];
class_label = i;
}
delete[] sum;
int orig_class_label = -1;
for (int i=0; i<get_len(class_labels); ++i)
if (class_labels->data.i[i] == class_label+1)
orig_class_label = i;
return float(orig_class_label);
}
//===========================================================================
void CvGBTrees::write_params( CvFileStorage* fs ) const
{
CV_FUNCNAME( "CvGBTrees::write_params" );
__BEGIN__;
const char* loss_function_type_str =
params.loss_function_type == SQUARED_LOSS ? "SquaredLoss" :
params.loss_function_type == ABSOLUTE_LOSS ? "AbsoluteLoss" :
params.loss_function_type == HUBER_LOSS ? "HuberLoss" :
params.loss_function_type == DEVIANCE_LOSS ? "DevianceLoss" : 0;
if( loss_function_type_str )
cvWriteString( fs, "loss_function", loss_function_type_str );
else
cvWriteInt( fs, "loss_function", params.loss_function_type );
cvWriteInt( fs, "ensemble_length", params.weak_count );
cvWriteReal( fs, "shrinkage", params.shrinkage );
cvWriteReal( fs, "subsample_portion", params.subsample_portion );
//cvWriteInt( fs, "max_tree_depth", params.max_depth );
//cvWriteString( fs, "use_surrogate_splits", params.use_surrogates ? "true" : "false");
if (class_labels) cvWrite( fs, "class_labels", class_labels);
data->is_classifier = !problem_type();
data->write_params( fs );
data->is_classifier = 0;
__END__;
}
//===========================================================================
void CvGBTrees::read_params( CvFileStorage* fs, CvFileNode* fnode )
{
CV_FUNCNAME( "CvGBTrees::read_params" );
__BEGIN__;
CvFileNode* temp;
if( !fnode || !CV_NODE_IS_MAP(fnode->tag) )
return;
data = new CvDTreeTrainData();
CV_CALL( data->read_params(fs, fnode));
data->shared = true;
params.max_depth = data->params.max_depth;
params.min_sample_count = data->params.min_sample_count;
params.max_categories = data->params.max_categories;
params.priors = data->params.priors;
params.regression_accuracy = data->params.regression_accuracy;
params.use_surrogates = data->params.use_surrogates;
temp = cvGetFileNodeByName( fs, fnode, "loss_function" );
if( !temp )
EXIT;
if( temp && CV_NODE_IS_STRING(temp->tag) )
{
const char* loss_function_type_str = cvReadString( temp, "" );
params.loss_function_type = strcmp( loss_function_type_str, "SquaredLoss" ) == 0 ? SQUARED_LOSS :
strcmp( loss_function_type_str, "AbsoluteLoss" ) == 0 ? ABSOLUTE_LOSS :
strcmp( loss_function_type_str, "HuberLoss" ) == 0 ? HUBER_LOSS :
strcmp( loss_function_type_str, "DevianceLoss" ) == 0 ? DEVIANCE_LOSS : -1;
}
else
params.loss_function_type = cvReadInt( temp, -1 );
if( params.loss_function_type < SQUARED_LOSS || params.loss_function_type > DEVIANCE_LOSS || params.loss_function_type == 2)
CV_ERROR( CV_StsBadArg, "Unknown loss function" );
params.weak_count = cvReadIntByName( fs, fnode, "ensemble_length" );
params.shrinkage = (float)cvReadRealByName( fs, fnode, "shrinkage", 0.1 );
params.subsample_portion = (float)cvReadRealByName( fs, fnode, "subsample_portion", 1.0 );
if (data->is_classifier)
{
class_labels = (CvMat*)cvReadByName( fs, fnode, "class_labels" );
if( class_labels && !CV_IS_MAT(class_labels))
CV_ERROR( CV_StsParseError, "class_labels must stored as a matrix");
}
data->is_classifier = 0;
__END__;
}
void CvGBTrees::write( CvFileStorage* fs, const char* name ) const
{
CV_FUNCNAME( "CvGBTrees::write" );
__BEGIN__;
CvSeqReader reader;
int i;
std::string s;
cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_ML_GBT );
if( !weak )
CV_ERROR( CV_StsBadArg, "The model has not been trained yet" );
write_params( fs );
cvWriteReal( fs, "base_value", base_value);
cvWriteInt( fs, "class_count", class_count);
for ( int j=0; j < class_count; ++j )
{
s = "trees_";
s += ToString(j);
cvStartWriteStruct( fs, s.c_str(), CV_NODE_SEQ );
cvStartReadSeq( weak[j], &reader );
for( i = 0; i < weak[j]->total; i++ )
{
CvDTree* tree;
CV_READ_SEQ_ELEM( tree, reader );
cvStartWriteStruct( fs, 0, CV_NODE_MAP );
tree->write( fs );
cvEndWriteStruct( fs );
}
cvEndWriteStruct( fs );
}
cvEndWriteStruct( fs );
__END__;
}
//===========================================================================
void CvGBTrees::read( CvFileStorage* fs, CvFileNode* node )
{
CV_FUNCNAME( "CvGBTrees::read" );
__BEGIN__;
CvSeqReader reader;
CvFileNode* trees_fnode;
CvMemStorage* storage;
int i, ntrees;
std::string s;
clear();
read_params( fs, node );
if( !data )
EXIT;
base_value = (float)cvReadRealByName( fs, node, "base_value", 0.0 );
class_count = cvReadIntByName( fs, node, "class_count", 1 );
weak = new pCvSeq[class_count];
for (int j=0; j<class_count; ++j)
{
s = "trees_";
s += ToString(j);
trees_fnode = cvGetFileNodeByName( fs, node, s.c_str() );
if( !trees_fnode || !CV_NODE_IS_SEQ(trees_fnode->tag) )
CV_ERROR( CV_StsParseError, "<trees_x> tag is missing" );
cvStartReadSeq( trees_fnode->data.seq, &reader );
ntrees = trees_fnode->data.seq->total;
if( ntrees != params.weak_count )
CV_ERROR( CV_StsUnmatchedSizes,
"The number of trees stored does not match <ntrees> tag value" );
CV_CALL( storage = cvCreateMemStorage() );
weak[j] = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvDTree*), storage );
for( i = 0; i < ntrees; i++ )
{
CvDTree* tree = new CvDTree();
CV_CALL(tree->read( fs, (CvFileNode*)reader.ptr, data ));
CV_NEXT_SEQ_ELEM( reader.seq->elem_size, reader );
cvSeqPush( weak[j], &tree );
}
}
__END__;
}
//===========================================================================
// type in {CV_TRAIN_ERROR, CV_TEST_ERROR}
float
CvGBTrees::calc_error( CvMLData* _data, int type, std::vector<float> *resp )
{
float err = 0;
const CvMat* values = _data->get_values();
const CvMat* response = _data->get_responses();
const CvMat* missing = _data->get_missing();
const CvMat* sample_idx = (type == CV_TEST_ERROR) ?
_data->get_test_sample_idx() :
_data->get_train_sample_idx();
//const CvMat* var_types = _data->get_var_types();
int* sidx = sample_idx ? sample_idx->data.i : 0;
int r_step = CV_IS_MAT_CONT(response->type) ?
1 : response->step / CV_ELEM_SIZE(response->type);
//bool is_classifier =
// var_types->data.ptr[var_types->cols-1] == CV_VAR_CATEGORICAL;
int sample_count = sample_idx ? sample_idx->cols : 0;
sample_count = (type == CV_TRAIN_ERROR && sample_count == 0) ?
values->rows :
sample_count;
float* pred_resp = 0;
if( resp && (sample_count > 0) )
{
resp->resize( sample_count );
pred_resp = &((*resp)[0]);
}
if ( !problem_type() )
{
for( int i = 0; i < sample_count; i++ )
{
CvMat sample, miss;
int si = sidx ? sidx[i] : i;
cvGetRow( values, &sample, si );
if( missing )
cvGetRow( missing, &miss, si );
float r = (float)predict( &sample, missing ? &miss : 0 );
if( pred_resp )
pred_resp[i] = r;
int d = fabs((double)r - response->data.fl[si*r_step]) <= FLT_EPSILON ? 0 : 1;
err += d;
}
err = sample_count ? err / (float)sample_count * 100 : -FLT_MAX;
}
else
{
for( int i = 0; i < sample_count; i++ )
{
CvMat sample, miss;
int si = sidx ? sidx[i] : i;
cvGetRow( values, &sample, si );
if( missing )
cvGetRow( missing, &miss, si );
float r = (float)predict( &sample, missing ? &miss : 0 );
if( pred_resp )
pred_resp[i] = r;
float d = r - response->data.fl[si*r_step];
err += d*d;
}
err = sample_count ? err / (float)sample_count : -FLT_MAX;
}
return err;
}
#include "mltest.h"
#include <string>
#include <fstream>
#include <iostream>
using namespace std;
class CV_GBTreesTest : public CvTest
{
public:
CV_GBTreesTest();
~CV_GBTreesTest();
protected:
void run(int);
int TestTrainPredict(int test_num);
int TestSaveLoad();
int checkPredictError(int test_num);
int checkLoadSave();
//string model_file_name1;
//string model_file_name2;
char model_file_name1[50];
char model_file_name2[50];
string* datasets;
string data_path;
CvMLData* data;
CvGBTrees* gtb;
vector<float> test_resps1;
vector<float> test_resps2;
};
int _get_len(const CvMat* mat)
{
return (mat->cols > mat->rows) ? mat->cols : mat->rows;
}
CV_GBTreesTest::CV_GBTreesTest() :
CvTest( "CvGBTrees_test",
"all public methods (train, predict, save, load)" )
{
datasets = 0;
data = 0;
gtb = 0;
}
CV_GBTreesTest::~CV_GBTreesTest()
{
if (data)
delete data;
delete[] datasets;
}
int CV_GBTreesTest::TestTrainPredict(int test_num)
{
int code = CvTS::OK;
int weak_count = 200;
float shrinkage = 0.1f;
float subsample_portion = 0.5f;
int max_depth = 5;
bool use_surrogates = true;
int loss_function_type = 0;
switch (test_num)
{
case (1) : loss_function_type = CvGBTrees::SQUARED_LOSS; break;
case (2) : loss_function_type = CvGBTrees::ABSOLUTE_LOSS; break;
case (3) : loss_function_type = CvGBTrees::HUBER_LOSS; break;
case (0) : loss_function_type = CvGBTrees::DEVIANCE_LOSS; break;
default :
{
ts->printf( CvTS::LOG, "Bad test_num value in CV_GBTreesTest::TestTrainPredict(..) function." );
return CvTS::FAIL_BAD_ARG_CHECK;
}
}
int dataset_num = test_num == 0 ? 0 : 1;
if (!data)
{
data = new CvMLData();
data->set_delimiter(',');
if (data->read_csv(datasets[dataset_num].c_str()))
{
ts->printf( CvTS::LOG, "File reading error." );
return CvTS::FAIL_INVALID_TEST_DATA;
}
if (test_num == 0)
{
data->set_response_idx(57);
data->set_var_types("ord[0-56],cat[57]");
}
else
{
data->set_response_idx(13);
data->set_var_types("ord[0-2,4-13],cat[3]");
subsample_portion = 0.7f;
}
int train_sample_count = cvFloor(_get_len(data->get_responses())*0.5f);
CvTrainTestSplit spl( train_sample_count );
data->set_train_test_split( &spl );
}
data->mix_train_and_test_idx();
if (gtb) delete gtb;
gtb = new CvGBTrees();
bool tmp_code = true;
tmp_code = gtb->train(data, CvGBTreesParams(loss_function_type, weak_count,
shrinkage, subsample_portion,
max_depth, use_surrogates));
if (!tmp_code)
{
ts->printf( CvTS::LOG, "Model training was failed.");
return CvTS::FAIL_INVALID_OUTPUT;
}
code = checkPredictError(test_num);
return code;
}
int CV_GBTreesTest::checkPredictError(int test_num)
{
if (!gtb)
return CvTS::FAIL_GENERIC;
float mean[] = {5.3555f, 11.2241f, 11.9212f, 12.0848f};
float sigma[] = {0.362127f, 3.4906f, 3.4906f, 3.64994f};
float current_error = gtb->calc_error(data, CV_TEST_ERROR);
if ( abs( current_error - mean[test_num]) > 6*sigma[test_num] )
{
ts->printf( CvTS::LOG, "Test error is out of range:\n"
"abs(%f/*curEr*/ - %f/*mean*/ > %f/*6*sigma*/",
current_error, mean[test_num], 6*sigma[test_num] );
return CvTS::FAIL_BAD_ACCURACY;
}
return CvTS::OK;
}
int CV_GBTreesTest::TestSaveLoad()
{
if (!gtb)
return CvTS::FAIL_GENERIC;
tmpnam(model_file_name1);
tmpnam(model_file_name2);
gtb->save(model_file_name1);
gtb->calc_error(data, CV_TEST_ERROR, &test_resps1);
gtb->load(model_file_name1);
gtb->calc_error(data, CV_TEST_ERROR, &test_resps2);
gtb->save(model_file_name2);
return checkLoadSave();
}
int CV_GBTreesTest::checkLoadSave()
{
int code = CvTS::OK;
// 1. compare files
ifstream f1( model_file_name1 ), f2( model_file_name2 );
string s1, s2;
int lineIdx = 0;
CV_Assert( f1.is_open() && f2.is_open() );
for( ; !f1.eof() && !f2.eof(); lineIdx++ )
{
getline( f1, s1 );
getline( f2, s2 );
if( s1.compare(s2) )
{
ts->printf( CvTS::LOG, "first and second saved files differ in %n-line; first %n line: %s; second %n-line: %s",
lineIdx, lineIdx, s1.c_str(), lineIdx, s2.c_str() );
code = CvTS::FAIL_INVALID_OUTPUT;
}
}
if( !f1.eof() || !f2.eof() )
{
ts->printf( CvTS::LOG, "First and second saved files differ in %n-line; first %n line: %s; second %n-line: %s",
lineIdx, lineIdx, s1.c_str(), lineIdx, s2.c_str() );
code = CvTS::FAIL_INVALID_OUTPUT;
}
f1.close();
f2.close();
// delete temporary files
remove( model_file_name1 );
remove( model_file_name2 );
// 2. compare responses
CV_Assert( test_resps1.size() == test_resps2.size() );
vector<float>::const_iterator it1 = test_resps1.begin(), it2 = test_resps2.begin();
for( ; it1 != test_resps1.end(); ++it1, ++it2 )
{
if( fabs(*it1 - *it2) > FLT_EPSILON )
{
ts->printf( CvTS::LOG, "Responses predicted before saving and after loading are different" );
code = CvTS::FAIL_INVALID_OUTPUT;
}
}
return code;
}
void CV_GBTreesTest::run(int)
{
string data_path = string(ts->get_data_path());
datasets = new string[2];
datasets[0] = data_path + string("spambase.data"); /*string("dataset_classification.csv");*/
datasets[1] = data_path + string("housing_.data"); /*string("dataset_regression.csv");*/
int code = CvTS::OK;
for (int i = 0; i < 4; i++)
{
int temp_code = TestTrainPredict(i);
if (temp_code != CvTS::OK)
{
code = temp_code;
break;
}
else if (i==0)
{
temp_code = TestSaveLoad();
if (temp_code != CvTS::OK)
code = temp_code;
delete data;
data = 0;
}
delete gtb;
gtb = 0;
}
delete data;
data = 0;
ts->set_failed_test_info( code );
}
/////////////////////////////////////////////////////////////////////////////
//////////////////// test registration /////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
CV_GBTreesTest gbtrees_test;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment