lr.cpp 18.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
///////////////////////////////////////////////////////////////////////////////////////
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.

//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.

// This is a implementation of the Logistic Regression algorithm in C++ in OpenCV.

// AUTHOR:
// Rahul Kavi rahulkavi[at]live[at]com

// # You are free to use, change, or redistribute the code in any way you wish for
// # non-commercial purposes, but please maintain the name of the original author.
// # This code comes with no warranty of any kind.

// #
// # You are free to use, change, or redistribute the code in any way you wish for
// # non-commercial purposes, but please maintain the name of the original author.
// # This code comes with no warranty of any kind.

// # Logistic Regression ALGORITHM


//                           License Agreement
//                For Open Source Computer Vision Library

// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.

// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:

//   * Redistributions of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.

//   * Redistributions in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.

//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.

// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.

#include "precomp.hpp"

using namespace std;

60 61 62
namespace cv {
namespace ml {

63
class LrParams
64
{
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
public:
    LrParams()
    {
        alpha = 0.001;
        num_iters = 1000;
        norm = LogisticRegression::REG_L2;
        train_method = LogisticRegression::BATCH;
        mini_batch_size = 1;
        term_crit = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, num_iters, alpha);
    }

    double alpha; //!< learning rate.
    int num_iters; //!< number of iterations.
    int norm;
    int train_method;
    int mini_batch_size;
    TermCriteria term_crit;
};
83

84
class LogisticRegressionImpl : public LogisticRegression
85
{
86
public:
87 88

    LogisticRegressionImpl() { }
89 90
    virtual ~LogisticRegressionImpl() {}

91 92 93 94 95 96 97
    CV_IMPL_PROPERTY(double, LearningRate, params.alpha)
    CV_IMPL_PROPERTY(int, Iterations, params.num_iters)
    CV_IMPL_PROPERTY(int, Regularization, params.norm)
    CV_IMPL_PROPERTY(int, TrainMethod, params.train_method)
    CV_IMPL_PROPERTY(int, MiniBatchSize, params.mini_batch_size)
    CV_IMPL_PROPERTY(TermCriteria, TermCriteria, params.term_crit)

98
    virtual bool train( const Ptr<TrainData>& trainData, int=0 );
99
    virtual float predict(InputArray samples, OutputArray results, int flags=0) const;
100 101 102
    virtual void clear();
    virtual void write(FileStorage& fs) const;
    virtual void read(const FileNode& fn);
103
    virtual Mat get_learnt_thetas() const { return learnt_thetas; }
104 105 106
    virtual int getVarCount() const { return learnt_thetas.cols; }
    virtual bool isTrained() const { return !learnt_thetas.empty(); }
    virtual bool isClassifier() const { return true; }
107
    virtual String getDefaultName() const { return "opencv_ml_lr"; }
108
protected:
109 110
    Mat calc_sigmoid(const Mat& data) const;
    double compute_cost(const Mat& _data, const Mat& _labels, const Mat& _init_theta);
111 112 113
    void compute_gradient(const Mat& _data, const Mat& _labels, const Mat &_theta, const double _lambda, Mat & _gradient );
    Mat batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta);
    Mat mini_batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta);
114 115
    bool set_label_map(const Mat& _labels_i);
    Mat remap_labels(const Mat& _labels_i, const map<int, int>& lmap) const;
116
protected:
117
    LrParams params;
118
    Mat learnt_thetas;
119 120
    map<int, int> forward_mapper;
    map<int, int> reverse_mapper;
121 122
    Mat labels_o;
    Mat labels_n;
123 124
};

125
Ptr<LogisticRegression> LogisticRegression::create()
126
{
127
    return makePtr<LogisticRegressionImpl>();
128 129
}

130
bool LogisticRegressionImpl::train(const Ptr<TrainData>& trainData, int)
131
{
132 133 134
    // return value
    bool ok = false;

135
    clear();
136 137
    Mat _data_i = trainData->getSamples();
    Mat _labels_i = trainData->getResponses();
138

139
    // check size and type of training data
140
    CV_Assert( !_labels_i.empty() && !_data_i.empty());
141 142
    if(_labels_i.cols != 1)
    {
143
        CV_Error( CV_StsBadArg, "labels should be a column matrix" );
144
    }
145
    if(_data_i.type() != CV_32FC1 || _labels_i.type() != CV_32FC1)
146
    {
147
        CV_Error( CV_StsBadArg, "data and labels must be a floating point matrix" );
148
    }
149 150 151 152
    if(_labels_i.rows != _data_i.rows)
    {
        CV_Error( CV_StsBadArg, "number of rows in data and labels should be equal" );
    }
153

154
    // class labels
155
    set_label_map(_labels_i);
156
    Mat labels_l = remap_labels(_labels_i, this->forward_mapper);
157
    int num_classes = (int) this->forward_mapper.size();
158 159
    if(num_classes < 2)
    {
160
        CV_Error( CV_StsBadArg, "data should have atleast 2 classes" );
161 162
    }

163 164 165
    // add a column of ones to the data (bias/intercept term)
    Mat data_t;
    hconcat( cv::Mat::ones( _data_i.rows, 1, CV_32F ), _data_i, data_t );
166

167 168
    // coefficient matrix (zero-initialized)
    Mat thetas;
169
    Mat init_theta = Mat::zeros(data_t.cols, 1, CV_32F);
170

171
    // fit the model (handles binary and multiclass cases)
172
    Mat new_theta;
173
    Mat labels;
174 175 176
    if(num_classes == 2)
    {
        labels_l.convertTo(labels, CV_32F);
177
        if(this->params.train_method == LogisticRegression::BATCH)
178
            new_theta = batch_gradient_descent(data_t, labels, init_theta);
179
        else
180
            new_theta = mini_batch_gradient_descent(data_t, labels, init_theta);
181 182 183 184 185 186
        thetas = new_theta.t();
    }
    else
    {
        /* take each class and rename classes you will get a theta per class
        as in multi class class scenario, we will have n thetas for n classes */
187 188 189
        thetas.create(num_classes, data_t.cols, CV_32F);
        Mat labels_binary;
        int ii = 0;
190 191
        for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
        {
192 193 194
            // one-vs-rest (OvR) scheme
            labels_binary = (labels_l == it->second)/255;
            labels_binary.convertTo(labels, CV_32F);
195
            if(this->params.train_method == LogisticRegression::BATCH)
196
                new_theta = batch_gradient_descent(data_t, labels, init_theta);
197
            else
198
                new_theta = mini_batch_gradient_descent(data_t, labels, init_theta);
199 200 201 202 203
            hconcat(new_theta.t(), thetas.row(ii));
            ii += 1;
        }
    }

204
    // check that the estimates are stable and finite
205
    this->learnt_thetas = thetas.clone();
206
    if( cvIsNaN( (double)sum(this->learnt_thetas)[0] ) )
207
    {
208
        CV_Error( CV_StsBadArg, "check training parameters. Invalid training classifier" );
209
    }
210 211

    // success
212 213 214 215
    ok = true;
    return ok;
}

216
float LogisticRegressionImpl::predict(InputArray samples, OutputArray results, int flags) const
217 218
{
    // check if learnt_mats array is populated
219
    if(!this->isTrained())
220
    {
221
        CV_Error( CV_StsBadArg, "classifier should be trained first" );
222
    }
223

224 225
    // coefficient matrix
    Mat thetas;
226
    if ( learnt_thetas.type() == CV_32F )
227
    {
228 229 230 231 232
        thetas = learnt_thetas;
    }
    else
    {
        this->learnt_thetas.convertTo( thetas, CV_32F );
233 234 235
    }
    CV_Assert(thetas.rows > 0);

236 237 238 239 240 241
    // data samples
    Mat data = samples.getMat();
    if(data.type() != CV_32F)
    {
        CV_Error( CV_StsBadArg, "data must be of floating type" );
    }
242

243 244 245 246
    // add a column of ones to the data (bias/intercept term)
    Mat data_t;
    hconcat( cv::Mat::ones( data.rows, 1, CV_32F ), data, data_t );
    CV_Assert(data_t.cols == thetas.cols);
247

248
    // predict class labels for samples (handles binary and multiclass cases)
249
    Mat labels_c;
250
    Mat pred_m;
251
    Mat temp_pred;
252 253
    if(thetas.rows == 1)
    {
254 255
        // apply sigmoid function
        temp_pred = calc_sigmoid(data_t * thetas.t());
256
        CV_Assert(temp_pred.cols==1);
257
        pred_m = temp_pred.clone();
258

259
        // if greater than 0.5, predict class 0 or predict class 1
260
        temp_pred = (temp_pred > 0.5f) / 255;
261 262 263 264
        temp_pred.convertTo(labels_c, CV_32S);
    }
    else
    {
265 266 267
        // apply sigmoid function
        pred_m.create(data_t.rows, thetas.rows, data.type());
        for(int i = 0; i < thetas.rows; i++)
268 269
        {
            temp_pred = calc_sigmoid(data_t * thetas.row(i).t());
270
            vconcat(temp_pred, pred_m.col(i));
271
        }
272 273 274 275 276

        // predict class with the maximum output
        Point max_loc;
        Mat labels;
        for(int i = 0; i < pred_m.rows; i++)
277 278
        {
            temp_pred = pred_m.row(i);
279
            minMaxLoc( temp_pred, NULL, NULL, NULL, &max_loc );
280 281 282 283
            labels.push_back(max_loc.x);
        }
        labels.convertTo(labels_c, CV_32S);
    }
284 285 286

    // return label of the predicted class. class names can be 1,2,3,...
    Mat pred_labs = remap_labels(labels_c, this->reverse_mapper);
287
    pred_labs.convertTo(pred_labs, CV_32S);
288 289 290 291

    // return either the labels or the raw output
    if ( results.needed() )
    {
292
        if ( flags & StatModel::RAW_OUTPUT )
293 294 295 296 297 298 299 300 301
        {
            pred_m.copyTo( results );
        }
        else
        {
            pred_labs.copyTo(results);
        }
    }

302
    return ( pred_labs.empty() ? 0.f : static_cast<float>(pred_labs.at<int>(0)) );
303 304
}

305
Mat LogisticRegressionImpl::calc_sigmoid(const Mat& data) const
306
{
307 308
    Mat dest;
    exp(-data, dest);
309 310 311
    return 1.0/(1.0+dest);
}

312
double LogisticRegressionImpl::compute_cost(const Mat& _data, const Mat& _labels, const Mat& _init_theta)
313 314 315 316 317 318
{
    int llambda = 0;
    int m;
    int n;
    double cost = 0;
    double rparameter = 0;
319 320 321 322
    Mat theta_b;
    Mat theta_c;
    Mat d_a;
    Mat d_b;
323 324 325 326 327 328

    m = _data.rows;
    n = _data.cols;

    theta_b = _init_theta(Range(1, n), Range::all());

ippei ito's avatar
ippei ito committed
329
    if (params.norm != REG_DISABLE)
330 331 332 333
    {
        llambda = 1;
    }

334
    if(this->params.norm == LogisticRegression::REG_L1)
335
    {
336
        rparameter = (llambda/(2*m)) * sum(theta_b)[0];
337 338 339 340
    }
    else
    {
        // assuming it to be L2 by default
341
        multiply(theta_b, theta_b, theta_c, 1);
342
        rparameter = (llambda/(2*m)) * sum(theta_c)[0];
343 344
    }

345
    d_a = calc_sigmoid(_data * _init_theta);
346 347
    log(d_a, d_a);
    multiply(d_a, _labels, d_a);
348

349 350
    // use the fact that: log(1 - sigmoid(x)) = log(sigmoid(-x))
    d_b = calc_sigmoid(- _data * _init_theta);
351 352
    log(d_b, d_b);
    multiply(d_b, 1-_labels, d_b);
353

354
    cost = (-1.0/m) * (sum(d_a)[0] + sum(d_b)[0]);
355 356
    cost = cost + rparameter;

357 358 359 360 361
    if(cvIsNaN( cost ) == 1)
    {
        CV_Error( CV_StsBadArg, "check training parameters. Invalid training classifier" );
    }

362 363 364
    return cost;
}

365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392

void LogisticRegressionImpl::compute_gradient(const Mat& _data, const Mat& _labels, const Mat &_theta, const double _lambda, Mat & _gradient )
{
    const int m = _data.rows;
    Mat pcal_a, pcal_b, pcal_ab;

    const Mat z = _data * _theta;

    CV_Assert( _gradient.rows == _theta.rows && _gradient.cols == _theta.cols );

    pcal_a = calc_sigmoid(z) - _labels;
    pcal_b = _data(Range::all(), Range(0,1));
    multiply(pcal_a, pcal_b, pcal_ab, 1);

    _gradient.row(0) = ((float)1/m) * sum(pcal_ab)[0];

    //cout<<"for each training data entry"<<endl;
    for(int ii = 1;ii<_gradient.rows;ii++)
    {
        pcal_b = _data(Range::all(), Range(ii,ii+1));
        multiply(pcal_a, pcal_b, pcal_ab, 1);

        _gradient.row(ii) = (1.0/m)*sum(pcal_ab)[0] + (_lambda/m) * _theta.row(ii);
    }
}


Mat LogisticRegressionImpl::batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta)
393 394 395 396
{
    // implements batch gradient descent
    if(this->params.alpha<=0)
    {
397
        CV_Error( CV_StsBadArg, "check training parameters (learning rate) for the classifier" );
398 399 400 401
    }

    if(this->params.num_iters <= 0)
    {
402
        CV_Error( CV_StsBadArg, "number of iterations cannot be zero or a negative number" );
403 404 405
    }

    int llambda = 0;
406
    int m;
407
    Mat theta_p = _init_theta.clone();
408
    Mat gradient( theta_p.rows, theta_p.cols, theta_p.type() );
409 410
    m = _data.rows;

ippei ito's avatar
ippei ito committed
411
    if (params.norm != REG_DISABLE)
412 413 414 415 416 417
    {
        llambda = 1;
    }

    for(int i = 0;i<this->params.num_iters;i++)
    {
418 419
        // this seems to only be called to ensure that cost is not NaN
        compute_cost(_data, _labels, theta_p);
420

421
        compute_gradient( _data, _labels, theta_p, llambda, gradient );
422 423 424 425 426 427

        theta_p = theta_p - ( static_cast<double>(this->params.alpha)/m)*gradient;
    }
    return theta_p;
}

428
Mat LogisticRegressionImpl::mini_batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta)
429 430 431
{
    // implements batch gradient descent
    int lambda_l = 0;
432
    int m;
433
    int j = 0;
434
    int size_b = this->params.mini_batch_size;
435

436
    if(this->params.mini_batch_size <= 0 || this->params.alpha == 0)
437
    {
438
        CV_Error( CV_StsBadArg, "check training parameters for the classifier" );
439 440 441 442
    }

    if(this->params.num_iters <= 0)
    {
443
        CV_Error( CV_StsBadArg, "number of iterations cannot be zero or a negative number" );
444 445
    }

446
    Mat theta_p = _init_theta.clone();
447
    Mat gradient( theta_p.rows, theta_p.cols, theta_p.type() );
448 449
    Mat data_d;
    Mat labels_l;
450

ippei ito's avatar
ippei ito committed
451
    if (params.norm != REG_DISABLE)
452 453 454 455
    {
        lambda_l = 1;
    }

456
    for(int i = 0;i<this->params.term_crit.maxCount;i++)
457 458 459 460 461 462 463 464 465 466 467 468 469 470
    {
        if(j+size_b<=_data.rows)
        {
            data_d = _data(Range(j,j+size_b), Range::all());
            labels_l = _labels(Range(j,j+size_b),Range::all());
        }
        else
        {
            data_d = _data(Range(j, _data.rows), Range::all());
            labels_l = _labels(Range(j, _labels.rows),Range::all());
        }

        m = data_d.rows;

471 472
        // this seems to only be called to ensure that cost is not NaN
        compute_cost(data_d, labels_l, theta_p);
473

474
        compute_gradient(data_d, labels_l, theta_p, lambda_l, gradient);
475 476 477

        theta_p = theta_p - ( static_cast<double>(this->params.alpha)/m)*gradient;

478
        j += this->params.mini_batch_size;
479

480 481 482
        // if parsed through all data variables
        if (j >= _data.rows) {
            j = 0;
483 484 485 486 487
        }
    }
    return theta_p;
}

488
bool LogisticRegressionImpl::set_label_map(const Mat &_labels_i)
489
{
490
    // this function creates two maps to map user defined labels to program friendly labels two ways.
491
    int ii = 0;
492
    Mat labels;
493

494 495
    this->labels_o = Mat(0,1, CV_8U);
    this->labels_n = Mat(0,1, CV_8U);
496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516

    _labels_i.convertTo(labels, CV_32S);

    for(int i = 0;i<labels.rows;i++)
    {
        this->forward_mapper[labels.at<int>(i)] += 1;
    }

    for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
    {
        this->forward_mapper[it->first] = ii;
        this->labels_o.push_back(it->first);
        this->labels_n.push_back(ii);
        ii += 1;
    }

    for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
    {
        this->reverse_mapper[it->second] = it->first;
    }

517
    return true;
518 519
}

520
Mat LogisticRegressionImpl::remap_labels(const Mat& _labels_i, const map<int, int>& lmap) const
521
{
522
    Mat labels;
523 524
    _labels_i.convertTo(labels, CV_32S);

525
    Mat new_labels = Mat::zeros(labels.rows, labels.cols, labels.type());
526

527
    CV_Assert( !lmap.empty() );
528 529 530

    for(int i =0;i<labels.rows;i++)
    {
531
        new_labels.at<int>(i,0) = lmap.find(labels.at<int>(i,0))->second;
532 533 534 535
    }
    return new_labels;
}

536
void LogisticRegressionImpl::clear()
537 538 539 540 541 542
{
    this->learnt_thetas.release();
    this->labels_o.release();
    this->labels_n.release();
}

543
void LogisticRegressionImpl::write(FileStorage& fs) const
544
{
545 546 547 548 549
    // check if open
    if(fs.isOpened() == 0)
    {
        CV_Error(CV_StsBadArg,"file can't open. Check file path");
    }
550 551 552 553 554 555 556 557 558 559 560 561 562 563
    string desc = "Logisitic Regression Classifier";
    fs<<"classifier"<<desc.c_str();
    fs<<"alpha"<<this->params.alpha;
    fs<<"iterations"<<this->params.num_iters;
    fs<<"norm"<<this->params.norm;
    fs<<"train_method"<<this->params.train_method;
    if(this->params.train_method == LogisticRegression::MINI_BATCH)
    {
        fs<<"mini_batch_size"<<this->params.mini_batch_size;
    }
    fs<<"learnt_thetas"<<this->learnt_thetas;
    fs<<"n_labels"<<this->labels_n;
    fs<<"o_labels"<<this->labels_o;
}
564

565
void LogisticRegressionImpl::read(const FileNode& fn)
566 567 568
{
    // check if empty
    if(fn.empty())
569
    {
570
        CV_Error( CV_StsBadArg, "empty FileNode object" );
571 572
    }

573 574 575 576 577 578 579 580 581
    this->params.alpha = (double)fn["alpha"];
    this->params.num_iters = (int)fn["iterations"];
    this->params.norm = (int)fn["norm"];
    this->params.train_method = (int)fn["train_method"];

    if(this->params.train_method == LogisticRegression::MINI_BATCH)
    {
        this->params.mini_batch_size = (int)fn["mini_batch_size"];
    }
582

583 584 585
    fn["learnt_thetas"] >> this->learnt_thetas;
    fn["o_labels"] >> this->labels_o;
    fn["n_labels"] >> this->labels_n;
586 587 588 589 590 591 592 593

    for(int ii =0;ii<labels_o.rows;ii++)
    {
        this->forward_mapper[labels_o.at<int>(ii,0)] = labels_n.at<int>(ii,0);
        this->reverse_mapper[labels_n.at<int>(ii,0)] = labels_o.at<int>(ii,0);
    }
}

594 595 596
}
}

597
/* End of file. */