lr.cpp 19.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
///////////////////////////////////////////////////////////////////////////////////////
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.

//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.

// This is a implementation of the Logistic Regression algorithm in C++ in OpenCV.

// AUTHOR:
// Rahul Kavi rahulkavi[at]live[at]com

// # You are free to use, change, or redistribute the code in any way you wish for
// # non-commercial purposes, but please maintain the name of the original author.
// # This code comes with no warranty of any kind.

// #
// # You are free to use, change, or redistribute the code in any way you wish for
// # non-commercial purposes, but please maintain the name of the original author.
// # This code comes with no warranty of any kind.

// # Logistic Regression ALGORITHM


//                           License Agreement
//                For Open Source Computer Vision Library

// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.

// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:

//   * Redistributions of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.

//   * Redistributions in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.

//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.

// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.

#include "precomp.hpp"

using namespace std;

60 61 62
namespace cv {
namespace ml {

63
class LrParams
64
{
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
public:
    LrParams()
    {
        alpha = 0.001;
        num_iters = 1000;
        norm = LogisticRegression::REG_L2;
        train_method = LogisticRegression::BATCH;
        mini_batch_size = 1;
        term_crit = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, num_iters, alpha);
    }

    double alpha; //!< learning rate.
    int num_iters; //!< number of iterations.
    int norm;
    int train_method;
    int mini_batch_size;
    TermCriteria term_crit;
};
83

84
class LogisticRegressionImpl : public LogisticRegression
85
{
86
public:
87 88

    LogisticRegressionImpl() { }
89 90
    virtual ~LogisticRegressionImpl() {}

91 92 93 94 95 96 97
    CV_IMPL_PROPERTY(double, LearningRate, params.alpha)
    CV_IMPL_PROPERTY(int, Iterations, params.num_iters)
    CV_IMPL_PROPERTY(int, Regularization, params.norm)
    CV_IMPL_PROPERTY(int, TrainMethod, params.train_method)
    CV_IMPL_PROPERTY(int, MiniBatchSize, params.mini_batch_size)
    CV_IMPL_PROPERTY(TermCriteria, TermCriteria, params.term_crit)

98
    virtual bool train( const Ptr<TrainData>& trainData, int=0 );
99
    virtual float predict(InputArray samples, OutputArray results, int flags=0) const;
100 101 102
    virtual void clear();
    virtual void write(FileStorage& fs) const;
    virtual void read(const FileNode& fn);
103
    virtual Mat get_learnt_thetas() const { return learnt_thetas; }
104 105 106
    virtual int getVarCount() const { return learnt_thetas.cols; }
    virtual bool isTrained() const { return !learnt_thetas.empty(); }
    virtual bool isClassifier() const { return true; }
107
    virtual String getDefaultName() const { return "opencv_ml_lr"; }
108
protected:
109 110
    Mat calc_sigmoid(const Mat& data) const;
    double compute_cost(const Mat& _data, const Mat& _labels, const Mat& _init_theta);
111 112 113
    void compute_gradient(const Mat& _data, const Mat& _labels, const Mat &_theta, const double _lambda, Mat & _gradient );
    Mat batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta);
    Mat mini_batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta);
114 115
    bool set_label_map(const Mat& _labels_i);
    Mat remap_labels(const Mat& _labels_i, const map<int, int>& lmap) const;
116
protected:
117
    LrParams params;
118
    Mat learnt_thetas;
119 120
    map<int, int> forward_mapper;
    map<int, int> reverse_mapper;
121 122
    Mat labels_o;
    Mat labels_n;
123 124
};

125
Ptr<LogisticRegression> LogisticRegression::create()
126
{
127
    return makePtr<LogisticRegressionImpl>();
128 129
}

130 131 132 133 134 135
Ptr<LogisticRegression> LogisticRegression::load(const String& filepath, const String& nodeName)
{
    return Algorithm::load<LogisticRegression>(filepath, nodeName);
}


136
bool LogisticRegressionImpl::train(const Ptr<TrainData>& trainData, int)
137
{
138
    CV_TRACE_FUNCTION_SKIP_NESTED();
139 140 141
    // return value
    bool ok = false;

142 143 144
    if (trainData.empty()) {
        return false;
    }
145
    clear();
146 147
    Mat _data_i = trainData->getSamples();
    Mat _labels_i = trainData->getResponses();
148

149
    // check size and type of training data
150
    CV_Assert( !_labels_i.empty() && !_data_i.empty());
151 152
    if(_labels_i.cols != 1)
    {
153
        CV_Error( CV_StsBadArg, "labels should be a column matrix" );
154
    }
155
    if(_data_i.type() != CV_32FC1 || _labels_i.type() != CV_32FC1)
156
    {
157
        CV_Error( CV_StsBadArg, "data and labels must be a floating point matrix" );
158
    }
159 160 161 162
    if(_labels_i.rows != _data_i.rows)
    {
        CV_Error( CV_StsBadArg, "number of rows in data and labels should be equal" );
    }
163

164
    // class labels
165
    set_label_map(_labels_i);
166
    Mat labels_l = remap_labels(_labels_i, this->forward_mapper);
167
    int num_classes = (int) this->forward_mapper.size();
168 169
    if(num_classes < 2)
    {
170
        CV_Error( CV_StsBadArg, "data should have atleast 2 classes" );
171 172
    }

173 174 175
    // add a column of ones to the data (bias/intercept term)
    Mat data_t;
    hconcat( cv::Mat::ones( _data_i.rows, 1, CV_32F ), _data_i, data_t );
176

177 178
    // coefficient matrix (zero-initialized)
    Mat thetas;
179
    Mat init_theta = Mat::zeros(data_t.cols, 1, CV_32F);
180

181
    // fit the model (handles binary and multiclass cases)
182
    Mat new_theta;
183
    Mat labels;
184 185 186
    if(num_classes == 2)
    {
        labels_l.convertTo(labels, CV_32F);
187
        if(this->params.train_method == LogisticRegression::BATCH)
188
            new_theta = batch_gradient_descent(data_t, labels, init_theta);
189
        else
190
            new_theta = mini_batch_gradient_descent(data_t, labels, init_theta);
191 192 193 194 195 196
        thetas = new_theta.t();
    }
    else
    {
        /* take each class and rename classes you will get a theta per class
        as in multi class class scenario, we will have n thetas for n classes */
197 198 199
        thetas.create(num_classes, data_t.cols, CV_32F);
        Mat labels_binary;
        int ii = 0;
200 201
        for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
        {
202 203 204
            // one-vs-rest (OvR) scheme
            labels_binary = (labels_l == it->second)/255;
            labels_binary.convertTo(labels, CV_32F);
205
            if(this->params.train_method == LogisticRegression::BATCH)
206
                new_theta = batch_gradient_descent(data_t, labels, init_theta);
207
            else
208
                new_theta = mini_batch_gradient_descent(data_t, labels, init_theta);
209 210 211 212 213
            hconcat(new_theta.t(), thetas.row(ii));
            ii += 1;
        }
    }

214
    // check that the estimates are stable and finite
215
    this->learnt_thetas = thetas.clone();
216
    if( cvIsNaN( (double)sum(this->learnt_thetas)[0] ) )
217
    {
218
        CV_Error( CV_StsBadArg, "check training parameters. Invalid training classifier" );
219
    }
220 221

    // success
222 223 224 225
    ok = true;
    return ok;
}

226
float LogisticRegressionImpl::predict(InputArray samples, OutputArray results, int flags) const
227 228
{
    // check if learnt_mats array is populated
229
    if(!this->isTrained())
230
    {
231
        CV_Error( CV_StsBadArg, "classifier should be trained first" );
232
    }
233

234 235
    // coefficient matrix
    Mat thetas;
236
    if ( learnt_thetas.type() == CV_32F )
237
    {
238 239 240 241 242
        thetas = learnt_thetas;
    }
    else
    {
        this->learnt_thetas.convertTo( thetas, CV_32F );
243 244 245
    }
    CV_Assert(thetas.rows > 0);

246 247 248 249 250 251
    // data samples
    Mat data = samples.getMat();
    if(data.type() != CV_32F)
    {
        CV_Error( CV_StsBadArg, "data must be of floating type" );
    }
252

253 254 255 256
    // add a column of ones to the data (bias/intercept term)
    Mat data_t;
    hconcat( cv::Mat::ones( data.rows, 1, CV_32F ), data, data_t );
    CV_Assert(data_t.cols == thetas.cols);
257

258
    // predict class labels for samples (handles binary and multiclass cases)
259
    Mat labels_c;
260
    Mat pred_m;
261
    Mat temp_pred;
262 263
    if(thetas.rows == 1)
    {
264 265
        // apply sigmoid function
        temp_pred = calc_sigmoid(data_t * thetas.t());
266
        CV_Assert(temp_pred.cols==1);
267
        pred_m = temp_pred.clone();
268

269
        // if greater than 0.5, predict class 0 or predict class 1
270
        temp_pred = (temp_pred > 0.5f) / 255;
271 272 273 274
        temp_pred.convertTo(labels_c, CV_32S);
    }
    else
    {
275 276 277
        // apply sigmoid function
        pred_m.create(data_t.rows, thetas.rows, data.type());
        for(int i = 0; i < thetas.rows; i++)
278 279
        {
            temp_pred = calc_sigmoid(data_t * thetas.row(i).t());
280
            vconcat(temp_pred, pred_m.col(i));
281
        }
282 283 284 285 286

        // predict class with the maximum output
        Point max_loc;
        Mat labels;
        for(int i = 0; i < pred_m.rows; i++)
287 288
        {
            temp_pred = pred_m.row(i);
289
            minMaxLoc( temp_pred, NULL, NULL, NULL, &max_loc );
290 291 292 293
            labels.push_back(max_loc.x);
        }
        labels.convertTo(labels_c, CV_32S);
    }
294 295 296

    // return label of the predicted class. class names can be 1,2,3,...
    Mat pred_labs = remap_labels(labels_c, this->reverse_mapper);
297
    pred_labs.convertTo(pred_labs, CV_32S);
298 299 300 301

    // return either the labels or the raw output
    if ( results.needed() )
    {
302
        if ( flags & StatModel::RAW_OUTPUT )
303 304 305 306 307 308 309 310 311
        {
            pred_m.copyTo( results );
        }
        else
        {
            pred_labs.copyTo(results);
        }
    }

312
    return ( pred_labs.empty() ? 0.f : static_cast<float>(pred_labs.at<int>(0)) );
313 314
}

315
Mat LogisticRegressionImpl::calc_sigmoid(const Mat& data) const
316
{
317
    CV_TRACE_FUNCTION();
318 319
    Mat dest;
    exp(-data, dest);
320 321 322
    return 1.0/(1.0+dest);
}

323
double LogisticRegressionImpl::compute_cost(const Mat& _data, const Mat& _labels, const Mat& _init_theta)
324
{
325
    CV_TRACE_FUNCTION();
BadrinathS's avatar
BadrinathS committed
326
    float llambda = 0;                   /*changed llambda from int to float to solve issue #7924*/
327 328 329 330
    int m;
    int n;
    double cost = 0;
    double rparameter = 0;
331 332 333 334
    Mat theta_b;
    Mat theta_c;
    Mat d_a;
    Mat d_b;
335 336 337 338 339 340

    m = _data.rows;
    n = _data.cols;

    theta_b = _init_theta(Range(1, n), Range::all());

ippei ito's avatar
ippei ito committed
341
    if (params.norm != REG_DISABLE)
342 343 344 345
    {
        llambda = 1;
    }

346
    if(this->params.norm == LogisticRegression::REG_L1)
347
    {
348
        rparameter = (llambda/(2*m)) * sum(theta_b)[0];
349 350 351 352
    }
    else
    {
        // assuming it to be L2 by default
353
        multiply(theta_b, theta_b, theta_c, 1);
354
        rparameter = (llambda/(2*m)) * sum(theta_c)[0];
355 356
    }

357
    d_a = calc_sigmoid(_data * _init_theta);
358 359
    log(d_a, d_a);
    multiply(d_a, _labels, d_a);
360

361 362
    // use the fact that: log(1 - sigmoid(x)) = log(sigmoid(-x))
    d_b = calc_sigmoid(- _data * _init_theta);
363 364
    log(d_b, d_b);
    multiply(d_b, 1-_labels, d_b);
365

366
    cost = (-1.0/m) * (sum(d_a)[0] + sum(d_b)[0]);
367 368
    cost = cost + rparameter;

369 370 371 372 373
    if(cvIsNaN( cost ) == 1)
    {
        CV_Error( CV_StsBadArg, "check training parameters. Invalid training classifier" );
    }

374 375 376
    return cost;
}

377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412
struct LogisticRegressionImpl_ComputeDradient_Impl : ParallelLoopBody
{
    const Mat* data;
    const Mat* theta;
    const Mat* pcal_a;
    Mat* gradient;
    double lambda;

    LogisticRegressionImpl_ComputeDradient_Impl(const Mat& _data, const Mat &_theta, const Mat& _pcal_a, const double _lambda, Mat & _gradient)
        : data(&_data)
        , theta(&_theta)
        , pcal_a(&_pcal_a)
        , gradient(&_gradient)
        , lambda(_lambda)
    {

    }

    void operator()(const cv::Range& r) const
    {
        const Mat& _data  = *data;
        const Mat &_theta = *theta;
        Mat & _gradient   = *gradient;
        const Mat & _pcal_a = *pcal_a;
        const int m = _data.rows;
        Mat pcal_ab;

        for (int ii = r.start; ii<r.end; ii++)
        {
            Mat pcal_b = _data(Range::all(), Range(ii,ii+1));
            multiply(_pcal_a, pcal_b, pcal_ab, 1);

            _gradient.row(ii) = (1.0/m)*sum(pcal_ab)[0] + (lambda/m) * _theta.row(ii);
        }
    }
};
413 414 415

void LogisticRegressionImpl::compute_gradient(const Mat& _data, const Mat& _labels, const Mat &_theta, const double _lambda, Mat & _gradient )
{
416
    CV_TRACE_FUNCTION();
417 418 419 420 421 422 423 424 425 426 427 428 429 430
    const int m = _data.rows;
    Mat pcal_a, pcal_b, pcal_ab;

    const Mat z = _data * _theta;

    CV_Assert( _gradient.rows == _theta.rows && _gradient.cols == _theta.cols );

    pcal_a = calc_sigmoid(z) - _labels;
    pcal_b = _data(Range::all(), Range(0,1));
    multiply(pcal_a, pcal_b, pcal_ab, 1);

    _gradient.row(0) = ((float)1/m) * sum(pcal_ab)[0];

    //cout<<"for each training data entry"<<endl;
431 432
    LogisticRegressionImpl_ComputeDradient_Impl invoker(_data, _theta, pcal_a, _lambda, _gradient);
    cv::parallel_for_(cv::Range(1, _gradient.rows), invoker);
433 434 435 436
}


Mat LogisticRegressionImpl::batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta)
437
{
438
    CV_TRACE_FUNCTION();
439 440 441
    // implements batch gradient descent
    if(this->params.alpha<=0)
    {
442
        CV_Error( CV_StsBadArg, "check training parameters (learning rate) for the classifier" );
443 444 445 446
    }

    if(this->params.num_iters <= 0)
    {
447
        CV_Error( CV_StsBadArg, "number of iterations cannot be zero or a negative number" );
448 449 450
    }

    int llambda = 0;
451
    int m;
452
    Mat theta_p = _init_theta.clone();
453
    Mat gradient( theta_p.rows, theta_p.cols, theta_p.type() );
454 455
    m = _data.rows;

ippei ito's avatar
ippei ito committed
456
    if (params.norm != REG_DISABLE)
457 458 459 460 461 462
    {
        llambda = 1;
    }

    for(int i = 0;i<this->params.num_iters;i++)
    {
463 464
        // this seems to only be called to ensure that cost is not NaN
        compute_cost(_data, _labels, theta_p);
465

466
        compute_gradient( _data, _labels, theta_p, llambda, gradient );
467 468 469 470 471 472

        theta_p = theta_p - ( static_cast<double>(this->params.alpha)/m)*gradient;
    }
    return theta_p;
}

473
Mat LogisticRegressionImpl::mini_batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta)
474 475 476
{
    // implements batch gradient descent
    int lambda_l = 0;
477
    int m;
478
    int j = 0;
479
    int size_b = this->params.mini_batch_size;
480

481
    if(this->params.mini_batch_size <= 0 || this->params.alpha == 0)
482
    {
483
        CV_Error( CV_StsBadArg, "check training parameters for the classifier" );
484 485 486 487
    }

    if(this->params.num_iters <= 0)
    {
488
        CV_Error( CV_StsBadArg, "number of iterations cannot be zero or a negative number" );
489 490
    }

491
    Mat theta_p = _init_theta.clone();
492
    Mat gradient( theta_p.rows, theta_p.cols, theta_p.type() );
493 494
    Mat data_d;
    Mat labels_l;
495

ippei ito's avatar
ippei ito committed
496
    if (params.norm != REG_DISABLE)
497 498 499 500
    {
        lambda_l = 1;
    }

501
    for(int i = 0;i<this->params.term_crit.maxCount;i++)
502 503 504 505 506 507 508 509 510 511 512 513 514 515
    {
        if(j+size_b<=_data.rows)
        {
            data_d = _data(Range(j,j+size_b), Range::all());
            labels_l = _labels(Range(j,j+size_b),Range::all());
        }
        else
        {
            data_d = _data(Range(j, _data.rows), Range::all());
            labels_l = _labels(Range(j, _labels.rows),Range::all());
        }

        m = data_d.rows;

516 517
        // this seems to only be called to ensure that cost is not NaN
        compute_cost(data_d, labels_l, theta_p);
518

519
        compute_gradient(data_d, labels_l, theta_p, lambda_l, gradient);
520 521 522

        theta_p = theta_p - ( static_cast<double>(this->params.alpha)/m)*gradient;

523
        j += this->params.mini_batch_size;
524

525 526 527
        // if parsed through all data variables
        if (j >= _data.rows) {
            j = 0;
528 529 530 531 532
        }
    }
    return theta_p;
}

533
bool LogisticRegressionImpl::set_label_map(const Mat &_labels_i)
534
{
535
    // this function creates two maps to map user defined labels to program friendly labels two ways.
536
    int ii = 0;
537
    Mat labels;
538

539 540
    this->labels_o = Mat(0,1, CV_8U);
    this->labels_n = Mat(0,1, CV_8U);
541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561

    _labels_i.convertTo(labels, CV_32S);

    for(int i = 0;i<labels.rows;i++)
    {
        this->forward_mapper[labels.at<int>(i)] += 1;
    }

    for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
    {
        this->forward_mapper[it->first] = ii;
        this->labels_o.push_back(it->first);
        this->labels_n.push_back(ii);
        ii += 1;
    }

    for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
    {
        this->reverse_mapper[it->second] = it->first;
    }

562
    return true;
563 564
}

565
Mat LogisticRegressionImpl::remap_labels(const Mat& _labels_i, const map<int, int>& lmap) const
566
{
567
    Mat labels;
568 569
    _labels_i.convertTo(labels, CV_32S);

570
    Mat new_labels = Mat::zeros(labels.rows, labels.cols, labels.type());
571

572
    CV_Assert( !lmap.empty() );
573 574 575

    for(int i =0;i<labels.rows;i++)
    {
576
        new_labels.at<int>(i,0) = lmap.find(labels.at<int>(i,0))->second;
577 578 579 580
    }
    return new_labels;
}

581
void LogisticRegressionImpl::clear()
582 583 584 585 586 587
{
    this->learnt_thetas.release();
    this->labels_o.release();
    this->labels_n.release();
}

588
void LogisticRegressionImpl::write(FileStorage& fs) const
589
{
590 591 592 593 594
    // check if open
    if(fs.isOpened() == 0)
    {
        CV_Error(CV_StsBadArg,"file can't open. Check file path");
    }
595
    writeFormat(fs);
596
    string desc = "Logistic Regression Classifier";
597 598 599 600 601 602 603 604 605 606 607 608 609
    fs<<"classifier"<<desc.c_str();
    fs<<"alpha"<<this->params.alpha;
    fs<<"iterations"<<this->params.num_iters;
    fs<<"norm"<<this->params.norm;
    fs<<"train_method"<<this->params.train_method;
    if(this->params.train_method == LogisticRegression::MINI_BATCH)
    {
        fs<<"mini_batch_size"<<this->params.mini_batch_size;
    }
    fs<<"learnt_thetas"<<this->learnt_thetas;
    fs<<"n_labels"<<this->labels_n;
    fs<<"o_labels"<<this->labels_o;
}
610

611
void LogisticRegressionImpl::read(const FileNode& fn)
612 613 614
{
    // check if empty
    if(fn.empty())
615
    {
616
        CV_Error( CV_StsBadArg, "empty FileNode object" );
617 618
    }

619 620 621 622 623 624 625 626 627
    this->params.alpha = (double)fn["alpha"];
    this->params.num_iters = (int)fn["iterations"];
    this->params.norm = (int)fn["norm"];
    this->params.train_method = (int)fn["train_method"];

    if(this->params.train_method == LogisticRegression::MINI_BATCH)
    {
        this->params.mini_batch_size = (int)fn["mini_batch_size"];
    }
628

629 630 631
    fn["learnt_thetas"] >> this->learnt_thetas;
    fn["o_labels"] >> this->labels_o;
    fn["n_labels"] >> this->labels_n;
632 633 634 635 636 637 638 639

    for(int ii =0;ii<labels_o.rows;ii++)
    {
        this->forward_mapper[labels_o.at<int>(ii,0)] = labels_n.at<int>(ii,0);
        this->reverse_mapper[labels_n.at<int>(ii,0)] = labels_o.at<int>(ii,0);
    }
}

640 641 642
}
}

643
/* End of file. */