lr.cpp 19.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
///////////////////////////////////////////////////////////////////////////////////////
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.

//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.

// This is a implementation of the Logistic Regression algorithm in C++ in OpenCV.

// AUTHOR:
// Rahul Kavi rahulkavi[at]live[at]com

// # You are free to use, change, or redistribute the code in any way you wish for
// # non-commercial purposes, but please maintain the name of the original author.
// # This code comes with no warranty of any kind.

// #
// # You are free to use, change, or redistribute the code in any way you wish for
// # non-commercial purposes, but please maintain the name of the original author.
// # This code comes with no warranty of any kind.

// # Logistic Regression ALGORITHM


//                           License Agreement
//                For Open Source Computer Vision Library

// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.

// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:

//   * Redistributions of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.

//   * Redistributions in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.

//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.

// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.

#include "precomp.hpp"

using namespace std;

60 61 62
namespace cv {
namespace ml {

63
class LrParams
64
{
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
public:
    LrParams()
    {
        alpha = 0.001;
        num_iters = 1000;
        norm = LogisticRegression::REG_L2;
        train_method = LogisticRegression::BATCH;
        mini_batch_size = 1;
        term_crit = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, num_iters, alpha);
    }

    double alpha; //!< learning rate.
    int num_iters; //!< number of iterations.
    int norm;
    int train_method;
    int mini_batch_size;
    TermCriteria term_crit;
};
83

84
class LogisticRegressionImpl : public LogisticRegression
85
{
86
public:
87 88

    LogisticRegressionImpl() { }
89 90
    virtual ~LogisticRegressionImpl() {}

91 92 93 94 95 96 97
    CV_IMPL_PROPERTY(double, LearningRate, params.alpha)
    CV_IMPL_PROPERTY(int, Iterations, params.num_iters)
    CV_IMPL_PROPERTY(int, Regularization, params.norm)
    CV_IMPL_PROPERTY(int, TrainMethod, params.train_method)
    CV_IMPL_PROPERTY(int, MiniBatchSize, params.mini_batch_size)
    CV_IMPL_PROPERTY(TermCriteria, TermCriteria, params.term_crit)

98
    virtual bool train( const Ptr<TrainData>& trainData, int=0 );
99
    virtual float predict(InputArray samples, OutputArray results, int flags=0) const;
100 101 102
    virtual void clear();
    virtual void write(FileStorage& fs) const;
    virtual void read(const FileNode& fn);
103
    virtual Mat get_learnt_thetas() const { return learnt_thetas; }
104 105 106
    virtual int getVarCount() const { return learnt_thetas.cols; }
    virtual bool isTrained() const { return !learnt_thetas.empty(); }
    virtual bool isClassifier() const { return true; }
107
    virtual String getDefaultName() const { return "opencv_ml_lr"; }
108
protected:
109 110
    Mat calc_sigmoid(const Mat& data) const;
    double compute_cost(const Mat& _data, const Mat& _labels, const Mat& _init_theta);
111 112 113
    void compute_gradient(const Mat& _data, const Mat& _labels, const Mat &_theta, const double _lambda, Mat & _gradient );
    Mat batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta);
    Mat mini_batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta);
114 115
    bool set_label_map(const Mat& _labels_i);
    Mat remap_labels(const Mat& _labels_i, const map<int, int>& lmap) const;
116
protected:
117
    LrParams params;
118
    Mat learnt_thetas;
119 120
    map<int, int> forward_mapper;
    map<int, int> reverse_mapper;
121 122
    Mat labels_o;
    Mat labels_n;
123 124
};

125
Ptr<LogisticRegression> LogisticRegression::create()
126
{
127
    return makePtr<LogisticRegressionImpl>();
128 129
}

130
bool LogisticRegressionImpl::train(const Ptr<TrainData>& trainData, int)
131
{
132 133 134
    // return value
    bool ok = false;

135
    clear();
136 137
    Mat _data_i = trainData->getSamples();
    Mat _labels_i = trainData->getResponses();
138

139
    // check size and type of training data
140
    CV_Assert( !_labels_i.empty() && !_data_i.empty());
141 142
    if(_labels_i.cols != 1)
    {
143
        CV_Error( CV_StsBadArg, "labels should be a column matrix" );
144
    }
145
    if(_data_i.type() != CV_32FC1 || _labels_i.type() != CV_32FC1)
146
    {
147
        CV_Error( CV_StsBadArg, "data and labels must be a floating point matrix" );
148
    }
149 150 151 152
    if(_labels_i.rows != _data_i.rows)
    {
        CV_Error( CV_StsBadArg, "number of rows in data and labels should be equal" );
    }
153

154
    // class labels
155
    set_label_map(_labels_i);
156
    Mat labels_l = remap_labels(_labels_i, this->forward_mapper);
157
    int num_classes = (int) this->forward_mapper.size();
158 159
    if(num_classes < 2)
    {
160
        CV_Error( CV_StsBadArg, "data should have atleast 2 classes" );
161 162
    }

163 164 165
    // add a column of ones to the data (bias/intercept term)
    Mat data_t;
    hconcat( cv::Mat::ones( _data_i.rows, 1, CV_32F ), _data_i, data_t );
166

167 168
    // coefficient matrix (zero-initialized)
    Mat thetas;
169
    Mat init_theta = Mat::zeros(data_t.cols, 1, CV_32F);
170

171
    // fit the model (handles binary and multiclass cases)
172
    Mat new_theta;
173
    Mat labels;
174 175 176
    if(num_classes == 2)
    {
        labels_l.convertTo(labels, CV_32F);
177
        if(this->params.train_method == LogisticRegression::BATCH)
178
            new_theta = batch_gradient_descent(data_t, labels, init_theta);
179
        else
180
            new_theta = mini_batch_gradient_descent(data_t, labels, init_theta);
181 182 183 184 185 186
        thetas = new_theta.t();
    }
    else
    {
        /* take each class and rename classes you will get a theta per class
        as in multi class class scenario, we will have n thetas for n classes */
187 188 189
        thetas.create(num_classes, data_t.cols, CV_32F);
        Mat labels_binary;
        int ii = 0;
190 191
        for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
        {
192 193 194
            // one-vs-rest (OvR) scheme
            labels_binary = (labels_l == it->second)/255;
            labels_binary.convertTo(labels, CV_32F);
195
            if(this->params.train_method == LogisticRegression::BATCH)
196
                new_theta = batch_gradient_descent(data_t, labels, init_theta);
197
            else
198
                new_theta = mini_batch_gradient_descent(data_t, labels, init_theta);
199 200 201 202 203
            hconcat(new_theta.t(), thetas.row(ii));
            ii += 1;
        }
    }

204
    // check that the estimates are stable and finite
205
    this->learnt_thetas = thetas.clone();
206
    if( cvIsNaN( (double)sum(this->learnt_thetas)[0] ) )
207
    {
208
        CV_Error( CV_StsBadArg, "check training parameters. Invalid training classifier" );
209
    }
210 211

    // success
212 213 214 215
    ok = true;
    return ok;
}

216
float LogisticRegressionImpl::predict(InputArray samples, OutputArray results, int flags) const
217 218
{
    // check if learnt_mats array is populated
219
    if(!this->isTrained())
220
    {
221
        CV_Error( CV_StsBadArg, "classifier should be trained first" );
222
    }
223

224 225
    // coefficient matrix
    Mat thetas;
226
    if ( learnt_thetas.type() == CV_32F )
227
    {
228 229 230 231 232
        thetas = learnt_thetas;
    }
    else
    {
        this->learnt_thetas.convertTo( thetas, CV_32F );
233 234 235
    }
    CV_Assert(thetas.rows > 0);

236 237 238 239 240 241
    // data samples
    Mat data = samples.getMat();
    if(data.type() != CV_32F)
    {
        CV_Error( CV_StsBadArg, "data must be of floating type" );
    }
242

243 244 245 246
    // add a column of ones to the data (bias/intercept term)
    Mat data_t;
    hconcat( cv::Mat::ones( data.rows, 1, CV_32F ), data, data_t );
    CV_Assert(data_t.cols == thetas.cols);
247

248
    // predict class labels for samples (handles binary and multiclass cases)
249
    Mat labels_c;
250
    Mat pred_m;
251
    Mat temp_pred;
252 253
    if(thetas.rows == 1)
    {
254 255
        // apply sigmoid function
        temp_pred = calc_sigmoid(data_t * thetas.t());
256
        CV_Assert(temp_pred.cols==1);
257
        pred_m = temp_pred.clone();
258

259
        // if greater than 0.5, predict class 0 or predict class 1
260
        temp_pred = (temp_pred > 0.5f) / 255;
261 262 263 264
        temp_pred.convertTo(labels_c, CV_32S);
    }
    else
    {
265 266 267
        // apply sigmoid function
        pred_m.create(data_t.rows, thetas.rows, data.type());
        for(int i = 0; i < thetas.rows; i++)
268 269
        {
            temp_pred = calc_sigmoid(data_t * thetas.row(i).t());
270
            vconcat(temp_pred, pred_m.col(i));
271
        }
272 273 274 275 276

        // predict class with the maximum output
        Point max_loc;
        Mat labels;
        for(int i = 0; i < pred_m.rows; i++)
277 278
        {
            temp_pred = pred_m.row(i);
279
            minMaxLoc( temp_pred, NULL, NULL, NULL, &max_loc );
280 281 282 283
            labels.push_back(max_loc.x);
        }
        labels.convertTo(labels_c, CV_32S);
    }
284 285 286

    // return label of the predicted class. class names can be 1,2,3,...
    Mat pred_labs = remap_labels(labels_c, this->reverse_mapper);
287
    pred_labs.convertTo(pred_labs, CV_32S);
288 289 290 291

    // return either the labels or the raw output
    if ( results.needed() )
    {
292
        if ( flags & StatModel::RAW_OUTPUT )
293 294 295 296 297 298 299 300 301
        {
            pred_m.copyTo( results );
        }
        else
        {
            pred_labs.copyTo(results);
        }
    }

302
    return ( pred_labs.empty() ? 0.f : static_cast<float>(pred_labs.at<int>(0)) );
303 304
}

305
Mat LogisticRegressionImpl::calc_sigmoid(const Mat& data) const
306
{
307 308
    Mat dest;
    exp(-data, dest);
309 310 311
    return 1.0/(1.0+dest);
}

312
double LogisticRegressionImpl::compute_cost(const Mat& _data, const Mat& _labels, const Mat& _init_theta)
313 314 315 316 317 318
{
    int llambda = 0;
    int m;
    int n;
    double cost = 0;
    double rparameter = 0;
319 320 321 322
    Mat theta_b;
    Mat theta_c;
    Mat d_a;
    Mat d_b;
323 324 325 326 327 328

    m = _data.rows;
    n = _data.cols;

    theta_b = _init_theta(Range(1, n), Range::all());

ippei ito's avatar
ippei ito committed
329
    if (params.norm != REG_DISABLE)
330 331 332 333
    {
        llambda = 1;
    }

334
    if(this->params.norm == LogisticRegression::REG_L1)
335
    {
336
        rparameter = (llambda/(2*m)) * sum(theta_b)[0];
337 338 339 340
    }
    else
    {
        // assuming it to be L2 by default
341
        multiply(theta_b, theta_b, theta_c, 1);
342
        rparameter = (llambda/(2*m)) * sum(theta_c)[0];
343 344
    }

345
    d_a = calc_sigmoid(_data * _init_theta);
346 347
    log(d_a, d_a);
    multiply(d_a, _labels, d_a);
348

349 350
    // use the fact that: log(1 - sigmoid(x)) = log(sigmoid(-x))
    d_b = calc_sigmoid(- _data * _init_theta);
351 352
    log(d_b, d_b);
    multiply(d_b, 1-_labels, d_b);
353

354
    cost = (-1.0/m) * (sum(d_a)[0] + sum(d_b)[0]);
355 356
    cost = cost + rparameter;

357 358 359 360 361
    if(cvIsNaN( cost ) == 1)
    {
        CV_Error( CV_StsBadArg, "check training parameters. Invalid training classifier" );
    }

362 363 364
    return cost;
}

365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400
struct LogisticRegressionImpl_ComputeDradient_Impl : ParallelLoopBody
{
    const Mat* data;
    const Mat* theta;
    const Mat* pcal_a;
    Mat* gradient;
    double lambda;

    LogisticRegressionImpl_ComputeDradient_Impl(const Mat& _data, const Mat &_theta, const Mat& _pcal_a, const double _lambda, Mat & _gradient)
        : data(&_data)
        , theta(&_theta)
        , pcal_a(&_pcal_a)
        , gradient(&_gradient)
        , lambda(_lambda)
    {

    }

    void operator()(const cv::Range& r) const
    {
        const Mat& _data  = *data;
        const Mat &_theta = *theta;
        Mat & _gradient   = *gradient;
        const Mat & _pcal_a = *pcal_a;
        const int m = _data.rows;
        Mat pcal_ab;

        for (int ii = r.start; ii<r.end; ii++)
        {
            Mat pcal_b = _data(Range::all(), Range(ii,ii+1));
            multiply(_pcal_a, pcal_b, pcal_ab, 1);

            _gradient.row(ii) = (1.0/m)*sum(pcal_ab)[0] + (lambda/m) * _theta.row(ii);
        }
    }
};
401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417

void LogisticRegressionImpl::compute_gradient(const Mat& _data, const Mat& _labels, const Mat &_theta, const double _lambda, Mat & _gradient )
{
    const int m = _data.rows;
    Mat pcal_a, pcal_b, pcal_ab;

    const Mat z = _data * _theta;

    CV_Assert( _gradient.rows == _theta.rows && _gradient.cols == _theta.cols );

    pcal_a = calc_sigmoid(z) - _labels;
    pcal_b = _data(Range::all(), Range(0,1));
    multiply(pcal_a, pcal_b, pcal_ab, 1);

    _gradient.row(0) = ((float)1/m) * sum(pcal_ab)[0];

    //cout<<"for each training data entry"<<endl;
418 419
    LogisticRegressionImpl_ComputeDradient_Impl invoker(_data, _theta, pcal_a, _lambda, _gradient);
    cv::parallel_for_(cv::Range(1, _gradient.rows), invoker);
420 421 422 423
}


Mat LogisticRegressionImpl::batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta)
424 425 426 427
{
    // implements batch gradient descent
    if(this->params.alpha<=0)
    {
428
        CV_Error( CV_StsBadArg, "check training parameters (learning rate) for the classifier" );
429 430 431 432
    }

    if(this->params.num_iters <= 0)
    {
433
        CV_Error( CV_StsBadArg, "number of iterations cannot be zero or a negative number" );
434 435 436
    }

    int llambda = 0;
437
    int m;
438
    Mat theta_p = _init_theta.clone();
439
    Mat gradient( theta_p.rows, theta_p.cols, theta_p.type() );
440 441
    m = _data.rows;

ippei ito's avatar
ippei ito committed
442
    if (params.norm != REG_DISABLE)
443 444 445 446 447 448
    {
        llambda = 1;
    }

    for(int i = 0;i<this->params.num_iters;i++)
    {
449 450
        // this seems to only be called to ensure that cost is not NaN
        compute_cost(_data, _labels, theta_p);
451

452
        compute_gradient( _data, _labels, theta_p, llambda, gradient );
453 454 455 456 457 458

        theta_p = theta_p - ( static_cast<double>(this->params.alpha)/m)*gradient;
    }
    return theta_p;
}

459
Mat LogisticRegressionImpl::mini_batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta)
460 461 462
{
    // implements batch gradient descent
    int lambda_l = 0;
463
    int m;
464
    int j = 0;
465
    int size_b = this->params.mini_batch_size;
466

467
    if(this->params.mini_batch_size <= 0 || this->params.alpha == 0)
468
    {
469
        CV_Error( CV_StsBadArg, "check training parameters for the classifier" );
470 471 472 473
    }

    if(this->params.num_iters <= 0)
    {
474
        CV_Error( CV_StsBadArg, "number of iterations cannot be zero or a negative number" );
475 476
    }

477
    Mat theta_p = _init_theta.clone();
478
    Mat gradient( theta_p.rows, theta_p.cols, theta_p.type() );
479 480
    Mat data_d;
    Mat labels_l;
481

ippei ito's avatar
ippei ito committed
482
    if (params.norm != REG_DISABLE)
483 484 485 486
    {
        lambda_l = 1;
    }

487
    for(int i = 0;i<this->params.term_crit.maxCount;i++)
488 489 490 491 492 493 494 495 496 497 498 499 500 501
    {
        if(j+size_b<=_data.rows)
        {
            data_d = _data(Range(j,j+size_b), Range::all());
            labels_l = _labels(Range(j,j+size_b),Range::all());
        }
        else
        {
            data_d = _data(Range(j, _data.rows), Range::all());
            labels_l = _labels(Range(j, _labels.rows),Range::all());
        }

        m = data_d.rows;

502 503
        // this seems to only be called to ensure that cost is not NaN
        compute_cost(data_d, labels_l, theta_p);
504

505
        compute_gradient(data_d, labels_l, theta_p, lambda_l, gradient);
506 507 508

        theta_p = theta_p - ( static_cast<double>(this->params.alpha)/m)*gradient;

509
        j += this->params.mini_batch_size;
510

511 512 513
        // if parsed through all data variables
        if (j >= _data.rows) {
            j = 0;
514 515 516 517 518
        }
    }
    return theta_p;
}

519
bool LogisticRegressionImpl::set_label_map(const Mat &_labels_i)
520
{
521
    // this function creates two maps to map user defined labels to program friendly labels two ways.
522
    int ii = 0;
523
    Mat labels;
524

525 526
    this->labels_o = Mat(0,1, CV_8U);
    this->labels_n = Mat(0,1, CV_8U);
527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547

    _labels_i.convertTo(labels, CV_32S);

    for(int i = 0;i<labels.rows;i++)
    {
        this->forward_mapper[labels.at<int>(i)] += 1;
    }

    for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
    {
        this->forward_mapper[it->first] = ii;
        this->labels_o.push_back(it->first);
        this->labels_n.push_back(ii);
        ii += 1;
    }

    for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
    {
        this->reverse_mapper[it->second] = it->first;
    }

548
    return true;
549 550
}

551
Mat LogisticRegressionImpl::remap_labels(const Mat& _labels_i, const map<int, int>& lmap) const
552
{
553
    Mat labels;
554 555
    _labels_i.convertTo(labels, CV_32S);

556
    Mat new_labels = Mat::zeros(labels.rows, labels.cols, labels.type());
557

558
    CV_Assert( !lmap.empty() );
559 560 561

    for(int i =0;i<labels.rows;i++)
    {
562
        new_labels.at<int>(i,0) = lmap.find(labels.at<int>(i,0))->second;
563 564 565 566
    }
    return new_labels;
}

567
void LogisticRegressionImpl::clear()
568 569 570 571 572 573
{
    this->learnt_thetas.release();
    this->labels_o.release();
    this->labels_n.release();
}

574
void LogisticRegressionImpl::write(FileStorage& fs) const
575
{
576 577 578 579 580
    // check if open
    if(fs.isOpened() == 0)
    {
        CV_Error(CV_StsBadArg,"file can't open. Check file path");
    }
581
    writeFormat(fs);
582 583 584 585 586 587 588 589 590 591 592 593 594 595
    string desc = "Logisitic Regression Classifier";
    fs<<"classifier"<<desc.c_str();
    fs<<"alpha"<<this->params.alpha;
    fs<<"iterations"<<this->params.num_iters;
    fs<<"norm"<<this->params.norm;
    fs<<"train_method"<<this->params.train_method;
    if(this->params.train_method == LogisticRegression::MINI_BATCH)
    {
        fs<<"mini_batch_size"<<this->params.mini_batch_size;
    }
    fs<<"learnt_thetas"<<this->learnt_thetas;
    fs<<"n_labels"<<this->labels_n;
    fs<<"o_labels"<<this->labels_o;
}
596

597
void LogisticRegressionImpl::read(const FileNode& fn)
598 599 600
{
    // check if empty
    if(fn.empty())
601
    {
602
        CV_Error( CV_StsBadArg, "empty FileNode object" );
603 604
    }

605 606 607 608 609 610 611 612 613
    this->params.alpha = (double)fn["alpha"];
    this->params.num_iters = (int)fn["iterations"];
    this->params.norm = (int)fn["norm"];
    this->params.train_method = (int)fn["train_method"];

    if(this->params.train_method == LogisticRegression::MINI_BATCH)
    {
        this->params.mini_batch_size = (int)fn["mini_batch_size"];
    }
614

615 616 617
    fn["learnt_thetas"] >> this->learnt_thetas;
    fn["o_labels"] >> this->labels_o;
    fn["n_labels"] >> this->labels_n;
618 619 620 621 622 623 624 625

    for(int ii =0;ii<labels_o.rows;ii++)
    {
        this->forward_mapper[labels_o.at<int>(ii,0)] = labels_n.at<int>(ii,0);
        this->reverse_mapper[labels_n.at<int>(ii,0)] = labels_o.at<int>(ii,0);
    }
}

626 627 628
}
}

629
/* End of file. */