lr.cpp 18 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
///////////////////////////////////////////////////////////////////////////////////////
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.

//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.

// This is a implementation of the Logistic Regression algorithm in C++ in OpenCV.

// AUTHOR:
// Rahul Kavi rahulkavi[at]live[at]com

// # You are free to use, change, or redistribute the code in any way you wish for
// # non-commercial purposes, but please maintain the name of the original author.
// # This code comes with no warranty of any kind.

// #
// # You are free to use, change, or redistribute the code in any way you wish for
// # non-commercial purposes, but please maintain the name of the original author.
// # This code comes with no warranty of any kind.

// # Logistic Regression ALGORITHM


//                           License Agreement
//                For Open Source Computer Vision Library

// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.

// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:

//   * Redistributions of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.

//   * Redistributions in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.

//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.

// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.

#include "precomp.hpp"

using namespace std;

60 61 62 63 64 65 66 67 68
namespace cv {
namespace ml {

LogisticRegression::Params::Params(double learning_rate,
                                   int iters,
                                   int method,
                                   int normlization,
                                   int reg,
                                   int batch_size)
69
{
70 71 72 73
    alpha = learning_rate;
    num_iters = iters;
    norm = normlization;
    regularized = reg;
74 75
    train_method = method;
    mini_batch_size = batch_size;
76
    term_crit = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, num_iters, alpha);
77 78
}

79
class LogisticRegressionImpl : public LogisticRegression
80
{
81 82 83 84 85 86 87 88 89 90 91 92
public:
    LogisticRegressionImpl(const Params& pms)
        : params(pms)
    {
    }
    virtual ~LogisticRegressionImpl() {}

    virtual bool train( const Ptr<TrainData>& trainData, int=0 );
    virtual float predict(InputArray samples, OutputArray results, int) const;
    virtual void clear();
    virtual void write(FileStorage& fs) const;
    virtual void read(const FileNode& fn);
93
    virtual Mat get_learnt_thetas() const;
94 95 96 97 98
    virtual int getVarCount() const { return learnt_thetas.cols; }
    virtual bool isTrained() const { return !learnt_thetas.empty(); }
    virtual bool isClassifier() const { return true; }
    virtual String getDefaultModelName() const { return "opencv_ml_lr"; }
protected:
99 100 101 102 103 104
    Mat calc_sigmoid(const Mat& data) const;
    double compute_cost(const Mat& _data, const Mat& _labels, const Mat& _init_theta);
    Mat compute_batch_gradient(const Mat& _data, const Mat& _labels, const Mat& _init_theta);
    Mat compute_mini_batch_gradient(const Mat& _data, const Mat& _labels, const Mat& _init_theta);
    bool set_label_map(const Mat& _labels_i);
    Mat remap_labels(const Mat& _labels_i, const map<int, int>& lmap) const;
105 106
protected:
    Params params;
107
    Mat learnt_thetas;
108 109
    map<int, int> forward_mapper;
    map<int, int> reverse_mapper;
110 111
    Mat labels_o;
    Mat labels_n;
112 113 114
};

Ptr<LogisticRegression> LogisticRegression::create(const Params& params)
115
{
116
    return makePtr<LogisticRegressionImpl>(params);
117 118
}

119
bool LogisticRegressionImpl::train(const Ptr<TrainData>& trainData, int)
120
{
121
    clear();
122 123
    Mat _data_i = trainData->getSamples();
    Mat _labels_i = trainData->getResponses();
124

125
    CV_Assert( !_labels_i.empty() && !_data_i.empty());
126

127
    // check the number of columns
128 129
    if(_labels_i.cols != 1)
    {
130
        CV_Error( CV_StsBadArg, "_labels_i should be a column matrix" );
131
    }
132

133 134 135 136 137
    // check data type.
    // data should be of floating type CV_32FC1

    if((_data_i.type() != CV_32FC1) || (_labels_i.type() != CV_32FC1))
    {
138
        CV_Error( CV_StsBadArg, "data and labels must be a floating point matrix" );
139 140 141 142
    }

    bool ok = false;

143
    Mat labels;
144 145

    set_label_map(_labels_i);
146
    int num_classes = (int) this->forward_mapper.size();
147 148

    // add a column of ones
149 150
    Mat data_t = Mat::zeros(_data_i.rows, _data_i.cols+1, CV_32F);
    vconcat(Mat(_data_i.rows, 1, _data_i.type(), Scalar::all(1.0)), data_t.col(0));
151

152 153 154 155 156 157 158
    for (int i=1;i<data_t.cols;i++)
    {
        vconcat(_data_i.col(i-1), data_t.col(i));
    }

    if(num_classes < 2)
    {
159
        CV_Error( CV_StsBadArg, "data should have atleast 2 classes" );
160 161 162 163
    }

    if(_labels_i.rows != _data_i.rows)
    {
164
        CV_Error( CV_StsBadArg, "number of rows in data and labels should be the equal" );
165 166 167
    }


168 169
    Mat thetas = Mat::zeros(num_classes, data_t.cols, CV_32F);
    Mat init_theta = Mat::zeros(data_t.cols, 1, CV_32F);
170

171 172
    Mat labels_l = remap_labels(_labels_i, this->forward_mapper);
    Mat new_local_labels;
173 174

    int ii=0;
175
    Mat new_theta;
176 177 178 179

    if(num_classes == 2)
    {
        labels_l.convertTo(labels, CV_32F);
180 181 182 183
        if(this->params.train_method == LogisticRegression::BATCH)
            new_theta = compute_batch_gradient(data_t, labels, init_theta);
        else
            new_theta = compute_mini_batch_gradient(data_t, labels, init_theta);
184 185 186 187 188 189 190 191 192 193 194 195
        thetas = new_theta.t();
    }
    else
    {
        /* take each class and rename classes you will get a theta per class
        as in multi class class scenario, we will have n thetas for n classes */
        ii = 0;

        for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
        {
            new_local_labels = (labels_l == it->second)/255;
            new_local_labels.convertTo(labels, CV_32F);
196 197 198 199
            if(this->params.train_method == LogisticRegression::BATCH)
                new_theta = compute_batch_gradient(data_t, labels, init_theta);
            else
                new_theta = compute_mini_batch_gradient(data_t, labels, init_theta);
200 201 202 203 204 205
            hconcat(new_theta.t(), thetas.row(ii));
            ii += 1;
        }
    }

    this->learnt_thetas = thetas.clone();
206
    if( cvIsNaN( (double)sum(this->learnt_thetas)[0] ) )
207
    {
208
        CV_Error( CV_StsBadArg, "check training parameters. Invalid training classifier" );
209 210 211 212 213
    }
    ok = true;
    return ok;
}

214
float LogisticRegressionImpl::predict(InputArray samples, OutputArray results, int) const
215 216 217
{
    /* returns a class of the predicted class
    class names can be 1,2,3,4, .... etc */
218
    Mat thetas, data, pred_labs;
219
    data = samples.getMat();
220 221 222 223

    // check if learnt_mats array is populated
    if(this->learnt_thetas.total()<=0)
    {
224
        CV_Error( CV_StsBadArg, "classifier should be trained first" );
225
    }
226
    if(data.type() != CV_32F)
227
    {
228
        CV_Error( CV_StsBadArg, "data must be of floating type" );
229 230 231
    }

    // add a column of ones
232
    Mat data_t = Mat::zeros(data.rows, data.cols+1, CV_32F);
233 234 235 236
    for (int i=0;i<data_t.cols;i++)
    {
        if(i==0)
        {
237
            vconcat(Mat(data.rows, 1, data.type(), Scalar::all(1.0)), data_t.col(i));
238 239
            continue;
        }
240
        vconcat(data.col(i-1), data_t.col(i));
241 242 243 244 245 246 247 248 249 250 251 252
    }

    this->learnt_thetas.convertTo(thetas, CV_32F);

    CV_Assert(thetas.rows > 0);

    double min_val;
    double max_val;

    Point min_loc;
    Point max_loc;

253 254 255 256
    Mat labels;
    Mat labels_c;
    Mat temp_pred;
    Mat pred_m = Mat::zeros(data_t.rows, thetas.rows, data.type());
257 258 259 260 261

    if(thetas.rows == 1)
    {
        temp_pred = calc_sigmoid(data_t*thetas.t());
        CV_Assert(temp_pred.cols==1);
262

263 264 265 266 267 268 269 270 271
        // if greater than 0.5, predict class 0 or predict class 1
        temp_pred = (temp_pred>0.5)/255;
        temp_pred.convertTo(labels_c, CV_32S);
    }
    else
    {
        for(int i = 0;i<thetas.rows;i++)
        {
            temp_pred = calc_sigmoid(data_t * thetas.row(i).t());
272
            vconcat(temp_pred, pred_m.col(i));
273 274 275 276 277 278 279 280 281
        }
        for(int i = 0;i<pred_m.rows;i++)
        {
            temp_pred = pred_m.row(i);
            minMaxLoc( temp_pred, &min_val, &max_val, &min_loc, &max_loc, Mat() );
            labels.push_back(max_loc.x);
        }
        labels.convertTo(labels_c, CV_32S);
    }
282 283 284
    pred_labs = remap_labels(labels_c, this->reverse_mapper);
    // convert pred_labs to integer type
    pred_labs.convertTo(pred_labs, CV_32S);
285 286 287
    pred_labs.copyTo(results);
    // TODO: determine
    return 0;
288 289
}

290
Mat LogisticRegressionImpl::calc_sigmoid(const Mat& data) const
291
{
292 293
    Mat dest;
    exp(-data, dest);
294 295 296
    return 1.0/(1.0+dest);
}

297
double LogisticRegressionImpl::compute_cost(const Mat& _data, const Mat& _labels, const Mat& _init_theta)
298 299 300 301 302 303
{
    int llambda = 0;
    int m;
    int n;
    double cost = 0;
    double rparameter = 0;
304 305 306 307
    Mat theta_b;
    Mat theta_c;
    Mat d_a;
    Mat d_b;
308 309 310 311 312

    m = _data.rows;
    n = _data.cols;

    theta_b = _init_theta(Range(1, n), Range::all());
313
    multiply(theta_b, theta_b, theta_c, 1);
314 315 316 317 318 319

    if(this->params.regularized > 0)
    {
        llambda = 1;
    }

320
    if(this->params.norm == LogisticRegression::REG_L1)
321
    {
322
        rparameter = (llambda/(2*m)) * sum(theta_b)[0];
323 324 325 326
    }
    else
    {
        // assuming it to be L2 by default
327
        rparameter = (llambda/(2*m)) * sum(theta_c)[0];
328 329
    }

330
    d_a = calc_sigmoid(_data* _init_theta);
331 332


333 334
    log(d_a, d_a);
    multiply(d_a, _labels, d_a);
335

336
    d_b = 1 - calc_sigmoid(_data * _init_theta);
337 338
    log(d_b, d_b);
    multiply(d_b, 1-_labels, d_b);
339

340
    cost = (-1.0/m) * (sum(d_a)[0] + sum(d_b)[0]);
341 342 343 344 345
    cost = cost + rparameter;

    return cost;
}

346
Mat LogisticRegressionImpl::compute_batch_gradient(const Mat& _data, const Mat& _labels, const Mat& _init_theta)
347 348 349 350
{
    // implements batch gradient descent
    if(this->params.alpha<=0)
    {
351
        CV_Error( CV_StsBadArg, "check training parameters for the classifier" );
352 353 354 355
    }

    if(this->params.num_iters <= 0)
    {
356
        CV_Error( CV_StsBadArg, "number of iterations cannot be zero or a negative number" );
357 358 359 360 361
    }

    int llambda = 0;
    double ccost;
    int m, n;
362 363 364 365 366
    Mat pcal_a;
    Mat pcal_b;
    Mat pcal_ab;
    Mat gradient;
    Mat theta_p = _init_theta.clone();
367 368 369 370 371 372 373 374 375 376 377 378 379 380
    m = _data.rows;
    n = _data.cols;

    if(this->params.regularized > 0)
    {
        llambda = 1;
    }

    for(int i = 0;i<this->params.num_iters;i++)
    {
        ccost = compute_cost(_data, _labels, theta_p);

        if( cvIsNaN( ccost ) )
        {
381
            CV_Error( CV_StsBadArg, "check training parameters. Invalid training classifier" );
382 383 384 385 386 387 388 389 390 391 392 393
        }

        pcal_b = calc_sigmoid((_data*theta_p) - _labels);

        pcal_a = (static_cast<double>(1/m)) * _data.t();

        gradient = pcal_a * pcal_b;

        pcal_a = calc_sigmoid(_data*theta_p) - _labels;

        pcal_b = _data(Range::all(), Range(0,1));

394
        multiply(pcal_a, pcal_b, pcal_ab, 1);
395 396 397 398 399 400 401 402 403 404

        gradient.row(0) = ((float)1/m) * sum(pcal_ab)[0];

        pcal_b = _data(Range::all(), Range(1,n));

        //cout<<"for each training data entry"<<endl;
        for(int ii = 1;ii<gradient.rows;ii++)
        {
            pcal_b = _data(Range::all(), Range(ii,ii+1));

405
            multiply(pcal_a, pcal_b, pcal_ab, 1);
406

407
            gradient.row(ii) = (1.0/m)*sum(pcal_ab)[0] + (llambda/m) * theta_p.row(ii);
408 409 410 411 412 413 414
        }

        theta_p = theta_p - ( static_cast<double>(this->params.alpha)/m)*gradient;
    }
    return theta_p;
}

415
Mat LogisticRegressionImpl::compute_mini_batch_gradient(const Mat& _data, const Mat& _labels, const Mat& _init_theta)
416 417 418 419 420 421
{
    // implements batch gradient descent
    int lambda_l = 0;
    double ccost;
    int m, n;
    int j = 0;
422
    int size_b = this->params.mini_batch_size;
423

424
    if(this->params.mini_batch_size <= 0 || this->params.alpha == 0)
425
    {
426
        CV_Error( CV_StsBadArg, "check training parameters for the classifier" );
427 428 429 430
    }

    if(this->params.num_iters <= 0)
    {
431
        CV_Error( CV_StsBadArg, "number of iterations cannot be zero or a negative number" );
432 433
    }

434 435 436 437 438 439 440
    Mat pcal_a;
    Mat pcal_b;
    Mat pcal_ab;
    Mat gradient;
    Mat theta_p = _init_theta.clone();
    Mat data_d;
    Mat labels_l;
441 442 443 444 445 446

    if(this->params.regularized > 0)
    {
        lambda_l = 1;
    }

447
    for(int i = 0;i<this->params.term_crit.maxCount;i++)
448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466
    {
        if(j+size_b<=_data.rows)
        {
            data_d = _data(Range(j,j+size_b), Range::all());
            labels_l = _labels(Range(j,j+size_b),Range::all());
        }
        else
        {
            data_d = _data(Range(j, _data.rows), Range::all());
            labels_l = _labels(Range(j, _labels.rows),Range::all());
        }

        m = data_d.rows;
        n = data_d.cols;

        ccost = compute_cost(data_d, labels_l, theta_p);

        if( cvIsNaN( ccost ) == 1)
        {
467
            CV_Error( CV_StsBadArg, "check training parameters. Invalid training classifier" );
468 469 470 471 472 473 474 475 476 477 478 479
        }

        pcal_b = calc_sigmoid((data_d*theta_p) - labels_l);

        pcal_a = (static_cast<double>(1/m)) * data_d.t();

        gradient = pcal_a * pcal_b;

        pcal_a = calc_sigmoid(data_d*theta_p) - labels_l;

        pcal_b = data_d(Range::all(), Range(0,1));

480
        multiply(pcal_a, pcal_b, pcal_ab, 1);
481 482 483 484 485 486 487 488

        gradient.row(0) = ((float)1/m) * sum(pcal_ab)[0];

        pcal_b = data_d(Range::all(), Range(1,n));

        for(int k = 1;k<gradient.rows;k++)
        {
            pcal_b = data_d(Range::all(), Range(k,k+1));
489 490
            multiply(pcal_a, pcal_b, pcal_ab, 1);
            gradient.row(k) = (1.0/m)*sum(pcal_ab)[0] + (lambda_l/m) * theta_p.row(k);
491 492 493 494
        }

        theta_p = theta_p - ( static_cast<double>(this->params.alpha)/m)*gradient;

495
        j+=this->params.mini_batch_size;
496 497 498 499 500 501 502 503 504 505

        if(j+size_b>_data.rows)
        {
            // if parsed through all data variables
            break;
        }
    }
    return theta_p;
}

506
bool LogisticRegressionImpl::set_label_map(const Mat &_labels_i)
507
{
508
    // this function creates two maps to map user defined labels to program friendly labels two ways.
509
    int ii = 0;
510
    Mat labels;
511

512 513
    this->labels_o = Mat(0,1, CV_8U);
    this->labels_n = Mat(0,1, CV_8U);
514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534

    _labels_i.convertTo(labels, CV_32S);

    for(int i = 0;i<labels.rows;i++)
    {
        this->forward_mapper[labels.at<int>(i)] += 1;
    }

    for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
    {
        this->forward_mapper[it->first] = ii;
        this->labels_o.push_back(it->first);
        this->labels_n.push_back(ii);
        ii += 1;
    }

    for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
    {
        this->reverse_mapper[it->second] = it->first;
    }

535
    return true;
536 537
}

538
Mat LogisticRegressionImpl::remap_labels(const Mat& _labels_i, const map<int, int>& lmap) const
539
{
540
    Mat labels;
541 542
    _labels_i.convertTo(labels, CV_32S);

543
    Mat new_labels = Mat::zeros(labels.rows, labels.cols, labels.type());
544 545 546 547 548

    CV_Assert( lmap.size() > 0 );

    for(int i =0;i<labels.rows;i++)
    {
549
        new_labels.at<int>(i,0) = lmap.find(labels.at<int>(i,0))->second;
550 551 552 553
    }
    return new_labels;
}

554
void LogisticRegressionImpl::clear()
555 556 557 558 559 560
{
    this->learnt_thetas.release();
    this->labels_o.release();
    this->labels_n.release();
}

561
void LogisticRegressionImpl::write(FileStorage& fs) const
562
{
563 564 565 566 567
    // check if open
    if(fs.isOpened() == 0)
    {
        CV_Error(CV_StsBadArg,"file can't open. Check file path");
    }
568 569 570 571 572 573 574 575 576 577 578 579 580 581 582
    string desc = "Logisitic Regression Classifier";
    fs<<"classifier"<<desc.c_str();
    fs<<"alpha"<<this->params.alpha;
    fs<<"iterations"<<this->params.num_iters;
    fs<<"norm"<<this->params.norm;
    fs<<"regularized"<<this->params.regularized;
    fs<<"train_method"<<this->params.train_method;
    if(this->params.train_method == LogisticRegression::MINI_BATCH)
    {
        fs<<"mini_batch_size"<<this->params.mini_batch_size;
    }
    fs<<"learnt_thetas"<<this->learnt_thetas;
    fs<<"n_labels"<<this->labels_n;
    fs<<"o_labels"<<this->labels_o;
}
583

584
void LogisticRegressionImpl::read(const FileNode& fn)
585 586 587
{
    // check if empty
    if(fn.empty())
588
    {
589
        CV_Error( CV_StsBadArg, "empty FileNode object" );
590 591
    }

592 593 594 595 596 597 598 599 600 601
    this->params.alpha = (double)fn["alpha"];
    this->params.num_iters = (int)fn["iterations"];
    this->params.norm = (int)fn["norm"];
    this->params.regularized = (int)fn["regularized"];
    this->params.train_method = (int)fn["train_method"];

    if(this->params.train_method == LogisticRegression::MINI_BATCH)
    {
        this->params.mini_batch_size = (int)fn["mini_batch_size"];
    }
602

603 604 605
    fn["learnt_thetas"] >> this->learnt_thetas;
    fn["o_labels"] >> this->labels_o;
    fn["n_labels"] >> this->labels_n;
606 607 608 609 610 611 612 613

    for(int ii =0;ii<labels_o.rows;ii++)
    {
        this->forward_mapper[labels_o.at<int>(ii,0)] = labels_n.at<int>(ii,0);
        this->reverse_mapper[labels_n.at<int>(ii,0)] = labels_o.at<int>(ii,0);
    }
}

614
Mat LogisticRegressionImpl::get_learnt_thetas() const
615 616 617
{
    return this->learnt_thetas;
}
618 619 620 621

}
}

622
/* End of file. */