Simulated Annealing for ANN_MLP training method (#10213)

* Simulated Annealing for ANN_MLP training method * EXPECT_LT * just to test new data * manage RNG * Try again * Just run buildbot with new data * try to understand * Test layer * New data- new test * Force RNG in backprop * Use Impl to avoid virtual method * reset all weights * try to solve ABI * retry * ABI solved? * till problem with dynamic_cast * Something is wrong * Solved? * disable backprop test * remove ANN_MLP_ANNEALImpl * Disable weight in varmap * Add example for SimulatedAnnealing

Simulated Annealing for ANN_MLP training method (#10213)
* Simulated Annealing for ANN_MLP training method * EXPECT_LT * just to test new data * manage RNG * Try again * Just run buildbot with new data * try to understand * Test layer * New data- new test * Force RNG in backprop * Use Impl to avoid virtual method * reset all weights * try to solve ABI * retry * ABI solved? * till problem with dynamic_cast * Something is wrong * Solved? * disable backprop test * remove ANN_MLP_ANNEALImpl * Disable weight in varmap * Add example for SimulatedAnnealing
7ad308ea · LaurentBerger · Vadim Pisarevsky · 6df8ac03 · 7ad308ea · 7ad308ea
Commit 7ad308ea authored Dec 15, 2017 by LaurentBerger Committed by Vadim Pisarevsky Dec 15, 2017
5 changed files
--- a/doc/opencv.bib
+++ b/doc/opencv.bib
@@ -459,6 +459,17 @@
  number = {3},
  publisher = {Elsevier}
 }
+@ARTICLE{Kirkpatrick83,
+  author = {Kirkpatrick, S. and  Gelatt, C. D.  Jr and Vecchi, M. P. },
+  title = {Optimization by Simulated Annealing},
+  year = {1983},
+  pages = {671--680},
+  journal = {Science},
+  volume = {220},
+  number = {4598},
+  publisher = {American Association for the Advancement of Science}
+}
 @INPROCEEDINGS{Kolmogorov03,
  author = {Kim, Junhwan and Kolmogorov, Vladimir and Zabih, Ramin},
  title = {Visual correspondence using energy minimization and mutual information},

--- a/modules/ml/include/opencv2/ml.hpp
+++ b/modules/ml/include/opencv2/ml.hpp
@@ -1406,13 +1406,14 @@ public:
    /** Available training methods */
    enum TrainingMethods {
        BACKPROP=0, //!< The back-propagation algorithm.
-        RPROP=1 //!< The RPROP algorithm. See @cite RPROP93 for details.
+        RPROP = 1, //!< The RPROP algorithm. See @cite RPROP93 for details.
+        ANNEAL = 2 //!< The simulated annealing algorithm. See @cite Kirkpatrick83 for details.
    };
    /** Sets training method and common parameters.
    @param method Default value is ANN_MLP::RPROP. See ANN_MLP::TrainingMethods.
-    @param param1 passed to setRpropDW0 for ANN_MLP::RPROP and to setBackpropWeightScale for ANN_MLP::BACKPROP
+    @param param1 passed to setRpropDW0 for ANN_MLP::RPROP and to setBackpropWeightScale for ANN_MLP::BACKPROP and to initialT for ANN_MLP::ANNEAL.
-    @param param2 passed to setRpropDWMin for ANN_MLP::RPROP and to setBackpropMomentumScale for ANN_MLP::BACKPROP.
+    @param param2 passed to setRpropDWMin for ANN_MLP::RPROP and to setBackpropMomentumScale for ANN_MLP::BACKPROP and to finalT for ANN_MLP::ANNEAL.
    */
    CV_WRAP virtual void setTrainMethod(int method, double param1 = 0, double param2 = 0) = 0;
@@ -1499,6 +1500,34 @@ public:
    /** @copybrief getRpropDWMax @see getRpropDWMax */
    CV_WRAP virtual void setRpropDWMax(double val) = 0;
+    /** ANNEAL: Update initial temperature.
+    It must be \>=0. Default value is 10.*/
+    /** @see setAnnealInitialT */
+    CV_WRAP double getAnnealInitialT() const;
+    /** @copybrief getAnnealInitialT @see getAnnealInitialT */
+    CV_WRAP void setAnnealInitialT(double val);
+    /** ANNEAL: Update final temperature.
+    It must be \>=0 and less than initialT. Default value is 0.1.*/
+    /** @see setAnnealFinalT */
+    CV_WRAP double getAnnealFinalT() const;
+    /** @copybrief getAnnealFinalT @see getAnnealFinalT */
+    CV_WRAP void setAnnealFinalT(double val);
+    /** ANNEAL: Update cooling ratio.
+    It must be \>0 and less than 1. Default value is 0.95.*/
+    /** @see setAnnealCoolingRatio */
+    CV_WRAP double getAnnealCoolingRatio() const;
+    /** @copybrief getAnnealCoolingRatio @see getAnnealCoolingRatio */
+    CV_WRAP void setAnnealCoolingRatio(double val);
+    /** ANNEAL: Update iteration per step.
+    It must be \>0 . Default value is 10.*/
+    /** @see setAnnealItePerStep */
+    CV_WRAP int getAnnealItePerStep() const;
+    /** @copybrief getAnnealItePerStep @see getAnnealItePerStep */
+    CV_WRAP void setAnnealItePerStep(int val);
    /** possible activation functions */
    enum ActivationFunctions {
        /** Identity function: \f$f(x)=x\f$ */
@@ -1838,6 +1867,111 @@ CV_EXPORTS void randMVNormal( InputArray mean, InputArray cov, int nsamples, Out
 CV_EXPORTS void createConcentricSpheresTestSet( int nsamples, int nfeatures, int nclasses,
                                                OutputArray samples, OutputArray responses);
+/** @brief Artificial Neural Networks - Multi-Layer Perceptrons.
+@sa @ref ml_intro_ann
+*/
+class CV_EXPORTS_W ANN_MLP_ANNEAL : public ANN_MLP
+{
+public:
+    /** @see setAnnealInitialT */
+    CV_WRAP virtual double getAnnealInitialT() const;
+    /** @copybrief getAnnealInitialT @see getAnnealInitialT */
+    CV_WRAP virtual void setAnnealInitialT(double val);
+    /** ANNEAL: Update final temperature.
+    It must be \>=0 and less than initialT. Default value is 0.1.*/
+    /** @see setAnnealFinalT */
+    CV_WRAP  virtual double getAnnealFinalT() const;
+    /** @copybrief getAnnealFinalT @see getAnnealFinalT */
+    CV_WRAP  virtual void setAnnealFinalT(double val);
+    /** ANNEAL: Update cooling ratio.
+    It must be \>0 and less than 1. Default value is 0.95.*/
+    /** @see setAnnealCoolingRatio */
+    CV_WRAP  virtual double getAnnealCoolingRatio() const;
+    /** @copybrief getAnnealCoolingRatio @see getAnnealCoolingRatio */
+    CV_WRAP  virtual void setAnnealCoolingRatio(double val);
+    /** ANNEAL: Update iteration per step.
+    It must be \>0 . Default value is 10.*/
+    /** @see setAnnealItePerStep */
+    CV_WRAP virtual int getAnnealItePerStep() const;
+    /** @copybrief getAnnealItePerStep @see getAnnealItePerStep */
+    CV_WRAP virtual  void setAnnealItePerStep(int val);
+    /** @brief Creates empty model
+    Use StatModel::train to train the model, Algorithm::load\<ANN_MLP\>(filename) to load the pre-trained model.
+    Note that the train method has optional flags: ANN_MLP::TrainFlags.
+    */
+//    CV_WRAP static Ptr<ANN_MLP> create();
+};
+/****************************************************************************************\
+*                                   Simulated annealing solver                             *
+\****************************************************************************************/
+/** @brief The class implements simulated annealing for optimization.
+@cite Kirkpatrick83 for details
+*/
+class CV_EXPORTS SimulatedAnnealingSolver : public Algorithm
+{
+public:
+    SimulatedAnnealingSolver() { init(); };
+    ~SimulatedAnnealingSolver();
+    /** Give energy value for  a state of system.*/
+    virtual double energy() =0;
+    /** Function which change the state of system (random pertubation).*/
+    virtual void changedState() = 0;
+    /** Function to reverse to the previous state.*/
+    virtual void reverseChangedState() = 0;
+    /** Simulated annealing procedure.  */
+    int run();
+    /** Set intial temperature of simulated annealing procedure.
+    *@param x new initial temperature. x\>0
+    */
+    void setInitialTemperature(double x);
+    /** Set final temperature of simulated annealing procedure.
+    *@param x new final temperature value. 0\<x\<initial temperature
+    */
+    void setFinalTemperature(double x);
+    double getFinalTemperature();
+    /** Set setCoolingRatio of simulated annealing procedure : T(t) = coolingRatio * T(t-1).
+    * @param x new cooling ratio value. 0\<x\<1
+    */
+    void setCoolingRatio(double x);
+    /** Set number iteration per temperature step.
+    * @param ite number of iteration per temperature step ite \> 0
+    */
+    void setIterPerStep(int ite);
+    struct Impl;
+protected :
+    void init();
+    Impl* impl;
+};
+struct SimulatedAnnealingSolver::Impl
+{
+    RNG rEnergy;
+    double coolingRatio;
+    double initialT;
+    double finalT;
+    int iterPerStep;
+    Impl()
+    {
+        initialT = 2;
+        finalT = 0.1;
+        coolingRatio = 0.95;
+        iterPerStep = 100;
+        refcount = 1;
+    }
+    int refcount;
+    ~Impl() { refcount--;CV_Assert(refcount==0); }
+};
 //! @} ml
 }

--- a/modules/ml/src/ann_mlp.cpp
+++ b/modules/ml/src/ann_mlp.cpp
--- a/modules/ml/test/test_mltests2.cpp
+++ b/modules/ml/test/test_mltests2.cpp
@@ -79,8 +79,10 @@ int str_to_ann_train_method( String& str )
 {
    if( !str.compare("BACKPROP") )
        return ANN_MLP::BACKPROP;
-    if( !str.compare("RPROP") )
+    if (!str.compare("RPROP"))
        return ANN_MLP::RPROP;
+    if (!str.compare("ANNEAL"))
+        return ANN_MLP::ANNEAL;
    CV_Error( CV_StsBadArg, "incorrect ann train method string" );
    return -1;
 }
@@ -241,13 +243,92 @@ TEST(ML_ANN, ActivationFunction)
        Mat rx, ry, dst;
        x->predict(testSamples, rx);
        y->predict(testSamples, ry);
-        absdiff(rx, ry, dst);
+        double n = cvtest::norm(rx, ry, NORM_INF);
-        double minVal, maxVal;
+        EXPECT_LT(n,FLT_EPSILON) << "Predict are not equal for " << dataname + activationName[i] + ".yml and " << activationName[i];
-        minMaxLoc(dst, &minVal, &maxVal);
-        ASSERT_TRUE(maxVal<FLT_EPSILON) << "Predict are not equal for " << dataname + activationName[i] + ".yml and " << activationName[i];
 #endif
    }
 }
+//#define GENERATE_TESTDATA
+TEST(ML_ANN, Method)
+{
+    String folder = string(cvtest::TS::ptr()->get_data_path());
+    String original_path = folder + "waveform.data";
+    String dataname = folder + "waveform";
+    Ptr<TrainData> tdata2 = TrainData::loadFromCSV(original_path, 0);
+    Mat responses(tdata2->getResponses().rows, 3, CV_32FC1, Scalar(0));
+    for (int i = 0; i<tdata2->getResponses().rows; i++)
+        responses.at<float>(i, static_cast<int>(tdata2->getResponses().at<float>(i, 0))) = 1;
+    Ptr<TrainData> tdata = TrainData::create(tdata2->getSamples(), ml::ROW_SAMPLE, responses);
+    ASSERT_FALSE(tdata.empty()) << "Could not find test data file : " << original_path;
+    RNG& rng = theRNG();
+    rng.state = 0;
+    tdata->setTrainTestSplitRatio(0.8);
+    vector<int> methodType;
+    methodType.push_back(ml::ANN_MLP::RPROP);
+    methodType.push_back(ml::ANN_MLP::ANNEAL);
+//    methodType.push_back(ml::ANN_MLP::BACKPROP); -----> NO BACKPROP TEST
+    vector<String> methodName;
+    methodName.push_back("_rprop");
+    methodName.push_back("_anneal");
+//    methodName.push_back("_backprop"); -----> NO BACKPROP TEST
+#ifdef GENERATE_TESTDATA
+    Ptr<ml::ANN_MLP> xx = ml::ANN_MLP_ANNEAL::create();
+    Mat_<int> layerSizesXX(1, 4);
+    layerSizesXX(0, 0) = tdata->getNVars();
+    layerSizesXX(0, 1) = 30;
+    layerSizesXX(0, 2) = 30;
+    layerSizesXX(0, 3) = tdata->getResponses().cols;
+    xx->setLayerSizes(layerSizesXX);
+    xx->setActivationFunction(ml::ANN_MLP::SIGMOID_SYM);
+    xx->setTrainMethod(ml::ANN_MLP::RPROP);
+    xx->setTermCriteria(TermCriteria(TermCriteria::COUNT, 1, 0.01));
+    xx->train(tdata, ml::ANN_MLP::NO_OUTPUT_SCALE + ml::ANN_MLP::NO_INPUT_SCALE);
+    FileStorage fs;
+    fs.open(dataname + "_init_weight.yml.gz", FileStorage::WRITE + FileStorage::BASE64);
+    xx->write(fs);
+    fs.release();
+#endif
+    for (size_t i = 0; i < methodType.size(); i++)
+    {
+        FileStorage fs;
+        fs.open(dataname + "_init_weight.yml.gz", FileStorage::READ + FileStorage::BASE64);
+        Ptr<ml::ANN_MLP> x = ml::ANN_MLP_ANNEAL::create();
+        x->read(fs.root());
+        x->setTrainMethod(methodType[i]);
+        if (methodType[i] == ml::ANN_MLP::ANNEAL)
+        {
+            x->setAnnealInitialT(12);
+            x->setAnnealFinalT(0.15);
+            x->setAnnealCoolingRatio(0.96);
+            x->setAnnealItePerStep(11);
+        }
+        x->setTermCriteria(TermCriteria(TermCriteria::COUNT, 100, 0.01));
+        x->train(tdata, ml::ANN_MLP::NO_OUTPUT_SCALE + ml::ANN_MLP::NO_INPUT_SCALE + ml::ANN_MLP::UPDATE_WEIGHTS);
+        ASSERT_TRUE(x->isTrained()) << "Could not train networks with  " << methodName[i];
+#ifdef  GENERATE_TESTDATA
+        x->save(dataname + methodName[i] + ".yml.gz");
+#endif
+        Ptr<ml::ANN_MLP> y = Algorithm::load<ANN_MLP>(dataname + methodName[i] + ".yml.gz");
+        ASSERT_TRUE(y != NULL) << "Could not load   " << dataname + methodName[i] + ".yml";
+        Mat testSamples = tdata->getTestSamples();
+        Mat rx, ry, dst;
+        for (int j = 0; j < 4; j++)
+        {
+            rx = x->getWeights(j);
+            ry = y->getWeights(j);
+            double n = cvtest::norm(rx, ry, NORM_INF);
+            EXPECT_LT(n, FLT_EPSILON) << "Weights are not equal for " << dataname + methodName[i] + ".yml and " << methodName[i] << " layer : " << j;
+        }
+        x->predict(testSamples, rx);
+        y->predict(testSamples, ry);
+        double n = cvtest::norm(rx, ry, NORM_INF);
+        EXPECT_LT(n, FLT_EPSILON) << "Predict are not equal for " << dataname + methodName[i] + ".yml and " << methodName[i];
+    }
+}
 // 6. dtree
 // 7. boost

--- a/samples/cpp/travelsalesman.cpp
+++ b/samples/cpp/travelsalesman.cpp
+#include <opencv2/opencv.hpp>
+using namespace std;
+using namespace cv;
+void DrawTravelMap(Mat &img, vector<Point> &p, vector<int> &n);
+class TravelSalesman : public ml::SimulatedAnnealingSolver
+{
+private :
+    vector<Point> &posCity;
+    vector<int> &next;
+    RNG rng;
+    int d0,d1,d2,d3;
+public:
+    TravelSalesman(vector<Point> &p,vector<int> &n):posCity(p),next(n)
+    {
+        rng = theRNG();
+    };
+    /** Give energy value for  a state of system.*/
+    virtual double energy();
+    /** Function which change the state of system (random pertubation).*/
+    virtual void changedState();
+    /** Function to reverse to the previous state.*/
+    virtual void reverseChangedState();
+};
+void TravelSalesman::changedState()
+{
+    d0 = rng.uniform(0,static_cast<int>(posCity.size()));
+    d1 = next[d0];
+    d2 = next[d1];
+    d3 = next[d2];
+    int d0Tmp = d0;
+    int d1Tmp = d1;
+    int d2Tmp = d2;
+    next[d0Tmp] = d2;
+    next[d2Tmp] = d1;
+    next[d1Tmp] = d3;
+}
+void TravelSalesman::reverseChangedState()
+{
+    next[d0] = d1;
+    next[d1] = d2;
+    next[d2] = d3;
+}
+double TravelSalesman::energy()
+{
+    double e=0;
+    for (size_t i = 0; i < next.size(); i++)
+    {
+        e +=  norm(posCity[i]-posCity[next[i]]);
+    }
+    return e;
+}
+void DrawTravelMap(Mat &img, vector<Point> &p, vector<int> &n)
+{
+    for (size_t i = 0; i < n.size(); i++)
+    {
+        circle(img,p[i],5,Scalar(0,0,255),2);
+        line(img,p[i],p[n[i]],Scalar(0,255,0),2);
+    }
+}
+int main(void)
+{
+    int nbCity=40;
+    Mat img(500,500,CV_8UC3,Scalar::all(0));
+    RNG &rng=theRNG();
+    int radius=static_cast<int>(img.cols*0.45);
+    Point center(img.cols/2,img.rows/2);
+    vector<Point> posCity(nbCity);
+    vector<int> next(nbCity);
+    for (size_t i = 0; i < posCity.size(); i++)
+    {
+        double theta = rng.uniform(0., 2 * CV_PI);
+        posCity[i].x = static_cast<int>(radius*cos(theta)) + center.x;
+        posCity[i].y = static_cast<int>(radius*sin(theta)) + center.y;
+        next[i]=(i+1)%nbCity;
+    }
+    TravelSalesman ts(posCity,next);
+    ts.setCoolingRatio(0.99);
+    ts.setInitialTemperature(100);
+    ts.setIterPerStep(10000*nbCity);
+    ts.setFinalTemperature(100*0.97);
+    DrawTravelMap(img,posCity,next);
+    imshow("Map",img);
+    waitKey(10);
+    for (int i = 0; i < 100; i++)
+    {
+        ts.run();
+        img = Mat::zeros(img.size(),CV_8UC3);
+        DrawTravelMap(img, posCity, next);
+        imshow("Map", img);
+        waitKey(10);
+        double ti=ts.getFinalTemperature();
+        cout<<ti <<"  -> "<<ts.energy()<<"\n";
+        ts.setInitialTemperature(ti);
+        ts.setFinalTemperature(ti*0.97);
+    }
+    return 0;
+}