Commit 7ad308ea authored by LaurentBerger's avatar LaurentBerger Committed by Vadim Pisarevsky

Simulated Annealing for ANN_MLP training method (#10213)

* Simulated Annealing for ANN_MLP training method

* EXPECT_LT

* just to test new data

* manage RNG

* Try again

* Just run buildbot with new data

* try to understand

* Test layer

* New data- new test

* Force RNG in backprop

* Use Impl to avoid virtual method

* reset all weights

* try to solve ABI

* retry

* ABI solved?

* till problem with dynamic_cast

* Something is wrong

* Solved?

* disable backprop test

* remove ANN_MLP_ANNEALImpl

* Disable weight in varmap

* Add example for SimulatedAnnealing
parent 6df8ac03
...@@ -459,6 +459,17 @@ ...@@ -459,6 +459,17 @@
number = {3}, number = {3},
publisher = {Elsevier} publisher = {Elsevier}
} }
@ARTICLE{Kirkpatrick83,
author = {Kirkpatrick, S. and Gelatt, C. D. Jr and Vecchi, M. P. },
title = {Optimization by Simulated Annealing},
year = {1983},
pages = {671--680},
journal = {Science},
volume = {220},
number = {4598},
publisher = {American Association for the Advancement of Science}
}
@INPROCEEDINGS{Kolmogorov03, @INPROCEEDINGS{Kolmogorov03,
author = {Kim, Junhwan and Kolmogorov, Vladimir and Zabih, Ramin}, author = {Kim, Junhwan and Kolmogorov, Vladimir and Zabih, Ramin},
title = {Visual correspondence using energy minimization and mutual information}, title = {Visual correspondence using energy minimization and mutual information},
......
...@@ -1406,13 +1406,14 @@ public: ...@@ -1406,13 +1406,14 @@ public:
/** Available training methods */ /** Available training methods */
enum TrainingMethods { enum TrainingMethods {
BACKPROP=0, //!< The back-propagation algorithm. BACKPROP=0, //!< The back-propagation algorithm.
RPROP=1 //!< The RPROP algorithm. See @cite RPROP93 for details. RPROP = 1, //!< The RPROP algorithm. See @cite RPROP93 for details.
ANNEAL = 2 //!< The simulated annealing algorithm. See @cite Kirkpatrick83 for details.
}; };
/** Sets training method and common parameters. /** Sets training method and common parameters.
@param method Default value is ANN_MLP::RPROP. See ANN_MLP::TrainingMethods. @param method Default value is ANN_MLP::RPROP. See ANN_MLP::TrainingMethods.
@param param1 passed to setRpropDW0 for ANN_MLP::RPROP and to setBackpropWeightScale for ANN_MLP::BACKPROP @param param1 passed to setRpropDW0 for ANN_MLP::RPROP and to setBackpropWeightScale for ANN_MLP::BACKPROP and to initialT for ANN_MLP::ANNEAL.
@param param2 passed to setRpropDWMin for ANN_MLP::RPROP and to setBackpropMomentumScale for ANN_MLP::BACKPROP. @param param2 passed to setRpropDWMin for ANN_MLP::RPROP and to setBackpropMomentumScale for ANN_MLP::BACKPROP and to finalT for ANN_MLP::ANNEAL.
*/ */
CV_WRAP virtual void setTrainMethod(int method, double param1 = 0, double param2 = 0) = 0; CV_WRAP virtual void setTrainMethod(int method, double param1 = 0, double param2 = 0) = 0;
...@@ -1499,6 +1500,34 @@ public: ...@@ -1499,6 +1500,34 @@ public:
/** @copybrief getRpropDWMax @see getRpropDWMax */ /** @copybrief getRpropDWMax @see getRpropDWMax */
CV_WRAP virtual void setRpropDWMax(double val) = 0; CV_WRAP virtual void setRpropDWMax(double val) = 0;
/** ANNEAL: Update initial temperature.
It must be \>=0. Default value is 10.*/
/** @see setAnnealInitialT */
CV_WRAP double getAnnealInitialT() const;
/** @copybrief getAnnealInitialT @see getAnnealInitialT */
CV_WRAP void setAnnealInitialT(double val);
/** ANNEAL: Update final temperature.
It must be \>=0 and less than initialT. Default value is 0.1.*/
/** @see setAnnealFinalT */
CV_WRAP double getAnnealFinalT() const;
/** @copybrief getAnnealFinalT @see getAnnealFinalT */
CV_WRAP void setAnnealFinalT(double val);
/** ANNEAL: Update cooling ratio.
It must be \>0 and less than 1. Default value is 0.95.*/
/** @see setAnnealCoolingRatio */
CV_WRAP double getAnnealCoolingRatio() const;
/** @copybrief getAnnealCoolingRatio @see getAnnealCoolingRatio */
CV_WRAP void setAnnealCoolingRatio(double val);
/** ANNEAL: Update iteration per step.
It must be \>0 . Default value is 10.*/
/** @see setAnnealItePerStep */
CV_WRAP int getAnnealItePerStep() const;
/** @copybrief getAnnealItePerStep @see getAnnealItePerStep */
CV_WRAP void setAnnealItePerStep(int val);
/** possible activation functions */ /** possible activation functions */
enum ActivationFunctions { enum ActivationFunctions {
/** Identity function: \f$f(x)=x\f$ */ /** Identity function: \f$f(x)=x\f$ */
...@@ -1838,6 +1867,111 @@ CV_EXPORTS void randMVNormal( InputArray mean, InputArray cov, int nsamples, Out ...@@ -1838,6 +1867,111 @@ CV_EXPORTS void randMVNormal( InputArray mean, InputArray cov, int nsamples, Out
CV_EXPORTS void createConcentricSpheresTestSet( int nsamples, int nfeatures, int nclasses, CV_EXPORTS void createConcentricSpheresTestSet( int nsamples, int nfeatures, int nclasses,
OutputArray samples, OutputArray responses); OutputArray samples, OutputArray responses);
/** @brief Artificial Neural Networks - Multi-Layer Perceptrons.
@sa @ref ml_intro_ann
*/
class CV_EXPORTS_W ANN_MLP_ANNEAL : public ANN_MLP
{
public:
/** @see setAnnealInitialT */
CV_WRAP virtual double getAnnealInitialT() const;
/** @copybrief getAnnealInitialT @see getAnnealInitialT */
CV_WRAP virtual void setAnnealInitialT(double val);
/** ANNEAL: Update final temperature.
It must be \>=0 and less than initialT. Default value is 0.1.*/
/** @see setAnnealFinalT */
CV_WRAP virtual double getAnnealFinalT() const;
/** @copybrief getAnnealFinalT @see getAnnealFinalT */
CV_WRAP virtual void setAnnealFinalT(double val);
/** ANNEAL: Update cooling ratio.
It must be \>0 and less than 1. Default value is 0.95.*/
/** @see setAnnealCoolingRatio */
CV_WRAP virtual double getAnnealCoolingRatio() const;
/** @copybrief getAnnealCoolingRatio @see getAnnealCoolingRatio */
CV_WRAP virtual void setAnnealCoolingRatio(double val);
/** ANNEAL: Update iteration per step.
It must be \>0 . Default value is 10.*/
/** @see setAnnealItePerStep */
CV_WRAP virtual int getAnnealItePerStep() const;
/** @copybrief getAnnealItePerStep @see getAnnealItePerStep */
CV_WRAP virtual void setAnnealItePerStep(int val);
/** @brief Creates empty model
Use StatModel::train to train the model, Algorithm::load\<ANN_MLP\>(filename) to load the pre-trained model.
Note that the train method has optional flags: ANN_MLP::TrainFlags.
*/
// CV_WRAP static Ptr<ANN_MLP> create();
};
/****************************************************************************************\
* Simulated annealing solver *
\****************************************************************************************/
/** @brief The class implements simulated annealing for optimization.
@cite Kirkpatrick83 for details
*/
class CV_EXPORTS SimulatedAnnealingSolver : public Algorithm
{
public:
SimulatedAnnealingSolver() { init(); };
~SimulatedAnnealingSolver();
/** Give energy value for a state of system.*/
virtual double energy() =0;
/** Function which change the state of system (random pertubation).*/
virtual void changedState() = 0;
/** Function to reverse to the previous state.*/
virtual void reverseChangedState() = 0;
/** Simulated annealing procedure. */
int run();
/** Set intial temperature of simulated annealing procedure.
*@param x new initial temperature. x\>0
*/
void setInitialTemperature(double x);
/** Set final temperature of simulated annealing procedure.
*@param x new final temperature value. 0\<x\<initial temperature
*/
void setFinalTemperature(double x);
double getFinalTemperature();
/** Set setCoolingRatio of simulated annealing procedure : T(t) = coolingRatio * T(t-1).
* @param x new cooling ratio value. 0\<x\<1
*/
void setCoolingRatio(double x);
/** Set number iteration per temperature step.
* @param ite number of iteration per temperature step ite \> 0
*/
void setIterPerStep(int ite);
struct Impl;
protected :
void init();
Impl* impl;
};
struct SimulatedAnnealingSolver::Impl
{
RNG rEnergy;
double coolingRatio;
double initialT;
double finalT;
int iterPerStep;
Impl()
{
initialT = 2;
finalT = 0.1;
coolingRatio = 0.95;
iterPerStep = 100;
refcount = 1;
}
int refcount;
~Impl() { refcount--;CV_Assert(refcount==0); }
};
//! @} ml //! @} ml
} }
......
This diff is collapsed.
...@@ -79,8 +79,10 @@ int str_to_ann_train_method( String& str ) ...@@ -79,8 +79,10 @@ int str_to_ann_train_method( String& str )
{ {
if( !str.compare("BACKPROP") ) if( !str.compare("BACKPROP") )
return ANN_MLP::BACKPROP; return ANN_MLP::BACKPROP;
if( !str.compare("RPROP") ) if (!str.compare("RPROP"))
return ANN_MLP::RPROP; return ANN_MLP::RPROP;
if (!str.compare("ANNEAL"))
return ANN_MLP::ANNEAL;
CV_Error( CV_StsBadArg, "incorrect ann train method string" ); CV_Error( CV_StsBadArg, "incorrect ann train method string" );
return -1; return -1;
} }
...@@ -241,13 +243,92 @@ TEST(ML_ANN, ActivationFunction) ...@@ -241,13 +243,92 @@ TEST(ML_ANN, ActivationFunction)
Mat rx, ry, dst; Mat rx, ry, dst;
x->predict(testSamples, rx); x->predict(testSamples, rx);
y->predict(testSamples, ry); y->predict(testSamples, ry);
absdiff(rx, ry, dst); double n = cvtest::norm(rx, ry, NORM_INF);
double minVal, maxVal; EXPECT_LT(n,FLT_EPSILON) << "Predict are not equal for " << dataname + activationName[i] + ".yml and " << activationName[i];
minMaxLoc(dst, &minVal, &maxVal);
ASSERT_TRUE(maxVal<FLT_EPSILON) << "Predict are not equal for " << dataname + activationName[i] + ".yml and " << activationName[i];
#endif #endif
} }
} }
//#define GENERATE_TESTDATA
TEST(ML_ANN, Method)
{
String folder = string(cvtest::TS::ptr()->get_data_path());
String original_path = folder + "waveform.data";
String dataname = folder + "waveform";
Ptr<TrainData> tdata2 = TrainData::loadFromCSV(original_path, 0);
Mat responses(tdata2->getResponses().rows, 3, CV_32FC1, Scalar(0));
for (int i = 0; i<tdata2->getResponses().rows; i++)
responses.at<float>(i, static_cast<int>(tdata2->getResponses().at<float>(i, 0))) = 1;
Ptr<TrainData> tdata = TrainData::create(tdata2->getSamples(), ml::ROW_SAMPLE, responses);
ASSERT_FALSE(tdata.empty()) << "Could not find test data file : " << original_path;
RNG& rng = theRNG();
rng.state = 0;
tdata->setTrainTestSplitRatio(0.8);
vector<int> methodType;
methodType.push_back(ml::ANN_MLP::RPROP);
methodType.push_back(ml::ANN_MLP::ANNEAL);
// methodType.push_back(ml::ANN_MLP::BACKPROP); -----> NO BACKPROP TEST
vector<String> methodName;
methodName.push_back("_rprop");
methodName.push_back("_anneal");
// methodName.push_back("_backprop"); -----> NO BACKPROP TEST
#ifdef GENERATE_TESTDATA
Ptr<ml::ANN_MLP> xx = ml::ANN_MLP_ANNEAL::create();
Mat_<int> layerSizesXX(1, 4);
layerSizesXX(0, 0) = tdata->getNVars();
layerSizesXX(0, 1) = 30;
layerSizesXX(0, 2) = 30;
layerSizesXX(0, 3) = tdata->getResponses().cols;
xx->setLayerSizes(layerSizesXX);
xx->setActivationFunction(ml::ANN_MLP::SIGMOID_SYM);
xx->setTrainMethod(ml::ANN_MLP::RPROP);
xx->setTermCriteria(TermCriteria(TermCriteria::COUNT, 1, 0.01));
xx->train(tdata, ml::ANN_MLP::NO_OUTPUT_SCALE + ml::ANN_MLP::NO_INPUT_SCALE);
FileStorage fs;
fs.open(dataname + "_init_weight.yml.gz", FileStorage::WRITE + FileStorage::BASE64);
xx->write(fs);
fs.release();
#endif
for (size_t i = 0; i < methodType.size(); i++)
{
FileStorage fs;
fs.open(dataname + "_init_weight.yml.gz", FileStorage::READ + FileStorage::BASE64);
Ptr<ml::ANN_MLP> x = ml::ANN_MLP_ANNEAL::create();
x->read(fs.root());
x->setTrainMethod(methodType[i]);
if (methodType[i] == ml::ANN_MLP::ANNEAL)
{
x->setAnnealInitialT(12);
x->setAnnealFinalT(0.15);
x->setAnnealCoolingRatio(0.96);
x->setAnnealItePerStep(11);
}
x->setTermCriteria(TermCriteria(TermCriteria::COUNT, 100, 0.01));
x->train(tdata, ml::ANN_MLP::NO_OUTPUT_SCALE + ml::ANN_MLP::NO_INPUT_SCALE + ml::ANN_MLP::UPDATE_WEIGHTS);
ASSERT_TRUE(x->isTrained()) << "Could not train networks with " << methodName[i];
#ifdef GENERATE_TESTDATA
x->save(dataname + methodName[i] + ".yml.gz");
#endif
Ptr<ml::ANN_MLP> y = Algorithm::load<ANN_MLP>(dataname + methodName[i] + ".yml.gz");
ASSERT_TRUE(y != NULL) << "Could not load " << dataname + methodName[i] + ".yml";
Mat testSamples = tdata->getTestSamples();
Mat rx, ry, dst;
for (int j = 0; j < 4; j++)
{
rx = x->getWeights(j);
ry = y->getWeights(j);
double n = cvtest::norm(rx, ry, NORM_INF);
EXPECT_LT(n, FLT_EPSILON) << "Weights are not equal for " << dataname + methodName[i] + ".yml and " << methodName[i] << " layer : " << j;
}
x->predict(testSamples, rx);
y->predict(testSamples, ry);
double n = cvtest::norm(rx, ry, NORM_INF);
EXPECT_LT(n, FLT_EPSILON) << "Predict are not equal for " << dataname + methodName[i] + ".yml and " << methodName[i];
}
}
// 6. dtree // 6. dtree
// 7. boost // 7. boost
......
#include <opencv2/opencv.hpp>
using namespace std;
using namespace cv;
void DrawTravelMap(Mat &img, vector<Point> &p, vector<int> &n);
class TravelSalesman : public ml::SimulatedAnnealingSolver
{
private :
vector<Point> &posCity;
vector<int> &next;
RNG rng;
int d0,d1,d2,d3;
public:
TravelSalesman(vector<Point> &p,vector<int> &n):posCity(p),next(n)
{
rng = theRNG();
};
/** Give energy value for a state of system.*/
virtual double energy();
/** Function which change the state of system (random pertubation).*/
virtual void changedState();
/** Function to reverse to the previous state.*/
virtual void reverseChangedState();
};
void TravelSalesman::changedState()
{
d0 = rng.uniform(0,static_cast<int>(posCity.size()));
d1 = next[d0];
d2 = next[d1];
d3 = next[d2];
int d0Tmp = d0;
int d1Tmp = d1;
int d2Tmp = d2;
next[d0Tmp] = d2;
next[d2Tmp] = d1;
next[d1Tmp] = d3;
}
void TravelSalesman::reverseChangedState()
{
next[d0] = d1;
next[d1] = d2;
next[d2] = d3;
}
double TravelSalesman::energy()
{
double e=0;
for (size_t i = 0; i < next.size(); i++)
{
e += norm(posCity[i]-posCity[next[i]]);
}
return e;
}
void DrawTravelMap(Mat &img, vector<Point> &p, vector<int> &n)
{
for (size_t i = 0; i < n.size(); i++)
{
circle(img,p[i],5,Scalar(0,0,255),2);
line(img,p[i],p[n[i]],Scalar(0,255,0),2);
}
}
int main(void)
{
int nbCity=40;
Mat img(500,500,CV_8UC3,Scalar::all(0));
RNG &rng=theRNG();
int radius=static_cast<int>(img.cols*0.45);
Point center(img.cols/2,img.rows/2);
vector<Point> posCity(nbCity);
vector<int> next(nbCity);
for (size_t i = 0; i < posCity.size(); i++)
{
double theta = rng.uniform(0., 2 * CV_PI);
posCity[i].x = static_cast<int>(radius*cos(theta)) + center.x;
posCity[i].y = static_cast<int>(radius*sin(theta)) + center.y;
next[i]=(i+1)%nbCity;
}
TravelSalesman ts(posCity,next);
ts.setCoolingRatio(0.99);
ts.setInitialTemperature(100);
ts.setIterPerStep(10000*nbCity);
ts.setFinalTemperature(100*0.97);
DrawTravelMap(img,posCity,next);
imshow("Map",img);
waitKey(10);
for (int i = 0; i < 100; i++)
{
ts.run();
img = Mat::zeros(img.size(),CV_8UC3);
DrawTravelMap(img, posCity, next);
imshow("Map", img);
waitKey(10);
double ti=ts.getFinalTemperature();
cout<<ti <<" -> "<<ts.energy()<<"\n";
ts.setInitialTemperature(ti);
ts.setFinalTemperature(ti*0.97);
}
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment