Merge pull request #15959 from mshabunin:refactor-ml-tests

ml: refactored tests * use parametrized tests where appropriate * use stable theRNG in most tests * use modern style with EXPECT_/ASSERT_ checks

Merge pull request #15959 from mshabunin:refactor-ml-tests
ml: refactored tests * use parametrized tests where appropriate * use stable theRNG in most tests * use modern style with EXPECT_/ASSERT_ checks
5ff1faba · Maksim Shabunin · Alexander Alekhin · 9e906d9e · 5ff1faba · 5ff1faba
Commit 5ff1faba authored Nov 25, 2019 by Maksim Shabunin Committed by Alexander Alekhin Nov 25, 2019
16 changed files
--- a/modules/ml/test/test_ann.cpp
+++ b/modules/ml/test/test_ann.cpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#include "test_precomp.hpp"
+// #define GENERATE_TESTDATA
+namespace opencv_test { namespace {
+struct Activation
+{
+    int id;
+    const char * name;
+};
+void PrintTo(const Activation &a, std::ostream *os) { *os << a.name; }
+Activation activation_list[] =
+{
+    { ml::ANN_MLP::IDENTITY, "identity" },
+    { ml::ANN_MLP::SIGMOID_SYM, "sigmoid_sym" },
+    { ml::ANN_MLP::GAUSSIAN, "gaussian" },
+    { ml::ANN_MLP::RELU, "relu" },
+    { ml::ANN_MLP::LEAKYRELU, "leakyrelu" },
+};
+typedef testing::TestWithParam< Activation > ML_ANN_Params;
+TEST_P(ML_ANN_Params, ActivationFunction)
+{
+    const Activation &activation = GetParam();
+    const string dataname = "waveform";
+    const string data_path = findDataFile(dataname + ".data");
+    const string model_name = dataname + "_" + activation.name + ".yml";
+    Ptr<TrainData> tdata = TrainData::loadFromCSV(data_path, 0);
+    ASSERT_FALSE(tdata.empty());
+    // hack?
+    const uint64 old_state = theRNG().state;
+    theRNG().state = 1027401484159173092;
+    tdata->setTrainTestSplit(500);
+    theRNG().state = old_state;
+    Mat_<int> layerSizes(1, 4);
+    layerSizes(0, 0) = tdata->getNVars();
+    layerSizes(0, 1) = 100;
+    layerSizes(0, 2) = 100;
+    layerSizes(0, 3) = tdata->getResponses().cols;
+    Mat testSamples = tdata->getTestSamples();
+    Mat rx, ry;
+    {
+        Ptr<ml::ANN_MLP> x = ml::ANN_MLP::create();
+        x->setActivationFunction(activation.id);
+        x->setLayerSizes(layerSizes);
+        x->setTrainMethod(ml::ANN_MLP::RPROP, 0.01, 0.1);
+        x->setTermCriteria(TermCriteria(TermCriteria::COUNT, 300, 0.01));
+        x->train(tdata, ml::ANN_MLP::NO_OUTPUT_SCALE);
+        ASSERT_TRUE(x->isTrained());
+        x->predict(testSamples, rx);
+#ifdef GENERATE_TESTDATA
+        x->save(cvtest::TS::ptr()->get_data_path() + model_name);
+#endif
+    }
+    {
+        const string model_path = findDataFile(model_name);
+        Ptr<ml::ANN_MLP> y = Algorithm::load<ANN_MLP>(model_path);
+        ASSERT_TRUE(y);
+        y->predict(testSamples, ry);
+        EXPECT_MAT_NEAR(rx, ry, FLT_EPSILON);
+    }
+}
+INSTANTIATE_TEST_CASE_P(/**/, ML_ANN_Params, testing::ValuesIn(activation_list));
+//==================================================================================================
+CV_ENUM(ANN_MLP_METHOD, ANN_MLP::RPROP, ANN_MLP::ANNEAL)
+typedef tuple<ANN_MLP_METHOD, string, int> ML_ANN_METHOD_Params;
+typedef TestWithParam<ML_ANN_METHOD_Params> ML_ANN_METHOD;
+TEST_P(ML_ANN_METHOD, Test)
+{
+    int methodType = get<0>(GetParam());
+    string methodName = get<1>(GetParam());
+    int N = get<2>(GetParam());
+    String folder = string(cvtest::TS::ptr()->get_data_path());
+    String original_path = findDataFile("waveform.data");
+    string dataname = "waveform_" + methodName;
+    string weight_name = dataname + "_init_weight.yml.gz";
+    string model_name = dataname + ".yml.gz";
+    string response_name = dataname + "_response.yml.gz";
+    Ptr<TrainData> tdata2 = TrainData::loadFromCSV(original_path, 0);
+    ASSERT_FALSE(tdata2.empty());
+    Mat samples = tdata2->getSamples()(Range(0, N), Range::all());
+    Mat responses(N, 3, CV_32FC1, Scalar(0));
+    for (int i = 0; i < N; i++)
+        responses.at<float>(i, static_cast<int>(tdata2->getResponses().at<float>(i, 0))) = 1;
+    Ptr<TrainData> tdata = TrainData::create(samples, ml::ROW_SAMPLE, responses);
+    ASSERT_FALSE(tdata.empty());
+    // hack?
+    const uint64 old_state = theRNG().state;
+    theRNG().state = 0;
+    tdata->setTrainTestSplitRatio(0.8);
+    theRNG().state = old_state;
+    Mat testSamples = tdata->getTestSamples();
+    // train 1st stage
+    Ptr<ml::ANN_MLP> xx = ml::ANN_MLP_ANNEAL::create();
+    Mat_<int> layerSizes(1, 4);
+    layerSizes(0, 0) = tdata->getNVars();
+    layerSizes(0, 1) = 30;
+    layerSizes(0, 2) = 30;
+    layerSizes(0, 3) = tdata->getResponses().cols;
+    xx->setLayerSizes(layerSizes);
+    xx->setActivationFunction(ml::ANN_MLP::SIGMOID_SYM);
+    xx->setTrainMethod(ml::ANN_MLP::RPROP);
+    xx->setTermCriteria(TermCriteria(TermCriteria::COUNT, 1, 0.01));
+    xx->train(tdata, ml::ANN_MLP::NO_OUTPUT_SCALE + ml::ANN_MLP::NO_INPUT_SCALE);
+#ifdef GENERATE_TESTDATA
+    {
+        FileStorage fs;
+        fs.open(cvtest::TS::ptr()->get_data_path() + weight_name, FileStorage::WRITE + FileStorage::BASE64);
+        xx->write(fs);
+    }
+#endif
+    // train 2nd stage
+    Mat r_gold;
+    Ptr<ml::ANN_MLP> x = ml::ANN_MLP_ANNEAL::create();
+    {
+        const string weight_file = findDataFile(weight_name);
+        FileStorage fs;
+        fs.open(weight_file, FileStorage::READ);
+        x->read(fs.root());
+    }
+    x->setTrainMethod(methodType);
+    if (methodType == ml::ANN_MLP::ANNEAL)
+    {
+        x->setAnnealEnergyRNG(RNG(CV_BIG_INT(0xffffffff)));
+        x->setAnnealInitialT(12);
+        x->setAnnealFinalT(0.15);
+        x->setAnnealCoolingRatio(0.96);
+        x->setAnnealItePerStep(11);
+    }
+    x->setTermCriteria(TermCriteria(TermCriteria::COUNT, 100, 0.01));
+    x->train(tdata, ml::ANN_MLP::NO_OUTPUT_SCALE + ml::ANN_MLP::NO_INPUT_SCALE + ml::ANN_MLP::UPDATE_WEIGHTS);
+    ASSERT_TRUE(x->isTrained());
+#ifdef GENERATE_TESTDATA
+    x->save(cvtest::TS::ptr()->get_data_path() + model_name);
+    x->predict(testSamples, r_gold);
+    {
+        FileStorage fs_response(cvtest::TS::ptr()->get_data_path() + response_name, FileStorage::WRITE + FileStorage::BASE64);
+        fs_response << "response" << r_gold;
+    }
+#endif
+    {
+        const string response_file = findDataFile(response_name);
+        FileStorage fs_response(response_file, FileStorage::READ);
+        fs_response["response"] >> r_gold;
+    }
+    ASSERT_FALSE(r_gold.empty());
+    // verify
+    const string model_file = findDataFile(model_name);
+    Ptr<ml::ANN_MLP> y = Algorithm::load<ANN_MLP>(model_file);
+    ASSERT_TRUE(y);
+    Mat rx, ry;
+    for (int j = 0; j < 4; j++)
+    {
+        rx = x->getWeights(j);
+        ry = y->getWeights(j);
+        EXPECT_MAT_NEAR(rx, ry, FLT_EPSILON) << "Weights are not equal for layer: " << j;
+    }
+    x->predict(testSamples, rx);
+    y->predict(testSamples, ry);
+    EXPECT_MAT_NEAR(ry, rx, FLT_EPSILON) << "Predict are not equal to result of the saved model";
+    EXPECT_MAT_NEAR(r_gold, rx, FLT_EPSILON) << "Predict are not equal to 'gold' response";
+}
+INSTANTIATE_TEST_CASE_P(/*none*/, ML_ANN_METHOD,
+    testing::Values(
+        ML_ANN_METHOD_Params(ml::ANN_MLP::RPROP, "rprop", 5000),
+        ML_ANN_METHOD_Params(ml::ANN_MLP::ANNEAL, "anneal", 1000)
+        // ML_ANN_METHOD_Params(ml::ANN_MLP::BACKPROP, "backprop", 500) -----> NO BACKPROP TEST
+    )
+);
+}} // namespace
--- a/modules/ml/test/test_bayes.cpp
+++ b/modules/ml/test/test_bayes.cpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#include "test_precomp.hpp"
+namespace opencv_test { namespace {
+TEST(ML_NBAYES, regression_5911)
+{
+    int N=12;
+    Ptr<ml::NormalBayesClassifier> nb = cv::ml::NormalBayesClassifier::create();
+    // data:
+    float X_data[] = {
+        1,2,3,4,  1,2,3,4,   1,2,3,4,    1,2,3,4,
+        5,5,5,5,  5,5,5,5,   5,5,5,5,    5,5,5,5,
+        4,3,2,1,  4,3,2,1,   4,3,2,1,    4,3,2,1
+    };
+    Mat_<float> X(N, 4, X_data);
+    // labels:
+    int Y_data[] = { 0,0,0,0, 1,1,1,1, 2,2,2,2 };
+    Mat_<int> Y(N, 1, Y_data);
+    nb->train(X, ml::ROW_SAMPLE, Y);
+    // single prediction:
+    Mat R1,P1;
+    for (int i=0; i<N; i++)
+    {
+        Mat r,p;
+        nb->predictProb(X.row(i), r, p);
+        R1.push_back(r);
+        P1.push_back(p);
+    }
+    // bulk prediction (continuous memory):
+    Mat R2,P2;
+    nb->predictProb(X, R2, P2);
+    EXPECT_EQ(255 * R2.total(), sum(R1 == R2)[0]);
+    EXPECT_EQ(255 * P2.total(), sum(P1 == P2)[0]);
+    // bulk prediction, with non-continuous memory storage
+    Mat R3_(N, 1+1, CV_32S),
+        P3_(N, 3+1, CV_32F);
+    nb->predictProb(X, R3_.col(0), P3_.colRange(0,3));
+    Mat R3 = R3_.col(0).clone(),
+        P3 = P3_.colRange(0,3).clone();
+    EXPECT_EQ(255 * R3.total(), sum(R1 == R3)[0]);
+    EXPECT_EQ(255 * P3.total(), sum(P1 == P3)[0]);
+}
+}} // namespace
--- a/modules/ml/test/test_em.cpp
+++ b/modules/ml/test/test_em.cpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#include "test_precomp.hpp"
+namespace opencv_test { namespace {
+CV_ENUM(EM_START_STEP, EM::START_AUTO_STEP, EM::START_M_STEP, EM::START_E_STEP)
+CV_ENUM(EM_COV_MAT, EM::COV_MAT_GENERIC, EM::COV_MAT_DIAGONAL, EM::COV_MAT_SPHERICAL)
+typedef testing::TestWithParam< tuple<EM_START_STEP, EM_COV_MAT> > ML_EM_Params;
+TEST_P(ML_EM_Params, accuracy)
+{
+    const int nclusters = 3;
+    const int sizesArr[] = { 500, 700, 800 };
+    const vector<int> sizes( sizesArr, sizesArr + sizeof(sizesArr) / sizeof(sizesArr[0]) );
+    const int pointsCount = sizesArr[0] + sizesArr[1] + sizesArr[2];
+    Mat means;
+    vector<Mat> covs;
+    defaultDistribs( means, covs, CV_64FC1 );
+    Mat trainData(pointsCount, 2, CV_64FC1 );
+    Mat trainLabels;
+    generateData( trainData, trainLabels, sizes, means, covs, CV_64FC1, CV_32SC1 );
+    Mat testData( pointsCount, 2, CV_64FC1 );
+    Mat testLabels;
+    generateData( testData, testLabels, sizes, means, covs, CV_64FC1, CV_32SC1 );
+    Mat probs(trainData.rows, nclusters, CV_64FC1, cv::Scalar(1));
+    Mat weights(1, nclusters, CV_64FC1, cv::Scalar(1));
+    TermCriteria termCrit(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 100, FLT_EPSILON);
+    int startStep = get<0>(GetParam());
+    int covMatType = get<1>(GetParam());
+    cv::Mat labels;
+    Ptr<EM> em = EM::create();
+    em->setClustersNumber(nclusters);
+    em->setCovarianceMatrixType(covMatType);
+    em->setTermCriteria(termCrit);
+    if( startStep == EM::START_AUTO_STEP )
+        em->trainEM( trainData, noArray(), labels, noArray() );
+    else if( startStep == EM::START_E_STEP )
+        em->trainE( trainData, means, covs, weights, noArray(), labels, noArray() );
+    else if( startStep == EM::START_M_STEP )
+        em->trainM( trainData, probs, noArray(), labels, noArray() );
+    {
+        SCOPED_TRACE("Train");
+        float err = 1000;
+        EXPECT_TRUE(calcErr( labels, trainLabels, sizes, err , false, false ));
+        EXPECT_LE(err, 0.008f);
+    }
+    {
+        SCOPED_TRACE("Test");
+        float err = 1000;
+        labels.create( testData.rows, 1, CV_32SC1 );
+        for( int i = 0; i < testData.rows; i++ )
+        {
+            Mat sample = testData.row(i);
+            Mat out_probs;
+            labels.at<int>(i) = static_cast<int>(em->predict2( sample, out_probs )[1]);
+        }
+        EXPECT_TRUE(calcErr( labels, testLabels, sizes, err, false, false ));
+        EXPECT_LE(err, 0.008f);
+    }
+}
+INSTANTIATE_TEST_CASE_P(/**/, ML_EM_Params,
+    testing::Combine(
+        testing::Values(EM::START_AUTO_STEP, EM::START_M_STEP, EM::START_E_STEP),
+        testing::Values(EM::COV_MAT_GENERIC, EM::COV_MAT_DIAGONAL, EM::COV_MAT_SPHERICAL)
+    ));
+//==================================================================================================
+TEST(ML_EM, save_load)
+{
+    const int nclusters = 2;
+    Mat_<double> samples(3, 1);
+    samples << 1., 2., 3.;
+    std::vector<double> firstResult;
+    string filename = cv::tempfile(".xml");
+    {
+        Mat labels;
+        Ptr<EM> em = EM::create();
+        em->setClustersNumber(nclusters);
+        em->trainEM(samples, noArray(), labels, noArray());
+        for( int i = 0; i < samples.rows; i++)
+        {
+            Vec2d res = em->predict2(samples.row(i), noArray());
+            firstResult.push_back(res[1]);
+        }
+        {
+            FileStorage fs = FileStorage(filename, FileStorage::WRITE);
+            ASSERT_NO_THROW(fs << "em" << "{");
+            ASSERT_NO_THROW(em->write(fs));
+            ASSERT_NO_THROW(fs << "}");
+        }
+    }
+    {
+        Ptr<EM> em;
+        ASSERT_NO_THROW(em = Algorithm::load<EM>(filename));
+        for( int i = 0; i < samples.rows; i++)
+        {
+            SCOPED_TRACE(i);
+            Vec2d res = em->predict2(samples.row(i), noArray());
+            EXPECT_DOUBLE_EQ(firstResult[i], res[1]);
+        }
+    }
+    remove(filename.c_str());
+}
+//==================================================================================================
+TEST(ML_EM, classification)
+{
+    // This test classifies spam by the following way:
+    // 1. estimates distributions of "spam" / "not spam"
+    // 2. predict classID using Bayes classifier for estimated distributions.
+    string dataFilename = findDataFile("spambase.data");
+    Ptr<TrainData> data = TrainData::loadFromCSV(dataFilename, 0);
+    ASSERT_FALSE(data.empty());
+    Mat samples = data->getSamples();
+    ASSERT_EQ(samples.cols, 57);
+    Mat responses = data->getResponses();
+    vector<int> trainSamplesMask(samples.rows, 0);
+    const int trainSamplesCount = (int)(0.5f * samples.rows);
+    const int testSamplesCount = samples.rows - trainSamplesCount;
+    for(int i = 0; i < trainSamplesCount; i++)
+        trainSamplesMask[i] = 1;
+    RNG &rng = cv::theRNG();
+    for(size_t i = 0; i < trainSamplesMask.size(); i++)
+    {
+        int i1 = rng(static_cast<unsigned>(trainSamplesMask.size()));
+        int i2 = rng(static_cast<unsigned>(trainSamplesMask.size()));
+        std::swap(trainSamplesMask[i1], trainSamplesMask[i2]);
+    }
+    Mat samples0, samples1;
+    for(int i = 0; i < samples.rows; i++)
+    {
+        if(trainSamplesMask[i])
+        {
+            Mat sample = samples.row(i);
+            int resp = (int)responses.at<float>(i);
+            if(resp == 0)
+                samples0.push_back(sample);
+            else
+                samples1.push_back(sample);
+        }
+    }
+    Ptr<EM> model0 = EM::create();
+    model0->setClustersNumber(3);
+    model0->trainEM(samples0, noArray(), noArray(), noArray());
+    Ptr<EM> model1 = EM::create();
+    model1->setClustersNumber(3);
+    model1->trainEM(samples1, noArray(), noArray(), noArray());
+    // confusion matrices
+    Mat_<int> trainCM(2, 2, 0);
+    Mat_<int> testCM(2, 2, 0);
+    const double lambda = 1.;
+    for(int i = 0; i < samples.rows; i++)
+    {
+        Mat sample = samples.row(i);
+        double sampleLogLikelihoods0 = model0->predict2(sample, noArray())[0];
+        double sampleLogLikelihoods1 = model1->predict2(sample, noArray())[0];
+        int classID = (sampleLogLikelihoods0 >= lambda * sampleLogLikelihoods1) ? 0 : 1;
+        int resp = (int)responses.at<float>(i);
+        EXPECT_TRUE(resp == 0 || resp == 1);
+        if(trainSamplesMask[i])
+            trainCM(resp, classID)++;
+        else
+            testCM(resp, classID)++;
+    }
+    EXPECT_LE((double)(trainCM(1,0) + trainCM(0,1)) / trainSamplesCount, 0.23);
+    EXPECT_LE((double)(testCM(1,0) + testCM(0,1)) / testSamplesCount, 0.26);
+}
+}} // namespace
--- a/modules/ml/test/test_emknearestkmeans.cpp
+++ b/modules/ml/test/test_emknearestkmeans.cpp
--- a/modules/ml/test/test_gbttest.cpp
+++ b/modules/ml/test/test_gbttest.cpp
-#include "test_precomp.hpp"
-#if 0
-using namespace std;
-class CV_GBTreesTest : public cvtest::BaseTest
-{
-public:
-    CV_GBTreesTest();
-    ~CV_GBTreesTest();
-protected:
-    void run(int);
-    int TestTrainPredict(int test_num);
-    int TestSaveLoad();
-    int checkPredictError(int test_num);
-    int checkLoadSave();
-    string model_file_name1;
-    string model_file_name2;
-    string* datasets;
-    string data_path;
-    CvMLData* data;
-    CvGBTrees* gtb;
-    vector<float> test_resps1;
-    vector<float> test_resps2;
-    int64 initSeed;
-};
-int _get_len(const CvMat* mat)
-{
-    return (mat->cols > mat->rows) ? mat->cols : mat->rows;
-}
-CV_GBTreesTest::CV_GBTreesTest()
-{
-    int64 seeds[] = { CV_BIG_INT(0x00009fff4f9c8d52),
-                      CV_BIG_INT(0x0000a17166072c7c),
-                      CV_BIG_INT(0x0201b32115cd1f9a),
-                      CV_BIG_INT(0x0513cb37abcd1234),
-                      CV_BIG_INT(0x0001a2b3c4d5f678)
-                    };
-    int seedCount = sizeof(seeds)/sizeof(seeds[0]);
-    cv::RNG& rng = cv::theRNG();
-    initSeed = rng.state;
-    rng.state = seeds[rng(seedCount)];
-    datasets = 0;
-    data = 0;
-    gtb = 0;
-}
-CV_GBTreesTest::~CV_GBTreesTest()
-{
-    if (data)
-        delete data;
-    delete[] datasets;
-    cv::theRNG().state = initSeed;
-}
-int CV_GBTreesTest::TestTrainPredict(int test_num)
-{
-    int code = cvtest::TS::OK;
-    int weak_count = 200;
-    float shrinkage = 0.1f;
-    float subsample_portion = 0.5f;
-    int max_depth = 5;
-    bool use_surrogates = false;
-    int loss_function_type = 0;
-    switch (test_num)
-    {
-        case (1) : loss_function_type = CvGBTrees::SQUARED_LOSS; break;
-        case (2) : loss_function_type = CvGBTrees::ABSOLUTE_LOSS; break;
-        case (3) : loss_function_type = CvGBTrees::HUBER_LOSS; break;
-        case (0) : loss_function_type = CvGBTrees::DEVIANCE_LOSS; break;
-        default  :
-            {
-            ts->printf( cvtest::TS::LOG, "Bad test_num value in CV_GBTreesTest::TestTrainPredict(..) function." );
-            return cvtest::TS::FAIL_BAD_ARG_CHECK;
-            }
-    }
-    int dataset_num = test_num == 0 ? 0 : 1;
-    if (!data)
-    {
-        data = new CvMLData();
-        data->set_delimiter(',');
-        if (data->read_csv(datasets[dataset_num].c_str()))
-        {
-            ts->printf( cvtest::TS::LOG, "File reading error." );
-            return cvtest::TS::FAIL_INVALID_TEST_DATA;
-        }
-        if (test_num == 0)
-        {
-            data->set_response_idx(57);
-            data->set_var_types("ord[0-56],cat[57]");
-        }
-        else
-        {
-            data->set_response_idx(13);
-            data->set_var_types("ord[0-2,4-13],cat[3]");
-            subsample_portion = 0.7f;
-        }
-        int train_sample_count = cvFloor(_get_len(data->get_responses())*0.5f);
-        CvTrainTestSplit spl( train_sample_count );
-        data->set_train_test_split( &spl );
-    }
-    data->mix_train_and_test_idx();
-    if (gtb) delete gtb;
-    gtb = new CvGBTrees();
-    bool tmp_code = true;
-    tmp_code = gtb->train(data, CvGBTreesParams(loss_function_type, weak_count,
-                          shrinkage, subsample_portion,
-                          max_depth, use_surrogates));
-    if (!tmp_code)
-    {
-        ts->printf( cvtest::TS::LOG, "Model training was failed.");
-        return cvtest::TS::FAIL_INVALID_OUTPUT;
-    }
-    code = checkPredictError(test_num);
-    return code;
-}
-int CV_GBTreesTest::checkPredictError(int test_num)
-{
-    if (!gtb)
-        return cvtest::TS::FAIL_GENERIC;
-    //float mean[] = {5.430247f, 13.5654f, 12.6569f, 13.1661f};
-    //float sigma[] = {0.4162694f, 3.21161f, 3.43297f, 3.00624f};
-    float mean[] = {5.80226f, 12.68689f, 13.49095f, 13.19628f};
-    float sigma[] = {0.4764534f, 3.166919f, 3.022405f, 2.868722f};
-    float current_error = gtb->calc_error(data, CV_TEST_ERROR);
-    if ( abs( current_error - mean[test_num]) > 6*sigma[test_num] )
-    {
-        ts->printf( cvtest::TS::LOG, "Test error is out of range:\n"
-                    "abs(%f/*curEr*/ - %f/*mean*/ > %f/*6*sigma*/",
-                    current_error, mean[test_num], 6*sigma[test_num] );
-        return cvtest::TS::FAIL_BAD_ACCURACY;
-    }
-    return cvtest::TS::OK;
-}
-int CV_GBTreesTest::TestSaveLoad()
-{
-    if (!gtb)
-        return cvtest::TS::FAIL_GENERIC;
-    model_file_name1 = cv::tempfile();
-    model_file_name2 = cv::tempfile();
-    gtb->save(model_file_name1.c_str());
-    gtb->calc_error(data, CV_TEST_ERROR, &test_resps1);
-    gtb->load(model_file_name1.c_str());
-    gtb->calc_error(data, CV_TEST_ERROR, &test_resps2);
-    gtb->save(model_file_name2.c_str());
-    return checkLoadSave();
-}
-int CV_GBTreesTest::checkLoadSave()
-{
-    int code = cvtest::TS::OK;
-    // 1. compare files
-    ifstream f1( model_file_name1.c_str() ), f2( model_file_name2.c_str() );
-    string s1, s2;
-    int lineIdx = 0;
-    CV_Assert( f1.is_open() && f2.is_open() );
-    for( ; !f1.eof() && !f2.eof(); lineIdx++ )
-    {
-        getline( f1, s1 );
-        getline( f2, s2 );
-        if( s1.compare(s2) )
-        {
-            ts->printf( cvtest::TS::LOG, "first and second saved files differ in %n-line; first %n line: %s; second %n-line: %s",
-               lineIdx, lineIdx, s1.c_str(), lineIdx, s2.c_str() );
-            code = cvtest::TS::FAIL_INVALID_OUTPUT;
-        }
-    }
-    if( !f1.eof() || !f2.eof() )
-    {
-        ts->printf( cvtest::TS::LOG, "First and second saved files differ in %n-line; first %n line: %s; second %n-line: %s",
-            lineIdx, lineIdx, s1.c_str(), lineIdx, s2.c_str() );
-        code = cvtest::TS::FAIL_INVALID_OUTPUT;
-    }
-    f1.close();
-    f2.close();
-    // delete temporary files
-    remove( model_file_name1.c_str() );
-    remove( model_file_name2.c_str() );
-    // 2. compare responses
-    CV_Assert( test_resps1.size() == test_resps2.size() );
-    vector<float>::const_iterator it1 = test_resps1.begin(), it2 = test_resps2.begin();
-    for( ; it1 != test_resps1.end(); ++it1, ++it2 )
-    {
-        if( fabs(*it1 - *it2) > FLT_EPSILON )
-        {
-            ts->printf( cvtest::TS::LOG, "Responses predicted before saving and after loading are different" );
-            code = cvtest::TS::FAIL_INVALID_OUTPUT;
-        }
-    }
-    return code;
-}
-void CV_GBTreesTest::run(int)
-{
-    string dataPath = string(ts->get_data_path());
-    datasets = new string[2];
-    datasets[0] = dataPath + string("spambase.data"); /*string("dataset_classification.csv");*/
-    datasets[1] = dataPath + string("housing_.data");  /*string("dataset_regression.csv");*/
-    int code = cvtest::TS::OK;
-    for (int i = 0; i < 4; i++)
-    {
-        int temp_code = TestTrainPredict(i);
-        if (temp_code != cvtest::TS::OK)
-        {
-            code = temp_code;
-            break;
-        }
-        else if (i==0)
-        {
-            temp_code = TestSaveLoad();
-            if (temp_code != cvtest::TS::OK)
-                code = temp_code;
-            delete data;
-            data = 0;
-        }
-        delete gtb;
-        gtb = 0;
-    }
-    delete data;
-    data = 0;
-    ts->set_failed_test_info( code );
-}
-/////////////////////////////////////////////////////////////////////////////
-//////////////////// test registration  /////////////////////////////////////
-/////////////////////////////////////////////////////////////////////////////
-TEST(ML_GBTrees, regression) { CV_GBTreesTest test; test.safe_run(); }
-#endif
--- a/modules/ml/test/test_kmeans.cpp
+++ b/modules/ml/test/test_kmeans.cpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#include "test_precomp.hpp"
+namespace opencv_test { namespace {
+TEST(ML_KMeans, accuracy)
+{
+    const int iters = 100;
+    int sizesArr[] = { 5000, 7000, 8000 };
+    int pointsCount = sizesArr[0]+ sizesArr[1] + sizesArr[2];
+    Mat data( pointsCount, 2, CV_32FC1 ), labels;
+    vector<int> sizes( sizesArr, sizesArr + sizeof(sizesArr) / sizeof(sizesArr[0]) );
+    Mat means;
+    vector<Mat> covs;
+    defaultDistribs( means, covs );
+    generateData( data, labels, sizes, means, covs, CV_32FC1, CV_32SC1 );
+    TermCriteria termCriteria( TermCriteria::COUNT, iters, 0.0);
+    {
+        SCOPED_TRACE("KMEANS_PP_CENTERS");
+        float err = 1000;
+        Mat bestLabels;
+        kmeans( data, 3, bestLabels, termCriteria, 0, KMEANS_PP_CENTERS, noArray() );
+        EXPECT_TRUE(calcErr( bestLabels, labels, sizes, err , false ));
+        EXPECT_LE(err, 0.01f);
+    }
+    {
+        SCOPED_TRACE("KMEANS_RANDOM_CENTERS");
+        float err = 1000;
+        Mat bestLabels;
+        kmeans( data, 3, bestLabels, termCriteria, 0, KMEANS_RANDOM_CENTERS, noArray() );
+        EXPECT_TRUE(calcErr( bestLabels, labels, sizes, err, false ));
+        EXPECT_LE(err, 0.01f);
+    }
+    {
+        SCOPED_TRACE("KMEANS_USE_INITIAL_LABELS");
+        float err = 1000;
+        Mat bestLabels;
+        labels.copyTo( bestLabels );
+        RNG &rng = cv::theRNG();
+        for( int i = 0; i < 0.5f * pointsCount; i++ )
+        bestLabels.at<int>( rng.next() % pointsCount, 0 ) = rng.next() % 3;
+        kmeans( data, 3, bestLabels, termCriteria, 0, KMEANS_USE_INITIAL_LABELS, noArray() );
+        EXPECT_TRUE(calcErr( bestLabels, labels, sizes, err, false ));
+        EXPECT_LE(err, 0.01f);
+    }
+}
+}} // namespace
--- a/modules/ml/test/test_knearest.cpp
+++ b/modules/ml/test/test_knearest.cpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#include "test_precomp.hpp"
+namespace opencv_test { namespace {
+using cv::ml::TrainData;
+using cv::ml::EM;
+using cv::ml::KNearest;
+TEST(ML_KNearest, accuracy)
+{
+    int sizesArr[] = { 500, 700, 800 };
+    int pointsCount = sizesArr[0]+ sizesArr[1] + sizesArr[2];
+    Mat trainData( pointsCount, 2, CV_32FC1 ), trainLabels;
+    vector<int> sizes( sizesArr, sizesArr + sizeof(sizesArr) / sizeof(sizesArr[0]) );
+    Mat means;
+    vector<Mat> covs;
+    defaultDistribs( means, covs );
+    generateData( trainData, trainLabels, sizes, means, covs, CV_32FC1, CV_32FC1 );
+    Mat testData( pointsCount, 2, CV_32FC1 );
+    Mat testLabels;
+    generateData( testData, testLabels, sizes, means, covs, CV_32FC1, CV_32FC1 );
+    {
+        SCOPED_TRACE("Default");
+        Mat bestLabels;
+        float err = 1000;
+        Ptr<KNearest> knn = KNearest::create();
+        knn->train(trainData, ml::ROW_SAMPLE, trainLabels);
+        knn->findNearest(testData, 4, bestLabels);
+        EXPECT_TRUE(calcErr( bestLabels, testLabels, sizes, err, true ));
+        EXPECT_LE(err, 0.01f);
+    }
+    {
+        // TODO: broken
+#if 0
+        SCOPED_TRACE("KDTree");
+        Mat bestLabels;
+        float err = 1000;
+        Ptr<KNearest> knn = KNearest::create();
+        knn->setAlgorithmType(KNearest::KDTREE);
+        knn->train(trainData, ml::ROW_SAMPLE, trainLabels);
+        knn->findNearest(testData, 4, bestLabels);
+        EXPECT_TRUE(calcErr( bestLabels, testLabels, sizes, err, true ));
+        EXPECT_LE(err, 0.01f);
+#endif
+    }
+}
+TEST(ML_KNearest, regression_12347)
+{
+    Mat xTrainData = (Mat_<float>(5,2) << 1, 1.1, 1.1, 1, 2, 2, 2.1, 2, 2.1, 2.1);
+    Mat yTrainLabels = (Mat_<float>(5,1) << 1, 1, 2, 2, 2);
+    Ptr<KNearest> knn = KNearest::create();
+    knn->train(xTrainData, ml::ROW_SAMPLE, yTrainLabels);
+    Mat xTestData = (Mat_<float>(2,2) << 1.1, 1.1, 2, 2.2);
+    Mat zBestLabels, neighbours, dist;
+    // check output shapes:
+    int K = 16, Kexp = std::min(K, xTrainData.rows);
+    knn->findNearest(xTestData, K, zBestLabels, neighbours, dist);
+    EXPECT_EQ(xTestData.rows, zBestLabels.rows);
+    EXPECT_EQ(neighbours.cols, Kexp);
+    EXPECT_EQ(dist.cols, Kexp);
+    // see if the result is still correct:
+    K = 2;
+    knn->findNearest(xTestData, K, zBestLabels, neighbours, dist);
+    EXPECT_EQ(1, zBestLabels.at<float>(0,0));
+    EXPECT_EQ(2, zBestLabels.at<float>(1,0));
+}
+}} // namespace
--- a/modules/ml/test/test_lr.cpp
+++ b/modules/ml/test/test_lr.cpp
-///////////////////////////////////////////////////////////////////////////////////////
+// This file is part of OpenCV project.
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
 // This is a implementation of the Logistic Regression algorithm in C++ in OpenCV.
@@ -11,92 +8,16 @@
 // Rahul Kavi rahulkavi[at]live[at]com
 //
-// contains a subset of data from the popular Iris Dataset (taken from "http://archive.ics.uci.edu/ml/datasets/Iris")
-// # You are free to use, change, or redistribute the code in any way you wish for
-// # non-commercial purposes, but please maintain the name of the original author.
-// # This code comes with no warranty of any kind.
-// #
-// # You are free to use, change, or redistribute the code in any way you wish for
-// # non-commercial purposes, but please maintain the name of the original author.
-// # This code comes with no warranty of any kind.
-// # Logistic Regression ALGORITHM
-//                           License Agreement
-//                For Open Source Computer Vision Library
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//   * Redistributions of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//   * Redistributions in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
 #include "test_precomp.hpp"
 namespace opencv_test { namespace {
-bool calculateError( const Mat& _p_labels, const Mat& _o_labels, float& error)
+TEST(ML_LR, accuracy)
-{
-    CV_TRACE_FUNCTION();
-    error = 0.0f;
-    float accuracy = 0.0f;
-    Mat _p_labels_temp;
-    Mat _o_labels_temp;
-    _p_labels.convertTo(_p_labels_temp, CV_32S);
-    _o_labels.convertTo(_o_labels_temp, CV_32S);
-    CV_Assert(_p_labels_temp.total() == _o_labels_temp.total());
-    CV_Assert(_p_labels_temp.rows == _o_labels_temp.rows);
-    accuracy = (float)countNonZero(_p_labels_temp == _o_labels_temp)/_p_labels_temp.rows;
-    error = 1 - accuracy;
-    return true;
-}
-//--------------------------------------------------------------------------------------------
-class CV_LRTest : public cvtest::BaseTest
-{
-public:
-    CV_LRTest() {}
-protected:
-    virtual void run( int start_from );
-};
-void CV_LRTest::run( int /*start_from*/ )
 {
-    CV_TRACE_FUNCTION();
+    std::string dataFileName = findDataFile("iris.data");
-    // initialize variables from the popular Iris Dataset
-    string dataFileName = ts->get_data_path() + "iris.data";
    Ptr<TrainData> tdata = TrainData::loadFromCSV(dataFileName, 0);
-    ASSERT_FALSE(tdata.empty()) << "Could not find test data file : " << dataFileName;
+    ASSERT_FALSE(tdata.empty());
-    // run LR classifier train classifier
    Ptr<LogisticRegression> p = LogisticRegression::create();
    p->setLearningRate(1.0);
    p->setIterations(10001);
@@ -105,121 +26,54 @@ void CV_LRTest::run( int /*start_from*/ )
    p->setMiniBatchSize(10);
    p->train(tdata);
-    // predict using the same data
    Mat responses;
    p->predict(tdata->getSamples(), responses);
-    // calculate error
+    float error = 1000;
-    int test_code = cvtest::TS::OK;
+    EXPECT_TRUE(calculateError(responses, tdata->getResponses(), error));
-    float error = 0.0f;
+    EXPECT_LE(error, 0.05f);
-    if(!calculateError(responses, tdata->getResponses(), error))
-    {
-        ts->printf(cvtest::TS::LOG, "Bad prediction labels\n" );
-        test_code = cvtest::TS::FAIL_INVALID_OUTPUT;
-    }
-    else if(error > 0.05f)
-    {
-        ts->printf(cvtest::TS::LOG, "Bad accuracy of (%f)\n", error);
-        test_code = cvtest::TS::FAIL_BAD_ACCURACY;
-    }
-    {
-        FileStorage s("debug.xml", FileStorage::WRITE);
-        s << "original" << tdata->getResponses();
-        s << "predicted1" << responses;
-        s << "learnt" << p->get_learnt_thetas();
-        s << "error" << error;
-        s.release();
-    }
-    ts->set_failed_test_info(test_code);
 }
-//--------------------------------------------------------------------------------------------
+//==================================================================================================
-class CV_LRTest_SaveLoad : public cvtest::BaseTest
-{
-public:
-    CV_LRTest_SaveLoad(){}
-protected:
-    virtual void run(int start_from);
-};
-void CV_LRTest_SaveLoad::run( int /*start_from*/ )
+TEST(ML_LR, save_load)
 {
-    CV_TRACE_FUNCTION();
+    string dataFileName = findDataFile("iris.data");
-    int code = cvtest::TS::OK;
-    // initialize variables from the popular Iris Dataset
-    string dataFileName = ts->get_data_path() + "iris.data";
    Ptr<TrainData> tdata = TrainData::loadFromCSV(dataFileName, 0);
-    ASSERT_FALSE(tdata.empty()) << "Could not find test data file : " << dataFileName;
+    ASSERT_FALSE(tdata.empty());
    Mat responses1, responses2;
    Mat learnt_mat1, learnt_mat2;
-    // train and save the classifier
    String filename = tempfile(".xml");
-    try
    {
-        // run LR classifier train classifier
        Ptr<LogisticRegression> lr1 = LogisticRegression::create();
        lr1->setLearningRate(1.0);
        lr1->setIterations(10001);
        lr1->setRegularization(LogisticRegression::REG_L2);
        lr1->setTrainMethod(LogisticRegression::BATCH);
        lr1->setMiniBatchSize(10);
-        lr1->train(tdata);
+        ASSERT_NO_THROW(lr1->train(tdata));
-        lr1->predict(tdata->getSamples(), responses1);
+        ASSERT_NO_THROW(lr1->predict(tdata->getSamples(), responses1));
+        ASSERT_NO_THROW(lr1->save(filename));
        learnt_mat1 = lr1->get_learnt_thetas();
-        lr1->save(filename);
    }
-    catch(...)
-    {
-        ts->printf(cvtest::TS::LOG, "Crash in write method.\n" );
-        ts->set_failed_test_info(cvtest::TS::FAIL_EXCEPTION);
-    }
-    // and load to another
-    try
    {
-        Ptr<LogisticRegression> lr2 = Algorithm::load<LogisticRegression>(filename);
+        Ptr<LogisticRegression> lr2;
-        lr2->predict(tdata->getSamples(), responses2);
+        ASSERT_NO_THROW(lr2 = Algorithm::load<LogisticRegression>(filename));
+        ASSERT_NO_THROW(lr2->predict(tdata->getSamples(), responses2));
        learnt_mat2 = lr2->get_learnt_thetas();
    }
-    catch(...)
+    // compare difference in prediction outputs and stored inputs
-    {
+    EXPECT_MAT_NEAR(responses1, responses2, 0.f);
-        ts->printf(cvtest::TS::LOG, "Crash in write method.\n" );
-        ts->set_failed_test_info(cvtest::TS::FAIL_EXCEPTION);
-    }
-    CV_Assert(responses1.rows == responses2.rows);
-    // compare difference in learnt matrices before and after loading from disk
    Mat comp_learnt_mats;
    comp_learnt_mats = (learnt_mat1 == learnt_mat2);
    comp_learnt_mats = comp_learnt_mats.reshape(1, comp_learnt_mats.rows*comp_learnt_mats.cols);
    comp_learnt_mats.convertTo(comp_learnt_mats, CV_32S);
    comp_learnt_mats = comp_learnt_mats/255;
-    // compare difference in prediction outputs and stored inputs
    // check if there is any difference between computed learnt mat and retrieved mat
+    EXPECT_EQ(comp_learnt_mats.rows, sum(comp_learnt_mats)[0]);
-    float errorCount = 0.0;
-    errorCount += 1 - (float)countNonZero(responses1 == responses2)/responses1.rows;
-    errorCount += 1 - (float)sum(comp_learnt_mats)[0]/comp_learnt_mats.rows;
-    if(errorCount>0)
-    {
-        ts->printf( cvtest::TS::LOG, "Different prediction results before writing and after reading (errorCount=%d).\n", errorCount );
-        code = cvtest::TS::FAIL_BAD_ACCURACY;
-    }
    remove( filename.c_str() );
-    ts->set_failed_test_info( code );
 }
-TEST(ML_LR, accuracy) { CV_LRTest test; test.safe_run(); }
-TEST(ML_LR, save_load) { CV_LRTest_SaveLoad test; test.safe_run(); }
 }} // namespace
--- a/modules/ml/test/test_mltests.cpp
+++ b/modules/ml/test/test_mltests.cpp
--- a/modules/ml/test/test_mltests2.cpp
+++ b/modules/ml/test/test_mltests2.cpp
--- a/modules/ml/test/test_precomp.hpp
+++ b/modules/ml/test/test_precomp.hpp
@@ -2,10 +2,15 @@
 #define __OPENCV_TEST_PRECOMP_HPP__
 #include "opencv2/ts.hpp"
+#include <opencv2/ts/cuda_test.hpp> // EXPECT_MAT_NEAR
 #include "opencv2/ml.hpp"
 #include "opencv2/core/core_c.h"
+#include <fstream>
+using std::ifstream;
 namespace opencv_test {
 using namespace cv::ml;
 #define CV_NBAYES   "nbayes"
@@ -19,8 +24,6 @@ using namespace cv::ml;
 #define CV_ERTREES  "ertrees"
 #define CV_SVMSGD   "svmsgd"
-enum { CV_TRAIN_ERROR=0, CV_TEST_ERROR=1 };
 using cv::Ptr;
 using cv::ml::StatModel;
 using cv::ml::TrainData;
@@ -34,58 +37,14 @@ using cv::ml::Boost;
 using cv::ml::RTrees;
 using cv::ml::SVMSGD;
-class CV_MLBaseTest : public cvtest::BaseTest
+void defaultDistribs( Mat& means, vector<Mat>& covs, int type=CV_32FC1 );
-{
+void generateData( Mat& data, Mat& labels, const vector<int>& sizes, const Mat& _means, const vector<Mat>& covs, int dataType, int labelType );
-public:
+int maxIdx( const vector<int>& count );
-    CV_MLBaseTest( const char* _modelName );
+bool getLabelsMap( const Mat& labels, const vector<int>& sizes, vector<int>& labelsMap, bool checkClusterUniq=true );
-    virtual ~CV_MLBaseTest();
+bool calcErr( const Mat& labels, const Mat& origLabels, const vector<int>& sizes, float& err, bool labelsEquivalent = true, bool checkClusterUniq=true );
-protected:
-    virtual int read_params( CvFileStorage* fs );
-    virtual void run( int startFrom );
-    virtual int prepare_test_case( int testCaseIdx );
-    virtual std::string& get_validation_filename();
-    virtual int run_test_case( int testCaseIdx ) = 0;
-    virtual int validate_test_results( int testCaseIdx ) = 0;
-    int train( int testCaseIdx );
-    float get_test_error( int testCaseIdx, std::vector<float> *resp = 0 );
-    void save( const char* filename );
-    void load( const char* filename );
-    Ptr<TrainData> data;
-    std::string modelName, validationFN;
-    std::vector<std::string> dataSetNames;
-    cv::FileStorage validationFS;
-    Ptr<StatModel> model;
-    std::map<int, int> cls_map;
-    int64 initSeed;
-};
-class CV_AMLTest : public CV_MLBaseTest
-{
-public:
-    CV_AMLTest( const char* _modelName );
-    virtual ~CV_AMLTest() {}
-protected:
-    virtual int run_test_case( int testCaseIdx );
-    virtual int validate_test_results( int testCaseIdx );
-};
-class CV_SLMLTest : public CV_MLBaseTest
-{
-public:
-    CV_SLMLTest( const char* _modelName );
-    virtual ~CV_SLMLTest() {}
-protected:
-    virtual int run_test_case( int testCaseIdx );
-    virtual int validate_test_results( int testCaseIdx );
-    std::vector<float> test_resps1, test_resps2; // predicted responses for test data
+// used in LR test
-    std::string fname1, fname2;
+bool calculateError( const Mat& _p_labels, const Mat& _o_labels, float& error);
-};
 } // namespace

--- a/modules/ml/test/test_rtrees.cpp
+++ b/modules/ml/test/test_rtrees.cpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#include "test_precomp.hpp"
+namespace opencv_test { namespace {
+TEST(ML_RTrees, getVotes)
+{
+    int n = 12;
+    int count, i;
+    int label_size = 3;
+    int predicted_class = 0;
+    int max_votes = -1;
+    int val;
+    // RTrees for classification
+    Ptr<ml::RTrees> rt = cv::ml::RTrees::create();
+    //data
+    Mat data(n, 4, CV_32F);
+    randu(data, 0, 10);
+    //labels
+    Mat labels = (Mat_<int>(n,1) << 0,0,0,0, 1,1,1,1, 2,2,2,2);
+    rt->train(data, ml::ROW_SAMPLE, labels);
+    //run function
+    Mat test(1, 4, CV_32F);
+    Mat result;
+    randu(test, 0, 10);
+    rt->getVotes(test, result, 0);
+    //count vote amount and find highest vote
+    count = 0;
+    const int* result_row = result.ptr<int>(1);
+    for( i = 0; i < label_size; i++ )
+    {
+        val = result_row[i];
+        //predicted_class = max_votes < val? i;
+        if( max_votes < val )
+        {
+            max_votes = val;
+            predicted_class = i;
+        }
+        count += val;
+    }
+    EXPECT_EQ(count, (int)rt->getRoots().size());
+    EXPECT_EQ(result.at<float>(0, predicted_class), rt->predict(test));
+}
+}} // namespace
--- a/modules/ml/test/test_save_load.cpp
+++ b/modules/ml/test/test_save_load.cpp
--- a/modules/ml/test/test_svmsgd.cpp
+++ b/modules/ml/test/test_svmsgd.cpp
--- a/modules/ml/test/test_svmtrainauto.cpp
+++ b/modules/ml/test/test_svmtrainauto.cpp
-/*M///////////////////////////////////////////////////////////////////////////////////////
+// This file is part of OpenCV project.
-//
+// It is subject to the license terms in the LICENSE file found in the top-level directory
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+// of this distribution and at http://opencv.org/license.html.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
 #include "test_precomp.hpp"
@@ -46,21 +9,11 @@ namespace opencv_test { namespace {
 using cv::ml::SVM;
 using cv::ml::TrainData;
-//--------------------------------------------------------------------------------------------
+static Ptr<TrainData> makeRandomData(int datasize)
-class CV_SVMTrainAutoTest : public cvtest::BaseTest {
-public:
-    CV_SVMTrainAutoTest() {}
-protected:
-    virtual void run( int start_from );
-};
-void CV_SVMTrainAutoTest::run( int /*start_from*/ )
 {
-    int datasize = 100;
    cv::Mat samples = cv::Mat::zeros( datasize, 2, CV_32FC1 );
    cv::Mat responses = cv::Mat::zeros( datasize, 1, CV_32S );
+    RNG &rng = cv::theRNG();
-    RNG rng(0);
    for (int i = 0; i < datasize; ++i)
    {
        int response = rng.uniform(0, 2);  // Random from {0, 1}.
@@ -68,36 +21,14 @@ void CV_SVMTrainAutoTest::run( int /*start_from*/ )
        samples.at<float>( i, 1 ) = rng.uniform(0.f, 0.5f) + response * 0.5f;
        responses.at<int>( i, 0 ) = response;
    }
+    return TrainData::create( samples, cv::ml::ROW_SAMPLE, responses );
-    cv::Ptr<TrainData> data = TrainData::create( samples, cv::ml::ROW_SAMPLE, responses );
-    cv::Ptr<SVM> svm = SVM::create();
-    svm->trainAuto( data, 10 );  // 2-fold cross validation.
-    float test_data0[2] = {0.25f, 0.25f};
-    cv::Mat test_point0 = cv::Mat( 1, 2, CV_32FC1, test_data0 );
-    float result0 = svm->predict( test_point0 );
-    float test_data1[2] = {0.75f, 0.75f};
-    cv::Mat test_point1 = cv::Mat( 1, 2, CV_32FC1, test_data1 );
-    float result1 = svm->predict( test_point1 );
-    if ( fabs( result0 - 0 ) > 0.001 || fabs( result1 - 1 ) > 0.001 )
-    {
-        ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
-    }
 }
-TEST(ML_SVM, trainauto) { CV_SVMTrainAutoTest test; test.safe_run(); }
+static Ptr<TrainData> makeCircleData(int datasize, float scale_factor, float radius)
-TEST(ML_SVM, trainauto_sigmoid)
 {
-    const int datasize = 100;
+    // Populate samples with data that can be split into two concentric circles
    cv::Mat samples = cv::Mat::zeros( datasize, 2, CV_32FC1 );
    cv::Mat responses = cv::Mat::zeros( datasize, 1, CV_32S );
-    const float scale_factor = 0.5;
-    const float radius = 2.0;
-    // Populate samples with data that can be split into two concentric circles
    for (int i = 0; i < datasize; i+=2)
    {
        const float pi = 3.14159f;
@@ -115,41 +46,74 @@ TEST(ML_SVM, trainauto_sigmoid)
        samples.at<float>( i + 1, 1 ) = y * scale_factor;
        responses.at<int>( i + 1, 0 ) = 1;
    }
+    return TrainData::create( samples, cv::ml::ROW_SAMPLE, responses );
+}
+static Ptr<TrainData> makeRandomData2(int datasize)
+{
+    cv::Mat samples = cv::Mat::zeros( datasize, 2, CV_32FC1 );
+    cv::Mat responses = cv::Mat::zeros( datasize, 1, CV_32S );
+    RNG &rng = cv::theRNG();
+    for (int i = 0; i < datasize; ++i)
+    {
+        int response = rng.uniform(0, 2);  // Random from {0, 1}.
+        samples.at<float>( i, 0 ) = 0;
+        samples.at<float>( i, 1 ) = (0.5f - response) * rng.uniform(0.f, 1.2f) + response;
+        responses.at<int>( i, 0 ) = response;
+    }
+    return TrainData::create( samples, cv::ml::ROW_SAMPLE, responses );
+}
+//==================================================================================================
-    cv::Ptr<TrainData> data = TrainData::create( samples, cv::ml::ROW_SAMPLE, responses );
+TEST(ML_SVM, trainauto)
+{
+    const int datasize = 100;
+    cv::Ptr<TrainData> data = makeRandomData(datasize);
+    ASSERT_TRUE(data);
    cv::Ptr<SVM> svm = SVM::create();
-    svm->setKernel(SVM::SIGMOID);
+    ASSERT_TRUE(svm);
+    svm->trainAuto( data, 10 );  // 2-fold cross validation.
+    float test_data0[2] = {0.25f, 0.25f};
+    cv::Mat test_point0 = cv::Mat( 1, 2, CV_32FC1, test_data0 );
+    float result0 = svm->predict( test_point0 );
+    float test_data1[2] = {0.75f, 0.75f};
+    cv::Mat test_point1 = cv::Mat( 1, 2, CV_32FC1, test_data1 );
+    float result1 = svm->predict( test_point1 );
+    EXPECT_NEAR(result0, 0, 0.001);
+    EXPECT_NEAR(result1, 1, 0.001);
+}
+TEST(ML_SVM, trainauto_sigmoid)
+{
+    const int datasize = 100;
+    const float scale_factor = 0.5;
+    const float radius = 2.0;
+    cv::Ptr<TrainData> data = makeCircleData(datasize, scale_factor, radius);
+    ASSERT_TRUE(data);
+    cv::Ptr<SVM> svm = SVM::create();
+    ASSERT_TRUE(svm);
+    svm->setKernel(SVM::SIGMOID);
    svm->setGamma(10.0);
    svm->setCoef0(-10.0);
    svm->trainAuto( data, 10 );  // 2-fold cross validation.
    float test_data0[2] = {radius, radius};
    cv::Mat test_point0 = cv::Mat( 1, 2, CV_32FC1, test_data0 );
-    ASSERT_EQ(0, svm->predict( test_point0 ));
+    EXPECT_FLOAT_EQ(svm->predict( test_point0 ), 0);
    float test_data1[2] = {scale_factor * radius, scale_factor * radius};
    cv::Mat test_point1 = cv::Mat( 1, 2, CV_32FC1, test_data1 );
-    ASSERT_EQ(1, svm->predict( test_point1 ));
+    EXPECT_FLOAT_EQ(svm->predict( test_point1 ), 1);
 }
 TEST(ML_SVM, trainAuto_regression_5369)
 {
-    int datasize = 100;
+    const int datasize = 100;
-    cv::Mat samples = cv::Mat::zeros( datasize, 2, CV_32FC1 );
+    Ptr<TrainData> data = makeRandomData2(datasize);
-    cv::Mat responses = cv::Mat::zeros( datasize, 1, CV_32S );
-    RNG rng(0); // fixed!
-    for (int i = 0; i < datasize; ++i)
-    {
-        int response = rng.uniform(0, 2);  // Random from {0, 1}.
-        samples.at<float>( i, 0 ) = 0;
-        samples.at<float>( i, 1 ) = (0.5f - response) * rng.uniform(0.f, 1.2f) + response;
-        responses.at<int>( i, 0 ) = response;
-    }
-    cv::Ptr<TrainData> data = TrainData::create( samples, cv::ml::ROW_SAMPLE, responses );
    cv::Ptr<SVM> svm = SVM::create();
    svm->trainAuto( data, 10 );  // 2-fold cross validation.
@@ -164,16 +128,8 @@ TEST(ML_SVM, trainAuto_regression_5369)
    EXPECT_EQ(1., result1);
 }
-class CV_SVMGetSupportVectorsTest : public cvtest::BaseTest {
+TEST(ML_SVM, getSupportVectors)
-public:
-    CV_SVMGetSupportVectorsTest() {}
-protected:
-    virtual void run( int startFrom );
-};
-void CV_SVMGetSupportVectorsTest::run(int /*startFrom*/ )
 {
-    int code = cvtest::TS::OK;
    // Set up training data
    int labels[4] = {1, -1, -1, -1};
    float trainingData[4][2] = { {501, 10}, {255, 10}, {501, 255}, {10, 501} };
@@ -181,19 +137,18 @@ void CV_SVMGetSupportVectorsTest::run(int /*startFrom*/ )
    Mat labelsMat(4, 1, CV_32SC1, labels);
    Ptr<SVM> svm = SVM::create();
+    ASSERT_TRUE(svm);
    svm->setType(SVM::C_SVC);
    svm->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER, 100, 1e-6));
    // Test retrieval of SVs and compressed SVs on linear SVM
    svm->setKernel(SVM::LINEAR);
    svm->train(trainingDataMat, cv::ml::ROW_SAMPLE, labelsMat);
    Mat sv = svm->getSupportVectors();
-    CV_Assert(sv.rows == 1);    // by default compressed SV returned
+    EXPECT_EQ(1, sv.rows);    // by default compressed SV returned
    sv = svm->getUncompressedSupportVectors();
-    CV_Assert(sv.rows == 3);
+    EXPECT_EQ(3, sv.rows);
    // Test retrieval of SVs and compressed SVs on non-linear SVM
    svm->setKernel(SVM::POLY);
@@ -201,15 +156,9 @@ void CV_SVMGetSupportVectorsTest::run(int /*startFrom*/ )
    svm->train(trainingDataMat, cv::ml::ROW_SAMPLE, labelsMat);
    sv = svm->getSupportVectors();
-    CV_Assert(sv.rows == 3);
+    EXPECT_EQ(3, sv.rows);
    sv = svm->getUncompressedSupportVectors();
-    CV_Assert(sv.rows == 0);    // inapplicable for non-linear SVMs
+    EXPECT_EQ(0, sv.rows);    // inapplicable for non-linear SVMs
-    ts->set_failed_test_info(code);
 }
-TEST(ML_SVM, getSupportVectors) { CV_SVMGetSupportVectorsTest test; test.safe_run(); }
 }} // namespace
--- a/modules/ml/test/test_utils.cpp
+++ b/modules/ml/test/test_utils.cpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#include "test_precomp.hpp"
+namespace opencv_test {
+void defaultDistribs( Mat& means, vector<Mat>& covs, int type)
+{
+    float mp0[] = {0.0f, 0.0f}, cp0[] = {0.67f, 0.0f, 0.0f, 0.67f};
+    float mp1[] = {5.0f, 0.0f}, cp1[] = {1.0f, 0.0f, 0.0f, 1.0f};
+    float mp2[] = {1.0f, 5.0f}, cp2[] = {1.0f, 0.0f, 0.0f, 1.0f};
+    means.create(3, 2, type);
+    Mat m0( 1, 2, CV_32FC1, mp0 ), c0( 2, 2, CV_32FC1, cp0 );
+    Mat m1( 1, 2, CV_32FC1, mp1 ), c1( 2, 2, CV_32FC1, cp1 );
+    Mat m2( 1, 2, CV_32FC1, mp2 ), c2( 2, 2, CV_32FC1, cp2 );
+    means.resize(3), covs.resize(3);
+    Mat mr0 = means.row(0);
+    m0.convertTo(mr0, type);
+    c0.convertTo(covs[0], type);
+    Mat mr1 = means.row(1);
+    m1.convertTo(mr1, type);
+    c1.convertTo(covs[1], type);
+    Mat mr2 = means.row(2);
+    m2.convertTo(mr2, type);
+    c2.convertTo(covs[2], type);
+}
+// generate points sets by normal distributions
+void generateData( Mat& data, Mat& labels, const vector<int>& sizes, const Mat& _means, const vector<Mat>& covs, int dataType, int labelType )
+{
+    vector<int>::const_iterator sit = sizes.begin();
+    int total = 0;
+    for( ; sit != sizes.end(); ++sit )
+        total += *sit;
+    CV_Assert( _means.rows == (int)sizes.size() && covs.size() == sizes.size() );
+    CV_Assert( !data.empty() && data.rows == total );
+    CV_Assert( data.type() == dataType );
+    labels.create( data.rows, 1, labelType );
+    randn( data, Scalar::all(-1.0), Scalar::all(1.0) );
+    vector<Mat> means(sizes.size());
+    for(int i = 0; i < _means.rows; i++)
+        means[i] = _means.row(i);
+    vector<Mat>::const_iterator mit = means.begin(), cit = covs.begin();
+    int bi, ei = 0;
+    sit = sizes.begin();
+    for( int p = 0, l = 0; sit != sizes.end(); ++sit, ++mit, ++cit, l++ )
+    {
+        bi = ei;
+        ei = bi + *sit;
+        CV_Assert( mit->rows == 1 && mit->cols == data.cols );
+        CV_Assert( cit->rows == data.cols && cit->cols == data.cols );
+        for( int i = bi; i < ei; i++, p++ )
+        {
+            Mat r = data.row(i);
+            r =  r * (*cit) + *mit;
+            if( labelType == CV_32FC1 )
+                labels.at<float>(p, 0) = (float)l;
+            else if( labelType == CV_32SC1 )
+                labels.at<int>(p, 0) = l;
+            else
+            {
+                CV_DbgAssert(0);
+            }
+        }
+    }
+}
+int maxIdx( const vector<int>& count )
+{
+    int idx = -1;
+    int maxVal = -1;
+    vector<int>::const_iterator it = count.begin();
+    for( int i = 0; it != count.end(); ++it, i++ )
+    {
+        if( *it > maxVal)
+        {
+            maxVal = *it;
+            idx = i;
+        }
+    }
+    CV_Assert( idx >= 0);
+    return idx;
+}
+bool getLabelsMap( const Mat& labels, const vector<int>& sizes, vector<int>& labelsMap, bool checkClusterUniq)
+{
+    size_t total = 0, nclusters = sizes.size();
+    for(size_t i = 0; i < sizes.size(); i++)
+        total += sizes[i];
+    CV_Assert( !labels.empty() );
+    CV_Assert( labels.total() == total && (labels.cols == 1 || labels.rows == 1));
+    CV_Assert( labels.type() == CV_32SC1 || labels.type() == CV_32FC1 );
+    bool isFlt = labels.type() == CV_32FC1;
+    labelsMap.resize(nclusters);
+    vector<bool> buzy(nclusters, false);
+    int startIndex = 0;
+    for( size_t clusterIndex = 0; clusterIndex < sizes.size(); clusterIndex++ )
+    {
+        vector<int> count( nclusters, 0 );
+        for( int i = startIndex; i < startIndex + sizes[clusterIndex]; i++)
+        {
+            int lbl = isFlt ? (int)labels.at<float>(i) : labels.at<int>(i);
+            CV_Assert(lbl < (int)nclusters);
+            count[lbl]++;
+            CV_Assert(count[lbl] < (int)total);
+        }
+        startIndex += sizes[clusterIndex];
+        int cls = maxIdx( count );
+        CV_Assert( !checkClusterUniq || !buzy[cls] );
+        labelsMap[clusterIndex] = cls;
+        buzy[cls] = true;
+    }
+    if(checkClusterUniq)
+    {
+        for(size_t i = 0; i < buzy.size(); i++)
+            if(!buzy[i])
+                return false;
+    }
+    return true;
+}
+bool calcErr( const Mat& labels, const Mat& origLabels, const vector<int>& sizes, float& err, bool labelsEquivalent, bool checkClusterUniq)
+{
+    err = 0;
+    CV_Assert( !labels.empty() && !origLabels.empty() );
+    CV_Assert( labels.rows == 1 || labels.cols == 1 );
+    CV_Assert( origLabels.rows == 1 || origLabels.cols == 1 );
+    CV_Assert( labels.total() == origLabels.total() );
+    CV_Assert( labels.type() == CV_32SC1 || labels.type() == CV_32FC1 );
+    CV_Assert( origLabels.type() == labels.type() );
+    vector<int> labelsMap;
+    bool isFlt = labels.type() == CV_32FC1;
+    if( !labelsEquivalent )
+    {
+        if( !getLabelsMap( labels, sizes, labelsMap, checkClusterUniq ) )
+            return false;
+        for( int i = 0; i < labels.rows; i++ )
+            if( isFlt )
+                err += labels.at<float>(i) != labelsMap[(int)origLabels.at<float>(i)] ? 1.f : 0.f;
+            else
+                err += labels.at<int>(i) != labelsMap[origLabels.at<int>(i)] ? 1.f : 0.f;
+    }
+    else
+    {
+        for( int i = 0; i < labels.rows; i++ )
+            if( isFlt )
+                err += labels.at<float>(i) != origLabels.at<float>(i) ? 1.f : 0.f;
+            else
+                err += labels.at<int>(i) != origLabels.at<int>(i) ? 1.f : 0.f;
+    }
+    err /= (float)labels.rows;
+    return true;
+}
+bool calculateError( const Mat& _p_labels, const Mat& _o_labels, float& error)
+{
+    error = 0.0f;
+    float accuracy = 0.0f;
+    Mat _p_labels_temp;
+    Mat _o_labels_temp;
+    _p_labels.convertTo(_p_labels_temp, CV_32S);
+    _o_labels.convertTo(_o_labels_temp, CV_32S);
+    CV_Assert(_p_labels_temp.total() == _o_labels_temp.total());
+    CV_Assert(_p_labels_temp.rows == _o_labels_temp.rows);
+    accuracy = (float)countNonZero(_p_labels_temp == _o_labels_temp)/_p_labels_temp.rows;
+    error = 1 - accuracy;
+    return true;
+}
+} // namespace