add RGB as an option for data generation for triplet training

cabd5d40 · Wangyida · a0d56301 · cabd5d40 · cabd5d40 · cabd5d40
Commit cabd5d40 authored Aug 16, 2015 by Wangyida
7 changed files
--- a/modules/cnn_3dobj/README.md
+++ b/modules/cnn_3dobj/README.md
@@ -53,7 +53,7 @@ $ make
 =============
 #Demos
 ##Demo1: training data generation
-###Imagas generation from different pose, by default there are 4 models used, there will be 276 images in all which each class contains 69 iamges, if you want to use additional .ply models, it is necessary to change the class number parameter to the new class number and also give it a new class label.
+###Imagas generation from different pose, by default there are 4 models used, there will be 276 images in all which each class contains 69 iamges, if you want to use additional .ply models, it is necessary to change the class number parameter to the new class number and also give it a new class label. If you will train net work and extract feature from RGB images set the parameter rgb_use as 1.
 ```
 $ ./sphereview_test -plymodel=../3Dmodel/ape.ply -label_class=0
 ```
@@ -91,4 +91,8 @@ $ cd <opencv_contrib>/modules/cnn_3dobj/samples/build
 ```
 $ ./classify_test
 ```
+###if the classification and pose estimation issue need to extract mean got from all training images, you can run this:
+```
+$ ./classify_test -mean_file=../data/images_mean/triplet_mean.binaryproto
+```
 ==============================================
--- a/modules/cnn_3dobj/include/opencv2/cnn_3dobj.hpp
+++ b/modules/cnn_3dobj/include/opencv2/cnn_3dobj.hpp
@@ -128,7 +128,7 @@ The class create some sphere views of camera towards a 3D object meshed from .pl
        CV_WRAP static void createHeader(int num_item, int rows, int cols, const char* headerPath);
        /** @brief Create header in binary files collecting the image data and label.
        */
-        CV_WRAP static void writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z);
+        CV_WRAP static void writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z, int isrgb);
        /** @brief Write binary files used for training in other open source project.
        */
    };
@@ -136,39 +136,37 @@ The class create some sphere views of camera towards a 3D object meshed from .pl
    class CV_EXPORTS_W descriptorExtractor
    {
        private:
-        caffe::Net<float>* net_;
-        cv::Size input_geometry_;
-        int num_channels_;
+        caffe::Net<float>* convnet;
+        cv::Size input_geometry;
+        int num_channels;
+        bool net_set;
+        int net_ready;
        cv::Mat mean_;
+        std::vector<string> device_info;
        void setMean(const string& mean_file);
-        /** @brief Load the mean file in binaryproto format.
+        /** @brief Load the mean file in binaryproto format if it is needed.
        */
-        void wrapInputLayer(std::vector<cv::Mat>* input_channels);
+        void wrapInput(std::vector<cv::Mat>* input_channels);
        /** @brief Wrap the input layer of the network in separate cv::Mat objects(one per channel). This way we save one memcpy operation and we don't need to rely on cudaMemcpy2D. The last preprocessing operation will write the separate channels directly to the input layer.
        */
-        void preprocess(const cv::Mat& img, std::vector<cv::Mat>* input_channels, int net_ready);
+        void preprocess(const cv::Mat& img, std::vector<cv::Mat>* input_channels);
        /** @brief Convert the input image to the input image format of the network.
        */
        public:
-        std::vector<string> labels_;
-        descriptorExtractor();
-        void listDir(const char *path,std::vector<string>& files,bool r);
-        /** @brief Get the file name from a root dictionary.
+        descriptorExtractor(const string& device_type, int device_id);
+        /** @brief Set the device for feature extraction.
        */
-        bool setNet(const string& cpu_only, int device_id);
-        /** @brief Initiate a classification structure.
+        std::vector<string> getDevice();
+        /** @brief Get device information for feature extraction.
        */
-        int loadNet(bool netsetter, const string& model_file, const string& trained_file);
-        /** @brief Initiate a classification structure.
+        void setDevice(const string& device_type, const string& device_id = "");
+        /** @brief Set device information for feature extraction.
        */
-        int loadNet(bool netsetter, const string& model_file, const string& trained_file, const string& mean_file);
+        void loadNet(const string& model_file, const string& trained_file, string mean_file = "");
        /** @brief Initiate a classification structure.
        */
-        void getLabellist(const std::vector<string>& name_gallery);
-        /** @brief Get the label of the gallery images for result displaying in prediction.
-        */
-        void extract(int net_ready, InputArray inputimg, OutputArray feature, std::string feature_blob);
-        /** @brief Extract a single featrue of one image.
+        void extract(InputArrayOfArrays inputimg, OutputArray feature, std::string feature_blob);
+        /** @brief Extract features from a set of images.
        */
    };
    //! @}

--- a/modules/cnn_3dobj/samples/classifyIMG_demo.cpp
+++ b/modules/cnn_3dobj/samples/classifyIMG_demo.cpp
@@ -34,43 +34,40 @@
 */
 #define HAVE_CAFFE
 #include <opencv2/cnn_3dobj.hpp>
+#include <opencv2/features2d/features2d.hpp>
 #include <iomanip>
 using namespace cv;
 using namespace std;
 using namespace cv::cnn_3dobj;

-/* Return the indices of the top N values of vector v. */
-std::vector<int> argmax(const std::vector<float>& v, int N)
+/* Get the file name from a root dictionary. */
+void listDir(const char *path, std::vector<string>& files, bool r)
 {
-    std::vector<std::pair<float, int> > pairs;
-    for (size_t i = 0; i < v.size(); ++i)
-        pairs.push_back(std::make_pair(v[i], i));
-    std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end());
-    std::vector<int> result;
-    for (int i = 0; i < N; ++i)
-        result.push_back(pairs[i].second);
-    return result;
-};
-
-/* Return the indices of the top N values of vector v. */
-std::vector<std::pair<string, float> > classify(const cv::Mat& reference, const cv::Mat& target, int N, std::vector<string> labels_)
-{
-    std::vector<float> output;
-    for (int i = 0; i < reference.rows; i++)
-    {
-        cv::Mat f1 = reference.row(i);
-        cv::Mat f2 = target;
-        cv::Mat output_temp = f1-f2;
-        output.push_back(cv::norm(output_temp));
-    }
-    std::vector<int> maxN = argmax(output, N);
-    std::vector<std::pair<string, float> > predictions;
-    for (int i = 0; i < N; ++i)
+    DIR *pDir;
+    struct dirent *ent;
+    char childpath[512];
+    pDir = opendir(path);
+    memset(childpath, 0, sizeof(childpath));
+    while ((ent = readdir(pDir)) != NULL)
    {
-        int idx = maxN[i];
-        predictions.push_back(std::make_pair(labels_[idx], output[idx]));
+        if (ent->d_type & DT_DIR)
+        {
+            if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
+            {
+                continue;
+            }
+            if(r)
+            {
+                sprintf(childpath, "%s/%s", path, ent->d_name);
+                listDir(childpath,files,false);
+            }
+        }
+        else
+        {
+            files.push_back(ent->d_name);
+        }
    }
-    return predictions;
+    sort(files.begin(),files.end());
 };

 int main(int argc, char** argv)
@@ -82,7 +79,7 @@ int main(int argc, char** argv)
 "{mean_file | no | The mean file generated by Caffe from all gallery images, this could be used for mean value substraction from all images. If you want to use the mean file, you can set this as ../data/images_mean/triplet_mean.binaryproto.}"
 "{target_img | ../data/images_all/3_13.png | Path of image waiting to be classified.}"
 "{feature_blob | feat | Name of layer which will represent as the feature, in this network, ip1 or feat is well.}"
-"{num_candidate | 6 | Number of candidates in gallery as the prediction result.}"
+"{num_candidate | 15 | Number of candidates in gallery as the prediction result.}"
 "{device | CPU | device}"
 "{dev_id | 0 | dev_id}";
    cv::CommandLineParser parser(argc, argv, keys);
@@ -102,16 +99,15 @@ int main(int argc, char** argv)
    string device = parser.get<string>("device");
    int dev_id = parser.get<int>("dev_id");

-    cv::cnn_3dobj::descriptorExtractor descriptor;
-    bool set_succeed = descriptor.setNet(device, dev_id);
-    int net_ready;
+    cv::cnn_3dobj::descriptorExtractor descriptor(device, dev_id);
+    std::vector<string> device_info = descriptor.getter();
+    std::cout << "Using" << device_info[0] << std::endl;
    if (strcmp(mean_file.c_str(), "no") == 0)
-        net_ready = descriptor.loadNet(set_succeed, network_forIMG, caffemodel);
+        descriptor.loadNet(network_forIMG, caffemodel);
    else
-        net_ready = descriptor.loadNet(set_succeed, network_forIMG, caffemodel, mean_file);
+        descriptor.loadNet(network_forIMG, caffemodel, mean_file);
    std::vector<string> name_gallery;
-    descriptor.listDir(src_dir.c_str(), name_gallery, false);
-    descriptor.getLabellist(name_gallery);
+    listDir(src_dir.c_str(), name_gallery, false);
    for (unsigned int i = 0; i < name_gallery.size(); i++) {
        name_gallery[i] = src_dir + name_gallery[i];
    }
@@ -120,7 +116,7 @@ int main(int argc, char** argv)
    for (unsigned int i = 0; i < name_gallery.size(); i++) {
        img_gallery.push_back(cv::imread(name_gallery[i], -1));
    }
-    descriptor.extract(net_ready, img_gallery, feature_reference, feature_blob);
+    descriptor.extract(img_gallery, feature_reference, feature_blob);

    std::cout << std::endl << "---------- Prediction for " << target_img << " ----------" << std::endl;

@@ -131,14 +127,15 @@ int main(int argc, char** argv)
    for (unsigned int i = 0; i < feature_reference.rows; i++)
        std::cout << feature_reference.row(i) << endl;
    cv::Mat feature_test;
-    descriptor.extract(net_ready, img, feature_test, feature_blob);
+    descriptor.extract(img, feature_test, feature_blob);
+    cv::BFMatcher matcher(NORM_L2);
+    std::vector<std::vector<cv::DMatch> > matches;
+    matcher.knnMatch(feature_test, feature_reference, matches, num_candidate);
    std::cout << std::endl << "---------- Featrue of target image: " << target_img << "----------" << endl << feature_test << std::endl;
-    prediction = classify(feature_reference, feature_test, num_candidate, descriptor.labels_);
    // Print the top N prediction.
    std::cout << std::endl << "---------- Prediction result(Distance - File Name in Gallery) ----------" << std::endl;
-    for (size_t i = 0; i < prediction.size(); ++i) {
-    std::pair<string, float> p = prediction[i];
-    std::cout << std::fixed << std::setprecision(2) << p.second << " - \"" << p.first << "\"" << std::endl;
+    for (size_t i = 0; i < matches[0].size(); ++i) {
+        std::cout << i << " - " << std::fixed << std::setprecision(2) << name_gallery[matches[0][i].trainIdx] << " - \""  << matches[0][i].distance << "\"" << std::endl;
    }
    return 0;
 }
--- a/modules/cnn_3dobj/samples/sphereview_3dobj_demo.cpp
+++ b/modules/cnn_3dobj/samples/sphereview_3dobj_demo.cpp
@@ -48,7 +48,8 @@ int main(int argc, char *argv[])
 "{imagedir | ../data/images_all/ | path of the generated images for one particular .ply model. }"
 "{labeldir | ../data/label_all.txt | path of the generated images for one particular .ply model. }"
 "{num_class | 4 | total number of classes of models}"
-"{label_class | 0 | class label of current .ply model}";
+"{label_class | 0 | class label of current .ply model}"
+"{rgb_use | 0 | use RGB image or grayscale}";
    cv::CommandLineParser parser(argc, argv, keys);
    parser.about("Demo for Sphere View data generation");
    if (parser.has("help"))
@@ -62,6 +63,7 @@ int main(int argc, char *argv[])
    string labeldir = parser.get<string>("labeldir");
    int num_class = parser.get<int>("num_class");
    int label_class = parser.get<int>("label_class");
+    int rgb_use = parser.get<int>("rgb_use");
    cv::cnn_3dobj::icoSphere ViewSphere(10,ite_depth);
    std::vector<cv::Point3d> campos = ViewSphere.CameraPos;
    std::fstream imglabel;
@@ -122,7 +124,7 @@ int main(int argc, char *argv[])
        if (camera_pov)
            myWindow.setViewerPose(cam_pose);
        myWindow.saveScreenshot(filename);
-        ViewSphere.writeBinaryfile(filename, binaryPath, headerPath,(int)campos.size()*num_class, label_class, (int)(campos.at(pose).x*100), (int)(campos.at(pose).y*100), (int)(campos.at(pose).z*100));
+        ViewSphere.writeBinaryfile(filename, binaryPath, headerPath,(int)campos.size()*num_class, label_class, (int)(campos.at(pose).x*100), (int)(campos.at(pose).y*100), (int)(campos.at(pose).z*100), rgb_use);
    }
    imglabel.close();
    return 1;

--- a/modules/cnn_3dobj/src/cnn_feature.cpp
+++ b/modules/cnn_3dobj/src/cnn_feature.cpp
--- a/modules/cnn_3dobj/src/cnn_sphereview.cpp
+++ b/modules/cnn_3dobj/src/cnn_sphereview.cpp
@@ -175,9 +175,8 @@ namespace cnn_3dobj
        headerLabel.close();
    };

-    void icoSphere::writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z)
+    void icoSphere::writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z, int isrgb)
    {
-        int isrgb = 0;
        cv::Mat ImgforBin = cv::imread(filenameImg, isrgb);
        char* A0 = (char*)malloc(1024);
        strcpy(A0, binaryPath);
@@ -208,9 +207,24 @@ namespace cnn_3dobj
            createHeader(num_item, 64, 64, binaryPath);
            img_file.open(binPathimg,ios::out|ios::binary|ios::app);
            lab_file.open(binPathlab,ios::out|ios::binary|ios::app);
-            for (int r = 0; r < ImgforBin.rows; r++)
+            if (isrgb == 0)
            {
-                img_file.write(reinterpret_cast<const char*>(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize());
+                for (int r = 0; r < ImgforBin.rows; r++)
+                {
+                    img_file.write(reinterpret_cast<const char*>(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize());
+                }
+            }
+            else
+            {
+                std::vector<cv::Mat> Img3forBin;
+                cv::split(ImgforBin,Img3forBin);
+                for (unsigned int i = 0; i < Img3forBin.size(); i++)
+                {
+                    for (int r = 0; r < Img3forBin[i].rows; r++)
+                    {
+                        img_file.write(reinterpret_cast<const char*>(Img3forBin[i].ptr(r)), Img3forBin[i].cols*Img3forBin[i].elemSize());
+                    }
+                }
            }
            signed char templab = (signed char)label_class;
            lab_file << templab << (signed char)x << (signed char)y << (signed char)z;
@@ -222,9 +236,24 @@ namespace cnn_3dobj
            img_file.open(binPathimg,ios::out|ios::binary|ios::app);
            lab_file.open(binPathlab,ios::out|ios::binary|ios::app);
            cout <<"Concatenating the training data at: " << binaryPath << ". " << endl;
-            for (int r = 0; r < ImgforBin.rows; r++)
+            if (isrgb == 0)
+            {
+                for (int r = 0; r < ImgforBin.rows; r++)
+                {
+                    img_file.write(reinterpret_cast<const char*>(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize());
+                }
+            }
+            else
            {
-                img_file.write(reinterpret_cast<const char*>(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize());
+                std::vector<cv::Mat> Img3forBin;
+                cv::split(ImgforBin,Img3forBin);
+                for (unsigned int i = 0; i < Img3forBin.size(); i++)
+                {
+                    for (int r = 0; r < Img3forBin[i].rows; r++)
+                    {
+                        img_file.write(reinterpret_cast<const char*>(Img3forBin[i].ptr(r)), Img3forBin[i].cols*Img3forBin[i].elemSize());
+                    }
+                }
            }
            signed char templab = (signed char)label_class;
            lab_file << templab << (signed char)x << (signed char)y << (signed char)z;

--- a/modules/cnn_3dobj/test/test_cnn_3dobj_feature_extract.cpp
+++ b/modules/cnn_3dobj/test/test_cnn_3dobj_feature_extract.cpp
@@ -34,23 +34,11 @@ void CV_CNN_Feature_Test::run(int)
    string device = "CPU";
    int dev_id = 0;

-    cv::cnn_3dobj::descriptorExtractor descriptor;
-    bool set_succeed = descriptor.setNet(device, dev_id);
-    if (!set_succeed) {
-      ts->printf(cvtest::TS::LOG, "Net parameters which is GPU or CPU could not be set");
-      ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
-      return;
-    }
-    int net_ready;
+    cv::cnn_3dobj::descriptorExtractor descriptor(device, dev_id);
    if (strcmp(mean_file.c_str(), "no") == 0)
-        net_ready = descriptor.loadNet(set_succeed, network_forIMG, caffemodel);
+        descriptor.loadNet(network_forIMG, caffemodel);
    else
-        net_ready = descriptor.loadNet(set_succeed, network_forIMG, caffemodel, mean_file);
-    if (!net_ready) {
-      ts->printf(cvtest::TS::LOG, "No model loaded");
-      ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
-      return;
-    }
+        descriptor.loadNet(network_forIMG, caffemodel, mean_file);
    cv::Mat img = cv::imread(target_img, -1);
    if (img.empty()) {
      ts->printf(cvtest::TS::LOG, "could not read image %s\n", target_img.c_str());
@@ -58,7 +46,7 @@ void CV_CNN_Feature_Test::run(int)
      return;
    }
    cv::Mat feature_test;
-    descriptor.extract(net_ready, img, feature_test, feature_blob);
+    descriptor.extract(img, feature_test, feature_blob);
    if (feature_test.empty()) {
      ts->printf(cvtest::TS::LOG, "could not extract feature from image %s\n", target_img.c_str());
      ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);