Commit f427be6e authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #871 from paroj:cnn_3dobj

parents adff4073 a7ad78b9
......@@ -27,18 +27,21 @@ endif()
if(NOT HAVE_CAFFE)
ocv_module_disable(cnn_3dobj)
else()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cnn_3dobj_config.hpp.in
${CMAKE_CURRENT_SOURCE_DIR}/include/opencv2/cnn_3dobj_config.hpp @ONLY)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
if(${Caffe_FOUND})
include_directories(${Caffe_INCLUDE_DIR})
endif()
include_directories(${Caffe_INCLUDE_DIR})
set(the_description "CNN for 3D object recognition and pose estimation including a completed Sphere View on 3D objects")
ocv_define_module(cnn_3dobj opencv_core opencv_imgproc opencv_viz opencv_highgui OPTIONAL WRAP python)
ocv_define_module(cnn_3dobj opencv_core opencv_imgproc ${Caffe_LIBS} ${Glog_LIBS} ${Protobuf_LIBS} OPTIONAL opencv_features2d opencv_viz opencv_calib3d WRAP python)
ocv_add_testdata(testdata/cv contrib/cnn_3dobj)
if(${Caffe_FOUND})
target_link_libraries(opencv_cnn_3dobj ${Caffe_LIBS} ${Glog_LIBS} ${Protobuf_LIBS})
if(TARGET opencv_test_cnn_3dobj)
target_link_libraries(opencv_test_cnn_3dobj boost_system)
endif()
foreach(exe_TGT classify video sphereview_data model_analysis)
if(TARGET example_cnn_3dobj_${exe_TGT})
target_link_libraries(example_cnn_3dobj_${exe_TGT} boost_system)
endif()
endforeach()
endif()
# Caffe package for CNN Triplet training
unset(Caffe_FOUND)
find_path(Caffe_INCLUDE_DIR NAMES caffe/caffe.hpp caffe/common.hpp caffe/net.hpp caffe/proto/caffe.pb.h caffe/util/io.hpp caffe/vision_layers.hpp
find_path(Caffe_INCLUDE_DIR NAMES caffe/caffe.hpp caffe/common.hpp caffe/net.hpp caffe/proto/caffe.pb.h caffe/util/io.hpp
HINTS
/usr/local/include)
......
......@@ -42,37 +42,16 @@ $ cd usr/local/include/opencv2/
$ sudo rm -rf cnn_3dobj.hpp
```
####And then redo the compiling steps above again.
===========================================================
#Building samples
```
$ cd <opencv_contrib>/modules/cnn_3dobj/samples
$ mkdir build
$ cd build
$ cmake ..
$ make
```
===========================================================
#Demos
##Demo1: Training set data generation
####Image generation for different poses: by default, there are 4 models used and there will be 276 images in all in which each class contains 69 iamges. If you want to use additional .ply models, it is necessary to change the class number parameter to the new class number and also give it a new class label. If you want to train the network and extract feature from RGB images, set the parameter rgb_use as 1.
```
$ ./sphereview_test -plymodel=../data/3Dmodel/ape.ply -label_class=0 -cam_head_x=0 -cam_head_y=0 -cam_head_z=1
```
####press 'Q' to start 2D image genaration
```
$ ./sphereview_test -plymodel=../data/3Dmodel/ant.ply -label_class=1 -cam_head_x=0 -cam_head_y=-1 -cam_head_z=0
```
```
$ ./sphereview_test -plymodel=../data/3Dmodel/cow.ply -label_class=2 -cam_head_x=0 -cam_head_y=-1 -cam_head_z=0
```
```
$ ./sphereview_test -plymodel=../data/3Dmodel/plane.ply -label_class=3 -cam_head_x=0 -cam_head_y=-1 -cam_head_z=0
```
```
$ ./sphereview_test -plymodel=../data/3Dmodel/bunny.ply -label_class=4 -cam_head_x=0 -cam_head_y=-1 -cam_head_z=0
```
```
$ ./sphereview_test -plymodel=../data/3Dmodel/horse.ply -label_class=5 -cam_head_x=0 -cam_head_y=0 -cam_head_z=-1
$ ./example_cnn_3dobj_sphereview_data -plymodel=../data/3Dmodel/ape.ply -label_class=0 -cam_head_x=0 -cam_head_y=0 -cam_head_z=1
$ ./example_cnn_3dobj_sphereview_data -plymodel=../data/3Dmodel/ant.ply -label_class=1 -cam_head_x=0 -cam_head_y=-1 -cam_head_z=0
$ ./example_cnn_3dobj_sphereview_data -plymodel=../data/3Dmodel/cow.ply -label_class=2 -cam_head_x=0 -cam_head_y=-1 -cam_head_z=0
$ ./example_cnn_3dobj_sphereview_data -plymodel=../data/3Dmodel/plane.ply -label_class=3 -cam_head_x=0 -cam_head_y=-1 -cam_head_z=0
$ ./example_cnn_3dobj_sphereview_data -plymodel=../data/3Dmodel/bunny.ply -label_class=4 -cam_head_x=0 -cam_head_y=-1 -cam_head_z=0
$ ./example_cnn_3dobj_sphereview_data -plymodel=../data/3Dmodel/horse.ply -label_class=5 -cam_head_x=0 -cam_head_y=0 -cam_head_z=-1
```
####When all images are created in images_all folder as a collection of training images for network tranining and as a gallery of reference images for classification, then proceed onward.
####After this demo, the binary files of images and labels will be stored as 'binary_image' and 'binary_label' in current path. You should copy them into the leveldb folder for Caffe triplet training. For example: copy these 2 files in <caffe_source_directory>/data/linemod and rename them as 'binary_image_train', 'binary_image_test' and 'binary_label_train', 'binary_label_train'. Here I use the same as trianing and testing data but you can use different data for training and testing. It's important to observe the error on the testing data to check whether the training data is suitable for the your aim. Error should be obseved to keep decreasing and remain much smaller than the initial error.
......@@ -92,17 +71,17 @@ $ cd <opencv_contrib>/modules/cnn_3dobj/samples/build
```
####Classification: This will extract features of a single image and compare it with features of a gallery of samples for prediction. This demo uses a set of images for feature extraction in a given path, these features will be a reference for prediction on the target image. The Caffe model and the network prototxt file are in <opencv_contrib>/modules/cnn_3dobj/testdata/cv. Just run:
```
$ ./classify_test
$ ./example_cnn_3dobj_classify
```
####if you want to extract mean classification and pose estimation performance from all the training images, you can run this:
```
$ ./classify_test -mean_file=../data/images_mean/triplet_mean.binaryproto
$ ./example_cnn_3dobj_classify -mean_file=../data/images_mean/triplet_mean.binaryproto
```
===========================================================
##Demo3: Model performance test
####This demo will run a performance test of a trained CNN model on several images. If the the model fails on telling different samples from seperate classes apart, or is confused on samples with similar pose but from different classes, this will give some information for model analysis.
```
$ ./model_test
$ ./example_cnn_3dobj_model_analysis
```
===========================================================
#Test
......
#ifndef __OPENCV_CNN_3DOBJ_CONFIG_HPP__
#define __OPENCV_CNN_3DOBJ_CONFIG_HPP__
// HAVE CAFFE
#cmakedefine HAVE_CAFFE
#endif
......@@ -58,24 +58,17 @@ the use of this software, even if advised of the possibility of such damage.
#include <dirent.h>
#define CPU_ONLY
#include <opencv2/cnn_3dobj_config.hpp>
#ifdef HAVE_CAFFE
#include <caffe/blob.hpp>
#include <caffe/common.hpp>
#include <caffe/net.hpp>
#include <caffe/proto/caffe.pb.h>
#include <caffe/util/io.hpp>
#include <caffe/vision_layers.hpp>
#endif
#include "opencv2/viz/vizcore.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/highgui/highgui_c.h"
#include "opencv2/imgproc.hpp"
using caffe::Blob;
using caffe::Caffe;
using caffe::Datum;
using caffe::Net;
/** @defgroup cnn_3dobj 3D object recognition and pose estimation API
As CNN based learning algorithm shows better performance on the classification issues,
......
cmake_minimum_required(VERSION 2.8)
SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb ")
SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall")
project(cnn_3dobj)
find_package(OpenCV REQUIRED)
set(SOURCES_generator demo_sphereview_data.cpp)
include_directories(${OpenCV_INCLUDE_DIRS})
add_executable(sphereview_test ${SOURCES_generator})
target_link_libraries(sphereview_test opencv_core opencv_imgproc opencv_highgui opencv_cnn_3dobj opencv_xfeatures2d)
set(SOURCES_classifier demo_classify.cpp)
add_executable(classify_test ${SOURCES_classifier})
target_link_libraries(classify_test opencv_core opencv_imgproc opencv_highgui opencv_cnn_3dobj opencv_xfeatures2d)
set(SOURCES_modelanalysis demo_model_analysis.cpp)
add_executable(model_test ${SOURCES_modelanalysis})
target_link_libraries(model_test opencv_core opencv_imgproc opencv_highgui opencv_cnn_3dobj opencv_xfeatures2d)
set(SOURCES_video demo_video.cpp)
add_executable(video_test ${SOURCES_video})
target_link_libraries(video_test opencv_core opencv_imgproc opencv_highgui opencv_cnn_3dobj opencv_xfeatures2d)
......@@ -38,7 +38,7 @@
* @author Yida Wang
*/
#include <opencv2/cnn_3dobj.hpp>
#include <opencv2/features2d/features2d.hpp>
#include <opencv2/features2d.hpp>
#include <iomanip>
using namespace cv;
using namespace std;
......@@ -48,7 +48,7 @@ using namespace cv::cnn_3dobj;
* @function listDir
* @brief Making all files names under a directory into a list
*/
void listDir(const char *path, std::vector<String>& files, bool r)
static void listDir(const char *path, std::vector<String>& files, bool r)
{
DIR *pDir;
struct dirent *ent;
......@@ -82,7 +82,7 @@ void listDir(const char *path, std::vector<String>& files, bool r)
* @function featureWrite
* @brief Writing features of gallery images into binary files
*/
int featureWrite(const Mat &features, const String &fname)
static int featureWrite(const Mat &features, const String &fname)
{
ofstream ouF;
ouF.open(fname.c_str(), std::ofstream::binary);
......@@ -131,7 +131,6 @@ int main(int argc, char** argv)
String feature_blob = parser.get<String>("feature_blob");
int num_candidate = parser.get<int>("num_candidate");
String device = parser.get<String>("device");
int dev_id = parser.get<int>("dev_id");
int gallery_out = parser.get<int>("gallery_out");
/* Initialize a net work with Device */
cv::cnn_3dobj::descriptorExtractor descriptor(device);
......@@ -167,7 +166,7 @@ int main(int argc, char** argv)
{
std::cout << std::endl << "---------- Features of gallery images ----------" << std::endl;
/* Print features of the reference images. */
for (unsigned int i = 0; i < feature_reference.rows; i++)
for (int i = 0; i < feature_reference.rows; i++)
std::cout << feature_reference.row(i) << endl;
std::cout << std::endl << "---------- Saving features of gallery images into feature.bin ----------" << std::endl;
featureWrite(feature_reference, "feature.bin");
......@@ -179,7 +178,7 @@ int main(int argc, char** argv)
std::cout << std::endl << "---------- Features of gallery images ----------" << std::endl;
std::vector<std::pair<String, float> > prediction;
/* Print features of the reference images. */
for (unsigned int i = 0; i < feature_reference.rows; i++)
for (int i = 0; i < feature_reference.rows; i++)
std::cout << feature_reference.row(i) << endl;
cv::Mat feature_test;
descriptor.extract(img, feature_test, feature_blob);
......@@ -198,4 +197,4 @@ int main(int argc, char** argv)
}
}
return 0;
}
\ No newline at end of file
}
......@@ -50,7 +50,7 @@ using namespace cv::cnn_3dobj;
* @function listDir
* @brief Making all files names under a directory into a list
*/
void listDir(const char *path, std::vector<String>& files, bool r)
static void listDir(const char *path, std::vector<String>& files, bool r)
{
DIR *pDir;
struct dirent *ent;
......@@ -112,8 +112,8 @@ int main(int argc, char *argv[])
int ite_depth = parser.get<int>("ite_depth");
String plymodel = parser.get<String>("plymodel");
String imagedir = parser.get<String>("imagedir");
string labeldir = parser.get<String>("labeldir");
String bakgrdir = parser.get<string>("bakgrdir");
String labeldir = parser.get<String>("labeldir");
String bakgrdir = parser.get<String>("bakgrdir");
int label_class = parser.get<int>("label_class");
int label_item = parser.get<int>("label_item");
float cam_head_x = parser.get<float>("cam_head_x");
......@@ -144,7 +144,7 @@ int main(int argc, char *argv[])
obj_dist = 370;
bg_dist = 400;
}
if (label_class == 5 | label_class == 10 | label_class == 11 | label_class == 12)
if (label_class == 5 || label_class == 10 || label_class == 11 || label_class == 12)
ite_depth = ite_depth + 1;
cv::cnn_3dobj::icoSphere ViewSphere(10,ite_depth);
std::vector<cv::Point3d> campos;
......@@ -218,8 +218,7 @@ int main(int argc, char *argv[])
}
}
std::fstream imglabel;
char* p=(char*)labeldir.data();
imglabel.open(p, fstream::app|fstream::out);
imglabel.open(labeldir.c_str(), fstream::app|fstream::out);
bool camera_pov = true;
/* Create a window using viz. */
viz::Viz3d myWindow("Coordinate Frame");
......@@ -227,7 +226,7 @@ int main(int argc, char *argv[])
myWindow.setWindowSize(Size(image_size,image_size));
/* Set background color. */
myWindow.setBackgroundColor(viz::Color::gray());
myWindow.spin();
myWindow.spinOnce();
/* Create a Mesh widget, loading .ply models. */
viz::Mesh objmesh = viz::Mesh::load(plymodel);
/* Get the center of the generated mesh widget, cause some .ply files, this could be ignored if you are using PASCAL database*/
......@@ -249,8 +248,7 @@ int main(int argc, char *argv[])
cam_y_dir.x = cam_head_x;
cam_y_dir.y = cam_head_y;
cam_y_dir.z = cam_head_z;
char* temp = new char;
char* bgname = new char;
char temp[1024];
std::vector<String> name_bkg;
if (bakgrdir.size() != 0)
{
......@@ -262,7 +260,7 @@ int main(int argc, char *argv[])
}
}
/* Images will be saved as .png files. */
int cnt_img;
size_t cnt_img;
srand((int)time(0));
do
{
......
#include <opencv2/viz/vizcore.hpp>
#include <opencv2/calib3d/calib3d.hpp>
#include <opencv2/calib3d.hpp>
#include <iostream>
#include <fstream>
#include <opencv2/cnn_3dobj.hpp>
#include <opencv2/features2d/features2d.hpp>
#include <opencv2/features2d.hpp>
#include <iomanip>
using namespace cv;
using namespace std;
......@@ -12,7 +12,7 @@ using namespace cv::cnn_3dobj;
* @function listDir
* @brief Making all files names under a directory into a list
*/
void listDir(const char *path, std::vector<String>& files, bool r)
static void listDir(const char *path, std::vector<String>& files, bool r)
{
DIR *pDir;
struct dirent *ent;
......@@ -46,12 +46,12 @@ void listDir(const char *path, std::vector<String>& files, bool r)
* @function cvcloud_load
* @brief load bunny.ply
*/
Mat cvcloud_load(Mat feature_reference)
static Mat cvcloud_load(Mat feature_reference)
{
Mat cloud(1, feature_reference.rows, CV_32FC3);
Point3f* data = cloud.ptr<cv::Point3f>();
float dummy1, dummy2;
for(size_t i = 0; i < feature_reference.rows; ++i)
for(int i = 0; i < feature_reference.rows; ++i)
{
data[i].x = feature_reference.at<float>(i,0);
data[i].y = feature_reference.at<float>(i,1);
......@@ -102,7 +102,7 @@ int main(int argc, char **argv)
String feature_blob = parser.get<String>("feature_blob");
int num_candidate = parser.get<int>("num_candidate");
String device = parser.get<String>("device");
int dev_id = parser.get<int>("dev_id");
ifstream namelist_model(caffemodellist.c_str(), ios::in);
vector<String> caffemodel;
char *buf = new char[512];
......@@ -198,7 +198,6 @@ int main(int argc, char **argv)
}
vector<Mat> img_merge;
/* Part2: Start to have a show */
bool camera_pov = true;
viz::Viz3d myWindow0("Instruction");
viz::Viz3d myWindow1("Point Cloud");
viz::Viz3d myWindow2("Prediction sample");
......@@ -246,7 +245,7 @@ int main(int argc, char **argv)
myWindowS.setWindowSize(Size(1300,700));
myWindowS.setWindowPosition(Point(0,0));
myWindowS.setBackgroundColor(viz::Color::white());
for (int i = 0; i < slide.size(); ++i)
for (size_t i = 0; i < slide.size(); ++i)
{
/// Create a triangle widget
viz::WImageOverlay slide1(slide[i],Rect(0, 0, 1300, 700));
......@@ -388,4 +387,4 @@ int main(int argc, char **argv)
myWindow2.removeAllWidgets();
}
return 0;
}
\ No newline at end of file
}
......@@ -26,11 +26,11 @@ CV_CNN_Feature_Test::CV_CNN_Feature_Test()
*/
void CV_CNN_Feature_Test::run(int)
{
String caffemodel = String(ts->get_data_path()) + "3d_triplet_iter_30000.caffemodel";
String network_forIMG = cvtest::TS::ptr()->get_data_path() + "3d_triplet_testIMG.prototxt";
String caffemodel = cvtest::findDataFile("contrib/cnn_3dobj/3d_triplet_iter_30000.caffemodel");
String network_forIMG = cvtest::findDataFile("contrib/cnn_3dobj/3d_triplet_testIMG.prototxt");
String mean_file = "no";
std::vector<String> ref_img;
String target_img = String(ts->get_data_path()) + "1_8.png";
String target_img = cvtest::findDataFile("contrib/cnn_3dobj/4_78.png");
String feature_blob = "feat";
String device = "CPU";
int dev_id = 0;
......@@ -43,15 +43,16 @@ void CV_CNN_Feature_Test::run(int)
return;
}
cv::cnn_3dobj::descriptorExtractor descriptor(device, dev_id);
if (strcmp(mean_file.c_str(), "no") == 0)
if (mean_file == "no")
descriptor.loadNet(network_forIMG, caffemodel);
else
descriptor.loadNet(network_forIMG, caffemodel, mean_file);
cv::Mat feature_test;
descriptor.extract(img_base, feature_test, feature_blob);
Mat feature_reference = (Mat_<float>(1,16) << -134.03548, -203.48265, -105.96752, 55.343075, -211.36378, 487.85968, -182.15063, 62.229042, 297.19876, 206.07578, 291.74951, -19.906454, -464.09152, 135.79895, 420.43616, 2.2887282);
printf("Reference feature is computed by Caffe extract_features tool by \n To generate values for different images, use extract_features \n with the resetted image list in prototxt.");
// Reference feature is computed by Caffe extract_features tool.
// To generate values for different images, use extract_features with the resetted image list in prototxt.
Mat feature_reference = (Mat_<float>(1,3) << -312.4805, 8.4768486, -224.98953);
float dist = norm(feature_test - feature_reference);
if (dist > 5) {
ts->printf(cvtest::TS::LOG, "Extracted featrue is not the same from the one extracted from Caffe.");
......
......@@ -12,7 +12,6 @@
#include <iostream>
#include "opencv2/ts.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/cnn_3dobj_config.hpp"
#include "opencv2/cnn_3dobj.hpp"
#endif
......@@ -12,7 +12,7 @@ In this tutorial you will learn how to
Code
----
@include cnn_3dobj/samples/demo_sphereview_data.cpp
@include cnn_3dobj/samples/sphereview_data.cpp
Explanation
-----------
......
......@@ -13,7 +13,7 @@ In this tutorial you will learn how to
Code
----
@include cnn_3dobj/samples/demo_classify.cpp
@include cnn_3dobj/samples/classify.cpp
Explanation
-----------
......
......@@ -12,7 +12,7 @@ In this tutorial you will learn how to
Code
----
@include cnn_3dobj/samples/demo_model_analysis.cpp
@include cnn_3dobj/samples/model_analysis.cpp
Explanation
-----------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment