Commit cabd5d40 authored by Wangyida's avatar Wangyida

add RGB as an option for data generation for triplet training

parent a0d56301
...@@ -53,7 +53,7 @@ $ make ...@@ -53,7 +53,7 @@ $ make
============= =============
#Demos #Demos
##Demo1: training data generation ##Demo1: training data generation
###Imagas generation from different pose, by default there are 4 models used, there will be 276 images in all which each class contains 69 iamges, if you want to use additional .ply models, it is necessary to change the class number parameter to the new class number and also give it a new class label. ###Imagas generation from different pose, by default there are 4 models used, there will be 276 images in all which each class contains 69 iamges, if you want to use additional .ply models, it is necessary to change the class number parameter to the new class number and also give it a new class label. If you will train net work and extract feature from RGB images set the parameter rgb_use as 1.
``` ```
$ ./sphereview_test -plymodel=../3Dmodel/ape.ply -label_class=0 $ ./sphereview_test -plymodel=../3Dmodel/ape.ply -label_class=0
``` ```
...@@ -91,4 +91,8 @@ $ cd <opencv_contrib>/modules/cnn_3dobj/samples/build ...@@ -91,4 +91,8 @@ $ cd <opencv_contrib>/modules/cnn_3dobj/samples/build
``` ```
$ ./classify_test $ ./classify_test
``` ```
###if the classification and pose estimation issue need to extract mean got from all training images, you can run this:
```
$ ./classify_test -mean_file=../data/images_mean/triplet_mean.binaryproto
```
============================================== ==============================================
...@@ -128,7 +128,7 @@ The class create some sphere views of camera towards a 3D object meshed from .pl ...@@ -128,7 +128,7 @@ The class create some sphere views of camera towards a 3D object meshed from .pl
CV_WRAP static void createHeader(int num_item, int rows, int cols, const char* headerPath); CV_WRAP static void createHeader(int num_item, int rows, int cols, const char* headerPath);
/** @brief Create header in binary files collecting the image data and label. /** @brief Create header in binary files collecting the image data and label.
*/ */
CV_WRAP static void writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z); CV_WRAP static void writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z, int isrgb);
/** @brief Write binary files used for training in other open source project. /** @brief Write binary files used for training in other open source project.
*/ */
}; };
...@@ -136,39 +136,37 @@ The class create some sphere views of camera towards a 3D object meshed from .pl ...@@ -136,39 +136,37 @@ The class create some sphere views of camera towards a 3D object meshed from .pl
class CV_EXPORTS_W descriptorExtractor class CV_EXPORTS_W descriptorExtractor
{ {
private: private:
caffe::Net<float>* net_; caffe::Net<float>* convnet;
cv::Size input_geometry_; cv::Size input_geometry;
int num_channels_; int num_channels;
bool net_set;
int net_ready;
cv::Mat mean_; cv::Mat mean_;
std::vector<string> device_info;
void setMean(const string& mean_file); void setMean(const string& mean_file);
/** @brief Load the mean file in binaryproto format. /** @brief Load the mean file in binaryproto format if it is needed.
*/ */
void wrapInputLayer(std::vector<cv::Mat>* input_channels); void wrapInput(std::vector<cv::Mat>* input_channels);
/** @brief Wrap the input layer of the network in separate cv::Mat objects(one per channel). This way we save one memcpy operation and we don't need to rely on cudaMemcpy2D. The last preprocessing operation will write the separate channels directly to the input layer. /** @brief Wrap the input layer of the network in separate cv::Mat objects(one per channel). This way we save one memcpy operation and we don't need to rely on cudaMemcpy2D. The last preprocessing operation will write the separate channels directly to the input layer.
*/ */
void preprocess(const cv::Mat& img, std::vector<cv::Mat>* input_channels, int net_ready); void preprocess(const cv::Mat& img, std::vector<cv::Mat>* input_channels);
/** @brief Convert the input image to the input image format of the network. /** @brief Convert the input image to the input image format of the network.
*/ */
public: public:
std::vector<string> labels_; descriptorExtractor(const string& device_type, int device_id);
descriptorExtractor(); /** @brief Set the device for feature extraction.
void listDir(const char *path,std::vector<string>& files,bool r);
/** @brief Get the file name from a root dictionary.
*/ */
bool setNet(const string& cpu_only, int device_id); std::vector<string> getDevice();
/** @brief Initiate a classification structure. /** @brief Get device information for feature extraction.
*/ */
int loadNet(bool netsetter, const string& model_file, const string& trained_file); void setDevice(const string& device_type, const string& device_id = "");
/** @brief Initiate a classification structure. /** @brief Set device information for feature extraction.
*/ */
int loadNet(bool netsetter, const string& model_file, const string& trained_file, const string& mean_file); void loadNet(const string& model_file, const string& trained_file, string mean_file = "");
/** @brief Initiate a classification structure. /** @brief Initiate a classification structure.
*/ */
void getLabellist(const std::vector<string>& name_gallery); void extract(InputArrayOfArrays inputimg, OutputArray feature, std::string feature_blob);
/** @brief Get the label of the gallery images for result displaying in prediction. /** @brief Extract features from a set of images.
*/
void extract(int net_ready, InputArray inputimg, OutputArray feature, std::string feature_blob);
/** @brief Extract a single featrue of one image.
*/ */
}; };
//! @} //! @}
......
...@@ -34,43 +34,40 @@ ...@@ -34,43 +34,40 @@
*/ */
#define HAVE_CAFFE #define HAVE_CAFFE
#include <opencv2/cnn_3dobj.hpp> #include <opencv2/cnn_3dobj.hpp>
#include <opencv2/features2d/features2d.hpp>
#include <iomanip> #include <iomanip>
using namespace cv; using namespace cv;
using namespace std; using namespace std;
using namespace cv::cnn_3dobj; using namespace cv::cnn_3dobj;
/* Return the indices of the top N values of vector v. */ /* Get the file name from a root dictionary. */
std::vector<int> argmax(const std::vector<float>& v, int N) void listDir(const char *path, std::vector<string>& files, bool r)
{ {
std::vector<std::pair<float, int> > pairs; DIR *pDir;
for (size_t i = 0; i < v.size(); ++i) struct dirent *ent;
pairs.push_back(std::make_pair(v[i], i)); char childpath[512];
std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end()); pDir = opendir(path);
std::vector<int> result; memset(childpath, 0, sizeof(childpath));
for (int i = 0; i < N; ++i) while ((ent = readdir(pDir)) != NULL)
result.push_back(pairs[i].second);
return result;
};
/* Return the indices of the top N values of vector v. */
std::vector<std::pair<string, float> > classify(const cv::Mat& reference, const cv::Mat& target, int N, std::vector<string> labels_)
{
std::vector<float> output;
for (int i = 0; i < reference.rows; i++)
{
cv::Mat f1 = reference.row(i);
cv::Mat f2 = target;
cv::Mat output_temp = f1-f2;
output.push_back(cv::norm(output_temp));
}
std::vector<int> maxN = argmax(output, N);
std::vector<std::pair<string, float> > predictions;
for (int i = 0; i < N; ++i)
{ {
int idx = maxN[i]; if (ent->d_type & DT_DIR)
predictions.push_back(std::make_pair(labels_[idx], output[idx])); {
if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
{
continue;
}
if(r)
{
sprintf(childpath, "%s/%s", path, ent->d_name);
listDir(childpath,files,false);
}
}
else
{
files.push_back(ent->d_name);
}
} }
return predictions; sort(files.begin(),files.end());
}; };
int main(int argc, char** argv) int main(int argc, char** argv)
...@@ -82,7 +79,7 @@ int main(int argc, char** argv) ...@@ -82,7 +79,7 @@ int main(int argc, char** argv)
"{mean_file | no | The mean file generated by Caffe from all gallery images, this could be used for mean value substraction from all images. If you want to use the mean file, you can set this as ../data/images_mean/triplet_mean.binaryproto.}" "{mean_file | no | The mean file generated by Caffe from all gallery images, this could be used for mean value substraction from all images. If you want to use the mean file, you can set this as ../data/images_mean/triplet_mean.binaryproto.}"
"{target_img | ../data/images_all/3_13.png | Path of image waiting to be classified.}" "{target_img | ../data/images_all/3_13.png | Path of image waiting to be classified.}"
"{feature_blob | feat | Name of layer which will represent as the feature, in this network, ip1 or feat is well.}" "{feature_blob | feat | Name of layer which will represent as the feature, in this network, ip1 or feat is well.}"
"{num_candidate | 6 | Number of candidates in gallery as the prediction result.}" "{num_candidate | 15 | Number of candidates in gallery as the prediction result.}"
"{device | CPU | device}" "{device | CPU | device}"
"{dev_id | 0 | dev_id}"; "{dev_id | 0 | dev_id}";
cv::CommandLineParser parser(argc, argv, keys); cv::CommandLineParser parser(argc, argv, keys);
...@@ -102,16 +99,15 @@ int main(int argc, char** argv) ...@@ -102,16 +99,15 @@ int main(int argc, char** argv)
string device = parser.get<string>("device"); string device = parser.get<string>("device");
int dev_id = parser.get<int>("dev_id"); int dev_id = parser.get<int>("dev_id");
cv::cnn_3dobj::descriptorExtractor descriptor; cv::cnn_3dobj::descriptorExtractor descriptor(device, dev_id);
bool set_succeed = descriptor.setNet(device, dev_id); std::vector<string> device_info = descriptor.getter();
int net_ready; std::cout << "Using" << device_info[0] << std::endl;
if (strcmp(mean_file.c_str(), "no") == 0) if (strcmp(mean_file.c_str(), "no") == 0)
net_ready = descriptor.loadNet(set_succeed, network_forIMG, caffemodel); descriptor.loadNet(network_forIMG, caffemodel);
else else
net_ready = descriptor.loadNet(set_succeed, network_forIMG, caffemodel, mean_file); descriptor.loadNet(network_forIMG, caffemodel, mean_file);
std::vector<string> name_gallery; std::vector<string> name_gallery;
descriptor.listDir(src_dir.c_str(), name_gallery, false); listDir(src_dir.c_str(), name_gallery, false);
descriptor.getLabellist(name_gallery);
for (unsigned int i = 0; i < name_gallery.size(); i++) { for (unsigned int i = 0; i < name_gallery.size(); i++) {
name_gallery[i] = src_dir + name_gallery[i]; name_gallery[i] = src_dir + name_gallery[i];
} }
...@@ -120,7 +116,7 @@ int main(int argc, char** argv) ...@@ -120,7 +116,7 @@ int main(int argc, char** argv)
for (unsigned int i = 0; i < name_gallery.size(); i++) { for (unsigned int i = 0; i < name_gallery.size(); i++) {
img_gallery.push_back(cv::imread(name_gallery[i], -1)); img_gallery.push_back(cv::imread(name_gallery[i], -1));
} }
descriptor.extract(net_ready, img_gallery, feature_reference, feature_blob); descriptor.extract(img_gallery, feature_reference, feature_blob);
std::cout << std::endl << "---------- Prediction for " << target_img << " ----------" << std::endl; std::cout << std::endl << "---------- Prediction for " << target_img << " ----------" << std::endl;
...@@ -131,14 +127,15 @@ int main(int argc, char** argv) ...@@ -131,14 +127,15 @@ int main(int argc, char** argv)
for (unsigned int i = 0; i < feature_reference.rows; i++) for (unsigned int i = 0; i < feature_reference.rows; i++)
std::cout << feature_reference.row(i) << endl; std::cout << feature_reference.row(i) << endl;
cv::Mat feature_test; cv::Mat feature_test;
descriptor.extract(net_ready, img, feature_test, feature_blob); descriptor.extract(img, feature_test, feature_blob);
cv::BFMatcher matcher(NORM_L2);
std::vector<std::vector<cv::DMatch> > matches;
matcher.knnMatch(feature_test, feature_reference, matches, num_candidate);
std::cout << std::endl << "---------- Featrue of target image: " << target_img << "----------" << endl << feature_test << std::endl; std::cout << std::endl << "---------- Featrue of target image: " << target_img << "----------" << endl << feature_test << std::endl;
prediction = classify(feature_reference, feature_test, num_candidate, descriptor.labels_);
// Print the top N prediction. // Print the top N prediction.
std::cout << std::endl << "---------- Prediction result(Distance - File Name in Gallery) ----------" << std::endl; std::cout << std::endl << "---------- Prediction result(Distance - File Name in Gallery) ----------" << std::endl;
for (size_t i = 0; i < prediction.size(); ++i) { for (size_t i = 0; i < matches[0].size(); ++i) {
std::pair<string, float> p = prediction[i]; std::cout << i << " - " << std::fixed << std::setprecision(2) << name_gallery[matches[0][i].trainIdx] << " - \"" << matches[0][i].distance << "\"" << std::endl;
std::cout << std::fixed << std::setprecision(2) << p.second << " - \"" << p.first << "\"" << std::endl;
} }
return 0; return 0;
} }
...@@ -48,7 +48,8 @@ int main(int argc, char *argv[]) ...@@ -48,7 +48,8 @@ int main(int argc, char *argv[])
"{imagedir | ../data/images_all/ | path of the generated images for one particular .ply model. }" "{imagedir | ../data/images_all/ | path of the generated images for one particular .ply model. }"
"{labeldir | ../data/label_all.txt | path of the generated images for one particular .ply model. }" "{labeldir | ../data/label_all.txt | path of the generated images for one particular .ply model. }"
"{num_class | 4 | total number of classes of models}" "{num_class | 4 | total number of classes of models}"
"{label_class | 0 | class label of current .ply model}"; "{label_class | 0 | class label of current .ply model}"
"{rgb_use | 0 | use RGB image or grayscale}";
cv::CommandLineParser parser(argc, argv, keys); cv::CommandLineParser parser(argc, argv, keys);
parser.about("Demo for Sphere View data generation"); parser.about("Demo for Sphere View data generation");
if (parser.has("help")) if (parser.has("help"))
...@@ -62,6 +63,7 @@ int main(int argc, char *argv[]) ...@@ -62,6 +63,7 @@ int main(int argc, char *argv[])
string labeldir = parser.get<string>("labeldir"); string labeldir = parser.get<string>("labeldir");
int num_class = parser.get<int>("num_class"); int num_class = parser.get<int>("num_class");
int label_class = parser.get<int>("label_class"); int label_class = parser.get<int>("label_class");
int rgb_use = parser.get<int>("rgb_use");
cv::cnn_3dobj::icoSphere ViewSphere(10,ite_depth); cv::cnn_3dobj::icoSphere ViewSphere(10,ite_depth);
std::vector<cv::Point3d> campos = ViewSphere.CameraPos; std::vector<cv::Point3d> campos = ViewSphere.CameraPos;
std::fstream imglabel; std::fstream imglabel;
...@@ -122,7 +124,7 @@ int main(int argc, char *argv[]) ...@@ -122,7 +124,7 @@ int main(int argc, char *argv[])
if (camera_pov) if (camera_pov)
myWindow.setViewerPose(cam_pose); myWindow.setViewerPose(cam_pose);
myWindow.saveScreenshot(filename); myWindow.saveScreenshot(filename);
ViewSphere.writeBinaryfile(filename, binaryPath, headerPath,(int)campos.size()*num_class, label_class, (int)(campos.at(pose).x*100), (int)(campos.at(pose).y*100), (int)(campos.at(pose).z*100)); ViewSphere.writeBinaryfile(filename, binaryPath, headerPath,(int)campos.size()*num_class, label_class, (int)(campos.at(pose).x*100), (int)(campos.at(pose).y*100), (int)(campos.at(pose).z*100), rgb_use);
} }
imglabel.close(); imglabel.close();
return 1; return 1;
......
...@@ -6,117 +6,100 @@ namespace cv ...@@ -6,117 +6,100 @@ namespace cv
{ {
namespace cnn_3dobj namespace cnn_3dobj
{ {
descriptorExtractor::descriptorExtractor(){}; descriptorExtractor::descriptorExtractor(const string& device_type, int device_id)
void descriptorExtractor::listDir(const char *path,vector<string>& files,bool r)
{ {
DIR *pDir; if (strcmp(device_type.c_str(), "CPU") == 0 || strcmp(device_type.c_str(), "GPU") == 0)
struct dirent *ent;
char childpath[512];
pDir = opendir(path);
memset(childpath, 0, sizeof(childpath));
while ((ent = readdir(pDir)) != NULL)
{ {
if (ent->d_type & DT_DIR) if (strcmp(device_type.c_str(), "CPU") == 0)
{ {
if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) caffe::Caffe::set_mode(caffe::Caffe::CPU);
{ device_info.push_back("CPU");
continue; std::cout << "Using CPU" << std::endl;
}
if(r)
{
sprintf(childpath, "%s/%s", path, ent->d_name);
descriptorExtractor::listDir(childpath,files,false);
}
} }
else else
{ {
files.push_back(ent->d_name); caffe::Caffe::set_mode(caffe::Caffe::GPU);
caffe::Caffe::SetDevice(device_id);
device_info.push_back("GPU");
std::cout << "Using GPU" << std::endl;
std::cout << "Using Device_id=" << device_id << std::endl;
} }
net_set = true;
} }
sort(files.begin(),files.end()); else
{
std::cout << "Error: Device name must be 'GPU' together with an device number or 'CPU'." << std::endl;
net_set = false;
}
};
std::vector<string> descriptorExtractor::getDevice()
{
std::vector<string> device_info_out;
device_info_out = device_info;
return device_info_out;
}; };
bool descriptorExtractor::setNet(const string& cpu_only, int device_id) void descriptorExtractor::setDevice(const string& device_type, const string& device_id)
{ {
if (strcmp(cpu_only.c_str(), "CPU") == 0 || strcmp(cpu_only.c_str(), "GPU") == 0) if (strcmp(device_type.c_str(), "CPU") == 0 || strcmp(device_type.c_str(), "GPU") == 0)
{ {
if (strcmp(cpu_only.c_str(), "CPU") == 0) if (strcmp(device_type.c_str(), "CPU") == 0)
{ {
caffe::Caffe::set_mode(caffe::Caffe::CPU); caffe::Caffe::set_mode(caffe::Caffe::CPU);
device_info.push_back("CPU");
std::cout << "Using CPU" << std::endl;
} }
else else
{ {
int dev_id = atoi(device_id.c_str());
caffe::Caffe::set_mode(caffe::Caffe::GPU); caffe::Caffe::set_mode(caffe::Caffe::GPU);
caffe::Caffe::SetDevice(device_id); caffe::Caffe::SetDevice(dev_id);
std::cout << "Using Device_id=" << device_id << std::endl; device_info.push_back("GPU");
std::cout << "Using GPU" << std::endl;
std::cout << "Using Device_id=" << dev_id << std::endl;
} }
return true; net_set = true;
} }
else else
{ {
std::cout << "Error: Device name must be 'GPU' together with an device number or 'CPU'." << std::endl; std::cout << "Error: Device name must be 'GPU' together with an device number or 'CPU'." << std::endl;
return false; net_set = false;
} }
}; };
int descriptorExtractor::loadNet(bool netsetter, const string& model_file, const string& trained_file, const string& mean_file) void descriptorExtractor::loadNet(const string& model_file, const string& trained_file, string mean_file)
{ {
int net_ready = 0; net_ready = 0;
if (netsetter) if (net_set)
{ {
/* Load the network. */ /* Load the network. */
net_ = new Net<float>(model_file, TEST); convnet = new Net<float>(model_file, TEST);
net_->CopyTrainedLayersFrom(trained_file); convnet->CopyTrainedLayersFrom(trained_file);
if (net_->num_inputs() != 1) if (convnet->num_inputs() != 1)
std::cout << "Network should have exactly one input." << std::endl; std::cout << "Network should have exactly one input." << std::endl;
if (net_->num_outputs() != 1) if (convnet->num_outputs() != 1)
std::cout << "Network should have exactly one output." << std::endl; std::cout << "Network should have exactly one output." << std::endl;
Blob<float>* input_layer = net_->input_blobs()[0]; Blob<float>* input_layer = convnet->input_blobs()[0];
num_channels_ = input_layer->channels(); num_channels = input_layer->channels();
if (num_channels_ != 3 && num_channels_ != 1) if (num_channels != 3 && num_channels != 1)
std::cout << "Input layer should have 1 or 3 channels." << std::endl; std::cout << "Input layer should have 1 or 3 channels." << std::endl;
input_geometry_ = cv::Size(input_layer->width(), input_layer->height()); input_geometry = cv::Size(input_layer->width(), input_layer->height());
/* Load the binaryproto mean file. */ /* Load the binaryproto mean file. */
setMean(mean_file); if (!mean_file.empty())
net_ready = 2; {
} setMean(mean_file);
else net_ready = 2;
{ }
std::cout << "Error: Device must be set in advance using SetNet function" << std::endl; else
} {
return net_ready; net_ready = 1;
}; }
int descriptorExtractor::loadNet(bool netsetter, const string& model_file, const string& trained_file)
{
int net_ready = 0;
if (netsetter)
{
/* Load the network. */
net_ = new Net<float>(model_file, TEST);
net_->CopyTrainedLayersFrom(trained_file);
if (net_->num_inputs() != 1)
std::cout << "Network should have exactly one input." << std::endl;
if (net_->num_outputs() != 1)
std::cout << "Network should have exactly one output." << std::endl;
Blob<float>* input_layer = net_->input_blobs()[0];
num_channels_ = input_layer->channels();
if (num_channels_ != 3 && num_channels_ != 1)
std::cout << "Input layer should have 1 or 3 channels." << std::endl;
input_geometry_ = cv::Size(input_layer->width(), input_layer->height());
net_ready = 1;
} }
else else
{ {
std::cout << "Error: Device must be set in advance using SetNet function" << std::endl; std::cout << "Error: Device must be set in advance using SetNet function" << std::endl;
} }
return net_ready;
};
void descriptorExtractor::getLabellist(const std::vector<string>& name_gallery)
{
for (unsigned int i = 0; i < name_gallery.size(); ++i)
labels_.push_back(name_gallery[i]);
}; };
/* Load the mean file in binaryproto format. */ /* Load the mean file in binaryproto format. */
...@@ -127,12 +110,12 @@ namespace cnn_3dobj ...@@ -127,12 +110,12 @@ namespace cnn_3dobj
/* Convert from BlobProto to Blob<float> */ /* Convert from BlobProto to Blob<float> */
Blob<float> mean_blob; Blob<float> mean_blob;
mean_blob.FromProto(blob_proto); mean_blob.FromProto(blob_proto);
if (mean_blob.channels() != num_channels_) if (mean_blob.channels() != num_channels)
std::cout << "Number of channels of mean file doesn't match input layer." << std::endl; std::cout << "Number of channels of mean file doesn't match input layer." << std::endl;
/* The format of the mean file is planar 32-bit float BGR or grayscale. */ /* The format of the mean file is planar 32-bit float BGR or grayscale. */
std::vector<cv::Mat> channels; std::vector<cv::Mat> channels;
float* data = mean_blob.mutable_cpu_data(); float* data = mean_blob.mutable_cpu_data();
for (int i = 0; i < num_channels_; ++i) for (int i = 0; i < num_channels; ++i)
{ {
/* Extract an individual channel. */ /* Extract an individual channel. */
cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data); cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data);
...@@ -145,27 +128,27 @@ namespace cnn_3dobj ...@@ -145,27 +128,27 @@ namespace cnn_3dobj
/* Compute the global mean pixel value and create a mean image /* Compute the global mean pixel value and create a mean image
* filled with this value. */ * filled with this value. */
cv::Scalar channel_mean = cv::mean(mean); cv::Scalar channel_mean = cv::mean(mean);
mean_ = cv::Mat(input_geometry_, mean.type(), channel_mean); mean_ = cv::Mat(input_geometry, mean.type(), channel_mean);
}; };
void descriptorExtractor::extract(int net_ready, InputArray inputimg, OutputArray feature, std::string featrue_blob) void descriptorExtractor::extract(InputArrayOfArrays inputimg, OutputArray feature, std::string feature_blob)
{ {
if (net_ready) if (net_ready)
{ {
Blob<float>* input_layer = net_->input_blobs()[0]; Blob<float>* input_layer = convnet->input_blobs()[0];
input_layer->Reshape(1, num_channels_, input_layer->Reshape(1, num_channels,
input_geometry_.height, input_geometry_.width); input_geometry.height, input_geometry.width);
/* Forward dimension change to all layers. */ /* Forward dimension change to all layers. */
net_->Reshape(); convnet->Reshape();
std::vector<cv::Mat> input_channels; std::vector<cv::Mat> input_channels;
wrapInputLayer(&input_channels); wrapInput(&input_channels);
if (inputimg.kind() == 65536) if (inputimg.kind() == 65536)
{/* this is a Mat */ {/* this is a Mat */
Mat img = inputimg.getMat(); Mat img = inputimg.getMat();
preprocess(img, &input_channels, net_ready); preprocess(img, &input_channels);
net_->ForwardPrefilled(); convnet->ForwardPrefilled();
/* Copy the output layer to a std::vector */ /* Copy the output layer to a std::vector */
Blob<float>* output_layer = net_->blob_by_name(featrue_blob).get(); Blob<float>* output_layer = convnet->blob_by_name(feature_blob).get();
const float* begin = output_layer->cpu_data(); const float* begin = output_layer->cpu_data();
const float* end = begin + output_layer->channels(); const float* end = begin + output_layer->channels();
std::vector<float> featureVec = std::vector<float>(begin, end); std::vector<float> featureVec = std::vector<float>(begin, end);
...@@ -179,10 +162,10 @@ namespace cnn_3dobj ...@@ -179,10 +162,10 @@ namespace cnn_3dobj
Mat feature_vector; Mat feature_vector;
for (unsigned int i = 0; i < img.size(); ++i) for (unsigned int i = 0; i < img.size(); ++i)
{ {
preprocess(img[i], &input_channels, net_ready); preprocess(img[i], &input_channels);
net_->ForwardPrefilled(); convnet->ForwardPrefilled();
/* Copy the output layer to a std::vector */ /* Copy the output layer to a std::vector */
Blob<float>* output_layer = net_->blob_by_name(featrue_blob).get(); Blob<float>* output_layer = convnet->blob_by_name(feature_blob).get();
const float* begin = output_layer->cpu_data(); const float* begin = output_layer->cpu_data();
const float* end = begin + output_layer->channels(); const float* end = begin + output_layer->channels();
std::vector<float> featureVec = std::vector<float>(begin, end); std::vector<float> featureVec = std::vector<float>(begin, end);
...@@ -206,9 +189,9 @@ namespace cnn_3dobj ...@@ -206,9 +189,9 @@ namespace cnn_3dobj
* don't need to rely on cudaMemcpy2D. The last preprocessing * don't need to rely on cudaMemcpy2D. The last preprocessing
* operation will write the separate channels directly to the input * operation will write the separate channels directly to the input
* layer. */ * layer. */
void descriptorExtractor::wrapInputLayer(std::vector<cv::Mat>* input_channels) void descriptorExtractor::wrapInput(std::vector<cv::Mat>* input_channels)
{ {
Blob<float>* input_layer = net_->input_blobs()[0]; Blob<float>* input_layer = convnet->input_blobs()[0];
int width = input_layer->width(); int width = input_layer->width();
int height = input_layer->height(); int height = input_layer->height();
float* input_data = input_layer->mutable_cpu_data(); float* input_data = input_layer->mutable_cpu_data();
...@@ -220,28 +203,27 @@ namespace cnn_3dobj ...@@ -220,28 +203,27 @@ namespace cnn_3dobj
} }
}; };
void descriptorExtractor::preprocess(const cv::Mat& img, void descriptorExtractor::preprocess(const cv::Mat& img, std::vector<cv::Mat>* input_channels)
std::vector<cv::Mat>* input_channels, int net_ready)
{ {
/* Convert the input image to the input image format of the network. */ /* Convert the input image to the input image format of the network. */
cv::Mat sample; cv::Mat sample;
if (img.channels() == 3 && num_channels_ == 1) if (img.channels() == 3 && num_channels == 1)
cv::cvtColor(img, sample, CV_BGR2GRAY); cv::cvtColor(img, sample, CV_BGR2GRAY);
else if (img.channels() == 4 && num_channels_ == 1) else if (img.channels() == 4 && num_channels == 1)
cv::cvtColor(img, sample, CV_BGRA2GRAY); cv::cvtColor(img, sample, CV_BGRA2GRAY);
else if (img.channels() == 4 && num_channels_ == 3) else if (img.channels() == 4 && num_channels == 3)
cv::cvtColor(img, sample, CV_BGRA2BGR); cv::cvtColor(img, sample, CV_BGRA2BGR);
else if (img.channels() == 1 && num_channels_ == 3) else if (img.channels() == 1 && num_channels == 3)
cv::cvtColor(img, sample, CV_GRAY2BGR); cv::cvtColor(img, sample, CV_GRAY2BGR);
else else
sample = img; sample = img;
cv::Mat sample_resized; cv::Mat sample_resized;
if (sample.size() != input_geometry_) if (sample.size() != input_geometry)
cv::resize(sample, sample_resized, input_geometry_); cv::resize(sample, sample_resized, input_geometry);
else else
sample_resized = sample; sample_resized = sample;
cv::Mat sample_float; cv::Mat sample_float;
if (num_channels_ == 3) if (num_channels == 3)
sample_resized.convertTo(sample_float, CV_32FC3); sample_resized.convertTo(sample_float, CV_32FC3);
else else
sample_resized.convertTo(sample_float, CV_32FC1); sample_resized.convertTo(sample_float, CV_32FC1);
...@@ -255,7 +237,7 @@ std::vector<cv::Mat>* input_channels, int net_ready) ...@@ -255,7 +237,7 @@ std::vector<cv::Mat>* input_channels, int net_ready)
* objects in input_channels. */ * objects in input_channels. */
cv::split(sample_normalized, *input_channels); cv::split(sample_normalized, *input_channels);
if (reinterpret_cast<float*>(input_channels->at(0).data) if (reinterpret_cast<float*>(input_channels->at(0).data)
!= net_->input_blobs()[0]->cpu_data()) != convnet->input_blobs()[0]->cpu_data())
std::cout << "Input channels are not wrapping the input layer of the network." << std::endl; std::cout << "Input channels are not wrapping the input layer of the network." << std::endl;
}; };
} }
......
...@@ -175,9 +175,8 @@ namespace cnn_3dobj ...@@ -175,9 +175,8 @@ namespace cnn_3dobj
headerLabel.close(); headerLabel.close();
}; };
void icoSphere::writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z) void icoSphere::writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z, int isrgb)
{ {
int isrgb = 0;
cv::Mat ImgforBin = cv::imread(filenameImg, isrgb); cv::Mat ImgforBin = cv::imread(filenameImg, isrgb);
char* A0 = (char*)malloc(1024); char* A0 = (char*)malloc(1024);
strcpy(A0, binaryPath); strcpy(A0, binaryPath);
...@@ -208,9 +207,24 @@ namespace cnn_3dobj ...@@ -208,9 +207,24 @@ namespace cnn_3dobj
createHeader(num_item, 64, 64, binaryPath); createHeader(num_item, 64, 64, binaryPath);
img_file.open(binPathimg,ios::out|ios::binary|ios::app); img_file.open(binPathimg,ios::out|ios::binary|ios::app);
lab_file.open(binPathlab,ios::out|ios::binary|ios::app); lab_file.open(binPathlab,ios::out|ios::binary|ios::app);
for (int r = 0; r < ImgforBin.rows; r++) if (isrgb == 0)
{ {
img_file.write(reinterpret_cast<const char*>(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize()); for (int r = 0; r < ImgforBin.rows; r++)
{
img_file.write(reinterpret_cast<const char*>(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize());
}
}
else
{
std::vector<cv::Mat> Img3forBin;
cv::split(ImgforBin,Img3forBin);
for (unsigned int i = 0; i < Img3forBin.size(); i++)
{
for (int r = 0; r < Img3forBin[i].rows; r++)
{
img_file.write(reinterpret_cast<const char*>(Img3forBin[i].ptr(r)), Img3forBin[i].cols*Img3forBin[i].elemSize());
}
}
} }
signed char templab = (signed char)label_class; signed char templab = (signed char)label_class;
lab_file << templab << (signed char)x << (signed char)y << (signed char)z; lab_file << templab << (signed char)x << (signed char)y << (signed char)z;
...@@ -222,9 +236,24 @@ namespace cnn_3dobj ...@@ -222,9 +236,24 @@ namespace cnn_3dobj
img_file.open(binPathimg,ios::out|ios::binary|ios::app); img_file.open(binPathimg,ios::out|ios::binary|ios::app);
lab_file.open(binPathlab,ios::out|ios::binary|ios::app); lab_file.open(binPathlab,ios::out|ios::binary|ios::app);
cout <<"Concatenating the training data at: " << binaryPath << ". " << endl; cout <<"Concatenating the training data at: " << binaryPath << ". " << endl;
for (int r = 0; r < ImgforBin.rows; r++) if (isrgb == 0)
{
for (int r = 0; r < ImgforBin.rows; r++)
{
img_file.write(reinterpret_cast<const char*>(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize());
}
}
else
{ {
img_file.write(reinterpret_cast<const char*>(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize()); std::vector<cv::Mat> Img3forBin;
cv::split(ImgforBin,Img3forBin);
for (unsigned int i = 0; i < Img3forBin.size(); i++)
{
for (int r = 0; r < Img3forBin[i].rows; r++)
{
img_file.write(reinterpret_cast<const char*>(Img3forBin[i].ptr(r)), Img3forBin[i].cols*Img3forBin[i].elemSize());
}
}
} }
signed char templab = (signed char)label_class; signed char templab = (signed char)label_class;
lab_file << templab << (signed char)x << (signed char)y << (signed char)z; lab_file << templab << (signed char)x << (signed char)y << (signed char)z;
......
...@@ -34,23 +34,11 @@ void CV_CNN_Feature_Test::run(int) ...@@ -34,23 +34,11 @@ void CV_CNN_Feature_Test::run(int)
string device = "CPU"; string device = "CPU";
int dev_id = 0; int dev_id = 0;
cv::cnn_3dobj::descriptorExtractor descriptor; cv::cnn_3dobj::descriptorExtractor descriptor(device, dev_id);
bool set_succeed = descriptor.setNet(device, dev_id);
if (!set_succeed) {
ts->printf(cvtest::TS::LOG, "Net parameters which is GPU or CPU could not be set");
ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
return;
}
int net_ready;
if (strcmp(mean_file.c_str(), "no") == 0) if (strcmp(mean_file.c_str(), "no") == 0)
net_ready = descriptor.loadNet(set_succeed, network_forIMG, caffemodel); descriptor.loadNet(network_forIMG, caffemodel);
else else
net_ready = descriptor.loadNet(set_succeed, network_forIMG, caffemodel, mean_file); descriptor.loadNet(network_forIMG, caffemodel, mean_file);
if (!net_ready) {
ts->printf(cvtest::TS::LOG, "No model loaded");
ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
return;
}
cv::Mat img = cv::imread(target_img, -1); cv::Mat img = cv::imread(target_img, -1);
if (img.empty()) { if (img.empty()) {
ts->printf(cvtest::TS::LOG, "could not read image %s\n", target_img.c_str()); ts->printf(cvtest::TS::LOG, "could not read image %s\n", target_img.c_str());
...@@ -58,7 +46,7 @@ void CV_CNN_Feature_Test::run(int) ...@@ -58,7 +46,7 @@ void CV_CNN_Feature_Test::run(int)
return; return;
} }
cv::Mat feature_test; cv::Mat feature_test;
descriptor.extract(net_ready, img, feature_test, feature_blob); descriptor.extract(img, feature_test, feature_blob);
if (feature_test.empty()) { if (feature_test.empty()) {
ts->printf(cvtest::TS::LOG, "could not extract feature from image %s\n", target_img.c_str()); ts->printf(cvtest::TS::LOG, "could not extract feature from image %s\n", target_img.c_str());
ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA); ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment