cnn_feature.cpp 10.1 KB
Newer Older
1 2 3 4 5 6 7
#include "precomp.hpp"
using namespace caffe;

namespace cv
{
namespace cnn_3dobj
{
8
    descriptorExtractor::descriptorExtractor(const String& device_type, int device_id)
9
    {
10
        net_ready = 0;
11
        if (strcmp(device_type.c_str(), "CPU") == 0 || strcmp(device_type.c_str(), "GPU") == 0)
12
        {
13
            if (strcmp(device_type.c_str(), "CPU") == 0)
14
            {
15
                caffe::Caffe::set_mode(caffe::Caffe::CPU);
16
                deviceType = "CPU";
17
                std::cout << "Using CPU" << std::endl;
18 19 20
            }
            else
            {
21 22
                caffe::Caffe::set_mode(caffe::Caffe::GPU);
                caffe::Caffe::SetDevice(device_id);
23
                deviceType = "GPU";
24 25
                std::cout << "Using GPU" << std::endl;
                std::cout << "Using Device_id=" << device_id << std::endl;
26
            }
27
            net_set = true;
28
        }
29 30 31 32 33 34 35
        else
        {
            std::cout << "Error: Device name must be 'GPU' together with an device number or 'CPU'." << std::endl;
            net_set = false;
        }
    };

36
    String descriptorExtractor::getDeviceType()
37
    {
38
        String device_info_out;
39 40 41 42 43
        device_info_out = deviceType;
        return device_info_out;
    };

    int descriptorExtractor::getDeviceId()
44
    {
45 46
        int device_info_out;
        device_info_out = deviceId;
47
        return device_info_out;
48 49
    };

50
    void descriptorExtractor::setDeviceType(const String& device_type)
51
    {
52
        if (strcmp(device_type.c_str(), "CPU") == 0 || strcmp(device_type.c_str(), "GPU") == 0)
53
        {
54
            if (strcmp(device_type.c_str(), "CPU") == 0)
55 56
            {
                caffe::Caffe::set_mode(caffe::Caffe::CPU);
57
                deviceType = "CPU";
58
                std::cout << "Using CPU" << std::endl;
59 60 61 62
            }
            else
            {
                caffe::Caffe::set_mode(caffe::Caffe::GPU);
63
                deviceType = "GPU";
64
                std::cout << "Using GPU" << std::endl;
65 66 67
            }
        }
        else
68
        {
69
            std::cout << "Error: Device name must be 'GPU' or 'CPU'." << std::endl;
70
        }
71
    };
72

73 74 75 76 77 78 79 80 81 82 83 84 85 86
    void descriptorExtractor::setDeviceId(const int& device_id)
    {
        if (strcmp(deviceType.c_str(), "GPU") == 0)
        {
            caffe::Caffe::SetDevice(device_id);
            deviceId = device_id;
            std::cout << "Using GPU with Device ID = " << device_id << std::endl;
        }
        else
        {
            std::cout << "Error: Device ID only need to be set when GPU is used." << std::endl;
        }
    };

87
    void descriptorExtractor::loadNet(const String& model_file, const String& trained_file, const String& mean_file)
88
    {
89
        if (net_set)
90 91
        {
            /* Load the network. */
92 93 94
            convnet = new Net<float>(model_file, TEST);
            convnet->CopyTrainedLayersFrom(trained_file);
            if (convnet->num_inputs() != 1)
95
                std::cout << "Network should have exactly one input." << std::endl;
96
            if (convnet->num_outputs() != 1)
97
                std::cout << "Network should have exactly one output." << std::endl;
98 99 100
            Blob<float>* input_layer = convnet->input_blobs()[0];
            num_channels = input_layer->channels();
            if (num_channels != 3 && num_channels != 1)
101
                std::cout << "Input layer should have 1 or 3 channels." << std::endl;
102
            input_geometry = cv::Size(input_layer->width(), input_layer->height());
103
            /* Load the binaryproto mean file. */
104 105 106 107 108 109 110 111 112
            if (!mean_file.empty())
            {
                setMean(mean_file);
                net_ready = 2;
            }
            else
            {
                net_ready = 1;
            }
113
        }
114
        else
115
        {
116
            std::cout << "Error: Net is not set properly in advance using construtor." << std::endl;
117 118
        }
    };
119

120
    /* Load the mean file in binaryproto format. */
121
    void descriptorExtractor::setMean(const String& mean_file)
122
    {
123 124 125 126 127
        BlobProto blob_proto;
        ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
        /* Convert from BlobProto to Blob<float> */
        Blob<float> mean_blob;
        mean_blob.FromProto(blob_proto);
128
        if (mean_blob.channels() != num_channels)
129 130 131 132
            std::cout << "Number of channels of mean file doesn't match input layer." << std::endl;
        /* The format of the mean file is planar 32-bit float BGR or grayscale. */
        std::vector<cv::Mat> channels;
        float* data = mean_blob.mutable_cpu_data();
133
        for (int i = 0; i < num_channels; ++i)
134 135 136 137 138 139 140 141 142 143 144 145
        {
            /* Extract an individual channel. */
            cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data);
            channels.push_back(channel);
            data += mean_blob.height() * mean_blob.width();
        }
        /* Merge the separate channels into a single image. */
        cv::Mat mean;
        cv::merge(channels, mean);
        /* Compute the global mean pixel value and create a mean image
         * filled with this value. */
        cv::Scalar channel_mean = cv::mean(mean);
146
        mean_ = cv::Mat(input_geometry, mean.type(), channel_mean);
147
    };
148

149
    void descriptorExtractor::extract(InputArrayOfArrays inputimg, OutputArray feature, String feature_blob)
150 151
    {
        if (net_ready)
152
        {
153 154 155
            Blob<float>* input_layer = convnet->input_blobs()[0];
            input_layer->Reshape(1, num_channels,
            input_geometry.height, input_geometry.width);
156
            /* Forward dimension change to all layers. */
157
            convnet->Reshape();
158
            std::vector<cv::Mat> input_channels;
159
            wrapInput(&input_channels);
160 161 162
            if (inputimg.kind() == 65536)
            {/* this is a Mat */
                Mat img = inputimg.getMat();
163 164
                preprocess(img, &input_channels);
                convnet->ForwardPrefilled();
165
                /* Copy the output layer to a std::vector */
166
                Blob<float>* output_layer = convnet->blob_by_name(feature_blob).get();
167 168 169 170 171 172 173 174 175 176 177 178 179
                const float* begin = output_layer->cpu_data();
                const float* end = begin + output_layer->channels();
                std::vector<float> featureVec = std::vector<float>(begin, end);
                cv::Mat feature_mat = cv::Mat(featureVec, true).t();
                feature_mat.copyTo(feature);
            }
            else
            {/* This is a vector<Mat> */
                vector<Mat> img;
                inputimg.getMatVector(img);
                Mat feature_vector;
                for (unsigned int i = 0; i < img.size(); ++i)
                {
180 181
                    preprocess(img[i], &input_channels);
                    convnet->ForwardPrefilled();
182
                    /* Copy the output layer to a std::vector */
183
                    Blob<float>* output_layer = convnet->blob_by_name(feature_blob).get();
184 185 186 187 188 189 190 191 192 193 194 195 196
                    const float* begin = output_layer->cpu_data();
                    const float* end = begin + output_layer->channels();
                    std::vector<float> featureVec = std::vector<float>(begin, end);
                    if (i == 0)
                    {
                        feature_vector = cv::Mat(featureVec, true).t();
                        int dim_feature = feature_vector.cols;
                        feature_vector.resize(img.size(), dim_feature);
                    }
                    feature_vector.row(i) = cv::Mat(featureVec, true).t();
                }
                feature_vector.copyTo(feature);
            }
197
        }
198
        else
199
          std::cout << "Device must be set properly using constructor and the net must be set in advance using loadNet.";
200
    };
201

202
    /* Wrap the input layer of the network in separate cv::Mat objects
203 204 205 206
     * (one per channel). This way we save one memcpy operation and we
     * don't need to rely on cudaMemcpy2D. The last preprocessing
     * operation will write the separate channels directly to the input
     * layer. */
207
    void descriptorExtractor::wrapInput(std::vector<cv::Mat>* input_channels)
208
    {
209
        Blob<float>* input_layer = convnet->input_blobs()[0];
210 211 212 213 214 215 216 217 218 219
        int width = input_layer->width();
        int height = input_layer->height();
        float* input_data = input_layer->mutable_cpu_data();
        for (int i = 0; i < input_layer->channels(); ++i)
        {
            cv::Mat channel(height, width, CV_32FC1, input_data);
            input_channels->push_back(channel);
            input_data += width * height;
        }
    };
220

221
    void descriptorExtractor::preprocess(const cv::Mat& img, std::vector<cv::Mat>* input_channels)
222 223 224
    {
        /* Convert the input image to the input image format of the network. */
        cv::Mat sample;
225
        if (img.channels() == 3 && num_channels == 1)
226
            cv::cvtColor(img, sample, CV_BGR2GRAY);
227
        else if (img.channels() == 4 && num_channels == 1)
228
            cv::cvtColor(img, sample, CV_BGRA2GRAY);
229
        else if (img.channels() == 4 && num_channels == 3)
230
            cv::cvtColor(img, sample, CV_BGRA2BGR);
231
        else if (img.channels() == 1 && num_channels == 3)
232 233 234 235
            cv::cvtColor(img, sample, CV_GRAY2BGR);
        else
            sample = img;
        cv::Mat sample_resized;
236 237
        if (sample.size() != input_geometry)
            cv::resize(sample, sample_resized, input_geometry);
238 239 240
        else
        sample_resized = sample;
        cv::Mat sample_float;
241
        if (num_channels == 3)
242 243 244 245
            sample_resized.convertTo(sample_float, CV_32FC3);
        else
            sample_resized.convertTo(sample_float, CV_32FC1);
        cv::Mat sample_normalized;
246
        if (net_ready == 2)
247 248 249 250
            cv::subtract(sample_float, mean_, sample_normalized);
        else
            sample_normalized = sample_float;
        /* This operation will write the separate BGR planes directly to the
251 252
         * input layer of the network because it is wrapped by the cv::Mat
         * objects in input_channels. */
253 254
        cv::split(sample_normalized, *input_channels);
        if (reinterpret_cast<float*>(input_channels->at(0).data)
255
      != convnet->input_blobs()[0]->cpu_data())
256 257
            std::cout << "Input channels are not wrapping the input layer of the network." << std::endl;
    };
258 259
} /* namespace cnn_3dobj */
} /* namespace cv */