Commit 048c3fab authored by vbystricky's avatar vbystricky Committed by Anna Petrovicheva

Add Crop layer. Add sample of using FCN32s network for semantic sementation

parent 99d1e44a
#
# This prototxt is based on voc-fcn32s/val.prototxt file from
# https://github.com/shelhamer/fcn.berkeleyvision.org, which is distributed under
# Caffe (BSD) license:
# http://caffe.berkeleyvision.org/model_zoo.html#bvlc-model-license
#
name: "voc-fcn32s"
input: "data"
input_dim: 1
input_dim: 3
input_dim: 500
input_dim: 500
layer {
name: "conv1_1"
type: "Convolution"
bottom: "data"
top: "conv1_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 100
kernel_size: 3
stride: 1
}
}
layer {
name: "relu1_1"
type: "ReLU"
bottom: "conv1_1"
top: "conv1_1"
}
layer {
name: "conv1_2"
type: "Convolution"
bottom: "conv1_1"
top: "conv1_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu1_2"
type: "ReLU"
bottom: "conv1_2"
top: "conv1_2"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1_2"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2_1"
type: "Convolution"
bottom: "pool1"
top: "conv2_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu2_1"
type: "ReLU"
bottom: "conv2_1"
top: "conv2_1"
}
layer {
name: "conv2_2"
type: "Convolution"
bottom: "conv2_1"
top: "conv2_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu2_2"
type: "ReLU"
bottom: "conv2_2"
top: "conv2_2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2_2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv3_1"
type: "Convolution"
bottom: "pool2"
top: "conv3_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu3_1"
type: "ReLU"
bottom: "conv3_1"
top: "conv3_1"
}
layer {
name: "conv3_2"
type: "Convolution"
bottom: "conv3_1"
top: "conv3_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu3_2"
type: "ReLU"
bottom: "conv3_2"
top: "conv3_2"
}
layer {
name: "conv3_3"
type: "Convolution"
bottom: "conv3_2"
top: "conv3_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu3_3"
type: "ReLU"
bottom: "conv3_3"
top: "conv3_3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3_3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv4_1"
type: "Convolution"
bottom: "pool3"
top: "conv4_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu4_1"
type: "ReLU"
bottom: "conv4_1"
top: "conv4_1"
}
layer {
name: "conv4_2"
type: "Convolution"
bottom: "conv4_1"
top: "conv4_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu4_2"
type: "ReLU"
bottom: "conv4_2"
top: "conv4_2"
}
layer {
name: "conv4_3"
type: "Convolution"
bottom: "conv4_2"
top: "conv4_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu4_3"
type: "ReLU"
bottom: "conv4_3"
top: "conv4_3"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4_3"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv5_1"
type: "Convolution"
bottom: "pool4"
top: "conv5_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu5_1"
type: "ReLU"
bottom: "conv5_1"
top: "conv5_1"
}
layer {
name: "conv5_2"
type: "Convolution"
bottom: "conv5_1"
top: "conv5_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu5_2"
type: "ReLU"
bottom: "conv5_2"
top: "conv5_2"
}
layer {
name: "conv5_3"
type: "Convolution"
bottom: "conv5_2"
top: "conv5_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu5_3"
type: "ReLU"
bottom: "conv5_3"
top: "conv5_3"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5_3"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "fc6"
type: "Convolution"
bottom: "pool5"
top: "fc6"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 4096
pad: 0
kernel_size: 7
stride: 1
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "fc7"
type: "Convolution"
bottom: "fc6"
top: "fc7"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 4096
pad: 0
kernel_size: 1
stride: 1
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "score_fr"
type: "Convolution"
bottom: "fc7"
top: "score_fr"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 21
pad: 0
kernel_size: 1
}
}
layer {
name: "upscore"
type: "Deconvolution"
bottom: "score_fr"
top: "upscore"
param {
lr_mult: 0
}
convolution_param {
num_output: 21
bias_term: false
kernel_size: 64
stride: 32
}
}
layer {
name: "score"
type: "Crop"
bottom: "upscore"
bottom: "data"
top: "score"
crop_param {
axis: 2
offset: 19
}
}
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
using namespace cv;
using namespace cv::dnn;
#include <fstream>
#include <iostream>
#include <cstdlib>
using namespace std;
static std::vector<cv::Vec3b> readColors(const string &filename = "d:/dnn_opencv/pascal-classes.txt")
{
std::vector<cv::Vec3b> colors;
std::ifstream fp(filename);
if (!fp.is_open())
{
std::cerr << "File with colors not found: " << filename << std::endl;
exit(-1);
}
std::string line;
while (!fp.eof())
{
std::getline(fp, line);
if (line.length())
{
std::stringstream ss(line);
std::string name; ss >> name;
int temp;
cv::Vec3b color;
ss >> temp; color[0] = temp;
ss >> temp; color[1] = temp;
ss >> temp; color[2] = temp;
colors.push_back(color);
}
}
fp.close();
return colors;
}
static void colorizeSegmentation(dnn::Blob &score, const std::vector<cv::Vec3b> &colors, cv::Mat &segm)
{
const int rows = score.rows();
const int cols = score.cols();
const int chns = score.channels();
cv::Mat maxCl(rows, cols, CV_8UC1);
cv::Mat maxVal(rows, cols, CV_32FC1);
for (int ch = 0; ch < chns; ch++)
{
for (int row = 0; row < rows; row++)
{
const float *ptrScore = score.ptrf(0, ch, row);
uchar *ptrMaxCl = maxCl.ptr<uchar>(row);
float *ptrMaxVal = maxVal.ptr<float>(row);
for (int col = 0; col < cols; col++)
{
if (ptrScore[col] > ptrMaxVal[col])
{
ptrMaxVal[col] = ptrScore[col];
ptrMaxCl[col] = ch;
}
}
}
}
segm.create(rows, cols, CV_8UC3);
for (int row = 0; row < rows; row++)
{
const uchar *ptrMaxCl = maxCl.ptr<uchar>(row);
cv::Vec3b *ptrSegm = segm.ptr<cv::Vec3b>(row);
for (int col = 0; col < cols; col++)
{
ptrSegm[col] = colors[ptrMaxCl[col]];
}
}
}
int main(int argc, char **argv)
{
String modelTxt = "d:/dnn_opencv/fcn32s-heavy-pascal.prototxt";
String modelBin = "d:/dnn_opencv/fcn32s-heavy-pascal.caffemodel";
String imageFile = (argc > 1) ? argv[1] : "d:/dnn_opencv/rgb.jpg";
std::vector<cv::Vec3b> colors = readColors();
//! [Create the importer of Caffe model]
Ptr<dnn::Importer> importer;
try //Try to import Caffe GoogleNet model
{
importer = dnn::createCaffeImporter(modelTxt, modelBin);
}
catch (const cv::Exception &err) //Importer can throw errors, we will catch them
{
std::cerr << err.msg << std::endl;
}
//! [Create the importer of Caffe model]
if (!importer)
{
std::cerr << "Can't load network by using the following files: " << std::endl;
std::cerr << "prototxt: " << modelTxt << std::endl;
std::cerr << "caffemodel: " << modelBin << std::endl;
std::cerr << "fcn32s-heavy-pascal.caffemodel can be downloaded here:" << std::endl;
std::cerr << "http://dl.caffe.berkeleyvision.org/fcn32s-heavy-pascal.caffemodel" << std::endl;
exit(-1);
}
//! [Initialize network]
dnn::Net net;
importer->populateNet(net);
importer.release(); //We don't need importer anymore
//! [Initialize network]
//! [Prepare blob]
Mat img = imread(imageFile);
if (img.empty())
{
std::cerr << "Can't read image from the file: " << imageFile << std::endl;
exit(-1);
}
resize(img, img, Size(500, 500)); //FCN accepts 500x500 RGB-images
dnn::Blob inputBlob = dnn::Blob(img); //Convert Mat to dnn::Blob image batch
//! [Prepare blob]
//! [Set input blob]
net.setBlob(".data", inputBlob); //set the network input
//! [Set input blob]
//! [Make forward pass]
net.forward(); //compute output
//! [Make forward pass]
//! [Gather output]
dnn::Blob score = net.getBlob("score");
cv::Mat colorize;
colorizeSegmentation(score, colors, colorize);
cv::Mat show;
cv::addWeighted(img, 0.4, colorize, 0.6, 0.0, show);
cv::imshow("show", show);
cv::waitKey(0);
return 0;
} //main
background 0 0 0
aeroplane 128 0 0
bicycle 0 128 0
bird 128 128 0
boat 0 0 128
bottle 128 0 128
bus 0 128 128
car 128 128 128
cat 64 0 0
chair 192 0 0
cow 64 128 0
diningtable 192 128 0
dog 64 0 128
horse 192 0 128
motorbike 64 128 128
person 192 128 128
pottedplant 0 64 0
sheep 128 64 0
sofa 0 192 0
train 128 192 0
tvmonitor 0 64 128
...@@ -73,6 +73,24 @@ message BlobProtoVector { ...@@ -73,6 +73,24 @@ message BlobProtoVector {
repeated BlobProto blobs = 1; repeated BlobProto blobs = 1;
} }
message CropParameter {
// To crop, elements of the first bottom are selected to fit the dimensions
// of the second, reference bottom. The crop is configured by
// - the crop `axis` to pick the dimensions for cropping
// - the crop `offset` to set the shift for all/each dimension
// to align the cropped bottom with the reference bottom.
// All dimensions up to but excluding `axis` are preserved, while
// the dimensions including and trailing `axis` are cropped.
// If only one `offset` is set, then all dimensions are offset by this amount.
// Otherwise, the number of offsets must equal the number of cropped axes to
// shift the crop in each dimension accordingly.
// Note: standard dimensions are N,C,H,W so the default is a spatial crop,
// and `axis` may be negative to index from the end (e.g., -1 for the last
// axis).
optional int32 axis = 1 [default = 2];
repeated uint32 offset = 2;
}
message Datum { message Datum {
optional int32 channels = 1; optional int32 channels = 1;
optional int32 height = 2; optional int32 height = 2;
...@@ -317,7 +335,7 @@ message ParamSpec { ...@@ -317,7 +335,7 @@ message ParamSpec {
// NOTE // NOTE
// Update the next available ID when you add a new LayerParameter field. // Update the next available ID when you add a new LayerParameter field.
// //
// LayerParameter next available layer-specific ID: 137 (last added: reduction_param) // LayerParameter next available layer-specific ID: 138 (last added: crop_param)
message LayerParameter { message LayerParameter {
optional string name = 1; // the layer name optional string name = 1; // the layer name
optional string type = 2; // the layer type optional string type = 2; // the layer type
...@@ -369,6 +387,7 @@ message LayerParameter { ...@@ -369,6 +387,7 @@ message LayerParameter {
optional ConcatParameter concat_param = 104; optional ConcatParameter concat_param = 104;
optional ContrastiveLossParameter contrastive_loss_param = 105; optional ContrastiveLossParameter contrastive_loss_param = 105;
optional ConvolutionParameter convolution_param = 106; optional ConvolutionParameter convolution_param = 106;
optional CropParameter crop_param = 137;
optional DataParameter data_param = 107; optional DataParameter data_param = 107;
optional DropoutParameter dropout_param = 108; optional DropoutParameter dropout_param = 108;
optional DummyDataParameter dummy_data_param = 109; optional DummyDataParameter dummy_data_param = 109;
......
...@@ -53,6 +53,7 @@ ...@@ -53,6 +53,7 @@
#include "layers/slice_layer.hpp" #include "layers/slice_layer.hpp"
#include "layers/softmax_layer.hpp" #include "layers/softmax_layer.hpp"
#include "layers/split_layer.hpp" #include "layers/split_layer.hpp"
#include "layers/crop_layer.hpp"
namespace cv namespace cv
{ {
...@@ -98,6 +99,8 @@ void initModule() ...@@ -98,6 +99,8 @@ void initModule()
REG_RUNTIME_LAYER_CLASS(Deconvolution, DeConvolutionLayer) REG_RUNTIME_LAYER_CLASS(Deconvolution, DeConvolutionLayer)
REG_RUNTIME_LAYER_CLASS(Concat, ConcatLayer) REG_RUNTIME_LAYER_CLASS(Concat, ConcatLayer)
REG_RUNTIME_LAYER_CLASS(Crop, CropLayer)
init.status = true; init.status = true;
} }
......
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "crop_layer.hpp"
namespace cv
{
namespace dnn
{
CropLayer::CropLayer(LayerParams &params) : Layer(params)
{
start_axis = params.get<int>("axis");
if (4 <= start_axis)
CV_Error(Error::StsBadArg, "crop axis bigger than input dim");
DictValue paramOffset = params.get("offset");
offset.resize(4, 0);
if (1 < paramOffset.size())
{
if (4 - start_axis != paramOffset.size())
CV_Error(Error::StsBadArg, "number of offset values specified must be equal to the number of dimensions following axis.");
for (int i = start_axis; i < offset.size(); i++)
{
offset[i] = paramOffset.get<int>(i);
}
}
else
{
const int offset_val = paramOffset.get<int>(0);
for (int i = start_axis; i < offset.size(); i++)
{
offset[i] = offset_val;
}
}
}
void CropLayer::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(2 == inputs.size());
const Blob &inpBlob = *inputs[0];
CV_Assert(inpBlob.dims() == 4 && inpBlob.type() == CV_32F);
const Blob &inpSzBlob = *inputs[1];
outSizes.resize(4, 0);
for (int i = 0; i < 4; i++)
{
if (i < start_axis)
outSizes[i] = inpBlob.size(i);
else
outSizes[i] = inpSzBlob.size(i);
if (offset[i] + outSizes[i] > inpBlob.size(i))
CV_Error(Error::StsBadArg, "invalid crop parameters");
}
outputs.resize(1);
outputs[0].create(BlobShape(outSizes));
}
void CropLayer::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
Blob input = *inputs[0];
Blob output = outputs[0];
for (int num = 0; num < outSizes[0]; ++num)
{
for (int ch = 0; ch < outSizes[1]; ++ch)
{
for (int row = 0; row < outSizes[2]; ++row)
{
float *srcData = input.ptrf(num + offset[0], ch + offset[1], row + offset[2]);
float *dstData = output.ptrf(num, ch, row);
memcpy(dstData, srcData + offset[3], sizeof(float) * outSizes[3]);
}
}
}
}
}
}
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_CROP_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_CROP_LAYER_HPP__
#include "../precomp.hpp"
namespace cv
{
namespace dnn
{
class CropLayer : public Layer
{
int start_axis;
std::vector<int> offset;
std::vector<int> outSizes;
public:
CropLayer(LayerParams& params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
}
}
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment