Commit 8e68d837 authored by Vitaliy Lyudvichenko's avatar Vitaliy Lyudvichenko

Merge branch 'object_detection_sample_ssd' into dnn*

# Conflicts:
#	modules/dnn/include/opencv2/dnn/blob.hpp
#	modules/dnn/src/init.cpp
#	modules/dnn/src/layers/concat_layer.cpp
#	modules/dnn/src/layers/convolution_layer.cpp
#	modules/dnn/src/layers/convolution_layer.hpp
#	modules/dnn/src/layers/layers_common.cpp
#	modules/dnn/src/layers/layers_common.hpp
#	modules/dnn/src/layers/op_im2col.hpp
#	modules/dnn/src/layers/pooling_layer.cpp
#	modules/dnn/src/layers/pooling_layer.hpp
parents 266692e1 e88b0618
......@@ -209,21 +209,21 @@ namespace dnn
{
public:
CV_PROP_RW Size kernel, stride, pad;
CV_PROP_RW Size kernel, stride, pad, dilation;
};
class CV_EXPORTS_W ConvolutionLayer : public BaseConvolutionLayer
{
public:
static CV_WRAP Ptr<BaseConvolutionLayer> create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0));
static CV_WRAP Ptr<BaseConvolutionLayer> create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0), Size dilation = Size(1, 1));
};
class CV_EXPORTS_W DeconvolutionLayer : public BaseConvolutionLayer
{
public:
static CV_WRAP Ptr<BaseConvolutionLayer> create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0));
static CV_WRAP Ptr<BaseConvolutionLayer> create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0), Size dilation = Size(1, 1));
};
class CV_EXPORTS_W LRNLayer : public Layer
......@@ -256,8 +256,10 @@ namespace dnn
CV_PROP_RW int type;
CV_PROP_RW Size kernel, stride, pad;
CV_PROP_RW bool globalPooling;
static CV_WRAP Ptr<PoolingLayer> create(int type = PoolingLayer::MAX, Size kernel = Size(2, 2), Size stride = Size(1, 1), Size pad = Size(0, 0));
static CV_WRAP Ptr<PoolingLayer> createGlobal(int type = PoolingLayer::MAX);
};
class CV_EXPORTS_W SoftmaxLayer : public Layer
......
......@@ -229,6 +229,18 @@ namespace dnn
/** @brief Checks equality of two blobs shapes. */
bool equalShape(const Blob &other) const;
/** @brief Returns slice of first two dimensions.
* @details The behaviour is similar to the following numpy code: blob[n, cn, ...]
*/
Mat getPlane(int n, int cn);
/** @brief Returns slice of first dimension.
* @details The behaviour is similar to getPlane(), but returns all
* channels * rows * cols values, corresponding to the n-th value
* of the first dimension.
*/
Mat getPlanes(int n);
/* Shape getters of 4-dimensional blobs. */
int cols() const; //!< Returns size of the fourth axis blob.
int rows() const; //!< Returns size of the thrid axis blob.
......@@ -262,12 +274,6 @@ namespace dnn
float *ptrf(int n = 0, int cn = 0, int row = 0, int col = 0);
//TODO: add const ptr methods
/** @brief Returns slice of first two dimensions.
* @details The behaviour is similar to the following numpy code: blob[n, cn, ...]
* @todo Method will be removed. Use slice() from shape_utils.hpp.
*/
Mat getPlane(int n, int cn);
/** @brief Shares data from other @p blob.
* @returns *this
*/
......
......@@ -456,6 +456,12 @@ inline Mat Blob::getPlane(int n, int cn)
return Mat(dims() - 2, sizes() + 2, type(), ptr(n, cn));
}
inline Mat Blob::getPlanes(int n)
{
CV_Assert(dims() > 3);
return Mat(dims() - 1, sizes() + 1, type(), ptr(n));
}
inline int Blob::cols() const
{
return xsize(3);
......
......@@ -112,7 +112,7 @@ class CV_EXPORTS Dict
public:
//! Checks a presence of the @p key in the dictionary.
bool has(const String &key);
bool has(const String &key) const;
//! If the @p key in the dictionary then returns pointer to its value, else returns NULL.
DictValue *ptr(const String &key);
......
......@@ -287,7 +287,7 @@ inline std::ostream &operator<<(std::ostream &stream, const DictValue &dictv)
/////////////////////////////////////////////////////////////////
inline bool Dict::has(const String &key)
inline bool Dict::has(const String &key) const
{
return dict.count(key) != 0;
}
......
#
# This prototxt is based on voc-fcn32s/val.prototxt file from
# https://github.com/shelhamer/fcn.berkeleyvision.org, which is distributed under
# Caffe (BSD) license:
# http://caffe.berkeleyvision.org/model_zoo.html#bvlc-model-license
#
name: "voc-fcn32s"
input: "data"
input_dim: 1
input_dim: 3
input_dim: 500
input_dim: 500
layer {
name: "conv1_1"
type: "Convolution"
bottom: "data"
top: "conv1_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 100
kernel_size: 3
stride: 1
}
}
layer {
name: "relu1_1"
type: "ReLU"
bottom: "conv1_1"
top: "conv1_1"
}
layer {
name: "conv1_2"
type: "Convolution"
bottom: "conv1_1"
top: "conv1_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu1_2"
type: "ReLU"
bottom: "conv1_2"
top: "conv1_2"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1_2"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2_1"
type: "Convolution"
bottom: "pool1"
top: "conv2_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu2_1"
type: "ReLU"
bottom: "conv2_1"
top: "conv2_1"
}
layer {
name: "conv2_2"
type: "Convolution"
bottom: "conv2_1"
top: "conv2_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu2_2"
type: "ReLU"
bottom: "conv2_2"
top: "conv2_2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2_2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv3_1"
type: "Convolution"
bottom: "pool2"
top: "conv3_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu3_1"
type: "ReLU"
bottom: "conv3_1"
top: "conv3_1"
}
layer {
name: "conv3_2"
type: "Convolution"
bottom: "conv3_1"
top: "conv3_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu3_2"
type: "ReLU"
bottom: "conv3_2"
top: "conv3_2"
}
layer {
name: "conv3_3"
type: "Convolution"
bottom: "conv3_2"
top: "conv3_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu3_3"
type: "ReLU"
bottom: "conv3_3"
top: "conv3_3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3_3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv4_1"
type: "Convolution"
bottom: "pool3"
top: "conv4_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu4_1"
type: "ReLU"
bottom: "conv4_1"
top: "conv4_1"
}
layer {
name: "conv4_2"
type: "Convolution"
bottom: "conv4_1"
top: "conv4_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu4_2"
type: "ReLU"
bottom: "conv4_2"
top: "conv4_2"
}
layer {
name: "conv4_3"
type: "Convolution"
bottom: "conv4_2"
top: "conv4_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu4_3"
type: "ReLU"
bottom: "conv4_3"
top: "conv4_3"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4_3"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv5_1"
type: "Convolution"
bottom: "pool4"
top: "conv5_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu5_1"
type: "ReLU"
bottom: "conv5_1"
top: "conv5_1"
}
layer {
name: "conv5_2"
type: "Convolution"
bottom: "conv5_1"
top: "conv5_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu5_2"
type: "ReLU"
bottom: "conv5_2"
top: "conv5_2"
}
layer {
name: "conv5_3"
type: "Convolution"
bottom: "conv5_2"
top: "conv5_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu5_3"
type: "ReLU"
bottom: "conv5_3"
top: "conv5_3"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5_3"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "fc6"
type: "Convolution"
bottom: "pool5"
top: "fc6"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 4096
pad: 0
kernel_size: 7
stride: 1
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "fc7"
type: "Convolution"
bottom: "fc6"
top: "fc7"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 4096
pad: 0
kernel_size: 1
stride: 1
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "score_fr"
type: "Convolution"
bottom: "fc7"
top: "score_fr"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 21
pad: 0
kernel_size: 1
}
}
layer {
name: "upscore"
type: "Deconvolution"
bottom: "score_fr"
top: "upscore"
param {
lr_mult: 0
}
convolution_param {
num_output: 21
bias_term: false
kernel_size: 64
stride: 32
}
}
layer {
name: "score"
type: "Crop"
bottom: "upscore"
bottom: "data"
top: "score"
crop_param {
axis: 2
offset: 19
}
}
#
# This prototxt is based on voc-fcn8s/val.prototxt file from
# https://github.com/shelhamer/fcn.berkeleyvision.org, which is distributed under
# Caffe (BSD) license:
# http://caffe.berkeleyvision.org/model_zoo.html#bvlc-model-license
#
name: "voc-fcn8s"
input: "data"
input_dim: 1
input_dim: 3
input_dim: 500
input_dim: 500
layer {
name: "conv1_1"
type: "Convolution"
bottom: "data"
top: "conv1_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 100
kernel_size: 3
stride: 1
}
}
layer {
name: "relu1_1"
type: "ReLU"
bottom: "conv1_1"
top: "conv1_1"
}
layer {
name: "conv1_2"
type: "Convolution"
bottom: "conv1_1"
top: "conv1_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu1_2"
type: "ReLU"
bottom: "conv1_2"
top: "conv1_2"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1_2"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2_1"
type: "Convolution"
bottom: "pool1"
top: "conv2_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu2_1"
type: "ReLU"
bottom: "conv2_1"
top: "conv2_1"
}
layer {
name: "conv2_2"
type: "Convolution"
bottom: "conv2_1"
top: "conv2_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu2_2"
type: "ReLU"
bottom: "conv2_2"
top: "conv2_2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2_2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv3_1"
type: "Convolution"
bottom: "pool2"
top: "conv3_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu3_1"
type: "ReLU"
bottom: "conv3_1"
top: "conv3_1"
}
layer {
name: "conv3_2"
type: "Convolution"
bottom: "conv3_1"
top: "conv3_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu3_2"
type: "ReLU"
bottom: "conv3_2"
top: "conv3_2"
}
layer {
name: "conv3_3"
type: "Convolution"
bottom: "conv3_2"
top: "conv3_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu3_3"
type: "ReLU"
bottom: "conv3_3"
top: "conv3_3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3_3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv4_1"
type: "Convolution"
bottom: "pool3"
top: "conv4_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu4_1"
type: "ReLU"
bottom: "conv4_1"
top: "conv4_1"
}
layer {
name: "conv4_2"
type: "Convolution"
bottom: "conv4_1"
top: "conv4_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu4_2"
type: "ReLU"
bottom: "conv4_2"
top: "conv4_2"
}
layer {
name: "conv4_3"
type: "Convolution"
bottom: "conv4_2"
top: "conv4_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu4_3"
type: "ReLU"
bottom: "conv4_3"
top: "conv4_3"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4_3"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv5_1"
type: "Convolution"
bottom: "pool4"
top: "conv5_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu5_1"
type: "ReLU"
bottom: "conv5_1"
top: "conv5_1"
}
layer {
name: "conv5_2"
type: "Convolution"
bottom: "conv5_1"
top: "conv5_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu5_2"
type: "ReLU"
bottom: "conv5_2"
top: "conv5_2"
}
layer {
name: "conv5_3"
type: "Convolution"
bottom: "conv5_2"
top: "conv5_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 1
}
}
layer {
name: "relu5_3"
type: "ReLU"
bottom: "conv5_3"
top: "conv5_3"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5_3"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "fc6"
type: "Convolution"
bottom: "pool5"
top: "fc6"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 4096
pad: 0
kernel_size: 7
stride: 1
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "fc7"
type: "Convolution"
bottom: "fc6"
top: "fc7"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 4096
pad: 0
kernel_size: 1
stride: 1
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "score_fr"
type: "Convolution"
bottom: "fc7"
top: "score_fr"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 21
pad: 0
kernel_size: 1
}
}
layer {
name: "upscore2"
type: "Deconvolution"
bottom: "score_fr"
top: "upscore2"
param {
lr_mult: 0
}
convolution_param {
num_output: 21
bias_term: false
kernel_size: 4
stride: 2
}
}
layer {
name: "score_pool4"
type: "Convolution"
bottom: "pool4"
top: "score_pool4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 21
pad: 0
kernel_size: 1
}
}
layer {
name: "score_pool4c"
type: "Crop"
bottom: "score_pool4"
bottom: "upscore2"
top: "score_pool4c"
crop_param {
axis: 2
offset: 5
}
}
layer {
name: "fuse_pool4"
type: "Eltwise"
bottom: "upscore2"
bottom: "score_pool4c"
top: "fuse_pool4"
eltwise_param {
operation: SUM
}
}
layer {
name: "upscore_pool4"
type: "Deconvolution"
bottom: "fuse_pool4"
top: "upscore_pool4"
param {
lr_mult: 0
}
convolution_param {
num_output: 21
bias_term: false
kernel_size: 4
stride: 2
}
}
layer {
name: "score_pool3"
type: "Convolution"
bottom: "pool3"
top: "score_pool3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 21
pad: 0
kernel_size: 1
}
}
layer {
name: "score_pool3c"
type: "Crop"
bottom: "score_pool3"
bottom: "upscore_pool4"
top: "score_pool3c"
crop_param {
axis: 2
offset: 9
}
}
layer {
name: "fuse_pool3"
type: "Eltwise"
bottom: "upscore_pool4"
bottom: "score_pool3c"
top: "fuse_pool3"
eltwise_param {
operation: SUM
}
}
layer {
name: "upscore8"
type: "Deconvolution"
bottom: "fuse_pool3"
top: "upscore8"
param {
lr_mult: 0
}
convolution_param {
num_output: 21
bias_term: false
kernel_size: 16
stride: 8
}
}
layer {
name: "score"
type: "Crop"
bottom: "upscore8"
bottom: "data"
top: "score"
crop_param {
axis: 2
offset: 31
}
}
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/core/ocl.hpp>
using namespace cv;
using namespace cv::dnn;
#include <fstream>
#include <iostream>
#include <cstdlib>
using namespace std;
static const string fcnType = "fcn8s";
static vector<cv::Vec3b> readColors(const string &filename = "pascal-classes.txt")
{
vector<cv::Vec3b> colors;
ifstream fp(filename.c_str());
if (!fp.is_open())
{
cerr << "File with colors not found: " << filename << endl;
exit(-1);
}
string line;
while (!fp.eof())
{
getline(fp, line);
if (line.length())
{
stringstream ss(line);
string name; ss >> name;
int temp;
cv::Vec3b color;
ss >> temp; color[0] = temp;
ss >> temp; color[1] = temp;
ss >> temp; color[2] = temp;
colors.push_back(color);
}
}
fp.close();
return colors;
}
static void colorizeSegmentation(dnn::Blob &score, const vector<cv::Vec3b> &colors, cv::Mat &segm)
{
const int rows = score.rows();
const int cols = score.cols();
const int chns = score.channels();
cv::Mat maxCl(rows, cols, CV_8UC1);
cv::Mat maxVal(rows, cols, CV_32FC1);
for (int ch = 0; ch < chns; ch++)
{
for (int row = 0; row < rows; row++)
{
const float *ptrScore = score.ptrf(0, ch, row);
uchar *ptrMaxCl = maxCl.ptr<uchar>(row);
float *ptrMaxVal = maxVal.ptr<float>(row);
for (int col = 0; col < cols; col++)
{
if (ptrScore[col] > ptrMaxVal[col])
{
ptrMaxVal[col] = ptrScore[col];
ptrMaxCl[col] = ch;
}
}
}
}
segm.create(rows, cols, CV_8UC3);
for (int row = 0; row < rows; row++)
{
const uchar *ptrMaxCl = maxCl.ptr<uchar>(row);
cv::Vec3b *ptrSegm = segm.ptr<cv::Vec3b>(row);
for (int col = 0; col < cols; col++)
{
ptrSegm[col] = colors[ptrMaxCl[col]];
}
}
}
int main(int argc, char **argv)
{
cv::ocl::setUseOpenCL(false);
String modelTxt = fcnType + "-heavy-pascal.prototxt";
String modelBin = fcnType + "-heavy-pascal.caffemodel";
String imageFile = (argc > 1) ? argv[1] : "rgb.jpg";
vector<cv::Vec3b> colors = readColors();
//! [Create the importer of Caffe model]
Ptr<dnn::Importer> importer;
try //Try to import Caffe GoogleNet model
{
importer = dnn::createCaffeImporter(modelTxt, modelBin);
}
catch (const cv::Exception &err) //Importer can throw errors, we will catch them
{
cerr << err.msg << endl;
}
//! [Create the importer of Caffe model]
if (!importer)
{
cerr << "Can't load network by using the following files: " << endl;
cerr << "prototxt: " << modelTxt << endl;
cerr << "caffemodel: " << modelBin << endl;
cerr << fcnType << "-heavy-pascal.caffemodel can be downloaded here:" << endl;
cerr << "http://dl.caffe.berkeleyvision.org/" << fcnType << "-heavy-pascal.caffemodel" << endl;
exit(-1);
}
//! [Initialize network]
dnn::Net net;
importer->populateNet(net);
importer.release(); //We don't need importer anymore
//! [Initialize network]
//! [Prepare blob]
Mat img = imread(imageFile);
if (img.empty())
{
cerr << "Can't read image from the file: " << imageFile << endl;
exit(-1);
}
resize(img, img, Size(500, 500)); //FCN accepts 500x500 RGB-images
dnn::Blob inputBlob = dnn::Blob::fromImages(img); //Convert Mat to dnn::Blob image batch
//! [Prepare blob]
//! [Set input blob]
net.setBlob(".data", inputBlob); //set the network input
//! [Set input blob]
//! [Make forward pass]
net.forward(); //compute output
//! [Make forward pass]
//! [Gather output]
dnn::Blob score = net.getBlob("score");
cv::Mat colorize;
colorizeSegmentation(score, colors, colorize);
cv::Mat show;
cv::addWeighted(img, 0.4, colorize, 0.6, 0.0, show);
cv::imshow("show", show);
cv::waitKey(0);
return 0;
} //main
background 0 0 0
aeroplane 128 0 0
bicycle 0 128 0
bird 128 128 0
boat 0 0 128
bottle 128 0 128
bus 0 128 128
car 128 128 128
cat 64 0 0
chair 192 0 0
cow 64 128 0
diningtable 192 128 0
dog 64 0 128
horse 192 0 128
motorbike 64 128 128
person 192 128 128
pottedplant 0 64 0
sheep 128 64 0
sofa 0 192 0
train 128 192 0
tvmonitor 0 64 128
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
using namespace cv;
using namespace cv::dnn;
#include <fstream>
#include <iostream>
#include <cstdlib>
using namespace std;
const size_t width = 300;
const size_t height = 300;
Mat getMean(const size_t& imageHeight, const size_t& imageWidth)
{
Mat mean;
const int meanValues[3] = {104, 117, 123};
vector<Mat> meanChannels;
for(size_t i = 0; i < 3; i++)
{
Mat channel(imageHeight, imageWidth, CV_32F, Scalar(meanValues[i]));
meanChannels.push_back(channel);
}
cv::merge(meanChannels, mean);
return mean;
}
Mat preprocess(const Mat& frame)
{
Mat preprocessed;
frame.convertTo(preprocessed, CV_32FC3);
resize(preprocessed, preprocessed, Size(width, height)); //SSD accepts 300x300 RGB-images
Mat mean = getMean(width, height);
cv::subtract(preprocessed, mean, preprocessed);
return preprocessed;
}
const char* about = "This sample uses Single-Shot Detector "
"(https://arxiv.org/abs/1512.02325)"
"to detect objects on image\n"; // TODO: link
const char* params
= "{ help | false | print usage }"
"{ proto | | model configuration }"
"{ model | | model weights }"
"{ image | | image for detection }"
"{ min_confidence | 0.5 | min confidence }";
int main(int argc, char** argv)
{
cv::CommandLineParser parser(argc, argv, params);
if (parser.get<bool>("help"))
{
std::cout << about << std::endl;
parser.printMessage();
return 0;
}
String modelConfiguration = parser.get<string>("proto");
String modelBinary = parser.get<string>("model");
//! [Create the importer of Caffe model]
Ptr<dnn::Importer> importer;
// Import Caffe SSD model
try
{
importer = dnn::createCaffeImporter(modelConfiguration, modelBinary);
}
catch (const cv::Exception &err) //Importer can throw errors, we will catch them
{
cerr << err.msg << endl;
}
//! [Create the importer of Caffe model]
if (!importer)
{
cerr << "Can't load network by using the following files: " << endl;
cerr << "prototxt: " << modelConfiguration << endl;
cerr << "caffemodel: " << modelBinary << endl;
cerr << "Models can be downloaded here:" << endl;
cerr << "https://github.com/weiliu89/caffe/tree/ssd#models" << endl;
exit(-1);
}
//! [Initialize network]
dnn::Net net;
importer->populateNet(net);
importer.release(); //We don't need importer anymore
//! [Initialize network]
cv::Mat frame = cv::imread(parser.get<string>("image"), -1);
//! [Prepare blob]
Mat preprocessedFrame = preprocess(frame);
dnn::Blob inputBlob = dnn::Blob::fromImages(preprocessedFrame); //Convert Mat to dnn::Blob image
//! [Prepare blob]
//! [Set input blob]
net.setBlob(".data", inputBlob); //set the network input
//! [Set input blob]
//! [Make forward pass]
net.forward(); //compute output
//! [Make forward pass]
//! [Gather output]
dnn::Blob detection = net.getBlob("detection_out");
Mat detectionMat(detection.rows(), detection.cols(), CV_32F, detection.ptrf());
float confidenceThreshold = parser.get<float>("min_confidence");
for(int i = 0; i < detectionMat.rows; i++)
{
float confidence = detectionMat.at<float>(i, 2);
if(confidence > confidenceThreshold)
{
size_t objectClass = detectionMat.at<float>(i, 1);
float xLeftBottom = detectionMat.at<float>(i, 3) * frame.cols;
float yLeftBottom = detectionMat.at<float>(i, 4) * frame.rows;
float xRightTop = detectionMat.at<float>(i, 5) * frame.cols;
float yRightTop = detectionMat.at<float>(i, 6) * frame.rows;
std::cout << "Class: " << objectClass << std::endl;
std::cout << "Confidence: " << confidence << std::endl;
std::cout << " " << xLeftBottom
<< " " << yLeftBottom
<< " " << xRightTop
<< " " << yRightTop << std::endl;
Rect object(xLeftBottom, yLeftBottom,
xRightTop - xLeftBottom,
yRightTop - yLeftBottom);
rectangle(frame, object, Scalar(0, 255, 0));
}
}
imshow("detections", frame);
waitKey();
return 0;
} // main
......@@ -73,6 +73,93 @@ message BlobProtoVector {
repeated BlobProto blobs = 1;
}
message CropParameter {
// To crop, elements of the first bottom are selected to fit the dimensions
// of the second, reference bottom. The crop is configured by
// - the crop `axis` to pick the dimensions for cropping
// - the crop `offset` to set the shift for all/each dimension
// to align the cropped bottom with the reference bottom.
// All dimensions up to but excluding `axis` are preserved, while
// the dimensions including and trailing `axis` are cropped.
// If only one `offset` is set, then all dimensions are offset by this amount.
// Otherwise, the number of offsets must equal the number of cropped axes to
// shift the crop in each dimension accordingly.
// Note: standard dimensions are N,C,H,W so the default is a spatial crop,
// and `axis` may be negative to index from the end (e.g., -1 for the last
// axis).
optional int32 axis = 1 [default = 2];
repeated uint32 offset = 2;
}
message PermuteParameter {
// The new orders of the axes of data. Notice it should be with
// in the same range as the input data, and it starts from 0.
// Do not provide repeated order.
repeated uint32 order = 1;
}
// Message that stores parameters used by NormalizeBBoxLayer
message NormalizeBBoxParameter {
optional bool across_spatial = 1 [default = true];
// Initial value of scale. Default is 1.0 for all
optional FillerParameter scale_filler = 2;
// Whether or not scale parameters are shared across channels.
optional bool channel_shared = 3 [default = true];
// Epsilon for not dividing by zero while normalizing variance
optional float eps = 4 [default = 1e-10];
}
// Message that store parameters used by PriorBoxLayer
message PriorBoxParameter {
// Encode/decode type.
enum CodeType {
CORNER = 1;
CENTER_SIZE = 2;
}
// Minimum box size (in pixels). Required!
optional float min_size = 1;
// Maximum box size (in pixels). Required!
optional float max_size = 2;
// Various of aspect ratios. Duplicate ratios will be ignored.
// If none is provided, we use default ratio 1.
repeated float aspect_ratio = 3;
// If true, will flip each aspect ratio.
// For example, if there is aspect ratio "r",
// we will generate aspect ratio "1.0/r" as well.
optional bool flip = 4 [default = true];
// If true, will clip the prior so that it is within [0, 1]
optional bool clip = 5 [default = true];
// Variance for adjusting the prior bboxes.
repeated float variance = 6;
}
// Message that store parameters used by DetectionOutputLayer
message DetectionOutputParameter {
// Number of classes to be predicted. Required!
optional uint32 num_classes = 1;
// If true, bounding box are shared among different classes.
optional bool share_location = 2 [default = true];
// Background label id. If there is no background class,
// set it as -1.
optional int32 background_label_id = 3 [default = 0];
// Type of coding method for bbox.
optional PriorBoxParameter.CodeType code_type = 6 [default = CORNER];
// If true, variance is encoded in target; otherwise we need to adjust the
// predicted offset accordingly.
optional bool variance_encoded_in_target = 8 [default = false];
// Number of total bboxes to be kept per image after nms step.
// -1 means keeping all bboxes after nms step.
optional int32 keep_top_k = 7 [default = -1];
// Only consider detections whose confidences are larger than a threshold.
// If not provided, consider all boxes.
optional float confidence_threshold = 9;
// Parameters used for non maximum suppression.
// Threshold to be used in nms.
optional float nms_threshold = 10 [default = 0.3];
// Maximum number of results to be kept.
optional int32 top_k = 11;
}
message Datum {
optional int32 channels = 1;
optional int32 height = 2;
......@@ -317,7 +404,7 @@ message ParamSpec {
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
// LayerParameter next available layer-specific ID: 137 (last added: reduction_param)
// LayerParameter next available layer-specific ID: 142 (last added: detection_output_param)
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
......@@ -369,7 +456,9 @@ message LayerParameter {
optional ConcatParameter concat_param = 104;
optional ContrastiveLossParameter contrastive_loss_param = 105;
optional ConvolutionParameter convolution_param = 106;
optional CropParameter crop_param = 137;
optional DataParameter data_param = 107;
optional DetectionOutputParameter detection_output_param = 141;
optional DropoutParameter dropout_param = 108;
optional DummyDataParameter dummy_data_param = 109;
optional EltwiseParameter eltwise_param = 110;
......@@ -385,17 +474,20 @@ message LayerParameter {
optional LRNParameter lrn_param = 118;
optional MemoryDataParameter memory_data_param = 119;
optional MVNParameter mvn_param = 120;
optional NormalizeBBoxParameter normalize_bbox_param = 139;
optional PermuteParameter permute_param = 138;
optional PoolingParameter pooling_param = 121;
optional PowerParameter power_param = 122;
optional PReLUParameter prelu_param = 131;
optional PriorBoxParameter prior_box_param = 140;
optional PythonParameter python_param = 130;
optional ReductionParameter reduction_param = 136;
optional ReLUParameter relu_param = 123;
optional ReshapeParameter reshape_param = 133;
optional SigmoidParameter sigmoid_param = 124;
optional SliceParameter slice_param = 126;
optional SoftmaxParameter softmax_param = 125;
optional SPPParameter spp_param = 132;
optional SliceParameter slice_param = 126;
optional TanHParameter tanh_param = 127;
optional ThresholdParameter threshold_param = 128;
optional WindowDataParameter window_data_param = 129;
......@@ -505,6 +597,12 @@ message ConvolutionParameter {
CUDNN = 2;
}
optional Engine engine = 15 [default = DEFAULT];
// Factor used to dilate the kernel, (implicitly) zero-filling the resulting
// holes. (Kernel dilation is sometimes referred to by its use in the
// algorithme à trous from Holschneider et al. 1987.)
optional uint32 dilation_h = 18; // The dilation height
optional uint32 dilation_w = 19; // The dilation width
optional uint32 dilation = 20; // The dilation; defaults to 1
}
message DataParameter {
......@@ -1155,3 +1253,15 @@ message PReLUParameter {
// Whether or not slope paramters are shared across channels.
optional bool channel_shared = 2 [default = false];
}
// The normalized bounding box [0, 1] w.r.t. the input image size.
message NormalizedBBox {
optional float xmin = 1;
optional float ymin = 2;
optional float xmax = 3;
optional float ymax = 4;
optional int32 label = 5;
optional bool difficult = 6;
optional float score = 7;
optional float size = 8;
}
......@@ -2,6 +2,7 @@
#include "layer_loaders.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include <climits>
#include "layers/layers_common.hpp"
namespace cv
{
......@@ -57,7 +58,8 @@ static void getCaffeConvParams(LayerParams &params, Size &kernel, Size &pad, Siz
static void initConvDeconvLayerFromCaffe(Ptr<BaseConvolutionLayer> l, LayerParams &params)
{
l->setParamsFrom(params);
getCaffeConvParams(params, l->kernel, l->pad, l->stride);
//getCaffeConvParams(params, l->kernel, l->pad, l->stride);
getConvolutionKernelParams(params, l->kernel.height, l->kernel.width, l->pad.height, l->pad.width, l->stride.height, l->stride.width, l->dilation.height, l->dilation.width);
bool bias = params.get<bool>("bias_term", true);
int numOutput = params.get<int>("num_output");
......@@ -88,6 +90,7 @@ Ptr<Layer> createLayerFromCaffe<PoolingLayer>(LayerParams &params)
{
int type;
Size kernel, stride, pad;
bool globalPooling;
if (params.has("pool"))
{
......@@ -106,9 +109,13 @@ Ptr<Layer> createLayerFromCaffe<PoolingLayer>(LayerParams &params)
type = PoolingLayer::MAX;
}
getCaffeConvParams(params, kernel, pad, stride);
getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling, pad.height, pad.width, stride.height, stride.width);
//getCaffeConvParams(params, kernel, pad, stride);
return Ptr<Layer>(PoolingLayer::create(type, kernel, stride, pad));
if (!globalPooling)
return Ptr<Layer>(PoolingLayer::create(type, kernel, stride, pad));
else
return Ptr<Layer>(PoolingLayer::createGlobal(type));
}
template<>
......
......@@ -43,6 +43,14 @@
#include "caffe/layer_loaders.hpp"
#include "layers/blank_layer.hpp"
#include "layers/crop_layer.hpp"
#include "layers/eltwise_layer.hpp"
#include "layers/flatten_layer.hpp"
#include "layers/permute_layer.hpp"
#include "layers/prior_box_layer.hpp"
#include "layers/detection_output_layer.hpp"
#include "layers/normalize_bbox_layer.hpp"
namespace cv
{
namespace dnn
......@@ -87,6 +95,15 @@ void initModule()
REG_RUNTIME_LAYER_FUNC(Power, createLayerFromCaffe<PowerLayer>);
REG_RUNTIME_LAYER_CLASS(Dropout, BlankLayer)
REG_RUNTIME_LAYER_CLASS(Crop, CropLayer)
REG_RUNTIME_LAYER_CLASS(Eltwise, EltwiseLayer)
REG_RUNTIME_LAYER_CLASS(Permute, PermuteLayer)
//REG_RUNTIME_LAYER_CLASS(Flatten, FlattenLayer)
REG_RUNTIME_LAYER_CLASS(PriorBox, PriorBoxLayer)
REG_RUNTIME_LAYER_CLASS(DetectionOutput, DetectionOutputLayer)
REG_RUNTIME_LAYER_CLASS(NormalizeBBox, NormalizeBBoxLayer)
init.status = true;
}
......
......@@ -77,7 +77,8 @@ void ConvolutionLayerImpl::init()
CV_Assert(blobs[0].dims() == 4 && blobs[0].cols() == kernel.width && blobs[0].rows() == kernel.height);
CV_Assert(!bias || blobs[1].total() == (size_t)blobs[0].num());
useOpenCL = ocl::useOpenCL() && tryUseOpenCL;
//TODO: dilation in OCL mode
useOpenCL = ocl::useOpenCL() && tryUseOpenCL && dilation == Size(1, 1);
}
void ConvolutionLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
......@@ -127,7 +128,8 @@ void ConvolutionLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vecto
bool ConvolutionLayerImpl::is1x1() const
{
return (kernel.height == 1 && kernel.width == 1) &&
(stride.height == 1 && stride.width == 1);
(stride.height == 1 && stride.width == 1) &&
(dilation.height == 1 && dilation.width == 1);
}
template<typename XMat>
......@@ -182,7 +184,7 @@ void ConvolutionLayerImpl::im2col(const UMat &srcImg, UMat &dstCol)
return;
}
#ifdef HAVE_OPENCL
CV_Assert(im2col_ocl(srcImg, inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, this->colBlob.umatRef()));
CV_Assert(im2col_ocl(srcImg, inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dilation.height, dilation.width, this->colBlob.umatRef()));
dstCol = this->colBlob.umatRefConst();
#else
CV_Error(Error::StsInternal, "");
......@@ -200,9 +202,9 @@ void ConvolutionLayerImpl::im2col(const Mat &srcImg, Mat &dstCol)
Mat &colMat = colBlob.matRef();
if (srcImg.type() == CV_32F)
im2col_CpuPBody<float>::run(srcImg.ptr<float>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, colMat.ptr<float>());
im2col_CpuPBody<float>::run(srcImg.ptr<float>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dilation.height, dilation.width, colMat.ptr<float>());
if (srcImg.type() == CV_64F)
im2col_CpuPBody<double>::run(srcImg.ptr<double>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, colMat.ptr<double>());
im2col_CpuPBody<double>::run(srcImg.ptr<double>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dilation.height, dilation.width, colMat.ptr<double>());
dstCol = colMat;
}
......@@ -213,8 +215,8 @@ void ConvolutionLayerImpl::computeInpOutShape(const Blob &input)
inpW = input.cols();
inpCn = input.channels();
outH = (inpH + 2 * pad.height - kernel.height) / stride.height + 1;
outW = (inpW + 2 * pad.width - kernel.width) / stride.width + 1;
outH = (inpH + 2 * pad.height - (dilation.height * (kernel.height - 1) + 1)) / stride.height + 1;
outW = (inpW + 2 * pad.width - (dilation.width * (kernel.width - 1) + 1)) / stride.width + 1;
outCn = numOutput;
topH = outH; topW = outW; topCn = outCn;
......@@ -252,7 +254,7 @@ template<typename XMat>
void DeConvolutionLayerImpl::forward_(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
XMat weightsMat = reshaped(blobs[0].getRefConst<XMat>(), Shape(outCn, ksize));
XMat biasesMat = reshaped(blobs[1].getRefConst<XMat>(), Shape(outCn, 1));
XMat biasesMat = (bias) ? reshaped(blobs[1].getRefConst<XMat>(), Shape(outCn, 1)) : XMat();
for (size_t ii = 0; ii < outputs.size(); ii++)
{
......@@ -315,21 +317,23 @@ void DeConvolutionLayerImpl::col2im(const UMat &colMat, UMat &dstImg)
//Initializers
Ptr<BaseConvolutionLayer> ConvolutionLayer::create(Size kernel, Size stride, Size pad)
Ptr<BaseConvolutionLayer> ConvolutionLayer::create(Size kernel, Size stride, Size pad, Size dilation)
{
ConvolutionLayerImpl *l = new ConvolutionLayerImpl();
l->kernel = kernel;
l->pad = pad;
l->stride = stride;
l->dilation = dilation;
return Ptr<BaseConvolutionLayer>(l);
}
Ptr<BaseConvolutionLayer> DeconvolutionLayer::create(Size kernel, Size stride, Size pad)
Ptr<BaseConvolutionLayer> DeconvolutionLayer::create(Size kernel, Size stride, Size pad, Size dilation)
{
DeConvolutionLayerImpl *l = new DeConvolutionLayerImpl();
l->kernel = kernel;
l->pad = pad;
l->stride = stride;
l->dilation = dilation;
return Ptr<BaseConvolutionLayer>(l);
}
......
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "crop_layer.hpp"
namespace cv
{
namespace dnn
{
CropLayer::CropLayer(LayerParams &params) : Layer(params)
{
start_axis = params.get<int>("axis");
if (4 <= start_axis)
CV_Error(Error::StsBadArg, "crop axis bigger than input dim");
DictValue paramOffset = params.get("offset");
offset.resize(4, 0);
if (1 < paramOffset.size())
{
if (4 - start_axis != paramOffset.size())
CV_Error(Error::StsBadArg, "number of offset values specified must be equal to the number of dimensions following axis.");
for (size_t i = start_axis; i < offset.size(); i++)
{
offset[i] = paramOffset.get<int>(i);
}
}
else
{
const int offset_val = paramOffset.get<int>(0);
for (size_t i = start_axis; i < offset.size(); i++)
{
offset[i] = offset_val;
}
}
}
void CropLayer::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(2 == inputs.size());
const Blob &inpBlob = *inputs[0];
CV_Assert(inpBlob.dims() == 4 && inpBlob.type() == CV_32F);
const Blob &inpSzBlob = *inputs[1];
outSizes.resize(4, 0);
for (int i = 0; i < 4; i++)
{
if (i < start_axis)
outSizes[i] = inpBlob.size(i);
else
outSizes[i] = inpSzBlob.size(i);
if (offset[i] + outSizes[i] > inpBlob.size(i))
CV_Error(Error::StsBadArg, "invalid crop parameters");
}
outputs.resize(1);
outputs[0].create(BlobShape(outSizes));
}
void CropLayer::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
Blob input = *inputs[0];
Blob output = outputs[0];
for (int num = 0; num < outSizes[0]; ++num)
{
for (int ch = 0; ch < outSizes[1]; ++ch)
{
for (int row = 0; row < outSizes[2]; ++row)
{
float *srcData = input.ptrf(num + offset[0], ch + offset[1], row + offset[2]);
float *dstData = output.ptrf(num, ch, row);
memcpy(dstData, srcData + offset[3], sizeof(float) * outSizes[3]);
}
}
}
}
}
}
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_CROP_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_CROP_LAYER_HPP__
#include "../precomp.hpp"
namespace cv
{
namespace dnn
{
class CropLayer : public Layer
{
int start_axis;
std::vector<int> offset;
std::vector<int> outSizes;
public:
CropLayer(LayerParams& params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
}
}
#endif
This diff is collapsed.
This diff is collapsed.
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "eltwise_layer.hpp"
namespace cv
{
namespace dnn
{
EltwiseLayer::EltwiseLayer(LayerParams &params) : Layer(params)
{
if (params.has("operation"))
{
String operation = params.get<String>("operation").toLowerCase();
if (operation == "prod")
op = PROD;
else if (operation == "sum")
op = SUM;
else if (operation == "max")
op = MAX;
else
CV_Error(cv::Error::StsBadArg, "Unknown operaticon type \"" + operation + "\"");
}
else
{
op = SUM;
}
if (params.has("coeff"))
{
DictValue paramCoeff = params.get("coeff");
coeffs.resize(paramCoeff.size(), 1);
for (int i = 0; i < paramCoeff.size(); i++)
{
coeffs[i] = paramCoeff.get<int>(i);
}
}
}
void EltwiseLayer::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(2 <= inputs.size());
CV_Assert(coeffs.size() == 0 || coeffs.size() == inputs.size());
CV_Assert(op == SUM || coeffs.size() == 0);
const BlobShape &shape0 = inputs[0]->shape();
for (size_t i = 1; i < inputs.size(); ++i)
{
CV_Assert(shape0 == inputs[i]->shape());
}
outputs.resize(1);
outputs[0].create(shape0);
}
void EltwiseLayer::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
switch (op)
{
case SUM:
{
CV_Assert(coeffs.size() == 0 || coeffs.size() == inputs.size());
Mat& output = outputs[0].matRef();
output.setTo(0.);
if (0 < coeffs.size())
{
for (size_t i = 0; i < inputs.size(); i++)
{
output += inputs[i]->matRefConst() * coeffs[i];
}
}
else
{
for (size_t i = 0; i < inputs.size(); i++)
{
output += inputs[i]->matRefConst();
}
}
}
break;
case PROD:
{
Mat& output = outputs[0].matRef();
output.setTo(1.);
for (size_t i = 0; i < inputs.size(); i++)
{
output = output.mul(inputs[i]->matRefConst());
}
}
break;
case MAX:
{
Mat& output = outputs[0].matRef();
cv::max(inputs[0]->matRefConst(), inputs[1]->matRefConst(), output);
for (size_t i = 2; i < inputs.size(); i++)
{
cv::max(output, inputs[i]->matRefConst(), output);
}
}
break;
default:
CV_Assert(0);
break;
};
}
}
}
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_ELTWISE_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_ELTWISE_LAYER_HPP__
#include "../precomp.hpp"
namespace cv
{
namespace dnn
{
class EltwiseLayer : public Layer
{
enum EltwiseOp
{
PROD = 0,
SUM = 1,
MAX = 2,
};
EltwiseOp op;
std::vector<int> coeffs;
public:
EltwiseLayer(LayerParams& params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
}
}
#endif
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "flatten_layer.hpp"
#include <float.h>
#include <algorithm>
namespace cv
{
namespace dnn
{
const std::string FlattenLayer::_layerName = std::string("Flatten");
bool FlattenLayer::getParameterDict(const LayerParams &params,
const std::string &parameterName,
DictValue& result)
{
if (!params.has(parameterName))
{
return false;
}
result = params.get(parameterName);
return true;
}
template<typename T>
T FlattenLayer::getParameter(const LayerParams &params,
const std::string &parameterName,
const size_t &idx,
const bool required,
const T& defaultValue)
{
DictValue dictValue;
bool success = getParameterDict(params, parameterName, dictValue);
if(!success)
{
if(required)
{
std::string message = _layerName;
message += " layer parameter does not contain ";
message += parameterName;
message += " parameter.";
CV_Error(Error::StsBadArg, message);
}
else
{
return defaultValue;
}
}
return dictValue.get<T>(idx);
}
FlattenLayer::FlattenLayer(LayerParams &params) : Layer(params)
{
_startAxis = getParameter<int>(params, "axis");
_endAxis = getParameter<int>(params, "end_axis", 0, false, -1);
}
void FlattenLayer::checkInputs(const std::vector<Blob*> &inputs)
{
CV_Assert(inputs.size() > 0);
for (size_t i = 1; i < inputs.size(); i++)
{
for (size_t j = 0; j < _numAxes; j++)
{
CV_Assert(inputs[i]->shape()[j] == inputs[0]->shape()[j]);
}
}
}
void FlattenLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
checkInputs(inputs);
_numAxes = inputs[0]->shape().dims();
if(_endAxis <= 0)
{
_endAxis += _numAxes;
}
CV_Assert(_startAxis >= 0);
CV_Assert(_endAxis >= _startAxis && _endAxis < (int)_numAxes);
size_t flattenedDimensionSize = 1;
for (int i = _startAxis; i <= _endAxis; i++)
{
flattenedDimensionSize *= inputs[0]->shape()[i];
}
std::vector<int> outputShape;
for (int i = 0; i < _startAxis; i++)
{
outputShape.push_back(inputs[0]->shape()[i]);
}
outputShape.push_back(flattenedDimensionSize);
for (size_t i = _endAxis + 1; i < _numAxes; i++)
{
outputShape.push_back(inputs[0]->shape()[i]);
}
CV_Assert(outputShape.size() <= 4);
for (size_t i = 0; i < inputs.size(); i++)
{
outputs[i].create(BlobShape(outputShape));
}
}
void FlattenLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
for (size_t j = 0; j < inputs.size(); j++)
{
outputs[j].matRef() = inputs[j]->matRef();
}
}
}
}
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_FLATTEN_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_FLATTEN_LAYER_HPP__
#include "../precomp.hpp"
namespace cv
{
namespace dnn
{
class FlattenLayer : public Layer
{
int _startAxis;
int _endAxis;
size_t _numAxes;
static const std::string _layerName;
public:
FlattenLayer(LayerParams &params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void checkInputs(const std::vector<Blob*> &inputs);
template<typename T>
T getParameter(const LayerParams &params,
const std::string &parameterName,
const size_t &idx = 0,
const bool required = true,
const T& defaultValue = T());
bool getParameterDict(const LayerParams &params,
const std::string &parameterName, DictValue &result);
};
}
}
#endif
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment