Commit 3cdc0e48 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #991 from arrybn:issue_912

parents 478baf93 0cef0503
...@@ -406,7 +406,7 @@ namespace dnn ...@@ -406,7 +406,7 @@ namespace dnn
class CV_EXPORTS_W BatchNormLayer : public Layer class CV_EXPORTS_W BatchNormLayer : public Layer
{ {
public: public:
static CV_WRAP Ptr<BatchNormLayer> create(float eps, bool has_weights, bool has_bias); static CV_WRAP Ptr<BatchNormLayer> create(bool hasWeights, bool hasBias, float epsilon);
}; };
class CV_EXPORTS_W MaxUnpoolLayer : public Layer class CV_EXPORTS_W MaxUnpoolLayer : public Layer
...@@ -415,6 +415,12 @@ namespace dnn ...@@ -415,6 +415,12 @@ namespace dnn
static CV_WRAP Ptr<MaxUnpoolLayer> create(Size unpoolSize); static CV_WRAP Ptr<MaxUnpoolLayer> create(Size unpoolSize);
}; };
class CV_EXPORTS_W ScaleLayer : public Layer
{
public:
static CV_WRAP Ptr<ScaleLayer> create(bool hasBias);
};
//! @} //! @}
//! @} //! @}
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -59,6 +59,8 @@ message BlobProto { ...@@ -59,6 +59,8 @@ message BlobProto {
optional BlobShape shape = 7; optional BlobShape shape = 7;
repeated float data = 5 [packed = true]; repeated float data = 5 [packed = true];
repeated float diff = 6 [packed = true]; repeated float diff = 6 [packed = true];
repeated double double_data = 8 [packed = true];
repeated double double_diff = 9 [packed = true];
// 4D dimensions -- deprecated. Use "shape" instead. // 4D dimensions -- deprecated. Use "shape" instead.
optional int32 num = 1 [default = 0]; optional int32 num = 1 [default = 0];
...@@ -73,24 +75,6 @@ message BlobProtoVector { ...@@ -73,24 +75,6 @@ message BlobProtoVector {
repeated BlobProto blobs = 1; repeated BlobProto blobs = 1;
} }
message CropParameter {
// To crop, elements of the first bottom are selected to fit the dimensions
// of the second, reference bottom. The crop is configured by
// - the crop `axis` to pick the dimensions for cropping
// - the crop `offset` to set the shift for all/each dimension
// to align the cropped bottom with the reference bottom.
// All dimensions up to but excluding `axis` are preserved, while
// the dimensions including and trailing `axis` are cropped.
// If only one `offset` is set, then all dimensions are offset by this amount.
// Otherwise, the number of offsets must equal the number of cropped axes to
// shift the crop in each dimension accordingly.
// Note: standard dimensions are N,C,H,W so the default is a spatial crop,
// and `axis` may be negative to index from the end (e.g., -1 for the last
// axis).
optional int32 axis = 1 [default = 2];
repeated uint32 offset = 2;
}
message PermuteParameter { message PermuteParameter {
// The new orders of the axes of data. Notice it should be with // The new orders of the axes of data. Notice it should be with
// in the same range as the input data, and it starts from 0. // in the same range as the input data, and it starts from 0.
...@@ -196,12 +180,12 @@ message FillerParameter { ...@@ -196,12 +180,12 @@ message FillerParameter {
message NetParameter { message NetParameter {
optional string name = 1; // consider giving the network a name optional string name = 1; // consider giving the network a name
// The input blobs to the network. // DEPRECATED. See InputParameter. The input blobs to the network.
repeated string input = 3; repeated string input = 3;
// The shape of the input blobs. // DEPRECATED. See InputParameter. The shape of the input blobs.
repeated BlobShape input_shape = 8; repeated BlobShape input_shape = 8;
// 4D input dimensions -- deprecated. Use "shape" instead. // 4D input dimensions -- deprecated. Use "input_shape" instead.
// If specified, for each input blob there should be four // If specified, for each input blob there should be four
// values specifying the num, channels, height and width of the input blob. // values specifying the num, channels, height and width of the input blob.
// Thus, there should be a total of (4 * #input) numbers. // Thus, there should be a total of (4 * #input) numbers.
...@@ -231,7 +215,7 @@ message NetParameter { ...@@ -231,7 +215,7 @@ message NetParameter {
// NOTE // NOTE
// Update the next available ID when you add a new SolverParameter field. // Update the next available ID when you add a new SolverParameter field.
// //
// SolverParameter next available ID: 37 (last added: iter_size) // SolverParameter next available ID: 41 (last added: type)
message SolverParameter { message SolverParameter {
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// Specifying the train and test networks // Specifying the train and test networks
...@@ -286,7 +270,23 @@ message SolverParameter { ...@@ -286,7 +270,23 @@ message SolverParameter {
optional int32 max_iter = 7; // the maximum number of iterations optional int32 max_iter = 7; // the maximum number of iterations
// accumulate gradients over `iter_size` x `batch_size` instances // accumulate gradients over `iter_size` x `batch_size` instances
optional int32 iter_size = 36 [default = 1]; optional int32 iter_size = 36 [default = 1];
optional string lr_policy = 8; // The learning rate decay policy.
// The learning rate decay policy. The currently implemented learning rate
// policies are as follows:
// - fixed: always return base_lr.
// - step: return base_lr * gamma ^ (floor(iter / step))
// - exp: return base_lr * gamma ^ iter
// - inv: return base_lr * (1 + gamma * iter) ^ (- power)
// - multistep: similar to step but it allows non uniform steps defined by
// stepvalue
// - poly: the effective learning rate follows a polynomial decay, to be
// zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power)
// - sigmoid: the effective learning rate follows a sigmod decay
// return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize))))
//
// where base_lr, max_iter, gamma, step, stepvalue and power are defined
// in the solver parameter protocol buffer, and iter is the current iteration.
optional string lr_policy = 8;
optional float gamma = 9; // The parameter to compute the learning rate. optional float gamma = 9; // The parameter to compute the learning rate.
optional float power = 10; // The parameter to compute the learning rate. optional float power = 10; // The parameter to compute the learning rate.
optional float momentum = 11; // The momentum value. optional float momentum = 11; // The momentum value.
...@@ -308,6 +308,11 @@ message SolverParameter { ...@@ -308,6 +308,11 @@ message SolverParameter {
// whether to snapshot diff in the results or not. Snapshotting diff will help // whether to snapshot diff in the results or not. Snapshotting diff will help
// debugging but the final protocol buffer size will be much larger. // debugging but the final protocol buffer size will be much larger.
optional bool snapshot_diff = 16 [default = false]; optional bool snapshot_diff = 16 [default = false];
enum SnapshotFormat {
HDF5 = 0;
BINARYPROTO = 1;
}
optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO];
// the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default. // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
enum SolverMode { enum SolverMode {
CPU = 0; CPU = 0;
...@@ -321,15 +326,17 @@ message SolverParameter { ...@@ -321,15 +326,17 @@ message SolverParameter {
// (and by default) initialize using a seed derived from the system clock. // (and by default) initialize using a seed derived from the system clock.
optional int64 random_seed = 20 [default = -1]; optional int64 random_seed = 20 [default = -1];
// Solver type // type of the solver
enum SolverType { optional string type = 40 [default = "SGD"];
SGD = 0;
NESTEROV = 1; // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam
ADAGRAD = 2;
}
optional SolverType solver_type = 30 [default = SGD];
// numerical stability for AdaGrad
optional float delta = 31 [default = 1e-8]; optional float delta = 31 [default = 1e-8];
// parameters for the Adam solver
optional float momentum2 = 39 [default = 0.999];
// RMSProp decay value
// MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)
optional float rms_decay = 38 [default = 0.99];
// If true, print information about the state of the net that may help with // If true, print information about the state of the net that may help with
// debugging learning problems. // debugging learning problems.
...@@ -337,6 +344,18 @@ message SolverParameter { ...@@ -337,6 +344,18 @@ message SolverParameter {
// If false, don't save a snapshot after training finishes. // If false, don't save a snapshot after training finishes.
optional bool snapshot_after_train = 28 [default = true]; optional bool snapshot_after_train = 28 [default = true];
// DEPRECATED: old solver enum types, use string instead
enum SolverType {
SGD = 0;
NESTEROV = 1;
ADAGRAD = 2;
RMSPROP = 3;
ADADELTA = 4;
ADAM = 5;
}
// DEPRECATED: use type instead of solver_type
optional SolverType solver_type = 30 [default = SGD];
} }
// A message that stores the solver snapshots // A message that stores the solver snapshots
...@@ -404,7 +423,7 @@ message ParamSpec { ...@@ -404,7 +423,7 @@ message ParamSpec {
// NOTE // NOTE
// Update the next available ID when you add a new LayerParameter field. // Update the next available ID when you add a new LayerParameter field.
// //
// LayerParameter next available layer-specific ID: 142 (last added: detection_output_param) // LayerParameter next available layer-specific ID: 147 (last added: recurrent_param)
message LayerParameter { message LayerParameter {
optional string name = 1; // the layer name optional string name = 1; // the layer name
optional string type = 2; // the layer type optional string type = 2; // the layer type
...@@ -426,7 +445,12 @@ message LayerParameter { ...@@ -426,7 +445,12 @@ message LayerParameter {
// The blobs containing the numeric parameters of the layer. // The blobs containing the numeric parameters of the layer.
repeated BlobProto blobs = 7; repeated BlobProto blobs = 7;
// Specifies on which bottoms the backpropagation should be skipped. // Specifies whether to backpropagate to each bottom. If unspecified,
// Caffe will automatically infer whether each input needs backpropagation
// to compute parameter gradients. If set to true for some inputs,
// backpropagation to those inputs is forced; if set false for some inputs,
// backpropagation to those inputs is skipped.
//
// The size must be either 0 or equal to the number of bottoms. // The size must be either 0 or equal to the number of bottoms.
repeated bool propagate_down = 11; repeated bool propagate_down = 11;
...@@ -453,15 +477,19 @@ message LayerParameter { ...@@ -453,15 +477,19 @@ message LayerParameter {
// The default for the engine is set by the ENGINE switch at compile-time. // The default for the engine is set by the ENGINE switch at compile-time.
optional AccuracyParameter accuracy_param = 102; optional AccuracyParameter accuracy_param = 102;
optional ArgMaxParameter argmax_param = 103; optional ArgMaxParameter argmax_param = 103;
optional BatchNormParameter batch_norm_param = 139;
optional BiasParameter bias_param = 141;
optional ConcatParameter concat_param = 104; optional ConcatParameter concat_param = 104;
optional ContrastiveLossParameter contrastive_loss_param = 105; optional ContrastiveLossParameter contrastive_loss_param = 105;
optional ConvolutionParameter convolution_param = 106; optional ConvolutionParameter convolution_param = 106;
optional CropParameter crop_param = 137; optional CropParameter crop_param = 144;
optional DataParameter data_param = 107; optional DataParameter data_param = 107;
optional DetectionOutputParameter detection_output_param = 141; optional DetectionOutputParameter detection_output_param = 147;
optional DropoutParameter dropout_param = 108; optional DropoutParameter dropout_param = 108;
optional DummyDataParameter dummy_data_param = 109; optional DummyDataParameter dummy_data_param = 109;
optional EltwiseParameter eltwise_param = 110; optional EltwiseParameter eltwise_param = 110;
optional ELUParameter elu_param = 140;
optional EmbedParameter embed_param = 137;
optional ExpParameter exp_param = 111; optional ExpParameter exp_param = 111;
optional FlattenParameter flatten_param = 135; optional FlattenParameter flatten_param = 135;
optional HDF5DataParameter hdf5_data_param = 112; optional HDF5DataParameter hdf5_data_param = 112;
...@@ -470,26 +498,31 @@ message LayerParameter { ...@@ -470,26 +498,31 @@ message LayerParameter {
optional ImageDataParameter image_data_param = 115; optional ImageDataParameter image_data_param = 115;
optional InfogainLossParameter infogain_loss_param = 116; optional InfogainLossParameter infogain_loss_param = 116;
optional InnerProductParameter inner_product_param = 117; optional InnerProductParameter inner_product_param = 117;
optional InputParameter input_param = 143;
optional LogParameter log_param = 134; optional LogParameter log_param = 134;
optional LRNParameter lrn_param = 118; optional LRNParameter lrn_param = 118;
optional MemoryDataParameter memory_data_param = 119; optional MemoryDataParameter memory_data_param = 119;
optional MVNParameter mvn_param = 120; optional MVNParameter mvn_param = 120;
optional NormalizeBBoxParameter normalize_bbox_param = 139; optional NormalizeBBoxParameter normalize_bbox_param = 149;
optional PermuteParameter permute_param = 138; optional PermuteParameter permute_param = 148;
optional ParameterParameter parameter_param = 145;
optional PoolingParameter pooling_param = 121; optional PoolingParameter pooling_param = 121;
optional PowerParameter power_param = 122; optional PowerParameter power_param = 122;
optional PReLUParameter prelu_param = 131; optional PReLUParameter prelu_param = 131;
optional PriorBoxParameter prior_box_param = 140; optional PriorBoxParameter prior_box_param = 150;
optional PythonParameter python_param = 130; optional PythonParameter python_param = 130;
optional RecurrentParameter recurrent_param = 146;
optional ReductionParameter reduction_param = 136; optional ReductionParameter reduction_param = 136;
optional ReLUParameter relu_param = 123; optional ReLUParameter relu_param = 123;
optional ReshapeParameter reshape_param = 133; optional ReshapeParameter reshape_param = 133;
optional ScaleParameter scale_param = 142;
optional SigmoidParameter sigmoid_param = 124; optional SigmoidParameter sigmoid_param = 124;
optional SliceParameter slice_param = 126;
optional SoftmaxParameter softmax_param = 125; optional SoftmaxParameter softmax_param = 125;
optional SPPParameter spp_param = 132; optional SPPParameter spp_param = 132;
optional SliceParameter slice_param = 126;
optional TanHParameter tanh_param = 127; optional TanHParameter tanh_param = 127;
optional ThresholdParameter threshold_param = 128; optional ThresholdParameter threshold_param = 128;
optional TileParameter tile_param = 138;
optional WindowDataParameter window_data_param = 129; optional WindowDataParameter window_data_param = 129;
} }
...@@ -506,7 +539,7 @@ message TransformationParameter { ...@@ -506,7 +539,7 @@ message TransformationParameter {
optional uint32 crop_size = 3 [default = 0]; optional uint32 crop_size = 3 [default = 0];
// mean_file and mean_value cannot be specified at the same time // mean_file and mean_value cannot be specified at the same time
optional string mean_file = 4; optional string mean_file = 4;
// if specified can be repeated once (would substract it from all the channels) // if specified can be repeated once (would subtract it from all the channels)
// or can be repeated the same number of times as channels // or can be repeated the same number of times as channels
// (would subtract them from the corresponding channel) // (would subtract them from the corresponding channel)
repeated float mean_value = 5; repeated float mean_value = 5;
...@@ -520,9 +553,29 @@ message TransformationParameter { ...@@ -520,9 +553,29 @@ message TransformationParameter {
message LossParameter { message LossParameter {
// If specified, ignore instances with the given label. // If specified, ignore instances with the given label.
optional int32 ignore_label = 1; optional int32 ignore_label = 1;
// If true, normalize each batch across all instances (including spatial // How to normalize the loss for loss layers that aggregate across batches,
// dimesions, but not ignored instances); else, divide by batch size only. // spatial dimensions, or other dimensions. Currently only implemented in
optional bool normalize = 2 [default = true]; // SoftmaxWithLoss and SigmoidCrossEntropyLoss layers.
enum NormalizationMode {
// Divide by the number of examples in the batch times spatial dimensions.
// Outputs that receive the ignore label will NOT be ignored in computing
// the normalization factor.
FULL = 0;
// Divide by the total number of output locations that do not take the
// ignore_label. If ignore_label is not set, this behaves like FULL.
VALID = 1;
// Divide by the batch size.
BATCH_SIZE = 2;
// Do not normalize the loss.
NONE = 3;
}
// For historical reasons, the default normalization for
// SigmoidCrossEntropyLoss is BATCH_SIZE and *not* VALID.
optional NormalizationMode normalization = 3 [default = VALID];
// Deprecated. Ignored if normalization is specified. If normalization
// is not specified, then setting this to false will be equivalent to
// normalization = BATCH_SIZE to be consistent with previous behavior.
optional bool normalize = 2;
} }
// Messages that store parameters used by individual layer types follow, in // Messages that store parameters used by individual layer types follow, in
...@@ -549,6 +602,11 @@ message ArgMaxParameter { ...@@ -549,6 +602,11 @@ message ArgMaxParameter {
// If true produce pairs (argmax, maxval) // If true produce pairs (argmax, maxval)
optional bool out_max_val = 1 [default = false]; optional bool out_max_val = 1 [default = false];
optional uint32 top_k = 2 [default = 1]; optional uint32 top_k = 2 [default = 1];
// The axis along which to maximise -- may be negative to index from the
// end (e.g., -1 for the last axis).
// By default ArgMaxLayer maximizes over the flattened trailing dimensions
// for each index of the first / num dimension.
optional int32 axis = 3;
} }
message ConcatParameter { message ConcatParameter {
...@@ -562,6 +620,50 @@ message ConcatParameter { ...@@ -562,6 +620,50 @@ message ConcatParameter {
optional uint32 concat_dim = 1 [default = 1]; optional uint32 concat_dim = 1 [default = 1];
} }
message BatchNormParameter {
// If false, accumulate global mean/variance values via a moving average. If
// true, use those accumulated values instead of computing mean/variance
// across the batch.
optional bool use_global_stats = 1;
// How much does the moving average decay each iteration?
optional float moving_average_fraction = 2 [default = .999];
// Small value to add to the variance estimate so that we don't divide by
// zero.
optional float eps = 3 [default = 1e-5];
}
message BiasParameter {
// The first axis of bottom[0] (the first input Blob) along which to apply
// bottom[1] (the second input Blob). May be negative to index from the end
// (e.g., -1 for the last axis).
//
// For example, if bottom[0] is 4D with shape 100x3x40x60, the output
// top[0] will have the same shape, and bottom[1] may have any of the
// following shapes (for the given value of axis):
// (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
// (axis == 1 == -3) 3; 3x40; 3x40x60
// (axis == 2 == -2) 40; 40x60
// (axis == 3 == -1) 60
// Furthermore, bottom[1] may have the empty shape (regardless of the value of
// "axis") -- a scalar bias.
optional int32 axis = 1 [default = 1];
// (num_axes is ignored unless just one bottom is given and the bias is
// a learned parameter of the layer. Otherwise, num_axes is determined by the
// number of axes by the second bottom.)
// The number of axes of the input (bottom[0]) covered by the bias
// parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
// Set num_axes := 0, to add a zero-axis Blob: a scalar.
optional int32 num_axes = 2 [default = 1];
// (filler is ignored unless just one bottom is given and the bias is
// a learned parameter of the layer.)
// The initialization for the learned bias parameter.
// Default is the zero (0) initialization, resulting in the BiasLayer
// initially performing the identity operation.
optional FillerParameter filler = 3;
}
message ContrastiveLossParameter { message ContrastiveLossParameter {
// margin for dissimilar pair // margin for dissimilar pair
optional float margin = 1 [default = 1.0]; optional float margin = 1 [default = 1.0];
...@@ -577,18 +679,28 @@ message ContrastiveLossParameter { ...@@ -577,18 +679,28 @@ message ContrastiveLossParameter {
message ConvolutionParameter { message ConvolutionParameter {
optional uint32 num_output = 1; // The number of outputs for the layer optional uint32 num_output = 1; // The number of outputs for the layer
optional bool bias_term = 2 [default = true]; // whether to have bias terms optional bool bias_term = 2 [default = true]; // whether to have bias terms
// Pad, kernel size, and stride are all given as a single value for equal // Pad, kernel size, and stride are all given as a single value for equal
// dimensions in height and width or as Y, X pairs. // dimensions in all spatial dimensions, or once per spatial dimension.
optional uint32 pad = 3 [default = 0]; // The padding size (equal in Y, X) repeated uint32 pad = 3; // The padding size; defaults to 0
optional uint32 pad_h = 9 [default = 0]; // The padding height repeated uint32 kernel_size = 4; // The kernel size
optional uint32 pad_w = 10 [default = 0]; // The padding width repeated uint32 stride = 6; // The stride; defaults to 1
optional uint32 kernel_size = 4; // The kernel size (square) // Factor used to dilate the kernel, (implicitly) zero-filling the resulting
optional uint32 kernel_h = 11; // The kernel height // holes. (Kernel dilation is sometimes referred to by its use in the
optional uint32 kernel_w = 12; // The kernel width // algorithme à trous from Holschneider et al. 1987.)
repeated uint32 dilation = 18; // The dilation; defaults to 1
// For 2D convolution only, the *_h and *_w versions may also be used to
// specify both spatial dimensions.
optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only)
optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only)
optional uint32 kernel_h = 11; // The kernel height (2D only)
optional uint32 kernel_w = 12; // The kernel width (2D only)
optional uint32 stride_h = 13; // The stride height (2D only)
optional uint32 stride_w = 14; // The stride width (2D only)
optional uint32 group = 5 [default = 1]; // The group size for group conv optional uint32 group = 5 [default = 1]; // The group size for group conv
optional uint32 stride = 6 [default = 1]; // The stride (equal in Y, X)
optional uint32 stride_h = 13; // The stride height
optional uint32 stride_w = 14; // The stride width
optional FillerParameter weight_filler = 7; // The filler for the weight optional FillerParameter weight_filler = 7; // The filler for the weight
optional FillerParameter bias_filler = 8; // The filler for the bias optional FillerParameter bias_filler = 8; // The filler for the bias
enum Engine { enum Engine {
...@@ -597,12 +709,42 @@ message ConvolutionParameter { ...@@ -597,12 +709,42 @@ message ConvolutionParameter {
CUDNN = 2; CUDNN = 2;
} }
optional Engine engine = 15 [default = DEFAULT]; optional Engine engine = 15 [default = DEFAULT];
// Factor used to dilate the kernel, (implicitly) zero-filling the resulting
// holes. (Kernel dilation is sometimes referred to by its use in the // The axis to interpret as "channels" when performing convolution.
// algorithme a trous from Holschneider et al. 1987.) // Preceding dimensions are treated as independent inputs;
optional uint32 dilation_h = 18; // The dilation height // succeeding dimensions are treated as "spatial".
optional uint32 dilation_w = 19; // The dilation width // With (N, C, H, W) inputs, and axis == 1 (the default), we perform
optional uint32 dilation = 20; // The dilation; defaults to 1 // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for
// groups g>1) filters across the spatial axes (H, W) of the input.
// With (N, C, D, H, W) inputs, and axis == 1, we perform
// N independent 3D convolutions, sliding (C/g)-channels
// filters across the spatial axes (D, H, W) of the input.
optional int32 axis = 16 [default = 1];
// Whether to force use of the general ND convolution, even if a specific
// implementation for blobs of the appropriate number of spatial dimensions
// is available. (Currently, there is only a 2D-specific convolution
// implementation; for input blobs with num_axes != 2, this option is
// ignored and the ND implementation will be used.)
optional bool force_nd_im2col = 17 [default = false];
}
message CropParameter {
// To crop, elements of the first bottom are selected to fit the dimensions
// of the second, reference bottom. The crop is configured by
// - the crop `axis` to pick the dimensions for cropping
// - the crop `offset` to set the shift for all/each dimension
// to align the cropped bottom with the reference bottom.
// All dimensions up to but excluding `axis` are preserved, while
// the dimensions including and trailing `axis` are cropped.
// If only one `offset` is set, then all dimensions are offset by this amount.
// Otherwise, the number of offsets must equal the number of cropped axes to
// shift the crop in each dimension accordingly.
// Note: standard dimensions are N,C,H,W so the default is a spatial crop,
// and `axis` may be negative to index from the end (e.g., -1 for the last
// axis).
optional int32 axis = 1 [default = 2];
repeated uint32 offset = 2;
} }
message DataParameter { message DataParameter {
...@@ -618,6 +760,7 @@ message DataParameter { ...@@ -618,6 +760,7 @@ message DataParameter {
// to avoid all asynchronous sgd clients to start at the same point. The skip // to avoid all asynchronous sgd clients to start at the same point. The skip
// point would be set as rand_skip * rand(0,1). Note that rand_skip should not // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
// be larger than the number of keys in the database. // be larger than the number of keys in the database.
// DEPRECATED. Each solver accesses a different subset of the database.
optional uint32 rand_skip = 7 [default = 0]; optional uint32 rand_skip = 7 [default = 0];
optional DB backend = 8 [default = LEVELDB]; optional DB backend = 8 [default = LEVELDB];
// DEPRECATED. See TransformationParameter. For data pre-processing, we can do // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
...@@ -633,6 +776,9 @@ message DataParameter { ...@@ -633,6 +776,9 @@ message DataParameter {
optional bool mirror = 6 [default = false]; optional bool mirror = 6 [default = false];
// Force the encoded image to have 3 color channels // Force the encoded image to have 3 color channels
optional bool force_encoded_color = 9 [default = false]; optional bool force_encoded_color = 9 [default = false];
// Prefetch queue (Number of batches to prefetch to host memory, increase if
// data access bandwidth varies).
optional uint32 prefetch = 10 [default = 4];
} }
message DropoutParameter { message DropoutParameter {
...@@ -672,6 +818,29 @@ message EltwiseParameter { ...@@ -672,6 +818,29 @@ message EltwiseParameter {
optional bool stable_prod_grad = 3 [default = true]; optional bool stable_prod_grad = 3 [default = true];
} }
// Message that stores parameters used by ELULayer
message ELUParameter {
// Described in:
// Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate
// Deep Network Learning by Exponential Linear Units (ELUs). arXiv
optional float alpha = 1 [default = 1];
}
// Message that stores parameters used by EmbedLayer
message EmbedParameter {
optional uint32 num_output = 1; // The number of outputs for the layer
// The input is given as integers to be interpreted as one-hot
// vector indices with dimension num_input. Hence num_input should be
// 1 greater than the maximum possible input value.
optional uint32 input_dim = 2;
optional bool bias_term = 3 [default = true]; // Whether to use a bias term
optional FillerParameter weight_filler = 4; // The filler for the weight
optional FillerParameter bias_filler = 5; // The filler for the bias
}
// Message that stores parameters used by ExpLayer
message ExpParameter { message ExpParameter {
// ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0. // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0.
// Or if base is set to the default (-1), base is set to e, // Or if base is set to the default (-1), base is set to e,
...@@ -725,7 +894,7 @@ message ImageDataParameter { ...@@ -725,7 +894,7 @@ message ImageDataParameter {
// Specify the data source. // Specify the data source.
optional string source = 1; optional string source = 1;
// Specify the batch size. // Specify the batch size.
optional uint32 batch_size = 4; optional uint32 batch_size = 4 [default = 1];
// The rand_skip variable is for the data layer to skip a few data points // The rand_skip variable is for the data layer to skip a few data points
// to avoid all asynchronous sgd clients to start at the same point. The skip // to avoid all asynchronous sgd clients to start at the same point. The skip
// point would be set as rand_skip * rand(0,1). Note that rand_skip should not // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
...@@ -767,6 +936,19 @@ message InnerProductParameter { ...@@ -767,6 +936,19 @@ message InnerProductParameter {
// all preceding axes are retained in the output. // all preceding axes are retained in the output.
// May be negative to index from the end (e.g., -1 for the last axis). // May be negative to index from the end (e.g., -1 for the last axis).
optional int32 axis = 5 [default = 1]; optional int32 axis = 5 [default = 1];
// Specify whether to transpose the weight matrix or not.
// If transpose == true, any operations will be performed on the transpose
// of the weight matrix. The weight matrix itself is not going to be transposed
// but rather the transfer flag of operations will be toggled accordingly.
optional bool transpose = 6 [default = false];
}
message InputParameter {
// This layer produces N >= 1 top blob(s) to be assigned manually.
// Define N shapes to set a shape for each top.
// Define 1 shape to set the same shape for every top.
// Define no shape to defer to reshaping manually.
repeated BlobShape shape = 1;
} }
// Message that stores parameters used by LogLayer // Message that stores parameters used by LogLayer
...@@ -790,6 +972,12 @@ message LRNParameter { ...@@ -790,6 +972,12 @@ message LRNParameter {
} }
optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS]; optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS];
optional float k = 5 [default = 1.]; optional float k = 5 [default = 1.];
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 6 [default = DEFAULT];
} }
message MemoryDataParameter { message MemoryDataParameter {
...@@ -810,6 +998,10 @@ message MVNParameter { ...@@ -810,6 +998,10 @@ message MVNParameter {
optional float eps = 3 [default = 1e-9]; optional float eps = 3 [default = 1e-9];
} }
message ParameterParameter {
optional BlobShape shape = 1;
}
message PoolingParameter { message PoolingParameter {
enum PoolMethod { enum PoolMethod {
MAX = 0; MAX = 0;
...@@ -849,6 +1041,34 @@ message PowerParameter { ...@@ -849,6 +1041,34 @@ message PowerParameter {
message PythonParameter { message PythonParameter {
optional string module = 1; optional string module = 1;
optional string layer = 2; optional string layer = 2;
// This value is set to the attribute `param_str` of the `PythonLayer` object
// in Python before calling the `setup()` method. This could be a number,
// string, dictionary in Python dict format, JSON, etc. You may parse this
// string in `setup` method and use it in `forward` and `backward`.
optional string param_str = 3 [default = ''];
// Whether this PythonLayer is shared among worker solvers during data parallelism.
// If true, each worker solver sequentially run forward from this layer.
// This value should be set true if you are using it as a data layer.
optional bool share_in_parallel = 4 [default = false];
}
// Message that stores parameters used by RecurrentLayer
message RecurrentParameter {
// The dimension of the output (and usually hidden state) representation --
// must be explicitly set to non-zero.
optional uint32 num_output = 1 [default = 0];
optional FillerParameter weight_filler = 2; // The filler for the weight
optional FillerParameter bias_filler = 3; // The filler for the bias
// Whether to enable displaying debug_info in the unrolled recurrent net.
optional bool debug_info = 4 [default = false];
// Whether to add as additional inputs (bottoms) the initial hidden state
// blobs, and add as additional outputs (tops) the final timestep hidden state
// blobs. The number of additional bottom/top blobs required depends on the
// recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs.
optional bool expose_hidden = 5 [default = false];
} }
// Message that stores parameters used by ReductionLayer // Message that stores parameters used by ReductionLayer
...@@ -915,7 +1135,7 @@ message ReshapeParameter { ...@@ -915,7 +1135,7 @@ message ReshapeParameter {
// reshape_param { shape { dim: 2 dim: 2 dim: 4 } } // reshape_param { shape { dim: 2 dim: 2 dim: 4 } }
// reshape_param { shape { dim: 0 dim: 2 dim: 4 } } // reshape_param { shape { dim: 0 dim: 2 dim: 4 } }
// reshape_param { shape { dim: 0 dim: 2 dim: -1 } } // reshape_param { shape { dim: 0 dim: 2 dim: -1 } }
// reshape_param { shape { dim: -1 dim: 0 dim: 2 } } // reshape_param { shape { dim: 0 dim:-1 dim: 4 } }
// //
optional BlobShape shape = 1; optional BlobShape shape = 1;
...@@ -960,6 +1180,43 @@ message ReshapeParameter { ...@@ -960,6 +1180,43 @@ message ReshapeParameter {
optional int32 num_axes = 3 [default = -1]; optional int32 num_axes = 3 [default = -1];
} }
message ScaleParameter {
// The first axis of bottom[0] (the first input Blob) along which to apply
// bottom[1] (the second input Blob). May be negative to index from the end
// (e.g., -1 for the last axis).
//
// For example, if bottom[0] is 4D with shape 100x3x40x60, the output
// top[0] will have the same shape, and bottom[1] may have any of the
// following shapes (for the given value of axis):
// (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
// (axis == 1 == -3) 3; 3x40; 3x40x60
// (axis == 2 == -2) 40; 40x60
// (axis == 3 == -1) 60
// Furthermore, bottom[1] may have the empty shape (regardless of the value of
// "axis") -- a scalar multiplier.
optional int32 axis = 1 [default = 1];
// (num_axes is ignored unless just one bottom is given and the scale is
// a learned parameter of the layer. Otherwise, num_axes is determined by the
// number of axes by the second bottom.)
// The number of axes of the input (bottom[0]) covered by the scale
// parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
// Set num_axes := 0, to multiply with a zero-axis Blob: a scalar.
optional int32 num_axes = 2 [default = 1];
// (filler is ignored unless just one bottom is given and the scale is
// a learned parameter of the layer.)
// The initialization for the learned scale parameter.
// Default is the unit (1) initialization, resulting in the ScaleLayer
// initially performing the identity operation.
optional FillerParameter filler = 3;
// Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but
// may be more efficient). Initialized with bias_filler (defaults to 0).
optional bool bias_term = 4 [default = false];
optional FillerParameter bias_filler = 5;
}
message SigmoidParameter { message SigmoidParameter {
enum Engine { enum Engine {
DEFAULT = 0; DEFAULT = 0;
...@@ -1004,6 +1261,16 @@ message TanHParameter { ...@@ -1004,6 +1261,16 @@ message TanHParameter {
optional Engine engine = 1 [default = DEFAULT]; optional Engine engine = 1 [default = DEFAULT];
} }
// Message that stores parameters used by TileLayer
message TileParameter {
// The index of the axis to tile.
optional int32 axis = 1 [default = 1];
// The number of copies (tiles) of the blob to output.
optional int32 tiles = 2;
}
// Message that stores parameters used by ThresholdLayer
message ThresholdParameter { message ThresholdParameter {
optional float threshold = 1 [default = 0]; // Strictly positive values optional float threshold = 1 [default = 0]; // Strictly positive values
} }
...@@ -1250,7 +1517,7 @@ message PReLUParameter { ...@@ -1250,7 +1517,7 @@ message PReLUParameter {
// Initial value of a_i. Default is a_i=0.25 for all i. // Initial value of a_i. Default is a_i=0.25 for all i.
optional FillerParameter filler = 1; optional FillerParameter filler = 1;
// Whether or not slope paramters are shared across channels. // Whether or not slope parameters are shared across channels.
optional bool channel_shared = 2 [default = false]; optional bool channel_shared = 2 [default = false];
} }
......
...@@ -155,12 +155,17 @@ bool UpgradeV1LayerParameter(const V1LayerParameter& v1_layer_param, ...@@ -155,12 +155,17 @@ bool UpgradeV1LayerParameter(const V1LayerParameter& v1_layer_param,
const char* UpgradeV1LayerType(const V1LayerParameter_LayerType type); const char* UpgradeV1LayerType(const V1LayerParameter_LayerType type);
bool NetNeedsBatchNormUpgrade(const NetParameter& net_param);
void UpgradeNetBatchNorm(NetParameter* net_param);
// Check for deprecations and upgrade the NetParameter as needed. // Check for deprecations and upgrade the NetParameter as needed.
bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param); bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param);
bool NetNeedsUpgrade(const NetParameter& net_param) { bool NetNeedsUpgrade(const NetParameter& net_param) {
return NetNeedsV0ToV1Upgrade(net_param) || NetNeedsV1ToV2Upgrade(net_param); return NetNeedsV0ToV1Upgrade(net_param) || NetNeedsV1ToV2Upgrade(net_param) ||
NetNeedsBatchNormUpgrade(net_param);
} }
bool NetNeedsV0ToV1Upgrade(const NetParameter& net_param) { bool NetNeedsV0ToV1Upgrade(const NetParameter& net_param) {
...@@ -340,7 +345,7 @@ bool UpgradeV0LayerParameter(const V1LayerParameter& v0_layer_connection, ...@@ -340,7 +345,7 @@ bool UpgradeV0LayerParameter(const V1LayerParameter& v0_layer_connection,
} }
if (v0_layer_param.has_pad()) { if (v0_layer_param.has_pad()) {
if (type == "conv") { if (type == "conv") {
layer_param->mutable_convolution_param()->set_pad(v0_layer_param.pad()); layer_param->mutable_convolution_param()->add_pad(v0_layer_param.pad());
} else if (type == "pool") { } else if (type == "pool") {
layer_param->mutable_pooling_param()->set_pad(v0_layer_param.pad()); layer_param->mutable_pooling_param()->set_pad(v0_layer_param.pad());
} else { } else {
...@@ -350,7 +355,7 @@ bool UpgradeV0LayerParameter(const V1LayerParameter& v0_layer_connection, ...@@ -350,7 +355,7 @@ bool UpgradeV0LayerParameter(const V1LayerParameter& v0_layer_connection,
} }
if (v0_layer_param.has_kernelsize()) { if (v0_layer_param.has_kernelsize()) {
if (type == "conv") { if (type == "conv") {
layer_param->mutable_convolution_param()->set_kernel_size( layer_param->mutable_convolution_param()->add_kernel_size(
v0_layer_param.kernelsize()); v0_layer_param.kernelsize());
} else if (type == "pool") { } else if (type == "pool") {
layer_param->mutable_pooling_param()->set_kernel_size( layer_param->mutable_pooling_param()->set_kernel_size(
...@@ -371,7 +376,7 @@ bool UpgradeV0LayerParameter(const V1LayerParameter& v0_layer_connection, ...@@ -371,7 +376,7 @@ bool UpgradeV0LayerParameter(const V1LayerParameter& v0_layer_connection,
} }
if (v0_layer_param.has_stride()) { if (v0_layer_param.has_stride()) {
if (type == "conv") { if (type == "conv") {
layer_param->mutable_convolution_param()->set_stride( layer_param->mutable_convolution_param()->add_stride(
v0_layer_param.stride()); v0_layer_param.stride());
} else if (type == "pool") { } else if (type == "pool") {
layer_param->mutable_pooling_param()->set_stride( layer_param->mutable_pooling_param()->set_stride(
...@@ -774,6 +779,14 @@ bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param) { ...@@ -774,6 +779,14 @@ bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param) {
<< "V1LayerParameter"; << "V1LayerParameter";
} }
} }
// NetParameter uses old style batch norm layers; try to upgrade it.
if (NetNeedsBatchNormUpgrade(*param)) {
LOG(INFO) << "Attempting to upgrade batch norm layers using deprecated "
<< "params: " << param_file;
UpgradeNetBatchNorm(param);
LOG(INFO) << "Successfully upgraded batch norm layers using deprecated "
<< "params.";
}
return success; return success;
} }
...@@ -797,6 +810,29 @@ bool UpgradeV1Net(const NetParameter& v1_net_param, NetParameter* net_param) { ...@@ -797,6 +810,29 @@ bool UpgradeV1Net(const NetParameter& v1_net_param, NetParameter* net_param) {
return is_fully_compatible; return is_fully_compatible;
} }
bool NetNeedsBatchNormUpgrade(const NetParameter& net_param) {
for (int i = 0; i < net_param.layer_size(); ++i) {
// Check if BatchNorm layers declare three parameters, as required by
// the previous BatchNorm layer definition.
if (net_param.layer(i).type() == "BatchNorm"
&& net_param.layer(i).param_size() == 3) {
return true;
}
}
return false;
}
void UpgradeNetBatchNorm(NetParameter* net_param) {
for (int i = 0; i < net_param->layer_size(); ++i) {
// Check if BatchNorm layers declare three parameters, as required by
// the previous BatchNorm layer definition.
if (net_param->layer(i).type() == "BatchNorm"
&& net_param->layer(i).param_size() == 3) {
net_param->mutable_layer(i)->clear_param();
}
}
}
bool UpgradeV1LayerParameter(const V1LayerParameter& v1_layer_param, bool UpgradeV1LayerParameter(const V1LayerParameter& v1_layer_param,
LayerParameter* layer_param) { LayerParameter* layer_param) {
layer_param->Clear(); layer_param->Clear();
......
...@@ -286,13 +286,12 @@ template<> //BatchNormLayer specialization ...@@ -286,13 +286,12 @@ template<> //BatchNormLayer specialization
Ptr<Layer> createLayerFromCaffe<BatchNormLayer>(LayerParams& params) Ptr<Layer> createLayerFromCaffe<BatchNormLayer>(LayerParams& params)
{ {
const std::vector<Blob> &blobs = params.blobs; const std::vector<Blob> &blobs = params.blobs;
CV_Assert(blobs.size() == 4); CV_Assert(blobs.size() >= 3);
float eps = params.get<float>("eps");
bool hasWeights = params.get<bool>("has_weight", false); bool hasWeights = params.get<bool>("has_weight", false);
bool hasBias = params.get<bool>("has_bias", false); bool hasBias = params.get<bool>("has_bias", false);
float epsilon = params.get<float>("eps", 1E-5);
Ptr<BatchNormLayer> l = BatchNormLayer::create(eps, hasWeights, hasBias); Ptr<BatchNormLayer> l = BatchNormLayer::create(hasWeights, hasBias, epsilon);
l->setParamsFrom(params); l->setParamsFrom(params);
return Ptr<Layer>(l); return Ptr<Layer>(l);
...@@ -318,6 +317,15 @@ Ptr<Layer> createLayerFromCaffe<MaxUnpoolLayer>(LayerParams& params) ...@@ -318,6 +317,15 @@ Ptr<Layer> createLayerFromCaffe<MaxUnpoolLayer>(LayerParams& params)
return Ptr<Layer>(l); return Ptr<Layer>(l);
} }
template<> //ScaleLayer specialization
Ptr<Layer> createLayerFromCaffe<ScaleLayer>(LayerParams& params)
{
Ptr<ScaleLayer> l = ScaleLayer::create(params.get<bool>("bias_term", false));
l->setParamsFrom(params);
return Ptr<Layer>(l);
}
//Explicit instantiation //Explicit instantiation
template Ptr<Layer> createLayerFromCaffe<ConvolutionLayer>(LayerParams&); template Ptr<Layer> createLayerFromCaffe<ConvolutionLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<DeconvolutionLayer>(LayerParams&); template Ptr<Layer> createLayerFromCaffe<DeconvolutionLayer>(LayerParams&);
...@@ -342,6 +350,6 @@ template Ptr<Layer> createLayerFromCaffe<EltwiseLayer>(LayerParams&); ...@@ -342,6 +350,6 @@ template Ptr<Layer> createLayerFromCaffe<EltwiseLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<BatchNormLayer>(LayerParams&); template Ptr<Layer> createLayerFromCaffe<BatchNormLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<ChannelsPReLULayer>(LayerParams&); template Ptr<Layer> createLayerFromCaffe<ChannelsPReLULayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<MaxUnpoolLayer>(LayerParams&); template Ptr<Layer> createLayerFromCaffe<MaxUnpoolLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<ScaleLayer>(LayerParams&);
} }
} }
...@@ -52,6 +52,7 @@ ...@@ -52,6 +52,7 @@
#include "layers/normalize_bbox_layer.hpp" #include "layers/normalize_bbox_layer.hpp"
#include "layers/shift_layer.hpp" #include "layers/shift_layer.hpp"
#include "layers/padding_layer.hpp" #include "layers/padding_layer.hpp"
#include "layers/scale_layer.hpp"
namespace cv namespace cv
{ {
...@@ -109,6 +110,7 @@ void initModule() ...@@ -109,6 +110,7 @@ void initModule()
REG_RUNTIME_LAYER_CLASS(NormalizeBBox, NormalizeBBoxLayer); REG_RUNTIME_LAYER_CLASS(NormalizeBBox, NormalizeBBoxLayer);
REG_RUNTIME_LAYER_CLASS(Shift, ShiftLayer); REG_RUNTIME_LAYER_CLASS(Shift, ShiftLayer);
REG_RUNTIME_LAYER_CLASS(Padding, PaddingLayer); REG_RUNTIME_LAYER_CLASS(Padding, PaddingLayer);
REG_RUNTIME_LAYER_FUNC(Scale, createLayerFromCaffe<ScaleLayer>);
init.status = true; init.status = true;
} }
......
...@@ -16,19 +16,21 @@ namespace cv ...@@ -16,19 +16,21 @@ namespace cv
namespace dnn namespace dnn
{ {
BatchNormLayerImpl::BatchNormLayerImpl(float eps_, bool hasWeights_, bool hasBias_): BatchNormLayerImpl::BatchNormLayerImpl(bool hasWeights_, bool hasBias_, float epsilon_):
eps(eps_),
hasWeights(hasWeights_), hasWeights(hasWeights_),
hasBias(hasBias_) hasBias(hasBias_),
epsilon(epsilon_)
{} {}
void BatchNormLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs) void BatchNormLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{ {
CV_Assert(blobs.size() == 4); CV_Assert(blobs.size() >= 2);
outputs.resize(inputs.size()); outputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++) for (size_t i = 0; i < inputs.size(); i++)
{ {
CV_Assert(blobs[0].total() == inputs[i]->channels());
CV_Assert(blobs[1].total() == inputs[i]->channels());
outputs[i].create(inputs[i]->shape()); outputs[i].create(inputs[i]->shape());
} }
} }
...@@ -39,30 +41,46 @@ void BatchNormLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> & ...@@ -39,30 +41,46 @@ void BatchNormLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &
Blob &inpBlob = *inputs[0]; Blob &inpBlob = *inputs[0];
int weightsBlobIndex = 2;
int biasBlobIndex = weightsBlobIndex + hasWeights;
float varMeanScale = 1;
if (!hasWeights && !hasBias) {
varMeanScale = *blobs[2].ptrf();
if (varMeanScale != 0)
varMeanScale = 1/varMeanScale;
}
Mat invStdMat;
cv::pow(blobs[1].matRefConst()*varMeanScale + epsilon, -0.5, invStdMat);
for (size_t ii = 0; ii < outputs.size(); ii++) for (size_t ii = 0; ii < outputs.size(); ii++)
{ {
Blob &outBlob = outputs[ii]; Blob &outBlob = outputs[ii];
if (hasWeights) if (hasWeights)
CV_Assert(inpBlob.channels() == blobs[2].total()); CV_Assert(inpBlob.channels() == blobs[weightsBlobIndex].total());
if (hasBias) if (hasBias)
CV_Assert(inpBlob.channels() == blobs[3].total()); CV_Assert(inpBlob.channels() == blobs[biasBlobIndex].total());
for (int n = 0; n < inpBlob.channels(); n++) for(int num = 0; num < outBlob.num(); num++)
{ {
float mean = blobs[0].matRefConst().at<float>(n); for (int n = 0; n < outBlob.channels(); n++)
float invstd = 1 / sqrt(blobs[1].matRefConst().at<float>(n) + eps); {
float w = hasWeights ? blobs[2].matRefConst().at<float>(n) : 1; float mean = blobs[0].matRefConst().at<float>(n)*varMeanScale;
float b = hasBias ? blobs[3].matRefConst().at<float>(n) : 0; double invstd = invStdMat.at<float>(n);
outBlob.getPlane(0, n) = (inpBlob.getPlane(0, n) - mean)*(w*invstd) + b; float w = hasWeights ? blobs[weightsBlobIndex].matRefConst().at<float>(n) : 1;
float b = hasBias ? blobs[biasBlobIndex].matRefConst().at<float>(n) : 0;
outBlob.getPlane(num, n) = (inpBlob.getPlane(num, n) - mean)*w*invstd + b;
}
} }
} }
} }
Ptr<BatchNormLayer> BatchNormLayer::create(float eps, bool has_weights, bool has_bias) Ptr<BatchNormLayer> BatchNormLayer::create(bool hasWeights, bool hasBias, float epsilon)
{ {
return Ptr<BatchNormLayer>(new BatchNormLayerImpl(eps, has_weights, has_bias)); return Ptr<BatchNormLayer>(new BatchNormLayerImpl(hasWeights, hasBias, epsilon));
} }
} // namespace dnn } // namespace dnn
......
...@@ -21,17 +21,17 @@ namespace dnn ...@@ -21,17 +21,17 @@ namespace dnn
class BatchNormLayerImpl : public BatchNormLayer class BatchNormLayerImpl : public BatchNormLayer
{ {
public: public:
BatchNormLayerImpl(float eps_, bool hasWeights_, bool hasBias_); BatchNormLayerImpl(bool hasWeights_, bool hasBias_, float epsilon_);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs); void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs); void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
private: private:
float eps;
bool hasWeights, hasBias; bool hasWeights, hasBias;
float epsilon;
}; };
} }
} }
#endif // BATCH_NORM_LAYER_HPP #endif // __OPENCV_DNN_LAYERS_BATCH_NORM_LAYER_HPP__
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2016, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
/*
Implementation of Scale layer.
*/
#include "scale_layer.hpp"
namespace cv
{
namespace dnn
{
void ScaleLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(blobs.size() == 1 + hasBias);
outputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++)
{
outputs[i].create(inputs[i]->shape());
}
}
void ScaleLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() == 1);
Blob &inpBlob = *inputs[0];
for (size_t ii = 0; ii < outputs.size(); ii++)
{
Blob &outBlob = outputs[ii];
CV_Assert(inpBlob.channels() == blobs[0].total());
if (hasBias)
CV_Assert(inpBlob.channels() == blobs[1].total());
for (int n = 0; n < inpBlob.channels(); n++)
{
float w = blobs[0].matRefConst().at<float>(n);
float b = hasBias ? blobs[1].matRefConst().at<float>(n) : 0;
outBlob.getPlane(0, n) = w*inpBlob.getPlane(0, n) + b;
}
}
}
Ptr<ScaleLayer> ScaleLayer::create(bool hasBias)
{
return Ptr<ScaleLayer>(new ScaleLayerImpl(hasBias));
}
} // namespace dnn
} // namespace cv
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2016, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
/*
Declaration of scale layer, which multiplies and shifts channels in input blob.
*/
#ifndef __OPENCV_DNN_LAYERS_SCALE_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_SCALE_LAYER_HPP__
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
class ScaleLayerImpl : public ScaleLayer
{
public:
ScaleLayerImpl(bool hasBias_): hasBias(hasBias_) {}
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
private:
bool hasBias;
};
}
}
#endif // __OPENCV_DNN_LAYERS_SCALE_LAYER_HPP__
...@@ -575,21 +575,19 @@ struct TorchImporter : public ::cv::dnn::Importer ...@@ -575,21 +575,19 @@ struct TorchImporter : public ::cv::dnn::Importer
layerParams.blobs.push_back(tensorParams["running_var"].second); layerParams.blobs.push_back(tensorParams["running_var"].second);
CV_Assert(scalarParams.has("eps")); CV_Assert(scalarParams.has("eps"));
layerParams.set("eps", float(scalarParams.get<double>("eps"))); float eps = float(scalarParams.get<double>("eps"));
layerParams.set("eps", eps);
layerParams.blobs.push_back(Blob());
layerParams.blobs.push_back(Blob());
if (tensorParams.count("weight")) if (tensorParams.count("weight"))
{ {
layerParams.set("has_weight", true); layerParams.set("has_weight", true);
layerParams.blobs[2] = tensorParams["weight"].second; layerParams.blobs.push_back(tensorParams["weight"].second);
} }
if (tensorParams.count("bias")) if (tensorParams.count("bias"))
{ {
layerParams.set("has_bias", true); layerParams.set("has_bias", true);
layerParams.blobs[3] = tensorParams["bias"].second; layerParams.blobs.push_back(tensorParams["bias"].second);
} }
curModule->modules.push_back(newModule); curModule->modules.push_back(newModule);
......
...@@ -215,6 +215,11 @@ TEST(Layer_Test_Reshape, squeeze) ...@@ -215,6 +215,11 @@ TEST(Layer_Test_Reshape, squeeze)
EXPECT_EQ(outVec[0].shape(), BlobShape(4, 3, 2)); EXPECT_EQ(outVec[0].shape(), BlobShape(4, 3, 2));
} }
TEST(Layer_Test_BatchNorm, Accuracy)
{
OCL_OFF(testLayerUsingCaffeModels("layer_batch_norm", true));
}
//template<typename XMat> //template<typename XMat>
//static void test_Layer_Concat() //static void test_Layer_Concat()
//{ //{
......
...@@ -135,6 +135,11 @@ TEST(Torch_Importer, run_deconv) ...@@ -135,6 +135,11 @@ TEST(Torch_Importer, run_deconv)
runTorchNet("net_deconv", "", false); runTorchNet("net_deconv", "", false);
} }
TEST(Torch_Importer, run_batch_norm)
{
runTorchNet("net_batch_norm", "", false);
}
#if defined(ENABLE_TORCH_ENET_TESTS) #if defined(ENABLE_TORCH_ENET_TESTS)
TEST(Torch_Importer, ENet_accuracy) TEST(Torch_Importer, ENet_accuracy)
......
...@@ -12,6 +12,9 @@ function fill_net(net) ...@@ -12,6 +12,9 @@ function fill_net(net)
if net.bias then if net.bias then
net.bias = torch.rand(net.bias:size()) net.bias = torch.rand(net.bias:size())
end end
if net.train then
net.train = 0
end
end end
function save(net, input, label) function save(net, input, label)
...@@ -68,4 +71,8 @@ save(net_concat, torch.rand(2, 6, 4, 3) - 0.5, 'net_concat') ...@@ -68,4 +71,8 @@ save(net_concat, torch.rand(2, 6, 4, 3) - 0.5, 'net_concat')
local net_deconv = nn.Sequential() local net_deconv = nn.Sequential()
net_deconv:add(nn.SpatialFullConvolution(3, 9, 4, 5, 1, 2, 0, 1, 0, 1)) net_deconv:add(nn.SpatialFullConvolution(3, 9, 4, 5, 1, 2, 0, 1, 0, 1))
save(net_deconv, torch.rand(2, 3, 4, 3) - 0.5, 'net_deconv') save(net_deconv, torch.rand(2, 3, 4, 3) - 0.5, 'net_deconv')
\ No newline at end of file
local net_batch_norm = nn.Sequential()
net_batch_norm:add(nn.SpatialBatchNormalization(3))
save(net_batch_norm, torch.rand(1, 3, 4, 3) - 0.5, 'net_batch_norm')
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment