Commit d9910542 authored by Aleksandr Rybnikov's avatar Aleksandr Rybnikov

Added files for face detector sample

parent 91ef0b95
......@@ -8,6 +8,27 @@ ocv_check_dependencies(${OPENCV_DNN_SAMPLES_REQUIRED_DEPS})
if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
project(dnn_samples)
# Model branch name: dnn_samples_face_detector_20170830
set(DNN_FACE_DETECTOR_MODEL_COMMIT "b2bfc75f6aea5b1f834ff0f0b865a7c18ff1459f")
set(DNN_FACE_DETECTOR_MODEL_HASH "afbb6037fd180e8d2acb3b58ca737b9e")
set(DNN_FACE_DETECTOR_MODEL_NAME "res10_300x300_ssd_iter_140000.caffemodel")
set(DNN_FACE_DETECTOR_MODEL_DOWNLOAD_DIR "${CMAKE_CURRENT_LIST_DIR}/face_detector")
if(COMMAND ocv_download)
ocv_download(FILENAME ${DNN_FACE_DETECTOR_MODEL_NAME}
HASH ${DNN_FACE_DETECTOR_MODEL_HASH}
URL
"$ENV{OPENCV_DNN_MODELS_URL}"
"${OPENCV_DNN_MODELS_URL}"
"https://raw.githubusercontent.com/opencv/opencv_3rdparty/${DNN_FACE_DETECTOR_MODEL_COMMIT}/"
DESTINATION_DIR ${DNN_FACE_DETECTOR_MODEL_DOWNLOAD_DIR}
ID DNN_FACE_DETECTOR
RELATIVE_URL
STATUS res)
endif()
ocv_include_directories("${OpenCV_SOURCE_DIR}/include")
ocv_include_modules_recurse(${OPENCV_DNN_SAMPLES_REQUIRED_DEPS})
......
res10_300x300_ssd_iter_140000.caffemodel
input: "data"
input_shape {
dim: 1
dim: 3
dim: 300
dim: 300
}
layer {
name: "data_bn"
type: "BatchNorm"
bottom: "data"
top: "data_bn"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "data_scale"
type: "Scale"
bottom: "data_bn"
top: "data_bn"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "conv1_h"
type: "Convolution"
bottom: "data_bn"
top: "conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
convolution_param {
num_output: 32
pad: 3
kernel_size: 7
stride: 2
weight_filler {
type: "msra"
variance_norm: FAN_OUT
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv1_bn_h"
type: "BatchNorm"
bottom: "conv1_h"
top: "conv1_h"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "conv1_scale_h"
type: "Scale"
bottom: "conv1_h"
top: "conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "conv1_relu"
type: "ReLU"
bottom: "conv1_h"
top: "conv1_h"
}
layer {
name: "conv1_pool"
type: "Pooling"
bottom: "conv1_h"
top: "conv1_pool"
pooling_param {
kernel_size: 3
stride: 2
}
}
layer {
name: "layer_64_1_conv1_h"
type: "Convolution"
bottom: "conv1_pool"
top: "layer_64_1_conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 32
bias_term: false
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_64_1_bn2_h"
type: "BatchNorm"
bottom: "layer_64_1_conv1_h"
top: "layer_64_1_conv1_h"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_64_1_scale2_h"
type: "Scale"
bottom: "layer_64_1_conv1_h"
top: "layer_64_1_conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_64_1_relu2"
type: "ReLU"
bottom: "layer_64_1_conv1_h"
top: "layer_64_1_conv1_h"
}
layer {
name: "layer_64_1_conv2_h"
type: "Convolution"
bottom: "layer_64_1_conv1_h"
top: "layer_64_1_conv2_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 32
bias_term: false
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_64_1_sum"
type: "Eltwise"
bottom: "layer_64_1_conv2_h"
bottom: "conv1_pool"
top: "layer_64_1_sum"
}
layer {
name: "layer_128_1_bn1_h"
type: "BatchNorm"
bottom: "layer_64_1_sum"
top: "layer_128_1_bn1_h"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_128_1_scale1_h"
type: "Scale"
bottom: "layer_128_1_bn1_h"
top: "layer_128_1_bn1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_128_1_relu1"
type: "ReLU"
bottom: "layer_128_1_bn1_h"
top: "layer_128_1_bn1_h"
}
layer {
name: "layer_128_1_conv1_h"
type: "Convolution"
bottom: "layer_128_1_bn1_h"
top: "layer_128_1_conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 128
bias_term: false
pad: 1
kernel_size: 3
stride: 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_128_1_bn2"
type: "BatchNorm"
bottom: "layer_128_1_conv1_h"
top: "layer_128_1_conv1_h"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_128_1_scale2"
type: "Scale"
bottom: "layer_128_1_conv1_h"
top: "layer_128_1_conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_128_1_relu2"
type: "ReLU"
bottom: "layer_128_1_conv1_h"
top: "layer_128_1_conv1_h"
}
layer {
name: "layer_128_1_conv2"
type: "Convolution"
bottom: "layer_128_1_conv1_h"
top: "layer_128_1_conv2"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 128
bias_term: false
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_128_1_conv_expand_h"
type: "Convolution"
bottom: "layer_128_1_bn1_h"
top: "layer_128_1_conv_expand_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 128
bias_term: false
pad: 0
kernel_size: 1
stride: 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_128_1_sum"
type: "Eltwise"
bottom: "layer_128_1_conv2"
bottom: "layer_128_1_conv_expand_h"
top: "layer_128_1_sum"
}
layer {
name: "layer_256_1_bn1"
type: "BatchNorm"
bottom: "layer_128_1_sum"
top: "layer_256_1_bn1"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_256_1_scale1"
type: "Scale"
bottom: "layer_256_1_bn1"
top: "layer_256_1_bn1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_256_1_relu1"
type: "ReLU"
bottom: "layer_256_1_bn1"
top: "layer_256_1_bn1"
}
layer {
name: "layer_256_1_conv1"
type: "Convolution"
bottom: "layer_256_1_bn1"
top: "layer_256_1_conv1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
pad: 1
kernel_size: 3
stride: 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_256_1_bn2"
type: "BatchNorm"
bottom: "layer_256_1_conv1"
top: "layer_256_1_conv1"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_256_1_scale2"
type: "Scale"
bottom: "layer_256_1_conv1"
top: "layer_256_1_conv1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_256_1_relu2"
type: "ReLU"
bottom: "layer_256_1_conv1"
top: "layer_256_1_conv1"
}
layer {
name: "layer_256_1_conv2"
type: "Convolution"
bottom: "layer_256_1_conv1"
top: "layer_256_1_conv2"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_256_1_conv_expand"
type: "Convolution"
bottom: "layer_256_1_bn1"
top: "layer_256_1_conv_expand"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
pad: 0
kernel_size: 1
stride: 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_256_1_sum"
type: "Eltwise"
bottom: "layer_256_1_conv2"
bottom: "layer_256_1_conv_expand"
top: "layer_256_1_sum"
}
layer {
name: "layer_512_1_bn1"
type: "BatchNorm"
bottom: "layer_256_1_sum"
top: "layer_512_1_bn1"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_512_1_scale1"
type: "Scale"
bottom: "layer_512_1_bn1"
top: "layer_512_1_bn1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_512_1_relu1"
type: "ReLU"
bottom: "layer_512_1_bn1"
top: "layer_512_1_bn1"
}
layer {
name: "layer_512_1_conv1_h"
type: "Convolution"
bottom: "layer_512_1_bn1"
top: "layer_512_1_conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 128
bias_term: false
pad: 1
kernel_size: 3
stride: 1 # 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_512_1_bn2_h"
type: "BatchNorm"
bottom: "layer_512_1_conv1_h"
top: "layer_512_1_conv1_h"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_512_1_scale2_h"
type: "Scale"
bottom: "layer_512_1_conv1_h"
top: "layer_512_1_conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_512_1_relu2"
type: "ReLU"
bottom: "layer_512_1_conv1_h"
top: "layer_512_1_conv1_h"
}
layer {
name: "layer_512_1_conv2_h"
type: "Convolution"
bottom: "layer_512_1_conv1_h"
top: "layer_512_1_conv2_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
pad: 2 # 1
kernel_size: 3
stride: 1
dilation: 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_512_1_conv_expand_h"
type: "Convolution"
bottom: "layer_512_1_bn1"
top: "layer_512_1_conv_expand_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
pad: 0
kernel_size: 1
stride: 1 # 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_512_1_sum"
type: "Eltwise"
bottom: "layer_512_1_conv2_h"
bottom: "layer_512_1_conv_expand_h"
top: "layer_512_1_sum"
}
layer {
name: "last_bn_h"
type: "BatchNorm"
bottom: "layer_512_1_sum"
top: "layer_512_1_sum"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "last_scale_h"
type: "Scale"
bottom: "layer_512_1_sum"
top: "layer_512_1_sum"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "last_relu"
type: "ReLU"
bottom: "layer_512_1_sum"
top: "fc7"
}
layer {
name: "conv6_1_h"
type: "Convolution"
bottom: "fc7"
top: "conv6_1_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6_1_relu"
type: "ReLU"
bottom: "conv6_1_h"
top: "conv6_1_h"
}
layer {
name: "conv6_2_h"
type: "Convolution"
bottom: "conv6_1_h"
top: "conv6_2_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
stride: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6_2_relu"
type: "ReLU"
bottom: "conv6_2_h"
top: "conv6_2_h"
}
layer {
name: "conv7_1_h"
type: "Convolution"
bottom: "conv6_2_h"
top: "conv7_1_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv7_1_relu"
type: "ReLU"
bottom: "conv7_1_h"
top: "conv7_1_h"
}
layer {
name: "conv7_2_h"
type: "Convolution"
bottom: "conv7_1_h"
top: "conv7_2_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
stride: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv7_2_relu"
type: "ReLU"
bottom: "conv7_2_h"
top: "conv7_2_h"
}
layer {
name: "conv8_1_h"
type: "Convolution"
bottom: "conv7_2_h"
top: "conv8_1_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv8_1_relu"
type: "ReLU"
bottom: "conv8_1_h"
top: "conv8_1_h"
}
layer {
name: "conv8_2_h"
type: "Convolution"
bottom: "conv8_1_h"
top: "conv8_2_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv8_2_relu"
type: "ReLU"
bottom: "conv8_2_h"
top: "conv8_2_h"
}
layer {
name: "conv9_1_h"
type: "Convolution"
bottom: "conv8_2_h"
top: "conv9_1_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv9_1_relu"
type: "ReLU"
bottom: "conv9_1_h"
top: "conv9_1_h"
}
layer {
name: "conv9_2_h"
type: "Convolution"
bottom: "conv9_1_h"
top: "conv9_2_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv9_2_relu"
type: "ReLU"
bottom: "conv9_2_h"
top: "conv9_2_h"
}
layer {
name: "conv4_3_norm"
type: "Normalize"
bottom: "layer_256_1_bn1"
top: "conv4_3_norm"
norm_param {
across_spatial: false
scale_filler {
type: "constant"
value: 20
}
channel_shared: false
}
}
layer {
name: "conv4_3_norm_mbox_loc"
type: "Convolution"
bottom: "conv4_3_norm"
top: "conv4_3_norm_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 16
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv4_3_norm_mbox_loc_perm"
type: "Permute"
bottom: "conv4_3_norm_mbox_loc"
top: "conv4_3_norm_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv4_3_norm_mbox_loc_flat"
type: "Flatten"
bottom: "conv4_3_norm_mbox_loc_perm"
top: "conv4_3_norm_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv4_3_norm_mbox_conf"
type: "Convolution"
bottom: "conv4_3_norm"
top: "conv4_3_norm_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 8 # 84
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv4_3_norm_mbox_conf_perm"
type: "Permute"
bottom: "conv4_3_norm_mbox_conf"
top: "conv4_3_norm_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv4_3_norm_mbox_conf_flat"
type: "Flatten"
bottom: "conv4_3_norm_mbox_conf_perm"
top: "conv4_3_norm_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv4_3_norm_mbox_priorbox"
type: "PriorBox"
bottom: "conv4_3_norm"
bottom: "data"
top: "conv4_3_norm_mbox_priorbox"
prior_box_param {
min_size: 30.0
max_size: 60.0
aspect_ratio: 2
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 8
offset: 0.5
}
}
layer {
name: "fc7_mbox_loc"
type: "Convolution"
bottom: "fc7"
top: "fc7_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 24
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "fc7_mbox_loc_perm"
type: "Permute"
bottom: "fc7_mbox_loc"
top: "fc7_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "fc7_mbox_loc_flat"
type: "Flatten"
bottom: "fc7_mbox_loc_perm"
top: "fc7_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "fc7_mbox_conf"
type: "Convolution"
bottom: "fc7"
top: "fc7_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 12 # 126
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "fc7_mbox_conf_perm"
type: "Permute"
bottom: "fc7_mbox_conf"
top: "fc7_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "fc7_mbox_conf_flat"
type: "Flatten"
bottom: "fc7_mbox_conf_perm"
top: "fc7_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "fc7_mbox_priorbox"
type: "PriorBox"
bottom: "fc7"
bottom: "data"
top: "fc7_mbox_priorbox"
prior_box_param {
min_size: 60.0
max_size: 111.0
aspect_ratio: 2
aspect_ratio: 3
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 16
offset: 0.5
}
}
layer {
name: "conv6_2_mbox_loc"
type: "Convolution"
bottom: "conv6_2_h"
top: "conv6_2_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 24
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6_2_mbox_loc_perm"
type: "Permute"
bottom: "conv6_2_mbox_loc"
top: "conv6_2_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv6_2_mbox_loc_flat"
type: "Flatten"
bottom: "conv6_2_mbox_loc_perm"
top: "conv6_2_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv6_2_mbox_conf"
type: "Convolution"
bottom: "conv6_2_h"
top: "conv6_2_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 12 # 126
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6_2_mbox_conf_perm"
type: "Permute"
bottom: "conv6_2_mbox_conf"
top: "conv6_2_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv6_2_mbox_conf_flat"
type: "Flatten"
bottom: "conv6_2_mbox_conf_perm"
top: "conv6_2_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv6_2_mbox_priorbox"
type: "PriorBox"
bottom: "conv6_2_h"
bottom: "data"
top: "conv6_2_mbox_priorbox"
prior_box_param {
min_size: 111.0
max_size: 162.0
aspect_ratio: 2
aspect_ratio: 3
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 32
offset: 0.5
}
}
layer {
name: "conv7_2_mbox_loc"
type: "Convolution"
bottom: "conv7_2_h"
top: "conv7_2_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 24
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv7_2_mbox_loc_perm"
type: "Permute"
bottom: "conv7_2_mbox_loc"
top: "conv7_2_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv7_2_mbox_loc_flat"
type: "Flatten"
bottom: "conv7_2_mbox_loc_perm"
top: "conv7_2_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv7_2_mbox_conf"
type: "Convolution"
bottom: "conv7_2_h"
top: "conv7_2_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 12 # 126
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv7_2_mbox_conf_perm"
type: "Permute"
bottom: "conv7_2_mbox_conf"
top: "conv7_2_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv7_2_mbox_conf_flat"
type: "Flatten"
bottom: "conv7_2_mbox_conf_perm"
top: "conv7_2_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv7_2_mbox_priorbox"
type: "PriorBox"
bottom: "conv7_2_h"
bottom: "data"
top: "conv7_2_mbox_priorbox"
prior_box_param {
min_size: 162.0
max_size: 213.0
aspect_ratio: 2
aspect_ratio: 3
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 64
offset: 0.5
}
}
layer {
name: "conv8_2_mbox_loc"
type: "Convolution"
bottom: "conv8_2_h"
top: "conv8_2_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 16
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv8_2_mbox_loc_perm"
type: "Permute"
bottom: "conv8_2_mbox_loc"
top: "conv8_2_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv8_2_mbox_loc_flat"
type: "Flatten"
bottom: "conv8_2_mbox_loc_perm"
top: "conv8_2_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv8_2_mbox_conf"
type: "Convolution"
bottom: "conv8_2_h"
top: "conv8_2_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 8 # 84
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv8_2_mbox_conf_perm"
type: "Permute"
bottom: "conv8_2_mbox_conf"
top: "conv8_2_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv8_2_mbox_conf_flat"
type: "Flatten"
bottom: "conv8_2_mbox_conf_perm"
top: "conv8_2_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv8_2_mbox_priorbox"
type: "PriorBox"
bottom: "conv8_2_h"
bottom: "data"
top: "conv8_2_mbox_priorbox"
prior_box_param {
min_size: 213.0
max_size: 264.0
aspect_ratio: 2
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 100
offset: 0.5
}
}
layer {
name: "conv9_2_mbox_loc"
type: "Convolution"
bottom: "conv9_2_h"
top: "conv9_2_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 16
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv9_2_mbox_loc_perm"
type: "Permute"
bottom: "conv9_2_mbox_loc"
top: "conv9_2_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv9_2_mbox_loc_flat"
type: "Flatten"
bottom: "conv9_2_mbox_loc_perm"
top: "conv9_2_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv9_2_mbox_conf"
type: "Convolution"
bottom: "conv9_2_h"
top: "conv9_2_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 8 # 84
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv9_2_mbox_conf_perm"
type: "Permute"
bottom: "conv9_2_mbox_conf"
top: "conv9_2_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv9_2_mbox_conf_flat"
type: "Flatten"
bottom: "conv9_2_mbox_conf_perm"
top: "conv9_2_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv9_2_mbox_priorbox"
type: "PriorBox"
bottom: "conv9_2_h"
bottom: "data"
top: "conv9_2_mbox_priorbox"
prior_box_param {
min_size: 264.0
max_size: 315.0
aspect_ratio: 2
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 300
offset: 0.5
}
}
layer {
name: "mbox_loc"
type: "Concat"
bottom: "conv4_3_norm_mbox_loc_flat"
bottom: "fc7_mbox_loc_flat"
bottom: "conv6_2_mbox_loc_flat"
bottom: "conv7_2_mbox_loc_flat"
bottom: "conv8_2_mbox_loc_flat"
bottom: "conv9_2_mbox_loc_flat"
top: "mbox_loc"
concat_param {
axis: 1
}
}
layer {
name: "mbox_conf"
type: "Concat"
bottom: "conv4_3_norm_mbox_conf_flat"
bottom: "fc7_mbox_conf_flat"
bottom: "conv6_2_mbox_conf_flat"
bottom: "conv7_2_mbox_conf_flat"
bottom: "conv8_2_mbox_conf_flat"
bottom: "conv9_2_mbox_conf_flat"
top: "mbox_conf"
concat_param {
axis: 1
}
}
layer {
name: "mbox_priorbox"
type: "Concat"
bottom: "conv4_3_norm_mbox_priorbox"
bottom: "fc7_mbox_priorbox"
bottom: "conv6_2_mbox_priorbox"
bottom: "conv7_2_mbox_priorbox"
bottom: "conv8_2_mbox_priorbox"
bottom: "conv9_2_mbox_priorbox"
top: "mbox_priorbox"
concat_param {
axis: 2
}
}
layer {
name: "mbox_conf_reshape"
type: "Reshape"
bottom: "mbox_conf"
top: "mbox_conf_reshape"
reshape_param {
shape {
dim: 0
dim: -1
dim: 2
}
}
}
layer {
name: "mbox_conf_softmax"
type: "Softmax"
bottom: "mbox_conf_reshape"
top: "mbox_conf_softmax"
softmax_param {
axis: 2
}
}
layer {
name: "mbox_conf_flatten"
type: "Flatten"
bottom: "mbox_conf_softmax"
top: "mbox_conf_flatten"
flatten_param {
axis: 1
}
}
layer {
name: "detection_out"
type: "DetectionOutput"
bottom: "mbox_loc"
bottom: "mbox_conf_flatten"
bottom: "mbox_priorbox"
top: "detection_out"
include {
phase: TEST
}
detection_output_param {
num_classes: 2
share_location: true
background_label_id: 0
nms_param {
nms_threshold: 0.45
top_k: 400
}
code_type: CENTER_SIZE
keep_top_k: 200
confidence_threshold: 0.01
}
}
\ No newline at end of file
This is a brief description of training process which has been used to get res10_300x300_ssd_iter_140000.caffemodel.
The model was created with SSD framework using ResNet-10 like architecture as a backbone. Channels count in ResNet-10 convolution layers was significantly dropped (2x- or 4x- fewer channels).
The model was trained in Caffe framework on some huge and avaliable online dataset.
1. Prepare training tools
You need to use "ssd" branch from this repository https://github.com/weiliu89/caffe/tree/ssd . Checkout this branch and built it (see instructions in repo's README)
2. Prepare training data.
The data preparation pipeline can be represented as:
(a)Download original face detection dataset -> (b)Convert annotation to the PASCAL VOC format -> (c)Create LMDB database with images + annotations for training
a) Find some datasets with face bounding boxes annotation. For some reasons I can't provide links here, but you easily find them on your own. Also study the data. It may contain small or low quality faces which can spoil training process. Often there are special flags about object quality in annotation. Remove such faces from annotation (smaller when 16 along at least one side, or blurred, of highly-occluded, or something else).
b) The downloaded dataset will have some format of annotation. It may be one single file for all images, or separate file for each image or something else. But to train SSD in Caffe you need to convert annotation to PASCAL VOC format.
PASCAL VOC annoitation consist of .xml file for each image. In this xml file all face bounding boxes should be listed as:
<annotation>
<size>
<width>300</width>
<height>300</height>
</size>
<object>
<name>face</name>
<difficult>0</difficult>
<bndbox>
<xmin>100</xmin>
<ymin>100</ymin>
<xmax>200</xmax>
<ymax>200</ymax>
</bndbox>
</object>
<object>
<name>face</name>
<difficult>0</difficult>
<bndbox>
<xmin>0</xmin>
<ymin>0</ymin>
<xmax>100</xmax>
<ymax>100</ymax>
</bndbox>
</object>
</annotation>
So, convert your dataset's annotation to the fourmat above.
Also, you should create labelmap.prototxt file with the following content:
item {
name: "none_of_the_above"
label: 0
display_name: "background"
}
item {
name: "face"
label: 1
display_name: "face"
}
You need this file to establish correspondence between name of class and digital label of class.
For next step we also need file there all our image-annotation file names pairs are listed. This file should contain similar lines:
images_val/0.jpg annotations_val/0.jpg.xml
c) To create LMDB you need to use create_data.sh tool from caffe/data/VOC0712 Caffe's source code directory.
This script calls create_annoset.py inside, so check out what you need to pass as script's arguments
You need to prepare 2 LMDB databases: one for training images, one for validation images.
3. Train your detector
For training you need to have 3 files: train.prototxt, test.prototxt and solver.prototxt. You can find these files in the same directory as for this readme.
Also you need to edit train.prototxt and test.prototxt to replace paths for your LMDB databases to actual databases you've crated in step 2.
Now all is done for launch training process.
Execute next lines in Terminal:
mkdir -p snapshot
mkdir -p log
/path_for_caffe_build_dir/tools/caffe train -solver="solver.prototxt" -gpu 0 2>&1 | tee -a log/log.log
And wait. It will take about 8 hours to finish the process.
After it you can use your .caffemodel from snapshot/ subdirectory in resnet_face_ssd_python.py sample.
\ No newline at end of file
train_net: "train.prototxt"
test_net: "test.prototxt"
test_iter: 2312
test_interval: 5000
test_initialization: true
base_lr: 0.01
display: 10
lr_policy: "multistep"
max_iter: 140000
stepvalue: 80000
stepvalue: 120000
gamma: 0.1
momentum: 0.9
weight_decay: 0.0005
average_loss: 500
iter_size: 1
type: "SGD"
solver_mode: GPU
random_seed: 0
debug_info: false
snapshot: 1000
snapshot_prefix: "snapshot/res10_300x300_ssd"
eval_type: "detection"
ap_version: "11point"
\ No newline at end of file
layer {
name: "data"
type: "AnnotatedData"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
mean_value: 104
mean_value: 117
mean_value: 123
resize_param {
prob: 1
resize_mode: WARP
height: 300
width: 300
interp_mode: LINEAR
}
emit_constraint {
emit_type: CENTER
}
}
data_param {
source: "val_lmdb/"
batch_size: 1
backend: LMDB
}
annotated_data_param {
label_map_file: "labelmap.prototxt"
}
}
layer {
name: "data_bn"
type: "BatchNorm"
bottom: "data"
top: "data_bn"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "data_scale"
type: "Scale"
bottom: "data_bn"
top: "data_bn"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "conv1_h"
type: "Convolution"
bottom: "data_bn"
top: "conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
convolution_param {
num_output: 32
pad: 3
kernel_size: 7
stride: 2
weight_filler {
type: "msra"
variance_norm: FAN_OUT
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv1_bn_h"
type: "BatchNorm"
bottom: "conv1_h"
top: "conv1_h"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "conv1_scale_h"
type: "Scale"
bottom: "conv1_h"
top: "conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "conv1_relu"
type: "ReLU"
bottom: "conv1_h"
top: "conv1_h"
}
layer {
name: "conv1_pool"
type: "Pooling"
bottom: "conv1_h"
top: "conv1_pool"
pooling_param {
kernel_size: 3
stride: 2
}
}
layer {
name: "layer_64_1_conv1_h"
type: "Convolution"
bottom: "conv1_pool"
top: "layer_64_1_conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 32
bias_term: false
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_64_1_bn2_h"
type: "BatchNorm"
bottom: "layer_64_1_conv1_h"
top: "layer_64_1_conv1_h"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_64_1_scale2_h"
type: "Scale"
bottom: "layer_64_1_conv1_h"
top: "layer_64_1_conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_64_1_relu2"
type: "ReLU"
bottom: "layer_64_1_conv1_h"
top: "layer_64_1_conv1_h"
}
layer {
name: "layer_64_1_conv2_h"
type: "Convolution"
bottom: "layer_64_1_conv1_h"
top: "layer_64_1_conv2_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 32
bias_term: false
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_64_1_sum"
type: "Eltwise"
bottom: "layer_64_1_conv2_h"
bottom: "conv1_pool"
top: "layer_64_1_sum"
}
layer {
name: "layer_128_1_bn1_h"
type: "BatchNorm"
bottom: "layer_64_1_sum"
top: "layer_128_1_bn1_h"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_128_1_scale1_h"
type: "Scale"
bottom: "layer_128_1_bn1_h"
top: "layer_128_1_bn1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_128_1_relu1"
type: "ReLU"
bottom: "layer_128_1_bn1_h"
top: "layer_128_1_bn1_h"
}
layer {
name: "layer_128_1_conv1_h"
type: "Convolution"
bottom: "layer_128_1_bn1_h"
top: "layer_128_1_conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 128
bias_term: false
pad: 1
kernel_size: 3
stride: 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_128_1_bn2"
type: "BatchNorm"
bottom: "layer_128_1_conv1_h"
top: "layer_128_1_conv1_h"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_128_1_scale2"
type: "Scale"
bottom: "layer_128_1_conv1_h"
top: "layer_128_1_conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_128_1_relu2"
type: "ReLU"
bottom: "layer_128_1_conv1_h"
top: "layer_128_1_conv1_h"
}
layer {
name: "layer_128_1_conv2"
type: "Convolution"
bottom: "layer_128_1_conv1_h"
top: "layer_128_1_conv2"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 128
bias_term: false
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_128_1_conv_expand_h"
type: "Convolution"
bottom: "layer_128_1_bn1_h"
top: "layer_128_1_conv_expand_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 128
bias_term: false
pad: 0
kernel_size: 1
stride: 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_128_1_sum"
type: "Eltwise"
bottom: "layer_128_1_conv2"
bottom: "layer_128_1_conv_expand_h"
top: "layer_128_1_sum"
}
layer {
name: "layer_256_1_bn1"
type: "BatchNorm"
bottom: "layer_128_1_sum"
top: "layer_256_1_bn1"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_256_1_scale1"
type: "Scale"
bottom: "layer_256_1_bn1"
top: "layer_256_1_bn1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_256_1_relu1"
type: "ReLU"
bottom: "layer_256_1_bn1"
top: "layer_256_1_bn1"
}
layer {
name: "layer_256_1_conv1"
type: "Convolution"
bottom: "layer_256_1_bn1"
top: "layer_256_1_conv1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
pad: 1
kernel_size: 3
stride: 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_256_1_bn2"
type: "BatchNorm"
bottom: "layer_256_1_conv1"
top: "layer_256_1_conv1"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_256_1_scale2"
type: "Scale"
bottom: "layer_256_1_conv1"
top: "layer_256_1_conv1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_256_1_relu2"
type: "ReLU"
bottom: "layer_256_1_conv1"
top: "layer_256_1_conv1"
}
layer {
name: "layer_256_1_conv2"
type: "Convolution"
bottom: "layer_256_1_conv1"
top: "layer_256_1_conv2"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_256_1_conv_expand"
type: "Convolution"
bottom: "layer_256_1_bn1"
top: "layer_256_1_conv_expand"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
pad: 0
kernel_size: 1
stride: 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_256_1_sum"
type: "Eltwise"
bottom: "layer_256_1_conv2"
bottom: "layer_256_1_conv_expand"
top: "layer_256_1_sum"
}
layer {
name: "layer_512_1_bn1"
type: "BatchNorm"
bottom: "layer_256_1_sum"
top: "layer_512_1_bn1"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_512_1_scale1"
type: "Scale"
bottom: "layer_512_1_bn1"
top: "layer_512_1_bn1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_512_1_relu1"
type: "ReLU"
bottom: "layer_512_1_bn1"
top: "layer_512_1_bn1"
}
layer {
name: "layer_512_1_conv1_h"
type: "Convolution"
bottom: "layer_512_1_bn1"
top: "layer_512_1_conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 128
bias_term: false
pad: 1
kernel_size: 3
stride: 1 # 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_512_1_bn2_h"
type: "BatchNorm"
bottom: "layer_512_1_conv1_h"
top: "layer_512_1_conv1_h"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_512_1_scale2_h"
type: "Scale"
bottom: "layer_512_1_conv1_h"
top: "layer_512_1_conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_512_1_relu2"
type: "ReLU"
bottom: "layer_512_1_conv1_h"
top: "layer_512_1_conv1_h"
}
layer {
name: "layer_512_1_conv2_h"
type: "Convolution"
bottom: "layer_512_1_conv1_h"
top: "layer_512_1_conv2_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
pad: 2 # 1
kernel_size: 3
stride: 1
dilation: 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_512_1_conv_expand_h"
type: "Convolution"
bottom: "layer_512_1_bn1"
top: "layer_512_1_conv_expand_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
pad: 0
kernel_size: 1
stride: 1 # 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_512_1_sum"
type: "Eltwise"
bottom: "layer_512_1_conv2_h"
bottom: "layer_512_1_conv_expand_h"
top: "layer_512_1_sum"
}
layer {
name: "last_bn_h"
type: "BatchNorm"
bottom: "layer_512_1_sum"
top: "layer_512_1_sum"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "last_scale_h"
type: "Scale"
bottom: "layer_512_1_sum"
top: "layer_512_1_sum"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "last_relu"
type: "ReLU"
bottom: "layer_512_1_sum"
top: "fc7"
}
layer {
name: "conv6_1_h"
type: "Convolution"
bottom: "fc7"
top: "conv6_1_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6_1_relu"
type: "ReLU"
bottom: "conv6_1_h"
top: "conv6_1_h"
}
layer {
name: "conv6_2_h"
type: "Convolution"
bottom: "conv6_1_h"
top: "conv6_2_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
stride: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6_2_relu"
type: "ReLU"
bottom: "conv6_2_h"
top: "conv6_2_h"
}
layer {
name: "conv7_1_h"
type: "Convolution"
bottom: "conv6_2_h"
top: "conv7_1_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv7_1_relu"
type: "ReLU"
bottom: "conv7_1_h"
top: "conv7_1_h"
}
layer {
name: "conv7_2_h"
type: "Convolution"
bottom: "conv7_1_h"
top: "conv7_2_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
stride: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv7_2_relu"
type: "ReLU"
bottom: "conv7_2_h"
top: "conv7_2_h"
}
layer {
name: "conv8_1_h"
type: "Convolution"
bottom: "conv7_2_h"
top: "conv8_1_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv8_1_relu"
type: "ReLU"
bottom: "conv8_1_h"
top: "conv8_1_h"
}
layer {
name: "conv8_2_h"
type: "Convolution"
bottom: "conv8_1_h"
top: "conv8_2_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv8_2_relu"
type: "ReLU"
bottom: "conv8_2_h"
top: "conv8_2_h"
}
layer {
name: "conv9_1_h"
type: "Convolution"
bottom: "conv8_2_h"
top: "conv9_1_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv9_1_relu"
type: "ReLU"
bottom: "conv9_1_h"
top: "conv9_1_h"
}
layer {
name: "conv9_2_h"
type: "Convolution"
bottom: "conv9_1_h"
top: "conv9_2_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv9_2_relu"
type: "ReLU"
bottom: "conv9_2_h"
top: "conv9_2_h"
}
layer {
name: "conv4_3_norm"
type: "Normalize"
bottom: "layer_256_1_bn1"
top: "conv4_3_norm"
norm_param {
across_spatial: false
scale_filler {
type: "constant"
value: 20
}
channel_shared: false
}
}
layer {
name: "conv4_3_norm_mbox_loc"
type: "Convolution"
bottom: "conv4_3_norm"
top: "conv4_3_norm_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 16
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv4_3_norm_mbox_loc_perm"
type: "Permute"
bottom: "conv4_3_norm_mbox_loc"
top: "conv4_3_norm_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv4_3_norm_mbox_loc_flat"
type: "Flatten"
bottom: "conv4_3_norm_mbox_loc_perm"
top: "conv4_3_norm_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv4_3_norm_mbox_conf"
type: "Convolution"
bottom: "conv4_3_norm"
top: "conv4_3_norm_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 8 # 84
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv4_3_norm_mbox_conf_perm"
type: "Permute"
bottom: "conv4_3_norm_mbox_conf"
top: "conv4_3_norm_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv4_3_norm_mbox_conf_flat"
type: "Flatten"
bottom: "conv4_3_norm_mbox_conf_perm"
top: "conv4_3_norm_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv4_3_norm_mbox_priorbox"
type: "PriorBox"
bottom: "conv4_3_norm"
bottom: "data"
top: "conv4_3_norm_mbox_priorbox"
prior_box_param {
min_size: 30.0
max_size: 60.0
aspect_ratio: 2
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 8
offset: 0.5
}
}
layer {
name: "fc7_mbox_loc"
type: "Convolution"
bottom: "fc7"
top: "fc7_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 24
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "fc7_mbox_loc_perm"
type: "Permute"
bottom: "fc7_mbox_loc"
top: "fc7_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "fc7_mbox_loc_flat"
type: "Flatten"
bottom: "fc7_mbox_loc_perm"
top: "fc7_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "fc7_mbox_conf"
type: "Convolution"
bottom: "fc7"
top: "fc7_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 12 # 126
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "fc7_mbox_conf_perm"
type: "Permute"
bottom: "fc7_mbox_conf"
top: "fc7_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "fc7_mbox_conf_flat"
type: "Flatten"
bottom: "fc7_mbox_conf_perm"
top: "fc7_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "fc7_mbox_priorbox"
type: "PriorBox"
bottom: "fc7"
bottom: "data"
top: "fc7_mbox_priorbox"
prior_box_param {
min_size: 60.0
max_size: 111.0
aspect_ratio: 2
aspect_ratio: 3
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 16
offset: 0.5
}
}
layer {
name: "conv6_2_mbox_loc"
type: "Convolution"
bottom: "conv6_2_h"
top: "conv6_2_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 24
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6_2_mbox_loc_perm"
type: "Permute"
bottom: "conv6_2_mbox_loc"
top: "conv6_2_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv6_2_mbox_loc_flat"
type: "Flatten"
bottom: "conv6_2_mbox_loc_perm"
top: "conv6_2_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv6_2_mbox_conf"
type: "Convolution"
bottom: "conv6_2_h"
top: "conv6_2_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 12 # 126
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6_2_mbox_conf_perm"
type: "Permute"
bottom: "conv6_2_mbox_conf"
top: "conv6_2_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv6_2_mbox_conf_flat"
type: "Flatten"
bottom: "conv6_2_mbox_conf_perm"
top: "conv6_2_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv6_2_mbox_priorbox"
type: "PriorBox"
bottom: "conv6_2_h"
bottom: "data"
top: "conv6_2_mbox_priorbox"
prior_box_param {
min_size: 111.0
max_size: 162.0
aspect_ratio: 2
aspect_ratio: 3
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 32
offset: 0.5
}
}
layer {
name: "conv7_2_mbox_loc"
type: "Convolution"
bottom: "conv7_2_h"
top: "conv7_2_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 24
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv7_2_mbox_loc_perm"
type: "Permute"
bottom: "conv7_2_mbox_loc"
top: "conv7_2_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv7_2_mbox_loc_flat"
type: "Flatten"
bottom: "conv7_2_mbox_loc_perm"
top: "conv7_2_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv7_2_mbox_conf"
type: "Convolution"
bottom: "conv7_2_h"
top: "conv7_2_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 12 # 126
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv7_2_mbox_conf_perm"
type: "Permute"
bottom: "conv7_2_mbox_conf"
top: "conv7_2_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv7_2_mbox_conf_flat"
type: "Flatten"
bottom: "conv7_2_mbox_conf_perm"
top: "conv7_2_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv7_2_mbox_priorbox"
type: "PriorBox"
bottom: "conv7_2_h"
bottom: "data"
top: "conv7_2_mbox_priorbox"
prior_box_param {
min_size: 162.0
max_size: 213.0
aspect_ratio: 2
aspect_ratio: 3
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 64
offset: 0.5
}
}
layer {
name: "conv8_2_mbox_loc"
type: "Convolution"
bottom: "conv8_2_h"
top: "conv8_2_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 16
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv8_2_mbox_loc_perm"
type: "Permute"
bottom: "conv8_2_mbox_loc"
top: "conv8_2_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv8_2_mbox_loc_flat"
type: "Flatten"
bottom: "conv8_2_mbox_loc_perm"
top: "conv8_2_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv8_2_mbox_conf"
type: "Convolution"
bottom: "conv8_2_h"
top: "conv8_2_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 8 # 84
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv8_2_mbox_conf_perm"
type: "Permute"
bottom: "conv8_2_mbox_conf"
top: "conv8_2_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv8_2_mbox_conf_flat"
type: "Flatten"
bottom: "conv8_2_mbox_conf_perm"
top: "conv8_2_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv8_2_mbox_priorbox"
type: "PriorBox"
bottom: "conv8_2_h"
bottom: "data"
top: "conv8_2_mbox_priorbox"
prior_box_param {
min_size: 213.0
max_size: 264.0
aspect_ratio: 2
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 100
offset: 0.5
}
}
layer {
name: "conv9_2_mbox_loc"
type: "Convolution"
bottom: "conv9_2_h"
top: "conv9_2_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 16
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv9_2_mbox_loc_perm"
type: "Permute"
bottom: "conv9_2_mbox_loc"
top: "conv9_2_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv9_2_mbox_loc_flat"
type: "Flatten"
bottom: "conv9_2_mbox_loc_perm"
top: "conv9_2_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv9_2_mbox_conf"
type: "Convolution"
bottom: "conv9_2_h"
top: "conv9_2_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 8 # 84
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv9_2_mbox_conf_perm"
type: "Permute"
bottom: "conv9_2_mbox_conf"
top: "conv9_2_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv9_2_mbox_conf_flat"
type: "Flatten"
bottom: "conv9_2_mbox_conf_perm"
top: "conv9_2_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv9_2_mbox_priorbox"
type: "PriorBox"
bottom: "conv9_2_h"
bottom: "data"
top: "conv9_2_mbox_priorbox"
prior_box_param {
min_size: 264.0
max_size: 315.0
aspect_ratio: 2
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 300
offset: 0.5
}
}
layer {
name: "mbox_loc"
type: "Concat"
bottom: "conv4_3_norm_mbox_loc_flat"
bottom: "fc7_mbox_loc_flat"
bottom: "conv6_2_mbox_loc_flat"
bottom: "conv7_2_mbox_loc_flat"
bottom: "conv8_2_mbox_loc_flat"
bottom: "conv9_2_mbox_loc_flat"
top: "mbox_loc"
concat_param {
axis: 1
}
}
layer {
name: "mbox_conf"
type: "Concat"
bottom: "conv4_3_norm_mbox_conf_flat"
bottom: "fc7_mbox_conf_flat"
bottom: "conv6_2_mbox_conf_flat"
bottom: "conv7_2_mbox_conf_flat"
bottom: "conv8_2_mbox_conf_flat"
bottom: "conv9_2_mbox_conf_flat"
top: "mbox_conf"
concat_param {
axis: 1
}
}
layer {
name: "mbox_priorbox"
type: "Concat"
bottom: "conv4_3_norm_mbox_priorbox"
bottom: "fc7_mbox_priorbox"
bottom: "conv6_2_mbox_priorbox"
bottom: "conv7_2_mbox_priorbox"
bottom: "conv8_2_mbox_priorbox"
bottom: "conv9_2_mbox_priorbox"
top: "mbox_priorbox"
concat_param {
axis: 2
}
}
layer {
name: "mbox_conf_reshape"
type: "Reshape"
bottom: "mbox_conf"
top: "mbox_conf_reshape"
reshape_param {
shape {
dim: 0
dim: -1
dim: 2
}
}
}
layer {
name: "mbox_conf_softmax"
type: "Softmax"
bottom: "mbox_conf_reshape"
top: "mbox_conf_softmax"
softmax_param {
axis: 2
}
}
layer {
name: "mbox_conf_flatten"
type: "Flatten"
bottom: "mbox_conf_softmax"
top: "mbox_conf_flatten"
flatten_param {
axis: 1
}
}
layer {
name: "detection_out"
type: "DetectionOutput"
bottom: "mbox_loc"
bottom: "mbox_conf_flatten"
bottom: "mbox_priorbox"
top: "detection_out"
include {
phase: TEST
}
detection_output_param {
num_classes: 2
share_location: true
background_label_id: 0
nms_param {
nms_threshold: 0.45
top_k: 400
}
code_type: CENTER_SIZE
keep_top_k: 200
confidence_threshold: 0.01
}
}
layer {
name: "detection_eval"
type: "DetectionEvaluate"
bottom: "detection_out"
bottom: "label"
top: "detection_eval"
include {
phase: TEST
}
detection_evaluate_param {
num_classes: 2
background_label_id: 0
overlap_threshold: 0.5
evaluate_difficult_gt: false
}
}
\ No newline at end of file
layer {
name: "data"
type: "AnnotatedData"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mirror: true
mean_value: 104
mean_value: 117
mean_value: 123
resize_param {
prob: 1
resize_mode: WARP
height: 300
width: 300
interp_mode: LINEAR
interp_mode: AREA
interp_mode: NEAREST
interp_mode: CUBIC
interp_mode: LANCZOS4
}
emit_constraint {
emit_type: CENTER
}
distort_param {
brightness_prob: 0.5
brightness_delta: 32
contrast_prob: 0.5
contrast_lower: 0.5
contrast_upper: 1.5
hue_prob: 0.5
hue_delta: 18
saturation_prob: 0.5
saturation_lower: 0.5
saturation_upper: 1.5
random_order_prob: 0.0
}
expand_param {
prob: 0.5
max_expand_ratio: 4.0
}
}
data_param {
source: "train_lmdb/"
batch_size: 16
backend: LMDB
}
annotated_data_param {
batch_sampler {
max_sample: 1
max_trials: 1
}
batch_sampler {
sampler {
min_scale: 0.3
max_scale: 1.0
min_aspect_ratio: 0.5
max_aspect_ratio: 2.0
}
sample_constraint {
min_jaccard_overlap: 0.1
}
max_sample: 1
max_trials: 50
}
batch_sampler {
sampler {
min_scale: 0.3
max_scale: 1.0
min_aspect_ratio: 0.5
max_aspect_ratio: 2.0
}
sample_constraint {
min_jaccard_overlap: 0.3
}
max_sample: 1
max_trials: 50
}
batch_sampler {
sampler {
min_scale: 0.3
max_scale: 1.0
min_aspect_ratio: 0.5
max_aspect_ratio: 2.0
}
sample_constraint {
min_jaccard_overlap: 0.5
}
max_sample: 1
max_trials: 50
}
batch_sampler {
sampler {
min_scale: 0.3
max_scale: 1.0
min_aspect_ratio: 0.5
max_aspect_ratio: 2.0
}
sample_constraint {
min_jaccard_overlap: 0.7
}
max_sample: 1
max_trials: 50
}
batch_sampler {
sampler {
min_scale: 0.3
max_scale: 1.0
min_aspect_ratio: 0.5
max_aspect_ratio: 2.0
}
sample_constraint {
min_jaccard_overlap: 0.9
}
max_sample: 1
max_trials: 50
}
batch_sampler {
sampler {
min_scale: 0.3
max_scale: 1.0
min_aspect_ratio: 0.5
max_aspect_ratio: 2.0
}
sample_constraint {
max_jaccard_overlap: 1.0
}
max_sample: 1
max_trials: 50
}
}
}
layer {
name: "data_bn"
type: "BatchNorm"
bottom: "data"
top: "data_bn"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "data_scale"
type: "Scale"
bottom: "data_bn"
top: "data_bn"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "conv1_h"
type: "Convolution"
bottom: "data_bn"
top: "conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
convolution_param {
num_output: 32
pad: 3
kernel_size: 7
stride: 2
weight_filler {
type: "msra"
variance_norm: FAN_OUT
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv1_bn_h"
type: "BatchNorm"
bottom: "conv1_h"
top: "conv1_h"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "conv1_scale_h"
type: "Scale"
bottom: "conv1_h"
top: "conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "conv1_relu"
type: "ReLU"
bottom: "conv1_h"
top: "conv1_h"
}
layer {
name: "conv1_pool"
type: "Pooling"
bottom: "conv1_h"
top: "conv1_pool"
pooling_param {
kernel_size: 3
stride: 2
}
}
layer {
name: "layer_64_1_conv1_h"
type: "Convolution"
bottom: "conv1_pool"
top: "layer_64_1_conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 32
bias_term: false
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_64_1_bn2_h"
type: "BatchNorm"
bottom: "layer_64_1_conv1_h"
top: "layer_64_1_conv1_h"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_64_1_scale2_h"
type: "Scale"
bottom: "layer_64_1_conv1_h"
top: "layer_64_1_conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_64_1_relu2"
type: "ReLU"
bottom: "layer_64_1_conv1_h"
top: "layer_64_1_conv1_h"
}
layer {
name: "layer_64_1_conv2_h"
type: "Convolution"
bottom: "layer_64_1_conv1_h"
top: "layer_64_1_conv2_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 32
bias_term: false
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_64_1_sum"
type: "Eltwise"
bottom: "layer_64_1_conv2_h"
bottom: "conv1_pool"
top: "layer_64_1_sum"
}
layer {
name: "layer_128_1_bn1_h"
type: "BatchNorm"
bottom: "layer_64_1_sum"
top: "layer_128_1_bn1_h"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_128_1_scale1_h"
type: "Scale"
bottom: "layer_128_1_bn1_h"
top: "layer_128_1_bn1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_128_1_relu1"
type: "ReLU"
bottom: "layer_128_1_bn1_h"
top: "layer_128_1_bn1_h"
}
layer {
name: "layer_128_1_conv1_h"
type: "Convolution"
bottom: "layer_128_1_bn1_h"
top: "layer_128_1_conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 128
bias_term: false
pad: 1
kernel_size: 3
stride: 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_128_1_bn2"
type: "BatchNorm"
bottom: "layer_128_1_conv1_h"
top: "layer_128_1_conv1_h"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_128_1_scale2"
type: "Scale"
bottom: "layer_128_1_conv1_h"
top: "layer_128_1_conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_128_1_relu2"
type: "ReLU"
bottom: "layer_128_1_conv1_h"
top: "layer_128_1_conv1_h"
}
layer {
name: "layer_128_1_conv2"
type: "Convolution"
bottom: "layer_128_1_conv1_h"
top: "layer_128_1_conv2"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 128
bias_term: false
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_128_1_conv_expand_h"
type: "Convolution"
bottom: "layer_128_1_bn1_h"
top: "layer_128_1_conv_expand_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 128
bias_term: false
pad: 0
kernel_size: 1
stride: 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_128_1_sum"
type: "Eltwise"
bottom: "layer_128_1_conv2"
bottom: "layer_128_1_conv_expand_h"
top: "layer_128_1_sum"
}
layer {
name: "layer_256_1_bn1"
type: "BatchNorm"
bottom: "layer_128_1_sum"
top: "layer_256_1_bn1"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_256_1_scale1"
type: "Scale"
bottom: "layer_256_1_bn1"
top: "layer_256_1_bn1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_256_1_relu1"
type: "ReLU"
bottom: "layer_256_1_bn1"
top: "layer_256_1_bn1"
}
layer {
name: "layer_256_1_conv1"
type: "Convolution"
bottom: "layer_256_1_bn1"
top: "layer_256_1_conv1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
pad: 1
kernel_size: 3
stride: 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_256_1_bn2"
type: "BatchNorm"
bottom: "layer_256_1_conv1"
top: "layer_256_1_conv1"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_256_1_scale2"
type: "Scale"
bottom: "layer_256_1_conv1"
top: "layer_256_1_conv1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_256_1_relu2"
type: "ReLU"
bottom: "layer_256_1_conv1"
top: "layer_256_1_conv1"
}
layer {
name: "layer_256_1_conv2"
type: "Convolution"
bottom: "layer_256_1_conv1"
top: "layer_256_1_conv2"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_256_1_conv_expand"
type: "Convolution"
bottom: "layer_256_1_bn1"
top: "layer_256_1_conv_expand"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
pad: 0
kernel_size: 1
stride: 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_256_1_sum"
type: "Eltwise"
bottom: "layer_256_1_conv2"
bottom: "layer_256_1_conv_expand"
top: "layer_256_1_sum"
}
layer {
name: "layer_512_1_bn1"
type: "BatchNorm"
bottom: "layer_256_1_sum"
top: "layer_512_1_bn1"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_512_1_scale1"
type: "Scale"
bottom: "layer_512_1_bn1"
top: "layer_512_1_bn1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_512_1_relu1"
type: "ReLU"
bottom: "layer_512_1_bn1"
top: "layer_512_1_bn1"
}
layer {
name: "layer_512_1_conv1_h"
type: "Convolution"
bottom: "layer_512_1_bn1"
top: "layer_512_1_conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 128
bias_term: false
pad: 1
kernel_size: 3
stride: 1 # 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_512_1_bn2_h"
type: "BatchNorm"
bottom: "layer_512_1_conv1_h"
top: "layer_512_1_conv1_h"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "layer_512_1_scale2_h"
type: "Scale"
bottom: "layer_512_1_conv1_h"
top: "layer_512_1_conv1_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "layer_512_1_relu2"
type: "ReLU"
bottom: "layer_512_1_conv1_h"
top: "layer_512_1_conv1_h"
}
layer {
name: "layer_512_1_conv2_h"
type: "Convolution"
bottom: "layer_512_1_conv1_h"
top: "layer_512_1_conv2_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
pad: 2 # 1
kernel_size: 3
stride: 1
dilation: 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_512_1_conv_expand_h"
type: "Convolution"
bottom: "layer_512_1_bn1"
top: "layer_512_1_conv_expand_h"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
pad: 0
kernel_size: 1
stride: 1 # 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "layer_512_1_sum"
type: "Eltwise"
bottom: "layer_512_1_conv2_h"
bottom: "layer_512_1_conv_expand_h"
top: "layer_512_1_sum"
}
layer {
name: "last_bn_h"
type: "BatchNorm"
bottom: "layer_512_1_sum"
top: "layer_512_1_sum"
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
param {
lr_mult: 0.0
}
}
layer {
name: "last_scale_h"
type: "Scale"
bottom: "layer_512_1_sum"
top: "layer_512_1_sum"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 1.0
}
scale_param {
bias_term: true
}
}
layer {
name: "last_relu"
type: "ReLU"
bottom: "layer_512_1_sum"
top: "fc7"
}
layer {
name: "conv6_1_h"
type: "Convolution"
bottom: "fc7"
top: "conv6_1_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6_1_relu"
type: "ReLU"
bottom: "conv6_1_h"
top: "conv6_1_h"
}
layer {
name: "conv6_2_h"
type: "Convolution"
bottom: "conv6_1_h"
top: "conv6_2_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
stride: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6_2_relu"
type: "ReLU"
bottom: "conv6_2_h"
top: "conv6_2_h"
}
layer {
name: "conv7_1_h"
type: "Convolution"
bottom: "conv6_2_h"
top: "conv7_1_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv7_1_relu"
type: "ReLU"
bottom: "conv7_1_h"
top: "conv7_1_h"
}
layer {
name: "conv7_2_h"
type: "Convolution"
bottom: "conv7_1_h"
top: "conv7_2_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
stride: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv7_2_relu"
type: "ReLU"
bottom: "conv7_2_h"
top: "conv7_2_h"
}
layer {
name: "conv8_1_h"
type: "Convolution"
bottom: "conv7_2_h"
top: "conv8_1_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv8_1_relu"
type: "ReLU"
bottom: "conv8_1_h"
top: "conv8_1_h"
}
layer {
name: "conv8_2_h"
type: "Convolution"
bottom: "conv8_1_h"
top: "conv8_2_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv8_2_relu"
type: "ReLU"
bottom: "conv8_2_h"
top: "conv8_2_h"
}
layer {
name: "conv9_1_h"
type: "Convolution"
bottom: "conv8_2_h"
top: "conv9_1_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv9_1_relu"
type: "ReLU"
bottom: "conv9_1_h"
top: "conv9_1_h"
}
layer {
name: "conv9_2_h"
type: "Convolution"
bottom: "conv9_1_h"
top: "conv9_2_h"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv9_2_relu"
type: "ReLU"
bottom: "conv9_2_h"
top: "conv9_2_h"
}
layer {
name: "conv4_3_norm"
type: "Normalize"
bottom: "layer_256_1_bn1"
top: "conv4_3_norm"
norm_param {
across_spatial: false
scale_filler {
type: "constant"
value: 20
}
channel_shared: false
}
}
layer {
name: "conv4_3_norm_mbox_loc"
type: "Convolution"
bottom: "conv4_3_norm"
top: "conv4_3_norm_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 16
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv4_3_norm_mbox_loc_perm"
type: "Permute"
bottom: "conv4_3_norm_mbox_loc"
top: "conv4_3_norm_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv4_3_norm_mbox_loc_flat"
type: "Flatten"
bottom: "conv4_3_norm_mbox_loc_perm"
top: "conv4_3_norm_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv4_3_norm_mbox_conf"
type: "Convolution"
bottom: "conv4_3_norm"
top: "conv4_3_norm_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 8 # 84
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv4_3_norm_mbox_conf_perm"
type: "Permute"
bottom: "conv4_3_norm_mbox_conf"
top: "conv4_3_norm_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv4_3_norm_mbox_conf_flat"
type: "Flatten"
bottom: "conv4_3_norm_mbox_conf_perm"
top: "conv4_3_norm_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv4_3_norm_mbox_priorbox"
type: "PriorBox"
bottom: "conv4_3_norm"
bottom: "data"
top: "conv4_3_norm_mbox_priorbox"
prior_box_param {
min_size: 30.0
max_size: 60.0
aspect_ratio: 2
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 8
offset: 0.5
}
}
layer {
name: "fc7_mbox_loc"
type: "Convolution"
bottom: "fc7"
top: "fc7_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 24
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "fc7_mbox_loc_perm"
type: "Permute"
bottom: "fc7_mbox_loc"
top: "fc7_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "fc7_mbox_loc_flat"
type: "Flatten"
bottom: "fc7_mbox_loc_perm"
top: "fc7_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "fc7_mbox_conf"
type: "Convolution"
bottom: "fc7"
top: "fc7_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 12 # 126
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "fc7_mbox_conf_perm"
type: "Permute"
bottom: "fc7_mbox_conf"
top: "fc7_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "fc7_mbox_conf_flat"
type: "Flatten"
bottom: "fc7_mbox_conf_perm"
top: "fc7_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "fc7_mbox_priorbox"
type: "PriorBox"
bottom: "fc7"
bottom: "data"
top: "fc7_mbox_priorbox"
prior_box_param {
min_size: 60.0
max_size: 111.0
aspect_ratio: 2
aspect_ratio: 3
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 16
offset: 0.5
}
}
layer {
name: "conv6_2_mbox_loc"
type: "Convolution"
bottom: "conv6_2_h"
top: "conv6_2_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 24
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6_2_mbox_loc_perm"
type: "Permute"
bottom: "conv6_2_mbox_loc"
top: "conv6_2_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv6_2_mbox_loc_flat"
type: "Flatten"
bottom: "conv6_2_mbox_loc_perm"
top: "conv6_2_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv6_2_mbox_conf"
type: "Convolution"
bottom: "conv6_2_h"
top: "conv6_2_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 12 # 126
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6_2_mbox_conf_perm"
type: "Permute"
bottom: "conv6_2_mbox_conf"
top: "conv6_2_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv6_2_mbox_conf_flat"
type: "Flatten"
bottom: "conv6_2_mbox_conf_perm"
top: "conv6_2_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv6_2_mbox_priorbox"
type: "PriorBox"
bottom: "conv6_2_h"
bottom: "data"
top: "conv6_2_mbox_priorbox"
prior_box_param {
min_size: 111.0
max_size: 162.0
aspect_ratio: 2
aspect_ratio: 3
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 32
offset: 0.5
}
}
layer {
name: "conv7_2_mbox_loc"
type: "Convolution"
bottom: "conv7_2_h"
top: "conv7_2_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 24
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv7_2_mbox_loc_perm"
type: "Permute"
bottom: "conv7_2_mbox_loc"
top: "conv7_2_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv7_2_mbox_loc_flat"
type: "Flatten"
bottom: "conv7_2_mbox_loc_perm"
top: "conv7_2_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv7_2_mbox_conf"
type: "Convolution"
bottom: "conv7_2_h"
top: "conv7_2_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 12 # 126
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv7_2_mbox_conf_perm"
type: "Permute"
bottom: "conv7_2_mbox_conf"
top: "conv7_2_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv7_2_mbox_conf_flat"
type: "Flatten"
bottom: "conv7_2_mbox_conf_perm"
top: "conv7_2_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv7_2_mbox_priorbox"
type: "PriorBox"
bottom: "conv7_2_h"
bottom: "data"
top: "conv7_2_mbox_priorbox"
prior_box_param {
min_size: 162.0
max_size: 213.0
aspect_ratio: 2
aspect_ratio: 3
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 64
offset: 0.5
}
}
layer {
name: "conv8_2_mbox_loc"
type: "Convolution"
bottom: "conv8_2_h"
top: "conv8_2_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 16
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv8_2_mbox_loc_perm"
type: "Permute"
bottom: "conv8_2_mbox_loc"
top: "conv8_2_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv8_2_mbox_loc_flat"
type: "Flatten"
bottom: "conv8_2_mbox_loc_perm"
top: "conv8_2_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv8_2_mbox_conf"
type: "Convolution"
bottom: "conv8_2_h"
top: "conv8_2_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 8 # 84
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv8_2_mbox_conf_perm"
type: "Permute"
bottom: "conv8_2_mbox_conf"
top: "conv8_2_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv8_2_mbox_conf_flat"
type: "Flatten"
bottom: "conv8_2_mbox_conf_perm"
top: "conv8_2_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv8_2_mbox_priorbox"
type: "PriorBox"
bottom: "conv8_2_h"
bottom: "data"
top: "conv8_2_mbox_priorbox"
prior_box_param {
min_size: 213.0
max_size: 264.0
aspect_ratio: 2
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 100
offset: 0.5
}
}
layer {
name: "conv9_2_mbox_loc"
type: "Convolution"
bottom: "conv9_2_h"
top: "conv9_2_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 16
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv9_2_mbox_loc_perm"
type: "Permute"
bottom: "conv9_2_mbox_loc"
top: "conv9_2_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv9_2_mbox_loc_flat"
type: "Flatten"
bottom: "conv9_2_mbox_loc_perm"
top: "conv9_2_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv9_2_mbox_conf"
type: "Convolution"
bottom: "conv9_2_h"
top: "conv9_2_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 8 # 84
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv9_2_mbox_conf_perm"
type: "Permute"
bottom: "conv9_2_mbox_conf"
top: "conv9_2_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv9_2_mbox_conf_flat"
type: "Flatten"
bottom: "conv9_2_mbox_conf_perm"
top: "conv9_2_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv9_2_mbox_priorbox"
type: "PriorBox"
bottom: "conv9_2_h"
bottom: "data"
top: "conv9_2_mbox_priorbox"
prior_box_param {
min_size: 264.0
max_size: 315.0
aspect_ratio: 2
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 300
offset: 0.5
}
}
layer {
name: "mbox_loc"
type: "Concat"
bottom: "conv4_3_norm_mbox_loc_flat"
bottom: "fc7_mbox_loc_flat"
bottom: "conv6_2_mbox_loc_flat"
bottom: "conv7_2_mbox_loc_flat"
bottom: "conv8_2_mbox_loc_flat"
bottom: "conv9_2_mbox_loc_flat"
top: "mbox_loc"
concat_param {
axis: 1
}
}
layer {
name: "mbox_conf"
type: "Concat"
bottom: "conv4_3_norm_mbox_conf_flat"
bottom: "fc7_mbox_conf_flat"
bottom: "conv6_2_mbox_conf_flat"
bottom: "conv7_2_mbox_conf_flat"
bottom: "conv8_2_mbox_conf_flat"
bottom: "conv9_2_mbox_conf_flat"
top: "mbox_conf"
concat_param {
axis: 1
}
}
layer {
name: "mbox_priorbox"
type: "Concat"
bottom: "conv4_3_norm_mbox_priorbox"
bottom: "fc7_mbox_priorbox"
bottom: "conv6_2_mbox_priorbox"
bottom: "conv7_2_mbox_priorbox"
bottom: "conv8_2_mbox_priorbox"
bottom: "conv9_2_mbox_priorbox"
top: "mbox_priorbox"
concat_param {
axis: 2
}
}
layer {
name: "mbox_loss"
type: "MultiBoxLoss"
bottom: "mbox_loc"
bottom: "mbox_conf"
bottom: "mbox_priorbox"
bottom: "label"
top: "mbox_loss"
include {
phase: TRAIN
}
propagate_down: true
propagate_down: true
propagate_down: false
propagate_down: false
loss_param {
normalization: VALID
}
multibox_loss_param {
loc_loss_type: SMOOTH_L1
conf_loss_type: SOFTMAX
loc_weight: 1.0
num_classes: 2 # 21
share_location: true
match_type: PER_PREDICTION
overlap_threshold: 0.5
use_prior_for_matching: true
background_label_id: 0
use_difficult_gt: true
neg_pos_ratio: 3.0
neg_overlap: 0.5
code_type: CENTER_SIZE
ignore_cross_boundary_bbox: false
mining_type: MAX_NEGATIVE
}
}
\ No newline at end of file
import numpy as np
import argparse
import os
import sys
sys.path.append('/home/arrybn/build/opencv/lib')
import cv2 as cv
try:
import cv2 as cv
except ImportError:
raise ImportError('Can\'t find OpenCV Python module. If you\'ve built it from sources without installation, '
'configure environemnt variable PYTHONPATH to "opencv_build_dir/lib" directory (with "python3" subdirectory if required)')
from cv2 import dnn
inWidth = 300
inHeight = 300
confThreshold = 0.5
prototxt = 'face_detector/deploy.prototxt'
caffemodel = 'face_detector/res10_300x300_ssd_iter_140000.caffemodel'
if __name__ == '__main__':
net = dnn.readNetFromCaffe(prototxt, caffemodel)
cap = cv.VideoCapture(0)
while True:
ret, frame = cap.read()
cols = frame.shape[1]
rows = frame.shape[0]
net.setInput(dnn.blobFromImage(cv.resize(frame, (inWidth, inHeight)),
1.0, (inWidth, inHeight), (104., 177., 123.)))
detections = net.forward()
perf_stats = net.getPerfProfile()
print('Inference time, ms: %.2f' % (perf_stats[0] / cv.getTickFrequency() * 1000))
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > confThreshold:
xLeftBottom = int(detections[0, 0, i, 3] * cols)
yLeftBottom = int(detections[0, 0, i, 4] * rows)
xRightTop = int(detections[0, 0, i, 5] * cols)
yRightTop = int(detections[0, 0, i, 6] * rows)
cv.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop),
(0, 255, 0))
label = "face: %.4f" % confidence
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
cv.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]),
(xLeftBottom + labelSize[0], yLeftBottom + baseLine),
(255, 255, 255), cv.FILLED)
cv.putText(frame, label, (xLeftBottom, yLeftBottom),
cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
cv.imshow("detections", frame)
if cv.waitKey(1) != -1:
break
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment