Commit 43f889ae authored by Lubov Batanina's avatar Lubov Batanina Committed by Alexander Alekhin

Merge pull request #12519 from l-bat:l-bat/onnx_parser

Support asymmetric padding in pooling layer (#12519)

* Add Inception_V1 support in ONNX

* Add asymmetric padding in OpenCL and Inference engine

* Refactoring
parent 76d4aa0c
......@@ -234,7 +234,9 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
{
public:
int type;
Size kernel, stride, pad;
Size kernel, stride;
int pad_l, pad_t, pad_r, pad_b;
CV_DEPRECATED Size pad;
bool globalPooling;
bool computeMaxIdx;
String padMode;
......
......@@ -64,10 +64,17 @@ public:
BaseConvolutionLayerImpl(const LayerParams &params)
{
setParamsFrom(params);
getConvolutionKernelParams(params, kernel.height, kernel.width, pad.height,
pad.width, stride.height, stride.width, dilation.height,
int pad_t = 0, pad_l = 0, pad_r = 0, pad_b = 0;
getConvolutionKernelParams(params, kernel.height, kernel.width, pad_t,
pad_l, pad_b, pad_r, stride.height, stride.width, dilation.height,
dilation.width, padMode);
if (pad_t != pad_b || pad_l != pad_r)
CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer");
pad.width = pad_l;
pad.height = pad_t;
numOutput = params.get<int>("num_output");
int ngroups = params.get<int>("group", 1);
......@@ -100,8 +107,18 @@ public:
}
Size outSize = Size(outputs[0].size[3], outputs[0].size[2]);
int pad_t = pad.height, pad_l = pad.width, pad_b = pad.height, pad_r = pad.width;
getConvPoolPaddings(Size(input.size[3], input.size[2]), outSize,
kernel, stride, padMode, dilation, pad);
kernel, stride, padMode, dilation, pad_t, pad_l, pad_b, pad_r);
if (pad_t != pad_b || pad_l != pad_r)
CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer");
pad.width = pad_l;
pad.height = pad_t;
}
bool hasBias() const
......@@ -1156,9 +1173,17 @@ public:
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
int pad_t = pad.height, pad_l = pad.width, pad_b = pad.height, pad_r = pad.width;
getConvPoolPaddings(Size(outputs[0].size[3], outputs[0].size[2]),
Size(inputs[0].size[3], inputs[0].size[2]),
kernel, stride, padMode, dilation, pad);
kernel, stride, padMode, dilation, pad_t, pad_l, pad_b, pad_r);
if (pad_t != pad_b || pad_l != pad_r)
CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer");
pad.width = pad_l;
pad.height = pad_t;
}
class MatMulInvoker : public ParallelLoopBody
......
......@@ -118,9 +118,19 @@ void getKernelSize(const LayerParams &params, int &kernelH, int &kernelW)
CV_Assert(kernelH > 0 && kernelW > 0);
}
void getStrideAndPadding(const LayerParams &params, int &padH, int &padW, int &strideH, int &strideW, cv::String& padMode)
void getStrideAndPadding(const LayerParams &params, int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String& padMode)
{
util::getParameter(params, "pad", "pad", padH, padW, true, 0);
if (params.has("pad_l") && params.has("pad_t") && params.has("pad_r") && params.has("pad_b")) {
padT = params.get<int>("pad_t");
padL = params.get<int>("pad_l");
padB = params.get<int>("pad_b");
padR = params.get<int>("pad_r");
}
else {
util::getParameter(params, "pad", "pad", padT, padL, true, 0);
padB = padT;
padR = padL;
}
util::getParameter(params, "stride", "stride", strideH, strideW, true, 1);
padMode = "";
......@@ -129,15 +139,15 @@ void getStrideAndPadding(const LayerParams &params, int &padH, int &padW, int &s
padMode = params.get<String>("pad_mode");
}
CV_Assert(padH >= 0 && padW >= 0 && strideH > 0 && strideW > 0);
CV_Assert(padT >= 0 && padL >= 0 && padB >= 0 && padR >= 0 && strideH > 0 && strideW > 0);
}
}
void getPoolingKernelParams(const LayerParams &params, int &kernelH, int &kernelW, bool &globalPooling,
int &padH, int &padW, int &strideH, int &strideW, cv::String &padMode)
int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String &padMode)
{
util::getStrideAndPadding(params, padH, padW, strideH, strideW, padMode);
util::getStrideAndPadding(params, padT, padL, padB, padR, strideH, strideW, padMode);
globalPooling = params.has("global_pooling") &&
params.get<bool>("global_pooling");
......@@ -148,9 +158,9 @@ void getPoolingKernelParams(const LayerParams &params, int &kernelH, int &kernel
{
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified");
}
if(padH != 0 || padW != 0 || strideH != 1 || strideW != 1)
if(padT != 0 || padL != 0 || padB != 0 || padR != 0 || strideH != 1 || strideW != 1)
{
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pad_h and pad_w must be = 0, and stride_h and stride_w must be = 1");
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0, and stride_h and stride_w must be = 1");
}
}
else
......@@ -159,12 +169,11 @@ void getPoolingKernelParams(const LayerParams &params, int &kernelH, int &kernel
}
}
void getConvolutionKernelParams(const LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW,
void getConvolutionKernelParams(const LayerParams &params, int &kernelH, int &kernelW, int &padT, int &padL, int &padB, int &padR,
int &strideH, int &strideW, int &dilationH, int &dilationW, cv::String &padMode)
{
util::getKernelSize(params, kernelH, kernelW);
util::getStrideAndPadding(params, padH, padW, strideH, strideW, padMode);
util::getStrideAndPadding(params, padT, padL, padB, padR, strideH, strideW, padMode);
util::getParameter(params, "dilation", "dilation", dilationH, dilationW, true, 1);
CV_Assert(dilationH > 0 && dilationW > 0);
......@@ -201,11 +210,11 @@ void getConvPoolOutParams(const Size& inp, const Size &kernel,
void getConvPoolPaddings(const Size& inp, const Size& out,
const Size &kernel, const Size &stride,
const String &padMode, const Size &dilation, Size &pad)
const String &padMode, const Size &dilation, int &padT, int &padL, int &padB, int &padR)
{
if (padMode == "VALID")
{
pad = cv::Size(0,0);
padT = padL = padB = padR = 0;
}
else if (padMode == "SAME")
{
......@@ -213,7 +222,8 @@ void getConvPoolPaddings(const Size& inp, const Size& out,
int Pw = std::max(0, (out.width - 1) * stride.width + (dilation.width * (kernel.width - 1) + 1) - inp.width);
// For odd values of total padding, add more padding at the 'right'
// side of the given dimension.
pad = cv::Size(Pw / 2, Ph / 2);
padT= padB = Ph / 2;
padL = padR = Pw / 2;
}
}
......
......@@ -60,19 +60,20 @@ namespace cv
namespace dnn
{
void getConvolutionKernelParams(const LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW,
void getConvolutionKernelParams(const LayerParams &params, int &kernelH, int &kernelW, int &padT, int &padL, int &padB, int &padR,
int &strideH, int &strideW, int &dilationH, int &dilationW, cv::String& padMode);
void getPoolingKernelParams(const LayerParams &params, int &kernelH, int &kernelW, bool &globalPooling,
int &padH, int &padW, int &strideH, int &strideW, cv::String& padMode);
int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String& padMode);
void getConvPoolOutParams(const Size& inp, const Size &kernel,
const Size &stride, const String &padMode,
const Size &dilation, Size& out);
void getConvPoolPaddings(const Size& inp, const Size& out,
const Size &kernel, const Size &stride,
const String &padMode, const Size &dilation, Size &pad);
const String &padMode, const Size &dilation, int &padT, int &padL, int &padB, int &padR);
}
}
......
This diff is collapsed.
......@@ -345,7 +345,7 @@ struct OCL4DNNPoolConfig
{
OCL4DNNPoolConfig() :
kernel(1, 1),
pad(0, 0),
pad_l(0), pad_t(0), pad_r(0), pad_b(0),
stride(1, 1),
dilation(1, 1),
channels(0),
......@@ -358,7 +358,7 @@ struct OCL4DNNPoolConfig
MatShape in_shape;
MatShape out_shape;
Size kernel;
Size pad;
int pad_l, pad_t, pad_r, pad_b;
Size stride;
Size dilation;
......@@ -381,7 +381,6 @@ class OCL4DNNPool
UMat& top_mask);
private:
// Pooling parameters
std::vector<int32_t> pad_;
std::vector<int32_t> stride_;
std::vector<int32_t> kernel_shape_;
std::vector<int32_t> im_in_shape_;
......@@ -394,8 +393,10 @@ class OCL4DNNPool
int32_t kernel_w_;
int32_t stride_h_;
int32_t stride_w_;
int32_t pad_h_;
int32_t pad_w_;
int32_t pad_t_;
int32_t pad_l_;
int32_t pad_b_;
int32_t pad_r_;
int32_t height_;
int32_t width_;
int32_t pooled_height_;
......
......@@ -62,7 +62,6 @@ OCL4DNNPool<Dtype>::OCL4DNNPool(OCL4DNNPoolConfig config)
for (int i = 0; i < spatial_dims; ++i)
{
kernel_shape_.push_back(i == 0 ? config.kernel.height : config.kernel.width);
pad_.push_back(i == 0 ? config.pad.height : config.pad.width);
stride_.push_back(i == 0 ? config.stride.height : config.stride.width);
im_in_shape_.push_back(config.in_shape[dims - spatial_dims + i]);
im_out_shape_.push_back(config.out_shape[dims - spatial_dims + i]);
......@@ -72,8 +71,10 @@ OCL4DNNPool<Dtype>::OCL4DNNPool(OCL4DNNPoolConfig config)
kernel_w_ = kernel_shape_[1];
stride_h_ = stride_[0];
stride_w_ = stride_[1];
pad_h_ = pad_[0];
pad_w_ = pad_[1];
pad_t_ = config.pad_t;
pad_l_ = config.pad_l;
pad_r_ = config.pad_r;
pad_b_ = config.pad_b;
height_ = im_in_shape_[0];
width_ = im_in_shape_[1];
pooled_height_ = im_out_shape_[0];
......@@ -113,14 +114,13 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
ocl::dnn::ocl4dnn_pooling_oclsrc,
format(" -D Dtype=%s -D KERNEL_MAX_POOL=1 -D KERNEL_W=%d -D KERNEL_H=%d"
" -D STRIDE_W=%d -D STRIDE_H=%d"
" -D PAD_W=%d -D PAD_H=%d%s",
" -D PAD_L=%d -D PAD_T=%d -D PAD_R=%d -D PAD_B=%d%s",
(use_half) ? "half" : "float",
kernel_w_, kernel_h_,
stride_w_, stride_h_,
pad_w_, pad_h_,
pad_l_, pad_t_, pad_r_, pad_b_,
computeMaxIdx ? " -D HAVE_MASK=1" : ""
));
if (oclk_max_pool_forward.empty())
return false;
......@@ -150,11 +150,11 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
ocl::dnn::ocl4dnn_pooling_oclsrc,
format(" -D Dtype=%s -D KERNEL_AVE_POOL=1 -D KERNEL_W=%d -D KERNEL_H=%d"
" -D STRIDE_W=%d -D STRIDE_H=%d"
" -D PAD_W=%d -D PAD_H=%d%s",
" -D PAD_L=%d -D PAD_T=%d -D PAD_R=%d -D PAD_B=%d%s",
(use_half) ? "half" : "float",
kernel_w_, kernel_h_,
stride_w_, stride_h_,
pad_w_, pad_h_,
pad_l_, pad_t_, pad_r_, pad_b_,
avePoolPaddedArea ? " -D AVE_POOL_PADDING_AREA" : ""
));
......
......@@ -174,9 +174,8 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot
else if(attribute_name == "pads")
{
CV_Assert(attribute_proto.ints_size() == 4);
lp.set("pad_h", saturate_cast<int32_t>(attribute_proto.ints(0)));
lp.set("pad_w", saturate_cast<int32_t>(attribute_proto.ints(1)));
// push pad_b and pad_r for compute ceil_mode
lp.set("pad_t", saturate_cast<int32_t>(attribute_proto.ints(0)));
lp.set("pad_l", saturate_cast<int32_t>(attribute_proto.ints(1)));
lp.set("pad_b", saturate_cast<int32_t>(attribute_proto.ints(2)));
lp.set("pad_r", saturate_cast<int32_t>(attribute_proto.ints(3)));
}
......@@ -306,6 +305,7 @@ void ONNXImporter::populateNet(Net dstNet)
std::string layer_type = node_proto.op_type();
layerParams.type = layer_type;
if (layer_type == "MaxPool")
{
layerParams.type = "Pooling";
......@@ -551,7 +551,6 @@ void ONNXImporter::populateNet(Net dstNet)
for (int j = 0; j < node_proto.input_size(); j++) {
layerId = layer_id.find(node_proto.input(j));
if (layerId != layer_id.end()) {
dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, j);
}
......
......@@ -73,8 +73,8 @@ __kernel void
const int xx = index / pooled_width;
const int ph = xx % pooled_height;
const int ch = xx / pooled_height;
int hstart = ph * STRIDE_H - PAD_H;
int wstart = pw * STRIDE_W - PAD_W;
int hstart = ph * STRIDE_H - PAD_T;
int wstart = pw * STRIDE_W - PAD_L;
Dtype maxval = -FLT_MAX;
int maxidx = -1;
int in_offset = ch * height * width;
......@@ -117,10 +117,10 @@ __kernel void TEMPLATE(ave_pool_forward, Dtype)(
const int xx = index / pooled_width;
const int ph = xx % pooled_height;
const int ch = xx / pooled_height;
int hstart = ph * STRIDE_H - PAD_H;
int wstart = pw * STRIDE_W - PAD_W;
int hend = min(hstart + KERNEL_H, height + PAD_H);
int wend = min(wstart + KERNEL_W, width + PAD_W);
int hstart = ph * STRIDE_H - PAD_T;
int wstart = pw * STRIDE_W - PAD_L;
int hend = min(hstart + KERNEL_H, height + PAD_B);
int wend = min(wstart + KERNEL_W, width + PAD_R);
int pool_size;
#ifdef AVE_POOL_PADDING_AREA
pool_size = (hend - hstart) * (wend - wstart);
......
......@@ -27,7 +27,7 @@
__kernel void MaxPoolForward(const int nthreads,
__global T* bottom_data, const int num, const int channels, const int height, const int width,
const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w,
const int stride_h, const int stride_w, const int pad_h, const int pad_w,
const int stride_h, const int stride_w, const int pad_t, const int pad_l, const int pad_b, const int pad_r,
__global T* top_data
#ifdef MASK
, __global float* mask
......@@ -41,8 +41,8 @@ __kernel void MaxPoolForward(const int nthreads,
int ph = (index / pooled_width) % pooled_height;
int c = (index / pooled_width / pooled_height) % channels;
int n = index / pooled_width / pooled_height / channels;
int hstart = ph * stride_h - pad_h;
int wstart = pw * stride_w - pad_w;
int hstart = ph * stride_h - pad_t;
int wstart = pw * stride_w - pad_l;
const int hend = min(hstart + kernel_h, height);
const int wend = min(wstart + kernel_w, width);
hstart = max(hstart, 0);
......@@ -71,7 +71,7 @@ __kernel void MaxPoolForward(const int nthreads,
__kernel void AvePoolForward(const int nthreads,
__global T* bottom_data, const int num, const int channels, const int height, const int width,
const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w,
const int stride_h, const int stride_w, const int pad_h, const int pad_w,
const int stride_h, const int stride_w, const int pad_t, const int pad_l, const int pad_b, const int pad_r,
__global T* top_data
#ifdef MASK
, __global float* mask // NOT USED
......@@ -84,9 +84,9 @@ __kernel void AvePoolForward(const int nthreads,
int pw = index % pooled_width;
int ph = (index / pooled_width) % pooled_height;
int c = (index / pooled_width / pooled_height) % channels;
int n = index / pooled_width / pooled_height / channels; int hstart = ph * stride_h - pad_h; int wstart = pw * stride_w - pad_w;
int hend = min(hstart + kernel_h, height + pad_h);
int wend = min(wstart + kernel_w, width + pad_w);
int n = index / pooled_width / pooled_height / channels; int hstart = ph * stride_h - pad_t; int wstart = pw * stride_w - pad_l;
int hend = min(hstart + kernel_h, height + pad_b);
int wend = min(wstart + kernel_w, width + pad_r);
const int pool_size = (hend - hstart) * (wend - wstart);
hstart = max(hstart, 0);
wstart = max(wstart, 0);
......
......@@ -346,6 +346,10 @@ TEST_P(Test_ONNX_nets, DenseNet121)
testONNXModels("densenet121", pb, l1, lInf);
}
TEST_P(Test_ONNX_nets, Inception_v1)
{
testONNXModels("inception_v1", pb);
}
INSTANTIATE_TEST_CASE_P(/**/, Test_ONNX_nets, dnnBackendsAndTargets());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment