Commit 77fa59c3 authored by Lubov Batanina's avatar Lubov Batanina Committed by Alexander Alekhin

Merge pull request #14301 from l-bat:conv3d

Support Convolution3D layer on IE backend (#14301)

* Add Convolution3D layer

* Disable CXX11

* Fixed tests

* Add Pooling3D layer

* Merge Conv2d with Conv3d and Pool2d with Pool3d layers

* Split pads

* Add Deconvolution layer

* Refactoring

* Deduplication

* Refactoring

* Add utils for Convolution and Pooling layers
parent 3bcbd2a0
...@@ -210,7 +210,10 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN ...@@ -210,7 +210,10 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
class CV_EXPORTS BaseConvolutionLayer : public Layer class CV_EXPORTS BaseConvolutionLayer : public Layer
{ {
public: public:
Size kernel, stride, pad, dilation, adjustPad; CV_DEPRECATED_EXTERNAL Size kernel, stride, pad, dilation, adjustPad;
std::vector<size_t> adjust_pads;
std::vector<size_t> kernel_size, strides, dilations;
std::vector<size_t> pads_begin, pads_end;
String padMode; String padMode;
int numOutput; int numOutput;
}; };
...@@ -243,9 +246,10 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN ...@@ -243,9 +246,10 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
{ {
public: public:
int type; int type;
Size kernel, stride; std::vector<size_t> kernel_size, strides;
int pad_l, pad_t, pad_r, pad_b; std::vector<size_t> pads_begin, pads_end;
CV_DEPRECATED_EXTERNAL Size pad; CV_DEPRECATED_EXTERNAL Size kernel, stride, pad;
CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b;
bool globalPooling; bool globalPooling;
bool computeMaxIdx; bool computeMaxIdx;
String padMode; String padMode;
......
...@@ -2263,6 +2263,7 @@ struct Net::Impl ...@@ -2263,6 +2263,7 @@ struct Net::Impl
if (isAsync) if (isAsync)
CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode"); CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode");
CV_Assert(layer->supportBackend(DNN_BACKEND_OPENCV));
if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
{ {
std::vector<UMat> umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers); std::vector<UMat> umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers);
......
This diff is collapsed.
...@@ -59,22 +59,20 @@ namespace cv ...@@ -59,22 +59,20 @@ namespace cv
{ {
namespace dnn namespace dnn
{ {
void getConvolutionKernelParams(const LayerParams &params, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
std::vector<size_t>& pads_end, std::vector<size_t>& strides, std::vector<size_t>& dilations, cv::String &padMode);
void getConvolutionKernelParams(const LayerParams &params, int &kernelH, int &kernelW, int &padT, int &padL, int &padB, int &padR, void getPoolingKernelParams(const LayerParams &params, std::vector<size_t>& kernel, bool &globalPooling,
int &strideH, int &strideW, int &dilationH, int &dilationW, cv::String& padMode); std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end, std::vector<size_t>& strides, cv::String &padMode);
void getPoolingKernelParams(const LayerParams &params, int &kernelH, int &kernelW, bool &globalPooling, void getConvPoolOutParams(const std::vector<int>& inp, const std::vector<size_t>& kernel,
int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String& padMode); const std::vector<size_t>& stride, const String &padMode,
const std::vector<size_t>& dilation, std::vector<int>& out);
void getConvPoolOutParams(const Size& inp, const Size &kernel,
const Size &stride, const String &padMode,
const Size &dilation, Size& out);
void getConvPoolPaddings(const Size& inp, const Size& out,
const Size &kernel, const Size &stride,
const String &padMode, const Size &dilation, int &padT, int &padL, int &padB, int &padR);
void getConvPoolPaddings(const std::vector<int>& inp, const std::vector<int>& out,
const std::vector<size_t>& kernel, const std::vector<size_t>& strides,
const String &padMode, const std::vector<size_t>& dilation,
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end);
} }
} }
......
...@@ -72,6 +72,7 @@ public: ...@@ -72,6 +72,7 @@ public:
computeMaxIdx = true; computeMaxIdx = true;
globalPooling = false; globalPooling = false;
stride = Size(1, 1); stride = Size(1, 1);
pad_t = pad_l = pad_b = pad_r = 0;
if (params.has("pool") || params.has("kernel_size") || if (params.has("pool") || params.has("kernel_size") ||
params.has("kernel_w") || params.has("kernel_h")) params.has("kernel_w") || params.has("kernel_h"))
...@@ -86,11 +87,17 @@ public: ...@@ -86,11 +87,17 @@ public:
else else
CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling, getPoolingKernelParams(params, kernel_size, globalPooling, pads_begin, pads_end, strides, padMode);
pad_t, pad_l, pad_b, pad_r, stride.height, stride.width, padMode); if (kernel_size.size() == 2) {
kernel = Size(kernel_size[1], kernel_size[0]);
stride = Size(strides[1], strides[0]);
pad = Size(pads_begin[1], pads_begin[0]);
pad.width = pad_l; pad_t = pads_begin[0];
pad.height = pad_t; pad_l = pads_begin[1];
pad_b = pads_end[0];
pad_r = pads_end[1];
}
} }
else if (params.has("pooled_w") || params.has("pooled_h")) else if (params.has("pooled_w") || params.has("pooled_h"))
{ {
...@@ -125,17 +132,24 @@ public: ...@@ -125,17 +132,24 @@ public:
CV_Assert(!inputs.empty()); CV_Assert(!inputs.empty());
cv::Size inp(inputs[0].size[3], inputs[0].size[2]), std::vector<int> inp;
out(outputs[0].size[3], outputs[0].size[2]); std::vector<int> out;
for (int i = 2; i < inputs[0].dims; i++) {
if(globalPooling) inp.push_back(inputs[0].size[i]);
{ out.push_back(outputs[0].size[i]);
kernel = inp; }
if (globalPooling) {
kernel = Size(inp[1], inp[0]);
kernel_size = std::vector<size_t>(inp.begin(), inp.end());
} }
getConvPoolPaddings(inp, out, kernel, stride, padMode, Size(1, 1), pad_t, pad_l, pad_b, pad_r); getConvPoolPaddings(inp, out, kernel_size, strides, padMode, std::vector<size_t>(kernel_size.size(), 1), pads_begin, pads_end);
pad.width = pad_l; if (pads_begin.size() == 2) {
pad.height = pad_t; pad_t = pads_begin[0];
pad_l = pads_begin[1];
pad_b = pads_end[0];
pad_r = pads_end[1];
}
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
poolOp.release(); poolOp.release();
...@@ -148,6 +162,8 @@ public: ...@@ -148,6 +162,8 @@ public:
if (backendId == DNN_BACKEND_INFERENCE_ENGINE) if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
{ {
#ifdef HAVE_INF_ENGINE #ifdef HAVE_INF_ENGINE
if (kernel_size.size() == 3)
return preferableTarget == DNN_TARGET_CPU;
if (preferableTarget == DNN_TARGET_MYRIAD) { if (preferableTarget == DNN_TARGET_MYRIAD) {
if (type == MAX && (pad_l == 1 && pad_t == 1) && stride == Size(2, 2) ) { if (type == MAX && (pad_l == 1 && pad_t == 1) && stride == Size(2, 2) ) {
return !isMyriadX(); return !isMyriadX();
...@@ -161,9 +177,9 @@ public: ...@@ -161,9 +177,9 @@ public:
#endif #endif
} }
else else
return backendId == DNN_BACKEND_OPENCV || return (kernel_size.empty() || kernel_size.size() == 2) && (backendId == DNN_BACKEND_OPENCV ||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && (backendId == DNN_BACKEND_HALIDE && haveHalide() &&
(type == MAX || (type == AVE && !pad_t && !pad_l && !pad_b && !pad_r))); (type == MAX || (type == AVE && !pad_t && !pad_l && !pad_b && !pad_r))));
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
...@@ -269,10 +285,12 @@ public: ...@@ -269,10 +285,12 @@ public:
if (type == MAX || type == AVE) if (type == MAX || type == AVE)
{ {
InferenceEngine::Builder::PoolingLayer ieLayer(name); InferenceEngine::Builder::PoolingLayer ieLayer(name);
ieLayer.setKernel({(size_t)kernel.height, (size_t)kernel.width});
ieLayer.setStrides({(size_t)stride.height, (size_t)stride.width}); ieLayer.setKernel(kernel_size);
ieLayer.setPaddingsBegin({(size_t)pad_t, (size_t)pad_l}); ieLayer.setStrides(strides);
ieLayer.setPaddingsEnd({(size_t)pad_b, (size_t)pad_r}); ieLayer.setPaddingsBegin(pads_begin);
ieLayer.setPaddingsEnd(pads_end);
ieLayer.setPoolingType(type == MAX ? ieLayer.setPoolingType(type == MAX ?
InferenceEngine::Builder::PoolingLayer::PoolingType::MAX : InferenceEngine::Builder::PoolingLayer::PoolingType::MAX :
InferenceEngine::Builder::PoolingLayer::PoolingType::AVG); InferenceEngine::Builder::PoolingLayer::PoolingType::AVG);
...@@ -916,59 +934,56 @@ public: ...@@ -916,59 +934,56 @@ public:
std::vector<MatShape> &internals) const CV_OVERRIDE std::vector<MatShape> &internals) const CV_OVERRIDE
{ {
CV_Assert(inputs.size() != 0); CV_Assert(inputs.size() != 0);
Size in(inputs[0][3], inputs[0][2]), out;
std::vector<int> inpShape(inputs[0].begin() + 2, inputs[0].end());
std::vector<int> outShape(inputs[0].begin(), inputs[0].begin() + 2);
if (globalPooling) if (globalPooling)
{ {
out.height = 1; outShape.push_back(1);
out.width = 1; outShape.push_back(1);
} }
else if (type == ROI || type == PSROI) else if (type == ROI || type == PSROI)
{ {
out.height = pooledSize.height; outShape.push_back(pooledSize.height);
out.width = pooledSize.width; outShape.push_back(pooledSize.width);
} }
else if (padMode.empty()) else if (padMode.empty())
{ {
float height = (float)(in.height + pad_t + pad_b - kernel.height) / stride.height; for (int i = 0; i < kernel_size.size(); i++) {
float width = (float)(in.width + pad_l + pad_r - kernel.width) / stride.width; float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - kernel_size[i]) / strides[i];
out.height = 1 + (ceilMode ? ceil(height) : floor(height)); outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst)));
out.width = 1 + (ceilMode ? ceil(width) : floor(width)); }
if (pad_r || pad_b) // If we have padding, ensure that the last pooling starts strictly
{ // inside the image (instead of at the padding); otherwise clip the last.
// If we have padding, ensure that the last pooling starts strictly for (int i = 0; i < pads_end.size(); i++) {
// inside the image (instead of at the padding); otherwise clip the last. if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) {
if ((out.height - 1) * stride.height >= in.height + pad_b) --outShape[2 + i];
--out.height; CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]);
if ((out.width - 1) * stride.width >= in.width + pad_r) }
--out.width;
CV_Assert((out.height - 1) * stride.height < in.height + pad_b);
CV_Assert((out.width - 1) * stride.width < in.width + pad_r);
} }
} }
else else
{ {
getConvPoolOutParams(in, kernel, stride, padMode, Size(1, 1), out); getConvPoolOutParams(inpShape, kernel_size, strides, padMode, std::vector<size_t>(kernel_size.size(), 1), outShape);
} }
int dims[] = {inputs[0][0], inputs[0][1], out.height, out.width};
if (type == ROI) if (type == ROI)
{ {
CV_Assert(inputs.size() == 2); CV_Assert(inputs.size() == 2);
dims[0] = inputs[1][0]; // Number of proposals; outShape[0] = inputs[1][0]; // Number of proposals;
} }
else if (type == PSROI) else if (type == PSROI)
{ {
CV_Assert(inputs.size() == 2); CV_Assert(inputs.size() == 2);
CV_Assert(psRoiOutChannels * pooledSize.width * pooledSize.height == inputs[0][1]); CV_Assert(psRoiOutChannels * pooledSize.width * pooledSize.height == inputs[0][1]);
dims[0] = inputs[1][0]; // Number of proposals; outShape[0] = inputs[1][0]; // Number of proposals;
dims[1] = psRoiOutChannels; outShape[1] = psRoiOutChannels;
} }
int numOutputs = requiredOutputs ? requiredOutputs : (type == MAX ? 2 : 1); int numOutputs = requiredOutputs ? requiredOutputs : (type == MAX ? 2 : 1);
CV_Assert(numOutputs == 1 || (numOutputs == 2 && type == MAX)); CV_Assert(numOutputs == 1 || (numOutputs == 2 && type == MAX));
outputs.assign(numOutputs, shape(dims, 4));
outputs.assign(numOutputs, outShape);
return false; return false;
} }
......
...@@ -184,6 +184,12 @@ std::map<std::string, Mat> ONNXImporter::getGraphTensors( ...@@ -184,6 +184,12 @@ std::map<std::string, Mat> ONNXImporter::getGraphTensors(
return layers_weights; return layers_weights;
} }
static DictValue parse(const ::google::protobuf::RepeatedField< ::google::protobuf::int64>& src) {
std::vector<int32_t> dst(src.size());
convertInt64ToInt32(src, dst, src.size());
return DictValue::arrayInt(&dst[0], src.size());
}
LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_proto) LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_proto)
{ {
LayerParams lp; LayerParams lp;
...@@ -194,15 +200,13 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot ...@@ -194,15 +200,13 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot
if(attribute_name == "kernel_shape") if(attribute_name == "kernel_shape")
{ {
CV_Assert(attribute_proto.ints_size() == 2); CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
lp.set("kernel_h", saturate_cast<int32_t>(attribute_proto.ints(0))); lp.set("kernel_size", parse(attribute_proto.ints()));
lp.set("kernel_w", saturate_cast<int32_t>(attribute_proto.ints(1)));
} }
else if(attribute_name == "strides") else if(attribute_name == "strides")
{ {
CV_Assert(attribute_proto.ints_size() == 2); CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
lp.set("stride_h", saturate_cast<int32_t>(attribute_proto.ints(0))); lp.set("stride", parse(attribute_proto.ints()));
lp.set("stride_w", saturate_cast<int32_t>(attribute_proto.ints(1)));
} }
else if(attribute_name == "pads") else if(attribute_name == "pads")
{ {
...@@ -225,11 +229,8 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot ...@@ -225,11 +229,8 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot
else else
{ {
// Convolution or pooling. // Convolution or pooling.
CV_Assert(attribute_proto.ints_size() == 4); CV_Assert(attribute_proto.ints_size() == 4 || attribute_proto.ints_size() == 6);
lp.set("pad_t", saturate_cast<int32_t>(attribute_proto.ints(0))); lp.set("pad", parse(attribute_proto.ints()));
lp.set("pad_l", saturate_cast<int32_t>(attribute_proto.ints(1)));
lp.set("pad_b", saturate_cast<int32_t>(attribute_proto.ints(2)));
lp.set("pad_r", saturate_cast<int32_t>(attribute_proto.ints(3)));
} }
} }
else if(attribute_name == "auto_pad") else if(attribute_name == "auto_pad")
...@@ -243,9 +244,8 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot ...@@ -243,9 +244,8 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot
} }
else if(attribute_name == "dilations") else if(attribute_name == "dilations")
{ {
CV_Assert(attribute_proto.ints_size() == 2); CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
lp.set("dilation_h", saturate_cast<int32_t>(attribute_proto.ints(0))); lp.set("dilation", parse(attribute_proto.ints()));
lp.set("dilation_w", saturate_cast<int32_t>(attribute_proto.ints(1)));
} }
else if (attribute_proto.has_i()) else if (attribute_proto.has_i())
{ {
...@@ -270,10 +270,7 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot ...@@ -270,10 +270,7 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot
} }
else if (attribute_proto.ints_size() > 0) else if (attribute_proto.ints_size() > 0)
{ {
const ::google::protobuf::RepeatedField< ::google::protobuf::int64> src = attribute_proto.ints(); lp.set(attribute_proto.name(), parse(attribute_proto.ints()));
std::vector<int32_t> dst(attribute_proto.ints_size());
convertInt64ToInt32(src, dst, attribute_proto.ints_size());
lp.set(attribute_proto.name(), DictValue::arrayInt(&dst[0], attribute_proto.ints_size()));
} }
else if (attribute_proto.has_t()) else if (attribute_proto.has_t())
{ {
...@@ -305,19 +302,6 @@ Mat ONNXImporter::getBlob(const opencv_onnx::NodeProto& node_proto, ...@@ -305,19 +302,6 @@ Mat ONNXImporter::getBlob(const opencv_onnx::NodeProto& node_proto,
return constBlob->second; return constBlob->second;
} }
bool ONNXImporter::isCeilMode(const LayerParams& layerParams) {
if (!layerParams.has("pad_mode")) {
if (layerParams.has("pad_h")) {
return layerParams.get<int>("pad_h") != layerParams.get<int>("pad_b") ||
layerParams.get<int>("pad_w") != layerParams.get<int>("pad_r");
}
else
return false; // all pads == 0
}
return true;
}
void ONNXImporter::populateNet(Net dstNet) void ONNXImporter::populateNet(Net dstNet)
{ {
CV_Assert(model_proto.has_graph()); CV_Assert(model_proto.has_graph());
...@@ -384,13 +368,13 @@ void ONNXImporter::populateNet(Net dstNet) ...@@ -384,13 +368,13 @@ void ONNXImporter::populateNet(Net dstNet)
{ {
layerParams.type = "Pooling"; layerParams.type = "Pooling";
layerParams.set("pool", "MAX"); layerParams.set("pool", "MAX");
layerParams.set("ceil_mode", isCeilMode(layerParams)); layerParams.set("ceil_mode", layerParams.has("pad_mode"));
} }
else if (layer_type == "AveragePool") else if (layer_type == "AveragePool")
{ {
layerParams.type = "Pooling"; layerParams.type = "Pooling";
layerParams.set("pool", "AVE"); layerParams.set("pool", "AVE");
layerParams.set("ceil_mode", isCeilMode(layerParams)); layerParams.set("ceil_mode", layerParams.has("pad_mode"));
layerParams.set("ave_pool_padded_area", framework_name == "pytorch"); layerParams.set("ave_pool_padded_area", framework_name == "pytorch");
} }
else if (layer_type == "GlobalAveragePool" || layer_type == "GlobalMaxPool") else if (layer_type == "GlobalAveragePool" || layer_type == "GlobalMaxPool")
...@@ -600,8 +584,9 @@ void ONNXImporter::populateNet(Net dstNet) ...@@ -600,8 +584,9 @@ void ONNXImporter::populateNet(Net dstNet)
if (outShape.size() != 4) if (outShape.size() != 4)
CV_Error(Error::StsNotImplemented, "Output shape must have 4 elements."); CV_Error(Error::StsNotImplemented, "Output shape must have 4 elements.");
const int strideY = layerParams.get<int>("stride_h", 1); DictValue stride = layerParams.get("stride");
const int strideX = layerParams.get<int>("stride_w", 1); const int strideY = stride.getIntValue(0);
const int strideX = stride.getIntValue(1);
const int outH = outShape.getIntValue(2); const int outH = outShape.getIntValue(2);
const int outW = outShape.getIntValue(3); const int outW = outShape.getIntValue(3);
...@@ -612,15 +597,13 @@ void ONNXImporter::populateNet(Net dstNet) ...@@ -612,15 +597,13 @@ void ONNXImporter::populateNet(Net dstNet)
} }
else if (layerParams.get<String>("pad_mode") == "VALID") else if (layerParams.get<String>("pad_mode") == "VALID")
{ {
if (!layerParams.has("kernel_h") || !layerParams.has("kernel_w")) if (!layerParams.has("kernel_size"))
CV_Error(Error::StsNotImplemented, CV_Error(Error::StsNotImplemented,
"Required attributes 'kernel_h' and 'kernel_w' are not present."); "Required attribute 'kernel_size' is not present.");
int kernelH = layerParams.get<int>("kernel_h");
int kernelW = layerParams.get<int>("kernel_w");
layerParams.set("adj_w", (outW - kernelW) % strideX); DictValue kernel = layerParams.get("kernel_size");
layerParams.set("adj_h", (outH - kernelH) % strideY); layerParams.set("adj_h", (outH - kernel.getIntValue(0)) % strideY);
layerParams.set("adj_w", (outW - kernel.getIntValue(1)) % strideX);
} }
} }
else if (layerParams.has("output_padding")) else if (layerParams.has("output_padding"))
......
...@@ -51,6 +51,7 @@ enum DataLayout ...@@ -51,6 +51,7 @@ enum DataLayout
{ {
DATA_LAYOUT_NHWC, DATA_LAYOUT_NHWC,
DATA_LAYOUT_NCHW, DATA_LAYOUT_NCHW,
DATA_LAYOUT_NDHWC,
DATA_LAYOUT_UNKNOWN, DATA_LAYOUT_UNKNOWN,
DATA_LAYOUT_PLANAR // 2-dimensional outputs (matmul, flatten, reshape to 2d) DATA_LAYOUT_PLANAR // 2-dimensional outputs (matmul, flatten, reshape to 2d)
}; };
...@@ -258,6 +259,8 @@ static int getDataLayout(const tensorflow::NodeDef& layer) ...@@ -258,6 +259,8 @@ static int getDataLayout(const tensorflow::NodeDef& layer)
return DATA_LAYOUT_NHWC; return DATA_LAYOUT_NHWC;
else if (format == "NCHW" || format == "channels_first") else if (format == "NCHW" || format == "channels_first")
return DATA_LAYOUT_NCHW; return DATA_LAYOUT_NCHW;
else if (format == "NDHWC")
return DATA_LAYOUT_NDHWC;
else else
CV_Error(Error::StsParseError, "Unknown data_format value: " + format); CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
} }
...@@ -281,21 +284,34 @@ void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer) ...@@ -281,21 +284,34 @@ void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer)
if (hasLayerAttr(layer, "strides")) if (hasLayerAttr(layer, "strides"))
{ {
const tensorflow::AttrValue& val = getLayerAttr(layer, "strides"); const tensorflow::AttrValue& val = getLayerAttr(layer, "strides");
int dimX, dimY, dimC; int dimX, dimY, dimC, dimD;
int layout = getDataLayout(layer); int layout = getDataLayout(layer);
if (layout == DATA_LAYOUT_NCHW) if (layout == DATA_LAYOUT_NCHW)
{ {
dimC = 1; dimY = 2; dimX = 3; dimC = 1; dimY = 2; dimX = 3;
} }
else if (layout == DATA_LAYOUT_NDHWC)
{
dimD = 1; dimY = 2; dimX = 3; dimC = 4;
}
else else
{ {
dimY = 1; dimX = 2; dimC = 3; dimY = 1; dimX = 2; dimC = 3;
} }
if (val.list().i_size() != 4 || if (!(val.list().i_size() == 4 || val.list().i_size() == 5) ||
val.list().i(0) != 1 || val.list().i(dimC) != 1) val.list().i(0) != 1 || val.list().i(dimC) != 1)
CV_Error(Error::StsError, "Unsupported strides"); CV_Error(Error::StsError, "Unsupported strides");
layerParams.set("stride_h", static_cast<int>(val.list().i(dimY))); if (layout == DATA_LAYOUT_NDHWC) {
layerParams.set("stride_w", static_cast<int>(val.list().i(dimX))); int strides[] = {static_cast<int>(val.list().i(dimD)),
static_cast<int>(val.list().i(dimY)),
static_cast<int>(val.list().i(dimX))};
layerParams.set("stride", DictValue::arrayInt(strides, 3));
}
else
{
layerParams.set("stride_h", static_cast<int>(val.list().i(dimY)));
layerParams.set("stride_w", static_cast<int>(val.list().i(dimX)));
}
} }
} }
...@@ -318,21 +334,35 @@ void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer) ...@@ -318,21 +334,35 @@ void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer)
if (hasLayerAttr(layer, "ksize")) if (hasLayerAttr(layer, "ksize"))
{ {
const tensorflow::AttrValue& val = getLayerAttr(layer, "ksize"); const tensorflow::AttrValue& val = getLayerAttr(layer, "ksize");
int dimX, dimY, dimC; int dimX, dimY, dimC, dimD;
int layout = getDataLayout(layer); int layout = getDataLayout(layer);
if (layout == DATA_LAYOUT_NCHW) if (layout == DATA_LAYOUT_NCHW)
{ {
dimC = 1; dimY = 2; dimX = 3; dimC = 1; dimY = 2; dimX = 3;
} }
else if (layout == DATA_LAYOUT_NDHWC)
{
dimD = 1; dimY = 2; dimX = 3; dimC = 4;
}
else else
{ {
dimY = 1; dimX = 2; dimC = 3; dimY = 1; dimX = 2; dimC = 3;
} }
if (val.list().i_size() != 4 || if (!(val.list().i_size() == 4 || val.list().i_size() == 5) ||
val.list().i(0) != 1 || val.list().i(dimC) != 1) val.list().i(0) != 1 || val.list().i(dimC) != 1)
CV_Error(Error::StsError, "Unsupported ksize"); CV_Error(Error::StsError, "Unsupported ksize");
layerParams.set("kernel_h", static_cast<int>(val.list().i(dimY)));
layerParams.set("kernel_w", static_cast<int>(val.list().i(dimX))); if (layout == DATA_LAYOUT_NDHWC) {
int kernel[] = {static_cast<int>(val.list().i(dimD)),
static_cast<int>(val.list().i(dimY)),
static_cast<int>(val.list().i(dimX))};
layerParams.set("kernel_size", DictValue::arrayInt(kernel, 3));
}
else
{
layerParams.set("kernel_h", static_cast<int>(val.list().i(dimY)));
layerParams.set("kernel_w", static_cast<int>(val.list().i(dimX)));
}
} }
else else
{ {
...@@ -456,12 +486,26 @@ void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &ds ...@@ -456,12 +486,26 @@ void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &ds
// TODO: other blob types // TODO: other blob types
CV_Assert(tensor.dtype() == tensorflow::DT_FLOAT || CV_Assert(tensor.dtype() == tensorflow::DT_FLOAT ||
tensor.dtype() == tensorflow::DT_HALF); tensor.dtype() == tensorflow::DT_HALF);
CV_Assert(dims == 4); CV_Assert(dims == 4 || dims == 5);
// REORDER kernel HWIO to OIHW int out_c, input_c, depth, height, width;
swap(shape[0], shape[2]); // IWHO if (dims == 4)
swap(shape[1], shape[3]); // IOHW {
swap(shape[0], shape[1]); // OIHW // REORDER kernel HWIO to OIHW
swap(shape[0], shape[2]); // IWHO
swap(shape[1], shape[3]); // IOHW
swap(shape[0], shape[1]); // OIHW
depth = 1; height = shape[2]; width = shape[3];
}
else
{
// REORDER kernel DHWIO to OIDHW
swap(shape[0], shape[4]); // OHWID
swap(shape[1], shape[3]); // OIWHD
swap(shape[2], shape[4]); // OIDHW
depth = shape[2]; height = shape[3]; width = shape[4];
}
out_c = shape[0]; input_c = shape[1];
dstBlob.create(shape, CV_32F); dstBlob.create(shape, CV_32F);
...@@ -472,17 +516,20 @@ void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &ds ...@@ -472,17 +516,20 @@ void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &ds
float *dstData = dstBlob.ptr<float>(); float *dstData = dstBlob.ptr<float>();
const float *data = reinterpret_cast<const float*>(tensorContent.data); const float *data = reinterpret_cast<const float*>(tensorContent.data);
int out_c = shape[0], input_c = shape[1], height = shape[2], width = shape[3]; int total = out_c * input_c * depth * height * width;
int total = out_c*input_c*height*width; for (int i_oc = 0; i_oc < out_c; i_oc++) {
for(int i_oc = 0; i_oc < out_c; i_oc++) { for (int i_ic = 0; i_ic < input_c; i_ic++) {
for(int i_ic = 0; i_ic < input_c; i_ic++) { for (int i_d = 0; i_d < depth; i_d++) {
for(int i_h = 0; i_h < height; i_h++) { for (int i_h = 0; i_h < height; i_h++) {
for(int i_w = 0; i_w < width; i_w++) { for (int i_w = 0; i_w < width; i_w++) {
int dst_i = input_c*height*width*i_oc + height*width*i_ic + width*i_h + i_w; int dst_i = input_c * depth * height * width * i_oc +
int src_i = out_c*input_c*width*i_h + out_c*input_c*i_w + out_c*i_ic + i_oc; depth * height * width * i_ic + height * width * i_d + width * i_h + i_w;
CV_Assert(dst_i < total); int src_i = out_c * input_c * width * height * i_d +
CV_Assert(src_i < total); out_c * input_c * width * i_h + out_c * input_c * i_w + out_c * i_ic + i_oc;
dstData[dst_i] = data[src_i]; CV_Assert(dst_i < total);
CV_Assert(src_i < total);
dstData[dst_i] = data[src_i];
}
} }
} }
} }
...@@ -745,7 +792,7 @@ void TFImporter::populateNet(Net dstNet) ...@@ -745,7 +792,7 @@ void TFImporter::populateNet(Net dstNet)
int predictedLayout = predictOutputDataLayout(net, layer, data_layouts); int predictedLayout = predictOutputDataLayout(net, layer, data_layouts);
data_layouts[name] = predictedLayout; data_layouts[name] = predictedLayout;
if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative" || type == "Pad") if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative" || type == "Pad" || type == "Conv3D")
{ {
// The first node of dilated convolution subgraph. // The first node of dilated convolution subgraph.
// Extract input node, dilation rate and paddings. // Extract input node, dilation rate and paddings.
...@@ -917,9 +964,9 @@ void TFImporter::populateNet(Net dstNet) ...@@ -917,9 +964,9 @@ void TFImporter::populateNet(Net dstNet)
{ {
layerParams.blobs[0] = sharedWeightsIt->second; layerParams.blobs[0] = sharedWeightsIt->second;
} }
Mat weights = layerParams.blobs[0];
layerParams.set("kernel_size", DictValue::arrayInt(&weights.size[2], weights.dims - 2));
layerParams.set("kernel_h", layerParams.blobs[0].size[2]);
layerParams.set("kernel_w", layerParams.blobs[0].size[3]);
layerParams.set("num_output", layerParams.blobs[0].size[0]); layerParams.set("num_output", layerParams.blobs[0].size[0]);
setStrides(layerParams, layer); setStrides(layerParams, layer);
...@@ -1290,7 +1337,7 @@ void TFImporter::populateNet(Net dstNet) ...@@ -1290,7 +1337,7 @@ void TFImporter::populateNet(Net dstNet)
connect(layer_id, dstNet, inp, id, ii - from); connect(layer_id, dstNet, inp, id, ii - from);
} }
} }
else if (type == "MaxPool") else if (type == "MaxPool" || type == "MaxPool3D")
{ {
layerParams.set("pool", "max"); layerParams.set("pool", "max");
...@@ -1303,11 +1350,10 @@ void TFImporter::populateNet(Net dstNet) ...@@ -1303,11 +1350,10 @@ void TFImporter::populateNet(Net dstNet)
connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size()); connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
} }
else if (type == "AvgPool") else if (type == "AvgPool" || type == "AvgPool3D")
{ {
layerParams.set("pool", "ave"); layerParams.set("pool", "ave");
layerParams.set("ave_pool_padded_area", false); layerParams.set("ave_pool_padded_area", false);
setKSize(layerParams, layer); setKSize(layerParams, layer);
setStrides(layerParams, layer); setStrides(layerParams, layer);
setPadding(layerParams, layer); setPadding(layerParams, layer);
......
...@@ -81,6 +81,13 @@ TEST_P(Test_ONNX_layers, Convolution) ...@@ -81,6 +81,13 @@ TEST_P(Test_ONNX_layers, Convolution)
testONNXModels("convolution"); testONNXModels("convolution");
} }
TEST_P(Test_ONNX_layers, Convolution3D)
{
if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU)
throw SkipTestException("Only DLIE backend on CPU is supported");
testONNXModels("conv3d");
testONNXModels("conv3d_bias");
}
TEST_P(Test_ONNX_layers, Two_convolution) TEST_P(Test_ONNX_layers, Two_convolution)
{ {
...@@ -138,6 +145,20 @@ TEST_P(Test_ONNX_layers, AveragePooling) ...@@ -138,6 +145,20 @@ TEST_P(Test_ONNX_layers, AveragePooling)
testONNXModels("average_pooling"); testONNXModels("average_pooling");
} }
TEST_P(Test_ONNX_layers, MaxPooling3D)
{
if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU)
throw SkipTestException("Only DLIE backend on CPU is supported");
testONNXModels("max_pool3d");
}
TEST_P(Test_ONNX_layers, AvePooling3D)
{
if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU)
throw SkipTestException("Only DLIE backend on CPU is supported");
testONNXModels("ave_pool3d");
}
TEST_P(Test_ONNX_layers, BatchNormalization) TEST_P(Test_ONNX_layers, BatchNormalization)
{ {
testONNXModels("batch_norm"); testONNXModels("batch_norm");
......
...@@ -131,6 +131,13 @@ TEST_P(Test_TensorFlow_layers, conv) ...@@ -131,6 +131,13 @@ TEST_P(Test_TensorFlow_layers, conv)
runTensorFlowNet("conv_pool_nchw"); runTensorFlowNet("conv_pool_nchw");
} }
TEST_P(Test_TensorFlow_layers, Convolution3D)
{
if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU)
throw SkipTestException("Only DLIE backend on CPU is supported");
runTensorFlowNet("conv3d");
}
TEST_P(Test_TensorFlow_layers, padding) TEST_P(Test_TensorFlow_layers, padding)
{ {
runTensorFlowNet("padding_valid"); runTensorFlowNet("padding_valid");
...@@ -212,6 +219,20 @@ TEST_P(Test_TensorFlow_layers, ave_pool_same) ...@@ -212,6 +219,20 @@ TEST_P(Test_TensorFlow_layers, ave_pool_same)
runTensorFlowNet("ave_pool_same"); runTensorFlowNet("ave_pool_same");
} }
TEST_P(Test_TensorFlow_layers, MaxPooling3D)
{
if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU)
throw SkipTestException("Only DLIE backend on CPU is supported");
runTensorFlowNet("max_pool3d");
}
TEST_P(Test_TensorFlow_layers, AvePooling3D)
{
if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU)
throw SkipTestException("Only DLIE backend on CPU is supported");
runTensorFlowNet("ave_pool3d");
}
TEST_P(Test_TensorFlow_layers, deconvolution) TEST_P(Test_TensorFlow_layers, deconvolution)
{ {
runTensorFlowNet("deconvolution"); runTensorFlowNet("deconvolution");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment