Commit 5127e7f2 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

Merge pull request #916 from arrybn:torch_enet

parents 99294995 e784f137
......@@ -29,6 +29,10 @@ else()
)
endif()
if(ANDROID)
add_definitions(-DDISABLE_POSIX_MEMALIGN -DTH_DISABLE_HEAP_TRACKING)
endif()
# ----------------------------------------------------------------------------
# Resolve libprotobuf dependency
# ----------------------------------------------------------------------------
......@@ -55,7 +59,7 @@ endif()
# ----------------------------------------------------------------------------
# Torch7 importer of blobs and models, produced by Torch.nn module
# ----------------------------------------------------------------------------
OCV_OPTION(${the_module}_BUILD_TORCH_IMPORTER "Build Torch model importer (experimental functionality!)" OFF)
OCV_OPTION(${the_module}_BUILD_TORCH_IMPORTER "Build Torch model importer" ON)
if(${the_module}_BUILD_TORCH_IMPORTER)
add_definitions(-DENABLE_TORCH_IMPORTER=1)
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4702 /wd4127 /wd4267) #supress warnings in original torch files
......
......@@ -209,7 +209,7 @@ namespace dnn
{
public:
CV_PROP_RW Size kernel, stride, pad, dilation;
CV_PROP_RW Size kernel, stride, pad, dilation, adjustPad;
CV_PROP_RW String padMode;
};
......@@ -224,7 +224,7 @@ namespace dnn
{
public:
static CV_WRAP Ptr<BaseConvolutionLayer> create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0), Size dilation = Size(1, 1));
static CV_WRAP Ptr<BaseConvolutionLayer> create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0), Size dilation = Size(1, 1), Size adjustPad = Size());
};
class CV_EXPORTS_W LRNLayer : public Layer
......@@ -341,6 +341,12 @@ namespace dnn
static CV_WRAP Ptr<ReLULayer> create(double negativeSlope = 0);
};
class CV_EXPORTS_W ChannelsPReLULayer : public Layer
{
public:
static CV_WRAP Ptr<ChannelsPReLULayer> create();
};
class CV_EXPORTS_W TanHLayer : public Layer
{
public:
......@@ -397,6 +403,18 @@ namespace dnn
static Ptr<EltwiseLayer> create(EltwiseOp op, const std::vector<int> &coeffs);
};
class CV_EXPORTS_W BatchNormLayer : public Layer
{
public:
static CV_WRAP Ptr<BatchNormLayer> create(float eps, bool has_weights, bool has_bias);
};
class CV_EXPORTS_W MaxUnpoolLayer : public Layer
{
public:
static CV_WRAP Ptr<MaxUnpoolLayer> create(Size unpoolSize);
};
//! @}
//! @}
......
......@@ -270,6 +270,9 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
*/
CV_WRAP Blob getParam(LayerId layer, int numParam = 0);
/** @brief Returns indexes of layers with unconnected outputs.
*/
CV_WRAP std::vector<int> getUnconnectedOutLayers() const;
private:
struct Impl;
......
/*
Sample of using OpenCV dnn module with Torch ENet model.
*/
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
using namespace cv;
using namespace cv::dnn;
#include <fstream>
#include <iostream>
#include <cstdlib>
#include <sstream>
using namespace std;
const String keys =
"{help h || Sample app for loading ENet Torch model. "
"The model and class names list can be downloaded here: "
"https://www.dropbox.com/sh/dywzk3gyb12hpe5/AAD5YkUa8XgMpHs2gCRgmCVCa }"
"{model m || path to Torch .net model file (model_best.net) }"
"{image i || path to image file }"
"{i_blob | .0 | input blob name) }"
"{o_blob || output blob name) }"
"{c_names c || path to file with classnames for channels (categories.txt) }"
"{result r || path to save output blob (optional, binary format, NCHW order) }"
;
std::vector<String> readClassNames(const char *filename);
int main(int argc, char **argv)
{
cv::CommandLineParser parser(argc, argv, keys);
if (parser.has("help"))
{
parser.printMessage();
return 0;
}
String modelFile = parser.get<String>("model");
String imageFile = parser.get<String>("image");
String inBlobName = parser.get<String>("i_blob");
String outBlobName = parser.get<String>("o_blob");
if (!parser.check())
{
parser.printErrors();
return 0;
}
String classNamesFile = parser.get<String>("c_names");
String resultFile = parser.get<String>("result");
//! [Create the importer of TensorFlow model]
Ptr<dnn::Importer> importer;
try //Try to import TensorFlow AlexNet model
{
importer = dnn::createTorchImporter(modelFile);
}
catch (const cv::Exception &err) //Importer can throw errors, we will catch them
{
std::cerr << err.msg << std::endl;
}
//! [Create the importer of Caffe model]
if (!importer)
{
std::cerr << "Can't load network by using the mode file: " << std::endl;
std::cerr << modelFile << std::endl;
exit(-1);
}
//! [Initialize network]
dnn::Net net;
importer->populateNet(net);
importer.release(); //We don't need importer anymore
//! [Initialize network]
//! [Prepare blob]
Mat img = imread(imageFile);
if (img.empty())
{
std::cerr << "Can't read image from the file: " << imageFile << std::endl;
exit(-1);
}
cv::Size inputImgSize = cv::Size(512, 512);
if (inputImgSize != img.size())
resize(img, img, inputImgSize); //Resize image to input size
if(img.channels() == 3)
cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
img.convertTo(img, CV_32F, 1/255.0);
dnn::Blob inputBlob = dnn::Blob::fromImages(img); //Convert Mat to dnn::Blob image batch
//! [Prepare blob]
//! [Set input blob]
net.setBlob(inBlobName, inputBlob); //set the network input
//! [Set input blob]
cv::TickMeter tm;
tm.start();
//! [Make forward pass]
net.forward(); //compute output
//! [Make forward pass]
tm.stop();
//! [Gather output]
dnn::Blob prob = net.getBlob(outBlobName); //gather output of "prob" layer
Mat& result = prob.matRef();
BlobShape shape = prob.shape();
if (!resultFile.empty()) {
CV_Assert(result.isContinuous());
ofstream fout(resultFile.c_str(), ios::out | ios::binary);
fout.write((char*)result.data, result.total() * sizeof(float));
fout.close();
}
std::cout << "Output blob shape " << shape << std::endl;
std::cout << "Inference time, ms: " << tm.getTimeMilli() << std::endl;
std::vector<String> classNames;
if(!classNamesFile.empty()) {
classNames = readClassNames(classNamesFile.c_str());
if (classNames.size() > prob.channels())
classNames = std::vector<String>(classNames.begin() + classNames.size() - prob.channels(),
classNames.end());
}
for(int i_c = 0; i_c < prob.channels(); i_c++) {
ostringstream convert;
convert << "Channel #" << i_c;
if(classNames.size() == prob.channels())
convert << ": " << classNames[i_c];
imshow(convert.str().c_str(), prob.getPlane(0, i_c));
}
waitKey();
return 0;
} //main
std::vector<String> readClassNames(const char *filename)
{
std::vector<String> classNames;
std::ifstream fp(filename);
if (!fp.is_open())
{
std::cerr << "File with classes labels not found: " << filename << std::endl;
exit(-1);
}
std::string name;
while (!fp.eof())
{
std::getline(fp, name);
if (name.length())
classNames.push_back(name);
}
fp.close();
return classNames;
}
......@@ -23,6 +23,9 @@ static void initConvDeconvLayerFromCaffe(Ptr<BaseConvolutionLayer> l, LayerParam
int numOutput = params.get<int>("num_output");
int group = params.get<int>("group", 1);
l->adjustPad.height = params.get<int>("adj_h", 0);
l->adjustPad.width = params.get<int>("adj_w", 0);
CV_Assert(numOutput % group == 0);
CV_Assert((bias && l->blobs.size() == 2) || (!bias && l->blobs.size() == 1));
}
......@@ -40,6 +43,7 @@ Ptr<Layer> createLayerFromCaffe<DeconvolutionLayer>(LayerParams &params)
{
Ptr<BaseConvolutionLayer> l = DeconvolutionLayer::create();
initConvDeconvLayerFromCaffe(l, params);
return Ptr<Layer>(l);
}
......@@ -248,7 +252,7 @@ Ptr<Layer> createLayerFromCaffe<CropLayer>(LayerParams& params)
return Ptr<Layer>(CropLayer::create(start_axis, offset));
}
template<> //Power specialization
template<> //Eltwise specialization
Ptr<Layer> createLayerFromCaffe<EltwiseLayer>(LayerParams& params)
{
EltwiseLayer::EltwiseOp op = EltwiseLayer::SUM;
......@@ -278,6 +282,42 @@ Ptr<Layer> createLayerFromCaffe<EltwiseLayer>(LayerParams& params)
return Ptr<Layer>(EltwiseLayer::create(op, coeffs));
}
template<> //BatchNormLayer specialization
Ptr<Layer> createLayerFromCaffe<BatchNormLayer>(LayerParams& params)
{
const std::vector<Blob> &blobs = params.blobs;
CV_Assert(blobs.size() == 4);
float eps = params.get<float>("eps");
bool hasWeights = params.get<bool>("has_weight", false);
bool hasBias = params.get<bool>("has_bias", false);
Ptr<BatchNormLayer> l = BatchNormLayer::create(eps, hasWeights, hasBias);
l->setParamsFrom(params);
return Ptr<Layer>(l);
}
template<> //ChannelsPReLULayer specialization
Ptr<Layer> createLayerFromCaffe<ChannelsPReLULayer>(LayerParams& params)
{
CV_Assert(params.blobs.size() == 1);
Ptr<ChannelsPReLULayer> l = ChannelsPReLULayer::create();
l->setParamsFrom(params);
return Ptr<Layer>(l);
}
template<> //MaxUnpoolLayer specialization
Ptr<Layer> createLayerFromCaffe<MaxUnpoolLayer>(LayerParams& params)
{
Size outSize(params.get<int>("out_w"),
params.get<int>("out_h"));
Ptr<MaxUnpoolLayer> l = MaxUnpoolLayer::create(outSize);
return Ptr<Layer>(l);
}
//Explicit instantiation
template Ptr<Layer> createLayerFromCaffe<ConvolutionLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<DeconvolutionLayer>(LayerParams&);
......@@ -299,6 +339,9 @@ template Ptr<Layer> createLayerFromCaffe<PowerLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<CropLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<EltwiseLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<BatchNormLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<ChannelsPReLULayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<MaxUnpoolLayer>(LayerParams&);
}
}
......@@ -592,6 +592,24 @@ bool Net::empty() const
return impl->layers.size() <= 1; //first layer is default Data layer
}
std::vector<int> Net::getUnconnectedOutLayers() const
{
std::vector<int> layersIds;
Impl::MapIdToLayerData::iterator it;
for (it = impl->layers.begin(); it != impl->layers.end(); it++)
{
int lid = it->first;
LayerData &ld = it->second;
if (ld.requiredOutputs.size() == 0)
layersIds.push_back(lid);
}
return layersIds;
}
//////////////////////////////////////////////////////////////////////////
Importer::~Importer() {}
......
......@@ -51,6 +51,7 @@
#include "layers/detection_output_layer.hpp"
#include "layers/normalize_bbox_layer.hpp"
#include "layers/shift_layer.hpp"
#include "layers/padding_layer.hpp"
namespace cv
{
......@@ -89,11 +90,14 @@ void initModule()
REG_RUNTIME_LAYER_FUNC(MVN, createLayerFromCaffe<MVNLayer>);
REG_RUNTIME_LAYER_FUNC(ReLU, createLayerFromCaffe<ReLULayer>);
REG_RUNTIME_LAYER_FUNC(ChannelsPReLU, createLayerFromCaffe<ChannelsPReLULayer>);
REG_RUNTIME_LAYER_FUNC(Sigmoid, createLayerFromCaffe<SigmoidLayer>);
REG_RUNTIME_LAYER_FUNC(TanH, createLayerFromCaffe<TanHLayer>);
REG_RUNTIME_LAYER_FUNC(BNLL, createLayerFromCaffe<BNLLLayer>);
REG_RUNTIME_LAYER_FUNC(AbsVal, createLayerFromCaffe<AbsLayer>);
REG_RUNTIME_LAYER_FUNC(Power, createLayerFromCaffe<PowerLayer>);
REG_RUNTIME_LAYER_FUNC(BatchNorm, createLayerFromCaffe<BatchNormLayer>);
REG_RUNTIME_LAYER_FUNC(MaxUnpool, createLayerFromCaffe<MaxUnpoolLayer>);
REG_RUNTIME_LAYER_CLASS(Dropout, BlankLayer);
REG_RUNTIME_LAYER_CLASS(Identity, BlankLayer);
......@@ -104,6 +108,7 @@ void initModule()
REG_RUNTIME_LAYER_CLASS(DetectionOutput, DetectionOutputLayer);
REG_RUNTIME_LAYER_CLASS(NormalizeBBox, NormalizeBBoxLayer);
REG_RUNTIME_LAYER_CLASS(Shift, ShiftLayer);
REG_RUNTIME_LAYER_CLASS(Padding, PaddingLayer);
init.status = true;
}
......
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2016, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
/*
Implementation of Batch Normalization layer.
*/
#include "batch_norm_layer.hpp"
namespace cv
{
namespace dnn
{
BatchNormLayerImpl::BatchNormLayerImpl(float eps_, bool hasWeights_, bool hasBias_):
eps(eps_),
hasWeights(hasWeights_),
hasBias(hasBias_)
{}
void BatchNormLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(blobs.size() == 4);
outputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++)
{
outputs[i].create(inputs[i]->shape());
}
}
void BatchNormLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() == 1);
Blob &inpBlob = *inputs[0];
for (size_t ii = 0; ii < outputs.size(); ii++)
{
Blob &outBlob = outputs[ii];
if (hasWeights)
CV_Assert(inpBlob.channels() == blobs[2].total());
if (hasBias)
CV_Assert(inpBlob.channels() == blobs[3].total());
for (int n = 0; n < inpBlob.channels(); n++)
{
float mean = blobs[0].matRefConst().at<float>(n);
float invstd = 1 / sqrt(blobs[1].matRefConst().at<float>(n) + eps);
float w = hasWeights ? blobs[2].matRefConst().at<float>(n) : 1;
float b = hasBias ? blobs[3].matRefConst().at<float>(n) : 0;
outBlob.getPlane(0, n) = (inpBlob.getPlane(0, n) - mean)*(w*invstd) + b;
}
}
}
Ptr<BatchNormLayer> BatchNormLayer::create(float eps, bool has_weights, bool has_bias)
{
return Ptr<BatchNormLayer>(new BatchNormLayerImpl(eps, has_weights, has_bias));
}
} // namespace dnn
} // namespace cv
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2016, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
/*
Declaration of Batch Normalization layer.
*/
#ifndef __OPENCV_DNN_LAYERS_BATCH_NORM_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_BATCH_NORM_LAYER_HPP__
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
class BatchNormLayerImpl : public BatchNormLayer
{
public:
BatchNormLayerImpl(float eps_, bool hasWeights_, bool hasBias_);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
private:
float eps;
bool hasWeights, hasBias;
};
}
}
#endif // BATCH_NORM_LAYER_HPP
......@@ -53,12 +53,14 @@ namespace cv
namespace dnn
{
ConvolutionLayerImpl::ConvolutionLayerImpl()
BaseConvolutionLayerImpl::BaseConvolutionLayerImpl():
numOutput(-1), group(-1),
inpH(0), inpW(0), inpCn(0),
outH(0), outW(0), outCn(0),
inpGroupCn(0), outGroupCn(0),
ksize(0), colBlobCols(0),
bias(false), tryUseOpenCL(false)
{
tryUseOpenCL = false; //true;
numOutput = -1;
group = -1;
#if HAVE_CBLAS
if (getBlasThreads() != cv::getThreadNum())
{
......@@ -67,37 +69,23 @@ ConvolutionLayerImpl::ConvolutionLayerImpl()
#endif
}
void ConvolutionLayerImpl::init()
void BaseConvolutionLayerImpl::init()
{
CV_Assert(1 <= blobs.size() && blobs.size() <= 2);
bias = (blobs.size() >= 2);
numOutput = blobs[0].num();
CV_Assert(blobs.size() >= 1 && blobs.size() <= 2);
CV_Assert(blobs[0].dims() == 4 && blobs[0].cols() == kernel.width && blobs[0].rows() == kernel.height);
CV_Assert(!bias || blobs[1].total() == (size_t)blobs[0].num());
//TODO: dilation in OCL mode
bias = (blobs.size() >= 2);
useOpenCL = ocl::useOpenCL() && tryUseOpenCL && dilation == Size(1, 1);
}
void ConvolutionLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
void BaseConvolutionLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() > 0);
init();
CV_Assert(inputs.size() > 0);
const Blob &input = *inputs[0];
CV_Assert(input.dims() == 4 && (input.type() == CV_32F || input.type() == CV_64F));
computeInpOutShape(input);
group = inpCn / blobs[0].channels();
CV_Assert(inpCn % group == 0 && outCn % group == 0);
CV_Assert(blobs[0].num() == outCn && blobs[0].channels() == inpCn / group);
outGroupCn = outCn / group;
inpGroupCn = inpCn / group;
ksize = inpGroupCn * kernel.height * kernel.width;
for (size_t i = 0; i < inputs.size(); i++)
{
CV_Assert(inputs[i]->type() == input.type());
......@@ -105,36 +93,73 @@ void ConvolutionLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vecto
CV_Assert(inputs[i]->rows() == input.rows() && inputs[i]->cols() == input.cols());
}
int allocFlags = useOpenCL ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT;
computeInpOutShape(input);
if (!is1x1())
{
colBlob.create(Shape(ksize, outH * outW), input.type(), allocFlags);
}
int allocFlags = useOpenCL ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT;
if (bias)
{
biasOnesBlob.create(Shape(1, topH * topW), input.type(), allocFlags);
biasOnesBlob.create(Shape(1, outH * outW), input.type(), allocFlags);
biasOnesBlob.setTo(1);
}
outputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++)
{
outputs[i].create(Shape(inputs[i]->num(), topCn, topH, topW), input.type(), allocFlags);
outputs[i].create(Shape(inputs[i]->num(), outCn, outH, outW), input.type(), allocFlags);
}
if (!is1x1())
{
colBlob.create(Shape(ksize, colBlobCols), input.type(), allocFlags);
}
}
bool ConvolutionLayerImpl::is1x1() const
bool BaseConvolutionLayerImpl::is1x1() const
{
return (kernel.height == 1 && kernel.width == 1) &&
(stride.height == 1 && stride.width == 1) &&
(dilation.height == 1 && dilation.width == 1);
}
void ConvolutionLayerImpl::computeInpOutShape(const Blob &input)
{
CV_Assert(!bias || blobs[1].total() == (size_t)blobs[0].num());
numOutput = blobs[0].num();
inpH = input.rows();
inpW = input.cols();
inpCn = input.channels();
outCn = numOutput;
if (padMode.empty())
{
outH = (inpH + 2 * pad.height - (dilation.height * (kernel.height - 1) + 1)) / stride.height + 1;
outW = (inpW + 2 * pad.width - (dilation.width * (kernel.width - 1) + 1)) / stride.width + 1;
}
else
{
getConvPoolOutParams(inpH, inpW, kernel, stride, pad, padMode, outH, outW);
}
group = inpCn / blobs[0].channels();
CV_Assert(inpCn % group == 0 && outCn % group == 0);
CV_Assert(blobs[0].num() == outCn && blobs[0].channels() == inpCn / group);
outGroupCn = outCn / group;
inpGroupCn = inpCn / group;
ksize = inpGroupCn * kernel.height * kernel.width;
colBlobCols = outH * outW;
}
template<typename XMat>
void ConvolutionLayerImpl::forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() > 0);
XMat weightsMat = reshaped(blobs[0].getRefConst<XMat>(), Shape(outCn, ksize));
XMat biasesMat = (bias) ? reshaped(blobs[1].getRefConst<XMat>(), Shape(outCn, 1)) : XMat();
......@@ -213,44 +238,33 @@ void ConvolutionLayerImpl::im2col(const Mat &srcImg, Mat &dstCol)
dstCol = colMat;
}
void ConvolutionLayerImpl::computeInpOutShape(const Blob &input)
{
inpH = input.rows();
inpW = input.cols();
inpCn = input.channels();
outCn = numOutput;
if (padMode.empty())
{
outH = (inpH + 2 * pad.height - (dilation.height * (kernel.height - 1) + 1)) / stride.height + 1;
outW = (inpW + 2 * pad.width - (dilation.width * (kernel.width - 1) + 1)) / stride.width + 1;
}
else
{
getConvPoolOutParams(inpH, inpW, kernel, stride, pad, padMode, outH, outW);
}
topH = outH; topW = outW; topCn = outCn;
}
//Deconvolution
DeConvolutionLayerImpl::DeConvolutionLayerImpl()
void DeConvolutionLayerImpl::computeInpOutShape(const Blob &inpBlob)
{
BlobShape bs0 = blobs[0].shape();
BlobShape bs1 = blobs[1].shape();
CV_Assert(!bias || blobs[1].total() == (size_t)blobs[0].channels());
}
numOutput = blobs[0].channels();
void DeConvolutionLayerImpl::computeInpOutShape(const Blob &inpBlob)
{
outH = inpBlob.rows();
outW = inpBlob.cols();
outCn = inpBlob.channels();
inpH = inpBlob.rows();
inpW = inpBlob.cols();
inpCn = inpBlob.channels();
inpH = stride.height * (outH - 1) + kernel.height - 2 * pad.height;
inpW = stride.width * (outW - 1) + kernel.width - 2 * pad.width;
inpCn = numOutput;
outH = stride.height * (inpH - 1) + kernel.height - 2 * pad.height + adjustPad.height;
outW = stride.width * (inpW - 1) + kernel.width - 2 * pad.width + adjustPad.width;
outCn = numOutput;
topH = inpH; topW = inpW; topCn = inpCn;
group = inpCn / blobs[0].num();
outGroupCn = outCn / group;
inpGroupCn = inpCn / group;
ksize = outGroupCn * kernel.height * kernel.width;
CV_Assert(inpCn % group == 0 && outCn % group == 0);
CV_Assert(blobs[0].channels() == outCn && blobs[0].num() == inpCn / group);
colBlobCols = inpH * inpW;
}
void DeConvolutionLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
......@@ -264,24 +278,24 @@ void DeConvolutionLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blo
template<typename XMat>
void DeConvolutionLayerImpl::forward_(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
XMat weightsMat = reshaped(blobs[0].getRefConst<XMat>(), Shape(outCn, ksize));
XMat weightsMat = reshaped(blobs[0].getRefConst<XMat>(), Shape(inpCn, ksize));
XMat biasesMat = (bias) ? reshaped(blobs[1].getRefConst<XMat>(), Shape(outCn, 1)) : XMat();
for (size_t ii = 0; ii < outputs.size(); ii++)
{
int numImg = inputs[ii]->size(0);
XMat convBlob = reshaped(inputs[ii]->getRefConst<XMat>(), Shape(numImg*outCn, outH*outW));
XMat decnBlob = reshaped(outputs[ii].getRef<XMat>(), Shape(numImg*inpCn, inpH*inpW));
XMat convBlob = reshaped(inputs[ii]->getRefConst<XMat>(), Shape(numImg*inpCn, inpH*inpW));
XMat decnBlob = reshaped(outputs[ii].getRef<XMat>(), Shape(numImg*outCn, outH*outW));
for (int n = 0; n < numImg; n++)
{
for (int g = 0; g < group; g++)
{
XMat dstMat = decnBlob.rowRange(_Range((g + n * group) * inpGroupCn, inpGroupCn));
XMat dstMat = decnBlob.rowRange(_Range((g + n * group) * outGroupCn, outGroupCn));
XMat &colMat = (is1x1()) ? dstMat : colBlob.getRef<XMat>();
XMat convMat = convBlob.rowRange(_Range((g + n * group) * outGroupCn, outGroupCn));
XMat wghtMat = weightsMat.rowRange(_Range(g * outGroupCn, outGroupCn));
XMat convMat = convBlob.rowRange(_Range((g + n * group) * inpGroupCn, inpGroupCn));
XMat wghtMat = weightsMat.rowRange(_Range(g * inpGroupCn, inpGroupCn));
dnn::gemm(wghtMat, convMat, 1, colMat, 0, GEMM_1_T);
......@@ -306,7 +320,7 @@ void DeConvolutionLayerImpl::col2im(const Mat &colMat, Mat &dstImg)
return;
}
if (dstImg.type() == CV_32F)
col2im_CpuPBody<float>::run(colMat.ptr<float>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dstImg.ptr<float>());
col2im_CpuPBody<float>::run(colMat.ptr<float>(), outGroupCn, outH, outW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dstImg.ptr<float>());
if (dstImg.type() == CV_64F)
col2im_CpuPBody<double>::run(colMat.ptr<double>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dstImg.ptr<double>());
}
......@@ -338,13 +352,15 @@ Ptr<BaseConvolutionLayer> ConvolutionLayer::create(Size kernel, Size stride, Siz
return Ptr<BaseConvolutionLayer>(l);
}
Ptr<BaseConvolutionLayer> DeconvolutionLayer::create(Size kernel, Size stride, Size pad, Size dilation)
Ptr<BaseConvolutionLayer> DeconvolutionLayer::create(Size kernel, Size stride, Size pad, Size dilation, Size adjustPad)
{
DeConvolutionLayerImpl *l = new DeConvolutionLayerImpl();
l->kernel = kernel;
l->pad = pad;
l->stride = stride;
l->dilation = dilation;
l->adjustPad = adjustPad;
return Ptr<BaseConvolutionLayer>(l);
}
......
......@@ -49,30 +49,38 @@ namespace cv
namespace dnn
{
//TODO: simultaneously convolution and bias addition for cache optimization
class ConvolutionLayerImpl : public ConvolutionLayer
class BaseConvolutionLayerImpl : public ConvolutionLayer
{
public:
ConvolutionLayerImpl();
BaseConvolutionLayerImpl();
virtual void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
virtual void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
virtual void init();
protected:
void init();
virtual void computeInpOutShape(const Blob &inpBlob) = 0;
bool is1x1() const;
int numOutput, group;
int inpH, inpW, inpCn;
int outH, outW, outCn;
int topH, topW, topCn; //switched between inp/out on deconv/conv
int inpGroupCn, outGroupCn;
int ksize;
int colBlobCols;
bool bias;
bool tryUseOpenCL, useOpenCL;
Blob colBlob, biasOnesBlob;
bool is1x1() const;
};
//TODO: simultaneously convolution and bias addition for cache optimization
class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
{
public:
virtual void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
protected:
virtual void computeInpOutShape(const Blob &inpBlob);
template<typename XMat>
......@@ -81,10 +89,9 @@ protected:
void im2col(const UMat &srcImg, UMat &dstCol);
};
class DeConvolutionLayerImpl : public ConvolutionLayerImpl
class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
{
public:
DeConvolutionLayerImpl();
virtual void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
protected:
......
#include "../precomp.hpp"
#include "elementwise_layers.hpp"
#include "opencv2/imgproc.hpp"
namespace cv
{
......@@ -42,5 +43,45 @@ Ptr<PowerLayer> PowerLayer::create(double power /*= 1*/, double scale /*= 1*/, d
return Ptr<PowerLayer>(new ElementWiseLayer<PowerFunctor>(f));
}
////////////////////////////////////////////////////////////////////////////
void ChannelsPReLULayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(blobs.size() == 1);
outputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++)
{
outputs[i].create(inputs[i]->shape());
}
}
void ChannelsPReLULayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() == 1);
Blob &inpBlob = *inputs[0];
for (size_t ii = 0; ii < outputs.size(); ii++)
{
Blob &outBlob = outputs[ii];
CV_Assert(blobs[0].total() == inpBlob.channels());
for (int n = 0; n < inpBlob.channels(); n++)
{
float slopeWeight = blobs[0].matRefConst().at<float>(n);
cv::threshold(inpBlob.getPlane(0, n), outBlob.getPlane(0, n), 0, 0, cv::THRESH_TOZERO_INV);
outBlob.getPlane(0, n) = inpBlob.getPlane(0, n) + (slopeWeight - 1)*outBlob.getPlane(0, n);
}
}
}
Ptr<ChannelsPReLULayer> ChannelsPReLULayer::create()
{
return Ptr<ChannelsPReLULayer>(new ChannelsPReLULayerImpl());
}
}
}
}
\ No newline at end of file
......@@ -313,6 +313,16 @@ struct PowerFunctor
#endif
};
class ChannelsPReLULayerImpl : public ChannelsPReLULayer
{
public:
ChannelsPReLULayerImpl() {}
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
}
}
#endif
......@@ -62,7 +62,8 @@ namespace dnn
const BlobShape &shape0 = inputs[0]->shape();
for (size_t i = 1; i < inputs.size(); ++i)
{
CV_Assert(shape0 == inputs[i]->shape());
BlobShape iShape = inputs[i]->shape();
CV_Assert(shape0 == iShape);
}
outputs.resize(1);
outputs[0].create(shape0);
......
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2016, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
/*
Implementation of Batch Normalization layer.
*/
#include "max_unpooling_layer.hpp"
namespace cv
{
namespace dnn
{
MaxUnpoolLayerImpl::MaxUnpoolLayerImpl(Size outSize_):
outSize(outSize_)
{}
void MaxUnpoolLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() == 2);
BlobShape outShape = inputs[0]->shape();
outShape[2] = outSize.height;
outShape[3] = outSize.width;
outputs.resize(1);
outputs[0].create(outShape);
}
void MaxUnpoolLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() == 2);
Blob& input = *inputs[0];
Blob& indices = *inputs[1];
CV_Assert(input.total() == indices.total());
CV_Assert(input.num() == 1);
for(int i_n = 0; i_n < outputs.size(); i_n++)
{
Blob& outBlob = outputs[i_n];
CV_Assert(input.channels() == outBlob.channels());
for (int i_c = 0; i_c < input.channels(); i_c++)
{
Mat outPlane = outBlob.getPlane(0, i_c);
for(int i_wh = 0; i_wh < input.size2().area(); i_wh++)
{
int index = indices.getPlane(0, i_c).at<float>(i_wh);
CV_Assert(index < outPlane.total());
outPlane.at<float>(index) = input.getPlane(0, i_c).at<float>(i_wh);
}
}
}
}
Ptr<MaxUnpoolLayer> MaxUnpoolLayer::create(Size unpoolSize)
{
return Ptr<MaxUnpoolLayer>(new MaxUnpoolLayerImpl(unpoolSize));
}
}
}
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2016, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
/*
Declaration of MaxUnpooling layer.
*/
#ifndef __OPENCV_DNN_LAYERS_MAX_UNPOOLING_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_MAX_UNPOOLING_LAYER_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
class MaxUnpoolLayerImpl : public MaxUnpoolLayer
{
public:
MaxUnpoolLayerImpl(Size outSize_);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
private:
Size outSize;
};
}
}
#endif // __OPENCV_DNN_LAYERS_MAX_UNPOOLING_LAYER_HPP__
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2016, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
/*
Implementation of padding layer, which adds paddings to input blob.
*/
#include "padding_layer.hpp"
#include <vector>
namespace cv
{
namespace dnn
{
PaddingLayer::PaddingLayer(LayerParams &params)
{
paddingDim = params.get<int>("padding_dim");
padding = abs(params.get<int>("padding"));
inputDims = params.get<int>("input_dims", 0);
index = params.get<int>("index", 0);
paddingValue = params.get<double>("value", 0);
if(paddingDim < 0 || padding < 0)
CV_Error(cv::Error::StsNotImplemented, "Negative padding and dim aren't supported");
}
void PaddingLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
outputs.resize(inputs.size());
for(int i = 0; i < inputs.size(); i++)
{
BlobShape shape = inputs[i]->shape();
int dim = getPadDim(shape);
CV_Assert(dim < shape.dims());
shape[dim] += padding;
outputs[i].create(shape);
}
}
void PaddingLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
for(int i = 0; i < inputs.size(); i++)
{
outputs[i].matRef() = paddingValue;
BlobShape inShape = inputs[i]->shape();
BlobShape outShape = outputs[i].shape();
int dim = getPadDim(inShape);
int actualIndex = index;
if(index == 0)
actualIndex = inShape[dim];
std::vector<std::pair<Range, Range> > srcDstRanges;
srcDstRanges.push_back(std::make_pair(Range(0, actualIndex), Range(0, actualIndex)));
srcDstRanges.push_back(std::make_pair(Range(actualIndex, inShape[dim]),
Range(actualIndex + padding, outShape[dim])));
std::vector<Range> srcRanges(inShape.dims(), Range::all()), dstRanges = srcRanges;
for(int i = 0; i < srcDstRanges.size(); i++)
{
if(!srcDstRanges[i].first.empty())
{
srcRanges[dim] = srcDstRanges[i].first;
dstRanges[dim] = srcDstRanges[i].second;
Mat dst = outputs[i].matRef()(&dstRanges[0]);
Mat src = inputs[i]->matRef()(&srcRanges[0]).clone();
src.copyTo(dst);
}
}
}
}
int PaddingLayer::getPadDim(const BlobShape& shape) const
{
return inputDims > 0 && shape.dims() > inputDims ? paddingDim + 1 : paddingDim;
}
}
}
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2016, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
/*
Declaration of padding layer, which adds paddings to input blob.
*/
#ifndef __OPENCV_DNN_LAYERS_PADDING_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_PADDING_LAYER_HPP__
#include "../precomp.hpp"
namespace cv
{
namespace dnn
{
class PaddingLayer : public Layer
{
public:
PaddingLayer() {}
PaddingLayer(LayerParams &params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
private:
int getPadDim(const BlobShape& shape) const;
int paddingDim, padding, inputDims, index;
float paddingValue;
};
}
}
#endif
......@@ -72,7 +72,7 @@ PoolingLayerImpl::PoolingLayerImpl(int type_, Size kernel_, Size stride_, Size p
void PoolingLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() > 0);
CV_Assert(inputs.size() == 1);
inp = inputs[0]->size2();
......@@ -85,11 +85,19 @@ void PoolingLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Bl
useOpenCL = ocl::useOpenCL();
outputs.resize(inputs.size());
outputs.resize(type == MAX ? 2 * inputs.size() : inputs.size());
for (size_t i = 0; i < inputs.size(); i++)
{
CV_Assert(inputs[i]->rows() == inp.height && inputs[i]->cols() == inp.width);
outputs[i].create(BlobShape(inputs[i]->num(), inputs[i]->channels(), out.height, out.width));
if (type == MAX)
{
outputs[2 * i].create(BlobShape(inputs[i]->num(), inputs[i]->channels(), out.height, out.width));
outputs[2 * i + 1].create(BlobShape(inputs[i]->num(), inputs[i]->channels(), out.height, out.width));
}
else
{
outputs[i].create(BlobShape(inputs[i]->num(), inputs[i]->channels(), out.height, out.width));
}
}
}
......@@ -100,7 +108,7 @@ void PoolingLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &ou
switch (type)
{
case MAX:
maxPooling(*inputs[ii], outputs[ii]);
maxPooling(*inputs[ii], outputs[2 * ii], outputs[2 * ii + 1]);
break;
case AVE:
avePooling(*inputs[ii], outputs[ii]);
......@@ -112,17 +120,17 @@ void PoolingLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &ou
}
}
void PoolingLayerImpl::maxPooling(Blob &src, Blob &dst)
void PoolingLayerImpl::maxPooling(Blob &src, Blob &dst, Blob &mask)
{
if (!useOpenCL)
maxPooling_cpu(src, dst);
maxPooling_cpu(src, dst, mask);
else
{
CV_Assert(maxPooling_ocl(src, dst));
CV_Assert(maxPooling_ocl(src, dst, mask));
}
}
bool PoolingLayerImpl::maxPooling_ocl(Blob &src, Blob &dst)
bool PoolingLayerImpl::maxPooling_ocl(Blob &src, Blob &dst, Blob &mask)
{
return pooling_ocl("MaxPoolForward", src, dst);
}
......@@ -142,7 +150,7 @@ bool PoolingLayerImpl::avePooling_ocl(Blob &src, Blob &dst)
return pooling_ocl("AvePoolForward", src, dst);
}
void PoolingLayerImpl::maxPooling_cpu(Blob &src, Blob &dst)
void PoolingLayerImpl::maxPooling_cpu(Blob &src, Blob &dst, Blob &mask)
{
CV_DbgAssert(dst.rows() == out.height && dst.cols() == out.width);
......@@ -152,6 +160,7 @@ void PoolingLayerImpl::maxPooling_cpu(Blob &src, Blob &dst)
{
const float *srcData = src.ptrf(n, c);
float *dstData = dst.ptrf(n, c);
float *dstMaskData = mask.ptrf(n, c);
for (int ph = 0; ph < out.height; ++ph)
{
......@@ -165,16 +174,21 @@ void PoolingLayerImpl::maxPooling_cpu(Blob &src, Blob &dst)
wstart = max(wstart, 0);
const int poolIndex = ph * out.width + pw;
float max_val = -FLT_MAX;
int max_index = -1;
for (int h = hstart; h < hend; ++h)
for (int w = wstart; w < wend; ++w)
{
const int index = h * inp.width + w;
if (srcData[index] > max_val)
{
max_val = srcData[index];
max_index = index;
}
}
dstData[poolIndex] = max_val;
dstMaskData[poolIndex] = max_index;
}
}
}
......@@ -187,7 +201,9 @@ bool PoolingLayerImpl::pooling_ocl(const char *kname, const Blob &src, Blob &dst
{
const UMat &srcMat = src.umatRefConst();
UMat &dstMat = dst.umatRef();
CV_Assert(mask == NULL && srcMat.offset == 0 && dstMat.offset == 0);
UMat* indexesMat = mask == NULL ? NULL : &dst.umatRef();
CV_Assert(srcMat.offset == 0 && dstMat.offset == 0);
ocl::Kernel ker(kname, ocl::dnn::pooling_oclsrc, String("-DT=") + ocl::typeToStr(src.type()));
if (ker.empty())
......@@ -199,7 +215,8 @@ bool PoolingLayerImpl::pooling_ocl(const char *kname, const Blob &src, Blob &dst
ocl::KernelArg::PtrReadOnly(srcMat), s[0], s[1], s[2], s[3],
out.height, out.width, kernel.height, kernel.width,
stride.height, stride.width, pad.height, pad.width,
ocl::KernelArg::PtrWriteOnly(dstMat));
ocl::KernelArg::PtrWriteOnly(dstMat),
ocl::KernelArg(ocl::KernelArg::PTR_ONLY + ocl::KernelArg::WRITE_ONLY, indexesMat));
size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
if (!ker.run(1, &nthreads, &wgSize, true))
......
......@@ -58,9 +58,9 @@ class PoolingLayerImpl : public PoolingLayer
bool pooling_ocl(const char *kname, const Blob &src, Blob &dst, Blob *mask = NULL);
void maxPooling(Blob &src, Blob &dst);
void maxPooling_cpu(Blob &src, Blob &dst);
bool maxPooling_ocl(Blob &src, Blob &dst);
void maxPooling(Blob &src, Blob &dst, Blob &mask);
void maxPooling_cpu(Blob &src, Blob &dst, Blob &mask);
bool maxPooling_ocl(Blob &src, Blob &dst, Blob &mask);
void avePooling(Blob &src, Blob &dst);
void avePooling_cpu(Blob &src, Blob &dst);
......
......@@ -39,17 +39,17 @@ public:
virtual void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs, const std::vector<Blob>& blobs) {
for (size_t ii = 0; ii < outputs.size(); ii++)
{
Blob &inpBlob = *inputs[ii];
Blob &outBlob = outputs[ii];
Blob &inpBlob = *inputs[ii];
Blob &outBlob = outputs[ii];
inpBlob.matRef().copyTo(outBlob.matRef());
inpBlob.matRef().copyTo(outBlob.matRef());
for (int n = 0; n < inpBlob.num(); n++)
{
Mat dstMat(inpBlob.channels(), inpBlob.rows() * inpBlob.cols(),
outBlob.type(), outBlob.ptr(n));
dnn::gemm(blobs[0].matRefConst(), biasOnesMat, 1, dstMat, 1); //TODO: gemv
}
for (int n = 0; n < inpBlob.num(); n++)
{
Mat dstMat(inpBlob.channels(), inpBlob.rows() * inpBlob.cols(),
outBlob.type(), outBlob.ptr(n));
dnn::gemm(blobs[0].matRefConst(), biasOnesMat, 1, dstMat, 1); //TODO: gemv
}
}
}
......
......@@ -22,13 +22,15 @@ class ShiftLayerImpl;
class ShiftLayer : public Layer
{
cv::Ptr<ShiftLayerImpl> impl;
public:
ShiftLayer() {}
ShiftLayer(LayerParams &params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
private:
cv::Ptr<ShiftLayerImpl> impl;
};
}
......
......@@ -24,10 +24,7 @@
* POSSIBILITY OF SUCH DAMAGE.
**************************************************************************************/
__kernel void MaxPoolForward(const int nthreads, __global T* bottom_data, const int num, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, __global T* top_data
#ifdef MASK
, __global int* mask, __global T* top_mask
#endif
__kernel void MaxPoolForward(const int nthreads, __global T* bottom_data, const int num, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, __global T* top_data, __global int* mask
) {
int index = get_global_id(0);
int tmp = get_global_size(0);
......@@ -55,13 +52,10 @@ __kernel void MaxPoolForward(const int nthreads, __global T* bottom_data, const
}
}
top_data[index] = maxval;
#ifdef MASK
if (mask) {
mask[index] = maxidx;
} else {
top_mask[index] = maxidx;
}
#endif
}
}
......
......@@ -45,11 +45,11 @@
#include <map>
#include <algorithm>
#include <iostream>
#include <fstream>
namespace cv {
namespace dnn {
#if defined(ENABLE_TORCH_IMPORTER) && ENABLE_TORCH_IMPORTER
#include "THDiskFile.h"
#ifdef NDEBUG
......@@ -91,6 +91,7 @@ static inline bool endsWith(const String &str, const char *substr)
struct TorchImporter : public ::cv::dnn::Importer
{
typedef std::map<String, std::pair<int, Blob> > TensorsMap;
Net net;
THFile *file;
......@@ -102,16 +103,10 @@ struct TorchImporter : public ::cv::dnn::Importer
{
String thName, apiType;
dnn::LayerParams params;
std::vector<Module*> modules;
std::vector<cv::Ptr<Module> > modules;
Module(const String &_thName, const String &_apiType = String())
: thName(_thName), apiType(_apiType) {}
~Module()
{
for (size_t i = 0; i < modules.size(); i++)
delete modules[i];
}
};
Module *rootModule;
......@@ -184,6 +179,7 @@ struct TorchImporter : public ::cv::dnn::Importer
readedIndexes.insert(index);
int size = readInt();
for (int i = 0; i < size; i++)
{
readObject(); //key
......@@ -271,7 +267,7 @@ struct TorchImporter : public ::cv::dnn::Importer
storages.insert(std::make_pair(index, storageMat));
}
void readTorchTable(Dict &scalarParams, std::map<String, Blob> &tensorParams)
void readTorchTable(Dict &scalarParams, TensorsMap &tensorParams)
{
int luaType = readInt();
int index = readInt();
......@@ -309,7 +305,7 @@ struct TorchImporter : public ::cv::dnn::Importer
if (tensors.count(index)) //tensor was readed
{
tensorParams.insert(std::make_pair(key, tensors[index]));
tensorParams.insert(std::make_pair(key, std::make_pair(index, tensors[index])));
}
else if (storages.count(index)) //storage was readed
{
......@@ -347,9 +343,9 @@ struct TorchImporter : public ::cv::dnn::Importer
std::cout << scalarParams;
std::cout << "#" << tensorParams.size() << " tensorParams:\n";
std::map<String,Blob>::const_iterator it;
std::map<String,std::pair<int, Blob> >::const_iterator it;
for (it = tensorParams.begin(); it != tensorParams.end(); it++)
std::cout << it->first << ": Tensor " << it->second.shape() << "\n";
std::cout << it->first << ": Tensor " << it->second.second.shape() << "\n";
}
}
......@@ -375,9 +371,11 @@ struct TorchImporter : public ::cv::dnn::Importer
int indexStorage = readInt();
if (readedIndexes.count(indexStorage) == 0)
{
int typeStorage = parseStorageType(readTorchClassName());
String className = readTorchClassName();
int typeStorage = parseStorageType(className);
CV_Assert(typeStorage >= 0 && typeTensor == typeStorage);
readTorchStorage(indexStorage, typeStorage);
readedIndexes.insert(indexStorage);
}
//small check
......@@ -396,8 +394,7 @@ struct TorchImporter : public ::cv::dnn::Importer
}
//allocate Blob
Mat srcMat(ndims, (int*)isizes, typeTensor , storages[indexStorage].ptr() + offset, (size_t*)ssteps);
//int dstType = (typeTensor == CV_64F) ? CV_64F : CV_32F;
Mat srcMat(ndims, (int*)isizes, typeTensor , storages[indexStorage].ptr() + offset*CV_ELEM_SIZE(typeTensor), (size_t*)ssteps);
int dstType = CV_32F;
Blob blob;
......@@ -436,12 +433,7 @@ struct TorchImporter : public ::cv::dnn::Importer
void readTorchObject(int index)
{
if(readedIndexes.count(index))
{
if(!storages.count(index) && !tensors.count(index))
CV_Error(Error::StsNotImplemented, "Objects which have multiple references are not supported");
else
return;
}
return;
String className = readTorchClassName();
String nnName;
......@@ -461,12 +453,15 @@ struct TorchImporter : public ::cv::dnn::Importer
else if (isNNClass(className, nnName))
{
Dict scalarParams;
std::map<String, Blob> tensorParams;
TensorsMap tensorParams;
Module *newModule = new Module(nnName);
cv::Ptr<Module> newModule(new Module(nnName));
cv::dnn::LayerParams &layerParams = newModule->params;
if (nnName == "Sequential" || nnName == "Parallel" || nnName == "Concat")
layerParams.set("torch_index", index);
if (nnName == "Sequential" || nnName == "Parallel" ||
nnName == "Concat" || nnName == "ConcatTable" || nnName == "JoinTable")
{
Module *parentModule = curModule;
curModule->modules.push_back(newModule);
......@@ -483,6 +478,10 @@ struct TorchImporter : public ::cv::dnn::Importer
{
layerParams.set("dimension", scalarParams.get<int>("dimension"));
}
if (nnName == "JoinTable")
{
layerParams.set("dimension", scalarParams.get<int>("dimension"));
}
}
else if (nnName == "SpatialConvolution")
{
......@@ -490,12 +489,12 @@ struct TorchImporter : public ::cv::dnn::Importer
readTorchTable(scalarParams, tensorParams);
CV_Assert(tensorParams.count("weight"));
layerParams.blobs.push_back(tensorParams["weight"]);
layerParams.blobs.push_back(tensorParams["weight"].second);
bool bias = tensorParams.count("bias") != 0;
layerParams.set("bias_term", bias);
if (bias)
layerParams.blobs.push_back(tensorParams["bias"]);
layerParams.blobs.push_back(tensorParams["bias"].second);
layerParams.set("num_output", scalarParams.get<int>("nOutputPlane"));
convertTorchKernelsParams(scalarParams, layerParams);
......@@ -507,8 +506,10 @@ struct TorchImporter : public ::cv::dnn::Importer
newModule->apiType = "Pooling";
readTorchTable(scalarParams, tensorParams);
if (nnName == "SpatialMaxPooling")
if (nnName == "SpatialMaxPooling") {
layerParams.set("pool", "MAX");
layerParams.set("indices_blob_id", tensorParams["indices"].first);
}
if (nnName == "SpatialAveragePooling")
layerParams.set("pool", "AVE");
convertTorchKernelsParams(scalarParams, layerParams);
......@@ -521,12 +522,12 @@ struct TorchImporter : public ::cv::dnn::Importer
readTorchTable(scalarParams, tensorParams);
CV_Assert(tensorParams.count("weight"));
Blob weightBlob = tensorParams["weight"];
Blob weightBlob = tensorParams["weight"].second;
layerParams.blobs.push_back(weightBlob);
bool bias = tensorParams.count("bias") != 0;
if (bias)
layerParams.blobs.push_back(tensorParams["bias"]);
layerParams.blobs.push_back(tensorParams["bias"].second);
layerParams.set("bias_term", bias);
layerParams.set("num_output", weightBlob.size(0));
......@@ -549,24 +550,205 @@ struct TorchImporter : public ::cv::dnn::Importer
}
else if (nnName == "ReLU")
{
curModule->modules.push_back(new Module(nnName, "ReLU"));
curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "ReLU")));
readObject();
}
else if (nnName == "Tanh")
{
curModule->modules.push_back(new Module(nnName, "TanH"));
curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "TanH")));
readObject();
}
else if (nnName == "Sigmoid")
{
curModule->modules.push_back(new Module(nnName, "Sigmoid"));
curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "Sigmoid")));
readObject();
}
else if (nnName == "SpatialBatchNormalization")
{
newModule->apiType = "BatchNorm";
readTorchTable(scalarParams, tensorParams);
CV_Assert(tensorParams.count("running_var") &&
tensorParams.count("running_mean"));
layerParams.blobs.push_back(tensorParams["running_mean"].second);
layerParams.blobs.push_back(tensorParams["running_var"].second);
CV_Assert(scalarParams.has("eps"));
layerParams.set("eps", float(scalarParams.get<double>("eps")));
layerParams.blobs.push_back(Blob());
layerParams.blobs.push_back(Blob());
if (tensorParams.count("weight"))
{
layerParams.set("has_weight", true);
layerParams.blobs[2] = tensorParams["weight"].second;
}
if (tensorParams.count("bias"))
{
layerParams.set("has_bias", true);
layerParams.blobs[3] = tensorParams["bias"].second;
}
curModule->modules.push_back(newModule);
}
else if (nnName == "PReLU")
{
readTorchTable(scalarParams, tensorParams);
CV_Assert(tensorParams.count("weight"));
size_t outputChannels = static_cast<int>(scalarParams.get<double>("nOutputPlane"));
if (outputChannels) {
CV_Assert(tensorParams["weight"].second.total() == outputChannels);
layerParams.blobs.push_back(tensorParams["weight"].second);
newModule->apiType = "ChannelsPReLU";
}
else {
CV_Assert(tensorParams["weight"].second.total() == 1);
float negative_slope = *tensorParams["weight"].second.ptrf();
layerParams.set("negative_slope", negative_slope);
newModule->apiType = "ReLU";
}
curModule->modules.push_back(newModule);
}
else if (nnName == "SpatialDropout")
{
readTorchTable(scalarParams, tensorParams);
CV_Assert(scalarParams.has("p"));
float scale = 1 - scalarParams.get<double>("p");
CV_Assert(scale > 0);
newModule->apiType = "Power";
layerParams.set("scale", scale);
curModule->modules.push_back(newModule);
}
else if (nnName == "Identity")
{
readTorchTable(scalarParams, tensorParams);
newModule->apiType = "Identity";
curModule->modules.push_back(newModule);
}
else if (nnName == "Padding")
{
readTorchTable(scalarParams, tensorParams);
newModule->apiType = "Padding";
CV_Assert(scalarParams.has("pad") &&
scalarParams.has("dim"));
layerParams.set("padding_dim",
static_cast<int>(scalarParams.get<double>("dim") - 1));
layerParams.set("padding", static_cast<int>(scalarParams.get<double>("pad")));
if (scalarParams.has("nInputDim"))
layerParams.set("input_dims",
static_cast<int>(scalarParams.get<double>("nInputDim")));
if (scalarParams.has("value"))
layerParams.set("value", scalarParams.get<double>("value"));
if (scalarParams.has("index"))
layerParams.set("index",
static_cast<int>(scalarParams.get<double>("index") - 1));
curModule->modules.push_back(newModule);
}
else if (nnName == "CAddTable")
{
curModule->modules.push_back(newModule);
readObject();
}
else if (nnName == "SpatialDilatedConvolution")
{
readTorchTable(scalarParams, tensorParams);
newModule->apiType = "Convolution";
CV_Assert(scalarParams.has("padW") &&
scalarParams.has("padH")&&
scalarParams.has("dW")&&
scalarParams.has("dH")&&
scalarParams.has("dilationW")&&
scalarParams.has("dilationH")&&
scalarParams.has("kW")&&
scalarParams.has("kH")&&
scalarParams.has("nOutputPlane"));
layerParams.set("kernel_w", static_cast<int>(scalarParams.get<double>("kW")));
layerParams.set("kernel_h", static_cast<int>(scalarParams.get<double>("kH")));
layerParams.set("pad_w", static_cast<int>(scalarParams.get<double>("padW")));
layerParams.set("pad_h", static_cast<int>(scalarParams.get<double>("padH")));
layerParams.set("stride_w", static_cast<int>(scalarParams.get<double>("dW")));
layerParams.set("stride_h", static_cast<int>(scalarParams.get<double>("dH")));
layerParams.set("dilation_w", static_cast<int>(scalarParams.get<double>("dilationW")));
layerParams.set("dilation_h", static_cast<int>(scalarParams.get<double>("dilationH")));
layerParams.set("num_output", static_cast<int>(scalarParams.get<double>("nOutputPlane")));
layerParams.blobs.push_back(tensorParams["weight"].second);
bool bias = tensorParams.count("bias");
layerParams.set("bias_term", bias);
if (bias)
layerParams.blobs.push_back(tensorParams["bias"].second);
curModule->modules.push_back(newModule);
}
else if (nnName == "SpatialFullConvolution")
{
readTorchTable(scalarParams, tensorParams);
newModule->apiType = "Deconvolution";
CV_Assert(scalarParams.has("padW") &&
scalarParams.has("padH")&&
scalarParams.has("dW")&&
scalarParams.has("dH")&&
scalarParams.has("adjW")&&
scalarParams.has("adjH")&&
scalarParams.has("kW")&&
scalarParams.has("kH")&&
scalarParams.has("nOutputPlane"));
layerParams.set("kernel_w", static_cast<int>(scalarParams.get<double>("kW")));
layerParams.set("kernel_h", static_cast<int>(scalarParams.get<double>("kH")));
layerParams.set("pad_w", static_cast<int>(scalarParams.get<double>("padW")));
layerParams.set("pad_h", static_cast<int>(scalarParams.get<double>("padH")));
layerParams.set("stride_w", static_cast<int>(scalarParams.get<double>("dW")));
layerParams.set("stride_h", static_cast<int>(scalarParams.get<double>("dH")));
layerParams.set("adj_w", static_cast<int>(scalarParams.get<double>("adjW")));
layerParams.set("adj_h", static_cast<int>(scalarParams.get<double>("adjH")));
layerParams.set("num_output", static_cast<int>(scalarParams.get<double>("nOutputPlane")));
layerParams.blobs.push_back(tensorParams["weight"].second);
bool bias = tensorParams.count("bias");
layerParams.set("bias_term", bias);
if (bias)
layerParams.blobs.push_back(tensorParams["bias"].second);
curModule->modules.push_back(newModule);
}
else if (nnName == "SpatialMaxUnpooling")
{
readTorchTable(scalarParams, tensorParams);
CV_Assert(scalarParams.has("oheight") &&
scalarParams.has("owidth"));
CV_Assert(tensorParams.count("indices"));
layerParams.set("out_h", static_cast<int>(scalarParams.get<double>("oheight")));
layerParams.set("out_w", static_cast<int>(scalarParams.get<double>("owidth"))/2);
layerParams.set("indices_blob_id", tensorParams["indices"].first);
curModule->modules.push_back(newModule);
}
else
{
delete newModule;
CV_Error(Error::StsNotImplemented, "Unknown nn class \"" + className + "\"");
readObject();
}
}
else
......@@ -606,15 +788,16 @@ struct TorchImporter : public ::cv::dnn::Importer
return "l" + toString(++this->moduleCounter) + "_" + label;
}
int fill(Module *module, int prevLayerId = 0, int prevOutNum = 0)
int fill(Module *module, std::vector<std::pair<int, Module*> >& addedModules, int prevLayerId = 0, int prevOutNum = 0)
{
if (module == NULL)
return prevLayerId;
if (module->apiType.length())
{
int newLayerId = this->net.addLayer(generateLayerName(module->apiType), module->apiType, module->params);
int newLayerId = net.addLayer(generateLayerName(module->apiType), module->apiType, module->params);
net.connect(prevLayerId, prevOutNum, newLayerId, 0);
addedModules.push_back(std::make_pair(newLayerId, module));
return newLayerId;
}
else
......@@ -623,7 +806,7 @@ struct TorchImporter : public ::cv::dnn::Importer
{
for (size_t i = 0; i < module->modules.size(); i++)
{
prevLayerId = fill(module->modules[i], prevLayerId, prevOutNum);
prevLayerId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum);
prevOutNum = 0;
}
return prevLayerId;
......@@ -640,10 +823,11 @@ struct TorchImporter : public ::cv::dnn::Importer
for (int i = 0; i < (int)module->modules.size(); i++)
{
newId = fill(module->modules[i], splitId, i);
newId = fill(module->modules[i], addedModules, splitId, i);
net.connect(newId, 0, mergeId, i);
}
addedModules.push_back(std::make_pair(mergeId, module));
return mergeId;
}
else if (module->thName == "Parallel")
......@@ -664,19 +848,92 @@ struct TorchImporter : public ::cv::dnn::Importer
for (int i = 0; i < (int)module->modules.size(); i++)
{
net.connect(splitId, i, reshapeId, i);
newId = fill(module->modules[i], reshapeId, i);
newId = fill(module->modules[i], addedModules, reshapeId, i);
net.connect(newId, 0, mergeId, i);
}
addedModules.push_back(std::make_pair(mergeId, module));
return mergeId;
}
else if (module->thName == "ConcatTable") {
int newId, splitId;
LayerParams splitParams;
splitId = net.addLayer(generateLayerName("torchSplit"), "Split", splitParams);
net.connect(prevLayerId, prevOutNum, splitId, 0);
addedModules.push_back(std::make_pair(splitId, module));
for (int i = 0; i < (int)module->modules.size(); i++)
{
newId = fill(module->modules[i], addedModules, splitId, i);
}
return newId;
}
else if (module->thName == "JoinTable") {
std::vector<int> ids = net.getUnconnectedOutLayers();
int mergeId;
LayerParams mergeParams;
mergeParams.set("axis", module->params.get<int>("dimension") - 1);
mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
addedModules.push_back(std::make_pair(mergeId, module));
for (int i = 0; i < ids.size(); i++)
{
net.connect(ids[i], 0, mergeId, i);
}
return mergeId;
}
else if (module->thName == "CAddTable") {
String name = generateLayerName("torchCAddTable");
std::vector<int> ids = net.getUnconnectedOutLayers();
LayerParams params;
params.set("operation", "sum");
int id = net.addLayer(name, "Eltwise", params);
for (int i = 0; i < ids.size(); i++)
{
net.connect(ids[i], 0, id, i);
}
addedModules.push_back(std::make_pair(id, module));
return id;
}
else if (module->thName == "SpatialMaxUnpooling") {
String name = generateLayerName("torchMaxUnpooling");
int id = net.addLayer(name, "MaxUnpool", module->params);
net.connect(prevLayerId, 0, id, 0);
CV_Assert(module->params.has("indices_blob_id"));
int indicesBlobId = module->params.get<int>("indices_blob_id");
for(int i = 0; i < addedModules.size(); i++)
{
if (addedModules[i].second->apiType == "Pooling" &&
addedModules[i].second->params.has("indices_blob_id") &&
addedModules[i].second->params.get<int>("indices_blob_id") == indicesBlobId)
{
net.connect(addedModules[i].first, 1, id, 1);
break;
}
}
return id;
}
}
CV_Error(Error::StsInternal, "Unexpected torch container: " + module->thName);
return -1;
}
void populateNet(Net net)
void populateNet(Net net_)
{
if (rootModule == NULL)
{
......@@ -687,8 +944,9 @@ struct TorchImporter : public ::cv::dnn::Importer
readObject();
}
this->net = net;
fill(rootModule);
net = net_;
std::vector<std::pair<int, Module*> > addedModules;
fill(rootModule, addedModules);
}
};
......@@ -707,20 +965,5 @@ Blob readTorchBlob(const String &filename, bool isBinary)
return importer->tensors.begin()->second;
}
#else //ENABLE_TORCH_IMPORTER
Ptr<Importer> createTorchImporter(const String&, bool)
{
CV_Error(Error::StsNotImplemented, "Module was build without Torch importer");
return Ptr<Importer>();
}
Blob readTorchBlob(const String&, bool)
{
CV_Error(Error::StsNotImplemented, "Module was build without Torch importer");
return Blob();
}
#endif //ENABLE_TORCH_IMPORTER
}
}
......@@ -154,6 +154,7 @@ TEST(Layer_Test_DeConvolution, Accuracy)
{
OCL_OFF(testLayerUsingCaffeModels("layer_deconvolution", true, false));
}
OCL_TEST(Layer_Test_DeConvolution, Accuracy)
{
OCL_ON(testLayerUsingCaffeModels("layer_deconvolution", true, false););
......
......@@ -38,13 +38,12 @@ TEST(Test_TensorFlow, read_inception)
resize(sample, input, Size(224, 224));
input -= 128; // mean sub
std::vector<Mat> inpMats;
inpMats.push_back(input);
dnn::Blob inputBlob = dnn::Blob::fromImages(input);
net.setBlob("_input.input", Blob(inpMats));
net.setBlob("_input.input", inputBlob);
net.forward();
Blob out = net.getBlob("output");
Blob out = net.getBlob("softmax2");
std::cout << out.dims() << std::endl;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment