Commit 9db548c6 authored by Pruthvi's avatar Pruthvi Committed by Scott Cyphers

bn fprop mkldnn optimized implementation (#581)

* - Added support optimized bn mkldnn implementation in cpu emitter
- modified bn unit_test to support new implementation
- added layout assignment for bn op
- Style Fix

(cherry picked from commit 7747a40806d62c126059d5c873adcd2e61a0adb0)

* modified value initilization in cpu_fusion to be float explicit

(cherry picked from commit 03499d380073d0197ab8cbd154eb03f63b042a48)

* fix compilation issue

* Addressed PR comments
- added exception if gamma and beta layout isnot equal to memory::format::x
- throw exception if bn Op is not mkldnn op

* fix compilation issue

* added support to handle multiple o/ps in fprop bn fusion

* - Removed laytout pass for bn
- fixed autodiff bug in bn
- added "Add" for the dispatcher in cpu-layout pass

* style fix

* Fix bprop batchnorm test with get_output_elements

* Style fix
parent f2e6b48b
......@@ -21,21 +21,20 @@
ngraph::op::BatchNorm::BatchNorm(double eps,
std::shared_ptr<ngraph::Node> gamma,
std::shared_ptr<ngraph::Node> beta,
std::shared_ptr<ngraph::Node> input,
std::shared_ptr<ngraph::Node> mean,
std::shared_ptr<ngraph::Node> variance)
: RequiresTensorViewArgs("BatchNorm", {gamma, beta, input, mean, variance})
std::shared_ptr<ngraph::Node> input)
: RequiresTensorViewArgs("BatchNorm", {gamma, beta, input})
, m_bn_input_shape(input->get_shape())
, m_bn_variance_shape(variance->get_shape())
, m_bn_mean_shape(mean->get_shape())
, m_epsilon(eps)
{
add_output(input->get_element_type(), m_bn_input_shape);
if (m_bn_input_shape.size() < 2)
{
throw ngraph_error("input tensor to batchnorm much have tensor of atleast rank 2");
}
else
{
this->m_bn_variance_shape.push_back(input->get_shape()[1]);
this->m_bn_mean_shape.push_back(input->get_shape()[1]);
}
if (m_bn_input_shape[1] == 0)
{
......@@ -49,51 +48,27 @@ ngraph::op::BatchNorm::BatchNorm(double eps,
throw ngraph_error("gamma, beta, mean, variance shoud have all rank 1");
}
// assuming input shape (N, C, H, W), check if the size of mean and
// variance are equal to channel axis
if (mean->get_shape()[0] != m_bn_input_shape[1])
{
throw ngraph_error("mean size is not equal to input channel size");
}
if (variance->get_shape()[0] != m_bn_input_shape[1])
{
throw ngraph_error("variance size is not equal to input channel size");
}
if (variance->get_shape().size() != mean->get_shape().size())
{
throw ngraph_error("mean and variance rank does not match");
}
if (gamma->get_shape().size() != beta->get_shape().size())
{
throw ngraph_error("gamma and beta rank does not match");
}
if (input->get_element_type() != mean->get_element_type())
{
throw ngraph_error("input tensor and mean element type does not match");
}
if (input->get_element_type() != variance->get_element_type())
{
throw ngraph_error("input tensor and variance element type does not match");
}
if (gamma->get_element_type() != beta->get_element_type())
{
throw ngraph_error("gamma and beta element type does not match");
}
add_output(input->get_element_type(), m_bn_input_shape);
add_output(input->get_element_type(), m_bn_mean_shape);
add_output(input->get_element_type(), m_bn_variance_shape);
}
std::shared_ptr<ngraph::Node>
ngraph::op::BatchNorm::copy_with_new_args(const NodeVector& new_args) const
{
if (new_args.size() != 5)
if (new_args.size() != 3)
throw ngraph_error("Incorrect number of new arguments");
return std::make_shared<BatchNorm>(
m_epsilon, new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3), new_args.at(4));
return std::make_shared<BatchNorm>(m_epsilon, new_args.at(0), new_args.at(1), new_args.at(2));
}
ngraph::op::BatchNormBackprop::BatchNormBackprop(double eps,
......@@ -174,10 +149,10 @@ void ngraph::op::BatchNorm::generate_adjoints(autodiff::Adjoints& adjoints,
auto gamma = get_input_op(0);
auto beta = get_input_op(1);
auto input = get_input_op(2);
auto mean = get_input_op(3);
auto variance = get_input_op(4);
auto mean = std::make_shared<op::GetOutputElement>(shared_from_this(), 1);
auto var = std::make_shared<op::GetOutputElement>(shared_from_this(), 2);
auto bbn = std::make_shared<op::BatchNormBackprop>(
get_eps_value(), gamma, beta, input, mean, variance, delta);
get_eps_value(), gamma, beta, input, mean, var, delta);
auto dinput = std::make_shared<op::GetOutputElement>(bbn, 0);
auto dgamma = std::make_shared<op::GetOutputElement>(bbn, 1);
auto dbeta = std::make_shared<op::GetOutputElement>(bbn, 2);
......
......@@ -33,9 +33,7 @@ namespace ngraph
BatchNorm(double eps,
std::shared_ptr<Node> gamma,
std::shared_ptr<Node> beta,
std::shared_ptr<Node> input,
std::shared_ptr<Node> mean,
std::shared_ptr<Node> variance);
std::shared_ptr<Node> input);
const Shape& get_inputs_shape() const { return m_bn_input_shape; }
const Shape& get_variance_shape() const { return m_bn_variance_shape; }
......
......@@ -301,14 +301,26 @@ namespace ngraph
auto gamma_shape = args[0].get_shape();
auto beta_shape = args[1].get_shape();
auto input_shape = args[2].get_shape();
auto mean_shape = args[3].get_shape();
auto variance_shape = args[4].get_shape();
auto result_shape = out[0].get_shape();
auto mean_shape = out[1].get_shape();
auto variance_shape = out[2].get_shape();
// get input element type
const string& et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string(
args[2].get_element_type());
const string& gamma_format = runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
runtime::cpu::mkldnn_utils::get_input_mkldnn_format(node, 0));
const string& beta_format = runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
runtime::cpu::mkldnn_utils::get_input_mkldnn_format(node, 1));
if (gamma_format.compare("memory::format::x") != 0 &&
beta_format.compare("memory::format::x") != 0)
{
throw std::runtime_error(
"gamma layout->" + gamma_format + ", beta layout->" + beta_format +
" should match and both should have memory::format::x format");
}
writer << "{\n";
writer.indent++;
......@@ -329,16 +341,20 @@ namespace ngraph
// get the eps value from the bn node
writer << "auto epsilon = " << batchnorm->get_eps_value() << ";\n";
const string& input_format = runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
runtime::cpu::mkldnn_utils::get_input_mkldnn_format(node, 2));
const string& result_format = runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
runtime::cpu::mkldnn_utils::get_output_mkldnn_format(node, 0));
// Bind to CPU engine
writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
// create memory descriptors
writer << "memory::desc input_data_desc = memory::desc({" << join(input_shape)
<< "}, " << et << ", memory::format::nchw);\n";
<< "}, " << et << ", " << input_format << ");\n";
// TODO define weights by stacking gamma and beta values
writer << "memory::desc weights_desc = memory::desc({" << join(weights_shape)
<< "}, " << et << ", memory::format::nc);\n";
writer << "memory::desc result_desc = memory::desc({" << join(result_shape) << "}, "
<< et << ", memory::format::nchw);\n";
<< et << ", " << result_format << ");\n";
writer << "memory::desc mean_desc = memory::desc({" << join(mean_shape) << "}, "
<< et << ", memory::format::x);\n";
writer << "memory::desc variance_desc = memory::desc({" << join(variance_shape)
......@@ -349,17 +365,17 @@ namespace ngraph
<< args[2].get_name() << ");\n";
writer << "memory weights = memory({weights_desc, cpu_engine}, bn_weights.data()"
<< ");\n";
writer << "memory mean = memory({mean_desc, cpu_engine}, " << args[3].get_name()
<< ");\n";
writer << "memory variance = memory({variance_desc, cpu_engine}, "
<< args[4].get_name() << ");\n";
writer << "memory result = memory({result_desc, cpu_engine}, " << out[0].get_name()
<< ");\n";
writer << "memory mean = memory({mean_desc, cpu_engine}, " << out[1].get_name()
<< ");\n";
writer << "memory variance = memory({variance_desc, cpu_engine}, "
<< out[2].get_name() << ");\n";
// create batchnorm descriptor
writer << "batch_normalization_forward::desc bn_fprop_desc = "
"batch_normalization_forward::desc(forward_training,"
<< "input_data_desc, epsilon, use_global_stats|use_scale_shift);\n";
<< "input_data_desc, epsilon, use_scale_shift);\n";
// bn fprop primitive descriptor
writer
<< "batch_normalization_forward::primitive_desc bn_fprop_prim_desc = "
......@@ -368,8 +384,8 @@ namespace ngraph
// create a batchnorm fprop primitive
writer << "batch_normalization_forward bn_fprop = "
"batch_normalization_forward(bn_fprop_prim_desc, "
"primitive::at(input_data),primitive::at(mean), primitive::at(variance),"
<< "primitive::at(weights), result); \n";
"primitive::at(input_data),"
<< "primitive::at(weights), result, mean, variance); \n";
// create stream and execute
writer << "stream s = stream(stream::kind::eager);\n"
......
......@@ -30,6 +30,7 @@
#include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/divide.hpp"
#include "ngraph/ops/dot.hpp"
#include "ngraph/ops/get_output_element.hpp"
#include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/pad.hpp"
#include "ngraph/ops/parameter.hpp"
......@@ -301,14 +302,12 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_fprop_bn()
// get epsilon value
auto eps_ptr = std::dynamic_pointer_cast<op::Constant>(pattern_map[eps_label]);
double epsilon = *(reinterpret_cast<const double*>(eps_ptr->get_data_ptr()));
auto bn_node = std::shared_ptr<Node>(new op::BatchNorm(epsilon,
pattern_map[gamma_label],
pattern_map[beta_label],
pattern_map[input],
pattern_map[mean_label],
pattern_map[variance_label]));
return bn_node;
auto bn_node = std::make_shared<op::BatchNorm>(
epsilon, pattern_map[gamma_label], pattern_map[beta_label], pattern_map[input]);
auto normalized_output = std::shared_ptr<Node>(new op::GetOutputElement(bn_node, 0));
return normalized_output;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(add_beta, callback);
......
......@@ -710,6 +710,7 @@ namespace ngraph
#define TI(x) type_index(typeid(x))
static const runtime::cpu::pass::LayoutOpMap s_dispatcher{
{TI(ngraph::op::Add), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Add>},
{TI(ngraph::op::Convolution), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Convolution>},
{TI(ngraph::op::ConvolutionBackpropData),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::ConvolutionBackpropData>},
......
......@@ -328,7 +328,7 @@ static shared_ptr<ngraph::Function>
else if (node_op == "BatchNorm")
{
auto epsilon = node_js.at("eps").get<double>();
node = make_shared<op::BatchNorm>(epsilon, args[0], args[1], args[2], args[3], args[4]);
node = make_shared<op::BatchNorm>(epsilon, args[0], args[1], args[2]);
}
else if (node_op == "BatchNormBackprop")
{
......
......@@ -25,6 +25,7 @@
#include "ngraph/log.hpp"
#include "ngraph/ngraph.hpp"
#include "ngraph/ops/batch_norm.hpp"
#include "ngraph/ops/get_output_element.hpp"
#include "ngraph/ops/sum.hpp"
#include "ngraph/pass/graph_rewrite.hpp"
#include "ngraph/pass/manager.hpp"
......@@ -254,18 +255,21 @@ TEST(cpu_fusion, batchnorm_fprop_b1c2h2w2)
auto input_shape = Shape{1, 2, 2, 2};
auto input = make_shared<op::Parameter>(element::f32, input_shape);
auto mean_shape = Shape{2};
auto mean = make_shared<op::Parameter>(element::f32, mean_shape);
auto var_shape = Shape{2};
auto var = make_shared<op::Parameter>(element::f32, var_shape);
auto gamma_shape = Shape{2};
auto gamma = make_shared<op::Parameter>(element::f32, gamma_shape);
auto beta_shape = Shape{2};
auto beta = make_shared<op::Parameter>(element::f32, beta_shape);
double eps = 0.001;
auto shape_r = Shape{1, 2, 2, 2};
auto bn = make_shared<op::BatchNorm>(eps, gamma, beta, input, mean, var);
auto bn = make_shared<op::BatchNorm>(eps, gamma, beta, input);
auto f = make_shared<Function>(bn, op::ParameterVector{mean, var, input, gamma, beta});
auto output_rt = std::make_shared<op::GetOutputElement>(bn, 0);
auto mean_rt = std::make_shared<op::GetOutputElement>(bn, 1);
auto variance_rt = std::make_shared<op::GetOutputElement>(bn, 2);
auto f = make_shared<Function>(NodeVector{output_rt, mean_rt, variance_rt},
op::ParameterVector{input, gamma, beta});
auto manager = runtime::Manager::get("CPU");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
......@@ -283,15 +287,13 @@ TEST(cpu_fusion, batchnorm_fprop_b1c2h2w2)
0.64589411f,
0.4375872f,
0.89177299f});
auto _mean = backend->make_primary_tensor_view(element::f32, mean_shape);
copy_data(_mean, vector<float>{0.60291237f, 0.59972727f});
auto _var = backend->make_primary_tensor_view(element::f32, var_shape);
copy_data(_var, vector<float>{0.00472505f, 0.03617825f});
auto _gamma = backend->make_primary_tensor_view(element::f32, gamma_shape);
copy_data(_gamma, vector<float>{1.0f, 1.0f});
auto _beta = backend->make_primary_tensor_view(element::f32, beta_shape);
copy_data(_beta, vector<float>{0.0f, 0.0f});
auto result = backend->make_primary_tensor_view(element::f32, shape_r);
auto bn_output = backend->make_primary_tensor_view(element::f32, shape_r);
auto result_mean = backend->make_primary_tensor_view(element::f32, mean_shape);
auto result_variance = backend->make_primary_tensor_view(element::f32, var_shape);
vector<float> expected_result{-0.71498716f,
1.48388731f,
......@@ -301,8 +303,14 @@ TEST(cpu_fusion, batchnorm_fprop_b1c2h2w2)
0.23943391f,
-0.84090298f,
1.51462936f};
cf->call({_mean, _var, _input, _gamma, _beta}, {result});
EXPECT_TRUE(test::all_close(expected_result, read_vector<float>(result)));
vector<float> expected_mean{0.602912f, 0.599727f};
vector<float> expected_variance{0.00472505f, 0.0361782f};
cf->call({_input, _gamma, _beta}, {bn_output, result_mean, result_variance});
EXPECT_TRUE(test::all_close(expected_result, read_vector<float>(bn_output)));
EXPECT_TRUE(test::all_close(expected_mean, read_vector<float>(result_mean)));
EXPECT_TRUE(test::all_close(expected_variance, read_vector<float>(result_variance)));
}
TEST(cpu_fusion, batchnorm_fprop_b2c2h2w1)
......@@ -310,18 +318,21 @@ TEST(cpu_fusion, batchnorm_fprop_b2c2h2w1)
auto input_shape = Shape{2, 2, 2, 1};
auto input = make_shared<op::Parameter>(element::f32, input_shape);
auto mean_shape = Shape{2};
auto mean = make_shared<op::Parameter>(element::f32, mean_shape);
auto var_shape = Shape{2};
auto var = make_shared<op::Parameter>(element::f32, var_shape);
auto gamma_shape = Shape{2};
auto gamma = make_shared<op::Parameter>(element::f32, gamma_shape);
auto beta_shape = Shape{2};
auto beta = make_shared<op::Parameter>(element::f32, beta_shape);
double eps = 0.001;
auto shape_r = Shape{2, 2, 2, 1};
auto bn = make_shared<op::BatchNorm>(eps, gamma, beta, input, mean, var);
auto bn = make_shared<op::BatchNorm>(eps, gamma, beta, input);
auto f = make_shared<Function>(bn, op::ParameterVector{mean, var, input, gamma, beta});
auto output_rt = std::make_shared<op::GetOutputElement>(bn, 0);
auto mean_rt = std::make_shared<op::GetOutputElement>(bn, 1);
auto variance_rt = std::make_shared<op::GetOutputElement>(bn, 2);
auto f = make_shared<Function>(NodeVector{output_rt, mean_rt, variance_rt},
op::ParameterVector{input, gamma, beta});
auto manager = runtime::Manager::get("CPU");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
......@@ -337,20 +348,24 @@ TEST(cpu_fusion, batchnorm_fprop_b2c2h2w1)
0.64589411f,
0.4375872f,
0.89177299f});
auto _mean = backend->make_primary_tensor_view(element::f32, mean_shape);
copy_data(_mean, vector<float>{0.60291237f, 0.59972727f});
auto _var = backend->make_primary_tensor_view(element::f32, var_shape);
copy_data(_var, vector<float>{0.00472505f, 0.03617825f});
auto _gamma = backend->make_primary_tensor_view(element::f32, gamma_shape);
copy_data(_gamma, vector<float>{1.0f, 1.0f});
auto _beta = backend->make_primary_tensor_view(element::f32, beta_shape);
copy_data(_beta, vector<float>{0.0f, 0.0f});
auto result = backend->make_primary_tensor_view(element::f32, shape_r);
auto bn_output = backend->make_primary_tensor_view(element::f32, shape_r);
auto result_mean = backend->make_primary_tensor_view(element::f32, mean_shape);
auto result_variance = backend->make_primary_tensor_view(element::f32, var_shape);
vector<float> expected_result{
-0.714987f, 1.48389f, 0.015746f, -0.284436f, -2.36912f, 0.56806f, -0.840903f, 1.51463f};
cf->call({_mean, _var, _input, _gamma, _beta}, {result});
EXPECT_TRUE(test::all_close(expected_result, read_vector<float>(result)));
-0.30327f, 1.1561f, -0.0963782f, -0.434702f, -1.4011f, 0.548275f, -1.06187f, 1.59295f};
vector<float> expected_mean{0.583388f, 0.619252f};
vector<float> expected_variance{0.0119972f, 0.0282681f};
cf->call({_input, _gamma, _beta}, {bn_output, result_mean, result_variance});
EXPECT_TRUE(test::all_close(expected_result, read_vector<float>(bn_output)));
EXPECT_TRUE(test::all_close(expected_mean, read_vector<float>(result_mean)));
EXPECT_TRUE(test::all_close(expected_variance, read_vector<float>(result_variance)));
}
TEST(cpu_fusion, fuse_fprop_bn)
......@@ -404,7 +419,10 @@ TEST(cpu_fusion, bn_bprop_n4c3h2w2)
auto beta = make_shared<op::Parameter>(element::f32, beta_shape);
double eps = 0.001;
auto shape_r = Shape{4, 3, 2, 2};
auto bn = make_shared<op::BatchNorm>(eps, gamma, beta, input, mean, var);
auto bn = make_shared<op::BatchNorm>(eps, gamma, beta, input);
auto bn_dx = make_shared<op::GetOutputElement>(bn, 0);
auto bn_dgamma = make_shared<op::GetOutputElement>(bn, 1);
auto bn_dbeta = make_shared<op::GetOutputElement>(bn, 2);
auto manager = runtime::Manager::get("CPU");
auto backend = manager->allocate_backend();
......@@ -436,7 +454,8 @@ TEST(cpu_fusion, bn_bprop_n4c3h2w2)
vector<float> deltaData(shape_size(shape_r), 20.0f);
copy_data(_delta, deltaData);
auto f = make_shared<Function>(bn, op::ParameterVector{mean, var, input, gamma, beta});
auto f = make_shared<Function>(NodeVector{bn_dx, bn_dgamma, bn_dbeta},
op::ParameterVector{mean, var, input, gamma, beta});
auto C = std::make_shared<op::Parameter>(element::f32, shape_r);
auto dinput = bn->backprop_node(input, C);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment