Commit dd8017f9 authored by Jayaram Bobba's avatar Jayaram Bobba

Merge branch 'master' into jmenon/cpu_layout_infra

parents 6bf066d7 00fb503f
......@@ -2036,8 +2036,87 @@ void runtime::cpu::CPU_Emitter::EmitConvolutionBackpropFilters(
auto arg0_shape = args[0].get_shape();
auto arg1_shape = args[1].get_shape();
auto result_shape = out[0].get_shape();
auto arg0_rank = arg0_shape.size();
auto arg1_rank = arg1_shape.size();
bool data_dilated = false;
for (size_t s : convolution->get_data_dilation_strides_forward())
{
data_dilated = data_dilated || (s != 1);
}
if (!data_dilated && arg0_rank == 4 && arg1_rank == 4 &&
args[0].get_element_type() == element::f32)
{
const string& elem_type = get_mkldnn_data_type(args[0].get_element_type().c_type_string());
Strides window_dilation_strides_adjusted;
writer << "kernel::convolution<" << out[0].get_type() << ">(" << args[0].get_name() << ",\n";
for (size_t s : convolution->get_window_dilation_strides_forward())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto emit_memory_desc = [&writer](const std::string& var,
const std::string& shape,
const std::string& type,
const std::string& layout) {
writer << "memory::desc " << var << " = memory::desc({" << shape << "}, " << type
<< ", memory::format::" << layout << ");\n";
};
auto emit_memory =
[&writer](const std::string& var, const std::string& desc, const std::string& data) {
writer << "memory " << var << " = memory({" << desc << ", cpu_engine}, " << data
<< ");\n";
};
auto emit_memory_dims = [&writer](const std::string& var, const std::string& dims) {
writer << "memory::dims " << var << "{" << dims << "};\n";
};
writer << "{\n";
writer.indent++;
writer << "try {\n";
writer.indent++;
writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
emit_memory_desc("data_desc", join(arg0_shape), elem_type, "nchw");
emit_memory_desc("delta_desc", join(arg1_shape), elem_type, "nchw");
emit_memory_desc("result_desc", join(result_shape), elem_type, "oihw");
emit_memory("data", "data_desc", args[0].get_name());
emit_memory("delta", "delta_desc", args[1].get_name());
emit_memory("result", "result_desc", out[0].get_name());
emit_memory_dims("dilates", join(window_dilation_strides_adjusted));
emit_memory_dims("strides", join(convolution->get_window_movement_strides_forward()));
emit_memory_dims("padding_l", join(convolution->get_padding_below_forward()));
emit_memory_dims("padding_r", join(convolution->get_padding_above_forward()));
writer << "convolution_backward_weights::desc bwd_weights_desc("
"algorithm::convolution_direct, "
"data_desc, result_desc, delta_desc, strides, dilates,"
"padding_l, padding_r, padding_kind::zero);\n"
"convolution_forward::primitive_desc fwd_pd({prop_kind::forward, "
"algorithm::convolution_direct, data_desc, "
"result_desc, delta_desc, strides, dilates, padding_l, padding_r, "
"padding_kind::zero}, cpu_engine);\n"
"convolution_backward_weights::primitive_desc bwd_weights_pd(bwd_weights_desc, "
"cpu_engine, fwd_pd);\n"
"convolution_backward_weights bwd_weights(bwd_weights_pd, data, delta, "
"result);\n"
"stream s = stream(stream::kind::eager);\n"
"s.submit({bwd_weights}).wait();\n";
writer.indent--;
writer << "} catch (const mkldnn::error& e) {\n";
writer.indent++;
writer << "throw ngraph::ngraph_error(\"MKLDNN ERROR (\" + std::to_string("
"e.status) + \"): \" + e.message);\n";
writer.indent--;
writer << "}\n";
writer.indent--;
writer << "}\n";
}
else
{
writer << "kernel::convolution<" << out[0].get_type() << ">(" << args[0].get_name()
<< ",\n";
writer << " " << args[1].get_name() << ",\n";
writer << " " << out[0].get_name() << ",\n";
writer << " {" << join(arg0_shape) << "},\n";
......@@ -2054,6 +2133,7 @@ void runtime::cpu::CPU_Emitter::EmitConvolutionBackpropFilters(
writer << " {"
<< join(convolution->get_data_dilation_strides_backward()) << "},\n";
writer << " 1, 0, 0, 1, 1, 0, false);\n";
}
}
void runtime::cpu::CPU_Emitter::EmitConvolutionBackpropData(
......@@ -2067,9 +2147,86 @@ void runtime::cpu::CPU_Emitter::EmitConvolutionBackpropData(
auto arg0_shape = args[0].get_shape();
auto arg1_shape = args[1].get_shape();
auto result_shape = out[0].get_shape();
auto arg0_rank = arg0_shape.size();
auto arg1_rank = arg1_shape.size();
bool data_dilated = false;
for (size_t s : convolution->get_data_dilation_strides_forward())
{
data_dilated = data_dilated || (s != 1);
}
if (!data_dilated && arg0_rank == 4 && arg1_rank == 4 &&
args[0].get_element_type() == element::f32)
{
const string& elem_type = get_mkldnn_data_type(args[0].get_element_type().c_type_string());
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides_forward())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
auto emit_memory_desc = [&writer](const std::string& var,
const std::string& shape,
const std::string& type,
const std::string& layout) {
writer << "memory::desc " << var << " = memory::desc({" << shape << "}, " << type
<< ", memory::format::" << layout << ");\n";
};
auto emit_memory =
[&writer](const std::string& var, const std::string& desc, const std::string& data) {
writer << "memory " << var << " = memory({" << desc << ", cpu_engine}, " << data
<< ");\n";
};
auto emit_memory_dims = [&writer](const std::string& var, const std::string& dims) {
writer << "memory::dims " << var << "{" << dims << "};\n";
};
writer << "{\n";
writer.indent++;
writer << "try {\n";
writer.indent++;
writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
emit_memory_desc("weight_desc", join(arg0_shape), elem_type, "oihw");
emit_memory_desc("delta_desc", join(arg1_shape), elem_type, "nchw");
emit_memory_desc("result_desc", join(result_shape), elem_type, "nchw");
emit_memory("weight", "weight_desc", args[0].get_name());
emit_memory("delta", "delta_desc", args[1].get_name());
emit_memory("result", "result_desc", out[0].get_name());
emit_memory_dims("dilates", join(window_dilation_strides_adjusted));
emit_memory_dims("strides", join(convolution->get_window_movement_strides_forward()));
emit_memory_dims("padding_l", join(convolution->get_padding_below_forward()));
emit_memory_dims("padding_r", join(convolution->get_padding_above_forward()));
writer << "convolution_backward_data::desc bwd_data_desc(algorithm::convolution_direct, "
"result_desc, weight_desc, delta_desc, strides, dilates, "
"padding_l, padding_r, padding_kind::zero);\n"
"convolution_forward::primitive_desc fwd_pd({prop_kind::forward, "
"algorithm::convolution_direct, result_desc, weight_desc, delta_desc, "
"strides, dilates, padding_l, padding_r, padding_kind::zero}, cpu_engine);\n"
"convolution_backward_data::primitive_desc bwd_data_pd(bwd_data_desc, "
"cpu_engine, fwd_pd);\n"
"convolution_backward_data bwd_data(bwd_data_pd, delta, weight, result);\n"
"stream s = stream(stream::kind::eager);\n"
"s.submit({bwd_data}).wait();\n";
writer.indent--;
writer << "} catch (const mkldnn::error& e) {\n";
writer.indent++;
writer << "throw ngraph::ngraph_error(\"MKLDNN ERROR (\" + std::to_string("
"e.status) + \"): \" + e.message);\n";
writer.indent--;
writer << "}\n";
writer.indent--;
writer << "}\n";
}
else
{
// Note that args[1] and args[0] are switched here from the usual order.
writer << "kernel::convolution<" << out[0].get_type() << ">(" << args[1].get_name() << ",\n";
writer << "kernel::convolution<" << out[0].get_type() << ">(" << args[1].get_name()
<< ",\n";
writer << " " << args[0].get_name() << ",\n";
writer << " " << out[0].get_name() << ",\n";
writer << " {" << join(arg1_shape) << "},\n";
......@@ -2086,6 +2243,7 @@ void runtime::cpu::CPU_Emitter::EmitConvolutionBackpropData(
writer << " {"
<< join(convolution->get_data_dilation_strides_backward()) << "},\n";
writer << " 0, 1, 0, 1, 0, 1, true);\n";
}
}
void runtime::cpu::CPU_Emitter::EmitNot(codegen::CodeWriter& writer,
......
......@@ -246,6 +246,8 @@ void runtime::cpu::CPU_ExternalFunction::compile()
for (shared_ptr<Node> node : current_function->get_ordered_ops())
{
if (dynamic_cast<op::Convolution*>(node.get()) ||
dynamic_cast<op::ConvolutionBackpropData*>(node.get()) ||
dynamic_cast<op::ConvolutionBackpropFilters*>(node.get()) ||
dynamic_cast<op::AvgPool*>(node.get()) || dynamic_cast<op::MaxPool*>(node.get()) ||
dynamic_cast<op::AvgPoolBackprop*>(node.get()))
{
......@@ -263,6 +265,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
writer +=
R"(#include <Eigen/Dense>
#include "ngraph/except.hpp"
#include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/runtime/cpu/cpu_eigen_utils.hpp"
#include "ngraph/runtime/cpu/cpu_kernels.hpp"
......
......@@ -69,7 +69,7 @@ ngraph::op::MatmulBias::MatmulBias(std::shared_ptr<ngraph::Node> W,
throw ngraph_error("product dimensions are not equal while creating MatmulBias");
}
auto dot_shape = Shape{shape_w.at(1 - dot_dimension_w), shape_x.at(1 - dot_dimension_x)};
Shape dot_shape{shape_w.at(1 - dot_dimension_w), shape_x.at(1 - dot_dimension_x)};
NGRAPH_DEBUG << "dot_shape shape = " << vector_to_string(dot_shape)
<< " , b shape = " << vector_to_string(b->get_shape());
......
......@@ -99,10 +99,10 @@ static std::vector<T> apply_permutation(std::vector<T> input, ngraph::AxisVector
void ngraph::runtime::cpu::pass::CPUFusion::construct_gemm_pattern()
{
auto shape_w = Shape{2, 4};
auto shape_x = Shape{4, 1};
auto shape_b = Shape{1};
auto shape_dot = Shape{2, 1};
Shape shape_w{2, 4};
Shape shape_x{4, 1};
Shape shape_b{1};
Shape shape_dot{2, 1};
auto W = std::make_shared<pattern::op::Label>(element::f32, shape_w);
auto x = std::make_shared<pattern::op::Label>(element::f32, shape_x);
......
......@@ -45,11 +45,11 @@ using namespace std;
TEST(Argon_fusion, fuse_max_with_constant_zero_input_as_relu)
{
auto shape_a = Shape{1, 5};
Shape shape_a{1, 5};
auto A = op::Constant::create(element::f32, shape_a, {0, 0, 0, 0, 0});
auto B = make_shared<op::Parameter>(element::f32, shape_a);
auto max = make_shared<op::Maximum>(A, B);
auto shape_rt = Shape{1, 5};
Shape shape_rt{1, 5};
auto f = make_shared<Function>(max, op::Parameters{B});
auto manager = runtime::Manager::get("ARGON");
......
This diff is collapsed.
......@@ -30,7 +30,7 @@ using namespace ngraph;
TEST(INTERPRETER, nan_check_input)
{
auto shape = Shape{4};
Shape shape{4};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Divide>(A, B), op::Parameters{A, B});
......@@ -56,7 +56,7 @@ TEST(INTERPRETER, nan_check_input)
TEST(INTERPRETER, nan_check_output)
{
auto shape = Shape{4};
Shape shape{4};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Divide>(A, B), op::Parameters{A, B});
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -25,9 +25,9 @@ using namespace std;
shared_ptr<runtime::TensorView>
make_reduce_result(function<shared_ptr<Node>(const shared_ptr<Node>&, const AxisSet&)> func)
{
auto shape_a = Shape{3, 2};
Shape shape_a{3, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
auto shape_rt = Shape{2};
Shape shape_rt{2};
auto f = make_shared<Function>(func(A, {0}), op::Parameters{A});
auto manager = runtime::Manager::get("INTERPRETER");
auto external = manager->compile(f);
......@@ -45,9 +45,9 @@ shared_ptr<runtime::TensorView>
shared_ptr<runtime::TensorView> make_reduce_result_true(
function<shared_ptr<Node>(const shared_ptr<Node>&, const AxisSet&, bool)> func)
{
auto shape_a = Shape{3, 2};
Shape shape_a{3, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
auto shape_rt = Shape{2};
Shape shape_rt{2};
auto f = make_shared<Function>(func(A, {0}, true), op::Parameters{A});
auto manager = runtime::Manager::get("INTERPRETER");
auto external = manager->compile(f);
......@@ -65,9 +65,9 @@ shared_ptr<runtime::TensorView> make_reduce_result_true(
shared_ptr<runtime::TensorView> make_reduce_result_false(
function<shared_ptr<Node>(const shared_ptr<Node>&, const AxisSet&, bool)> func)
{
auto shape_a = Shape{3, 2};
Shape shape_a{3, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
auto shape_rt = Shape{2};
Shape shape_rt{2};
auto f = make_shared<Function>(func(A, {0}, false), op::Parameters{A});
auto manager = runtime::Manager::get("INTERPRETER");
auto external = manager->compile(f);
......
......@@ -27,7 +27,7 @@ using namespace ngraph;
TEST(builder_xla, simple)
{
auto shape = Shape{2, 2};
Shape shape{2, 2};
auto pA = make_shared<op::Parameter>(element::f32, shape);
auto pB = make_shared<op::Parameter>(element::f32, shape);
......
This diff is collapsed.
......@@ -45,9 +45,9 @@ using namespace std;
TEST(cpu_fusion, gemm_pattern)
{
auto shape_w = Shape{2, 4};
auto shape_x = Shape{4, 1};
auto shape_b = Shape{1};
Shape shape_w{2, 4};
Shape shape_x{4, 1};
Shape shape_b{1};
auto A = make_shared<op::Parameter>(element::f32, shape_w);
auto B = make_shared<op::Parameter>(element::f32, shape_x);
auto C = make_shared<op::Parameter>(element::f32, shape_b);
......@@ -92,9 +92,9 @@ TEST(cpu_fusion, gemm_pattern)
TEST(cpu_fusion, gemm_cpu)
{
auto shapeA = Shape{3, 2};
auto shapeB = Shape{2, 3};
auto shapeC = Shape{2, 2};
Shape shapeA{3, 2};
Shape shapeB{2, 3};
Shape shapeC{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shapeA);
auto B = make_shared<op::Parameter>(element::f32, shapeB);
......@@ -131,10 +131,10 @@ TEST(cpu_fusion, gemm_cpu)
TEST(cpu_fusion, cpu_fusion_pass_basic)
{
auto shape = Shape{};
auto shape_w = Shape{2, 4};
auto shape_x = Shape{4, 1};
auto shape_b = Shape{1};
Shape shape{};
Shape shape_w{2, 4};
Shape shape_x{4, 1};
Shape shape_b{1};
auto A = make_shared<op::Parameter>(element::f32, shape_w);
auto B = make_shared<op::Parameter>(element::f32, shape_x);
auto C = make_shared<op::Parameter>(element::f32, shape_b);
......
......@@ -28,7 +28,7 @@ using namespace std;
TEST(inline, basic)
{
auto shape = Shape{2, 2};
Shape shape{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto C = make_shared<op::Parameter>(element::f32, shape);
......@@ -53,7 +53,7 @@ TEST(inline, basic)
TEST(inline, recursive)
{
auto shape = Shape{2, 2};
Shape shape{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>((A + B), op::Parameters{A, B});
......
......@@ -37,7 +37,7 @@ namespace ng = ngraph;
TEST(liveness, constant)
{
auto shape = Shape{1};
Shape shape{1};
auto c = op::Constant::create(element::i32, shape, {5});
auto f = make_shared<Function>(make_shared<op::Negative>(c), op::Parameters{});
......
......@@ -45,7 +45,7 @@ TEST(pass_manager, add)
TEST(pass_manager, module_add_function)
{
// First create "f(A,B,C) = (A+B)*C".
auto shape = Shape{2, 2};
Shape shape{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto C = make_shared<op::Parameter>(element::f32, shape);
......
......@@ -227,7 +227,7 @@ TEST(memory_layout, constant)
pass_manager.register_pass<pass::MemoryLayout>();
pass_manager.register_pass<pass::DumpSorted>(dump_file);
auto shape = Shape{1};
Shape shape{1};
auto c = op::Constant::create(element::i32, shape, {5});
auto f = make_shared<Function>(make_shared<op::Negative>(c), op::Parameters{});
......
......@@ -257,7 +257,7 @@ static void run_passes(pass::Manager& pass_manager,
TEST(pattern, graph_rewrite)
{
auto shape = Shape{};
Shape shape{};
pass::Manager pass_manager;
pass_manager.register_pass<TestGraphRewrite>();
......@@ -374,7 +374,7 @@ TEST(pattern, graph_rewrite)
TEST(pattern, matcher)
{
auto shape = Shape{};
Shape shape{};
auto a = make_shared<op::Parameter>(element::i32, shape);
TestMatcher n(nullptr);
ASSERT_TRUE(n.match(a, a));
......
......@@ -212,11 +212,11 @@ def emit_test(t,f):
template = '''
TEST (${BACKEND_NAME}, %s)
{
auto shape_a = Shape{%s};
Shape shape_a{%s};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
auto shape_b = Shape{%s};
Shape shape_b{%s};
auto B = make_shared<op::Parameter>(element::f32, shape_b);
auto shape_r = Shape{%s};
Shape shape_r{%s};
auto make_graph = [A, B] {
return make_shared<Function>(make_shared<op::Convolution>(A, B,
Strides{%s}, // move_strides
......
......@@ -33,7 +33,7 @@ using json = nlohmann::json;
TEST(serialize, main)
{
// First create "f(A,B,C) = (A+B)*C".
auto shape = Shape{2, 2};
Shape shape{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto C = make_shared<op::Parameter>(element::f32, shape);
......
This diff is collapsed.
......@@ -206,7 +206,7 @@ TEST(util, all_close)
TEST(util, traverse_functions)
{
// First create "f(A,B,C) = (A+B)*C".
auto shape = Shape{2, 2};
Shape shape{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto C = make_shared<op::Parameter>(element::f32, shape);
......@@ -320,7 +320,7 @@ TEST_F(CloneTest, clone_function_full)
TEST(graph_util, clone_multiple_results)
{
auto shape = Shape{2, 2};
Shape shape{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto C = make_shared<op::Parameter>(element::f32, shape);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment