Unverified Commit 9ebe7a0c authored by Jayaram Bobba's avatar Jayaram Bobba Committed by GitHub

Merge pull request #446 from NervanaSystems/jbobba/mkldnn-outlining

Dynamically create and destroy mkldnn engine objects to work around f…
parents f4d3089c a30ee67a
......@@ -90,8 +90,9 @@ static const string& get_mkldnn_data_type(const string& type)
void runtime::cpu::CPU_Emitter::EmitMKLDNNPreamble(codegen::CodeWriter& writer)
{
writer << "using namespace mkldnn;\n";
writer << "auto cpu_engine = engine(engine::cpu, 0);\n";
writer << "// MKLDNN Preamble\n";
writer << "#include <mkldnn.hpp>;\n";
writer << "using namespace mkldnn;\n\n";
}
void runtime::cpu::CPU_Emitter::EmitNop(codegen::CodeWriter& writer,
......@@ -1926,19 +1927,21 @@ void runtime::cpu::CPU_Emitter::EmitConvolution(codegen::CodeWriter& writer,
writer << "{\n";
writer.indent++;
writer << "auto input_data_desc = memory::desc({" << join(arg0_shape) << "}, " << et
writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
writer << "memory::desc input_data_desc = memory::desc({" << join(arg0_shape) << "}, " << et
<< ", memory::format::nchw);\n";
writer << "auto weights_desc = memory::desc({" << join(arg1_shape) << "}, " << et
writer << "memory::desc weights_desc = memory::desc({" << join(arg1_shape) << "}, " << et
<< ", memory::format::oihw);\n";
writer << "auto result_desc = memory::desc({" << join(result_shape) << "}, " << et
writer << "memory::desc result_desc = memory::desc({" << join(result_shape) << "}, " << et
<< ", memory::format::nchw);\n";
writer << "auto input_data = memory({input_data_desc, cpu_engine}, " << args[0].get_name()
writer << "memory input_data = memory({input_data_desc, cpu_engine}, " << args[0].get_name()
<< ");\n";
writer << "auto weights = memory({weights_desc, cpu_engine}, " << args[1].get_name()
writer << "memory weights = memory({weights_desc, cpu_engine}, " << args[1].get_name()
<< ");\n";
writer << "auto result = memory({result_desc, cpu_engine}, " << out[0].get_name() << ");\n";
writer << "auto conv = convolution_forward({"
writer << "memory result = memory({result_desc, cpu_engine}, " << out[0].get_name()
<< ");\n";
writer << "convolution_forward conv = convolution_forward({"
<< "{prop_kind::forward, algorithm::convolution_direct, input_data_desc, "
"weights_desc, result_desc, {"
<< join(convolution->get_window_movement_strides()) << "}, {"
......@@ -1946,7 +1949,7 @@ void runtime::cpu::CPU_Emitter::EmitConvolution(codegen::CodeWriter& writer,
<< join(convolution->get_padding_above()) << "}, padding_kind::zero}, cpu_engine}, "
<< "input_data, weights, result);\n";
writer << "auto s = stream(stream::kind::eager);\n"
writer << "stream s = stream(stream::kind::eager);\n"
<< "s.submit({conv}).wait();\n";
writer.indent--;
writer << "}\n";
......@@ -1968,19 +1971,21 @@ void runtime::cpu::CPU_Emitter::EmitConvolution(codegen::CodeWriter& writer,
writer << "{\n";
writer.indent++;
writer << "auto input_data_desc = memory::desc({" << join(arg0_shape) << "}, " << et
writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
writer << "memory::desc input_data_desc = memory::desc({" << join(arg0_shape) << "}, " << et
<< ", memory::format::nchw);\n";
writer << "auto weights_desc = memory::desc({" << join(arg1_shape) << "}, " << et
writer << "memory::desc weights_desc = memory::desc({" << join(arg1_shape) << "}, " << et
<< ", memory::format::oihw);\n";
writer << "auto result_desc = memory::desc({" << join(result_shape) << "}, " << et
writer << "memory::desc result_desc = memory::desc({" << join(result_shape) << "}, " << et
<< ", memory::format::nchw);\n";
writer << "auto input_data = memory({input_data_desc, cpu_engine}, " << args[0].get_name()
writer << "memory input_data = memory({input_data_desc, cpu_engine}, " << args[0].get_name()
<< ");\n";
writer << "memory weights = memory({weights_desc, cpu_engine}, " << args[1].get_name()
<< ");\n";
writer << "auto weights = memory({weights_desc, cpu_engine}, " << args[1].get_name()
writer << "memory result = memory({result_desc, cpu_engine}, " << out[0].get_name()
<< ");\n";
writer << "auto result = memory({result_desc, cpu_engine}, " << out[0].get_name() << ");\n";
writer << "auto conv = convolution_forward({"
writer << "convolution_forward conv = convolution_forward({"
<< "{prop_kind::forward, algorithm::convolution_direct, input_data_desc, "
"weights_desc, result_desc, {"
<< join(convolution->get_window_movement_strides()) << "}, {"
......@@ -1989,7 +1994,7 @@ void runtime::cpu::CPU_Emitter::EmitConvolution(codegen::CodeWriter& writer,
<< join(convolution->get_padding_above()) << "}, padding_kind::zero}, cpu_engine}, "
<< "input_data, weights, result);\n";
writer << "auto s = stream(stream::kind::eager);\n"
writer << "stream s = stream(stream::kind::eager);\n"
<< "s.submit({conv}).wait();\n";
writer.indent--;
writer << "}\n";
......@@ -2111,17 +2116,19 @@ void runtime::cpu::CPU_Emitter::EmitMaxPool(codegen::CodeWriter& writer,
writer << "{\n";
writer.indent++;
writer << "auto input_data_desc = memory::desc({" << join(arg_shape) << "}, " << et
writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
writer << "memory::desc input_data_desc = memory::desc({" << join(arg_shape) << "}, " << et
<< ", memory::format::nchw);\n";
writer << "auto result_desc = memory::desc({" << join(result_shape) << "}, " << et
writer << "memory::desc result_desc = memory::desc({" << join(result_shape) << "}, " << et
<< ", memory::format::nchw);\n";
writer << "auto input_data = memory({input_data_desc, cpu_engine}, " << args[0].get_name()
writer << "memory input_data = memory({input_data_desc, cpu_engine}, " << args[0].get_name()
<< ");\n";
writer << "memory result = memory({result_desc, cpu_engine}, " << out[0].get_name()
<< ");\n";
writer << "auto result = memory({result_desc, cpu_engine}, " << out[0].get_name() << ");\n";
// TODO(jmenon): Use a workspace
writer << "auto max_pooling = pooling_forward({"
writer << "pooling_forward max_pooling = pooling_forward({"
<< "{prop_kind::forward_inference, algorithm::pooling_max, "
<< "input_data_desc, result_desc, {" << join(max_pool->get_window_movement_strides())
<< "}, {" << join(max_pool->get_window_shape()) << "}, {"
......@@ -2130,7 +2137,7 @@ void runtime::cpu::CPU_Emitter::EmitMaxPool(codegen::CodeWriter& writer,
<< "}, padding_kind::zero}, cpu_engine}, "
<< "input_data, result);\n";
writer << "auto s = stream(stream::kind::eager);\n"
writer << "stream s = stream(stream::kind::eager);\n"
<< "s.submit({max_pooling}).wait();\n";
writer.indent--;
writer << "}\n";
......@@ -2292,17 +2299,19 @@ void runtime::cpu::CPU_Emitter::EmitAvgPool(codegen::CodeWriter& writer,
writer << "{\n";
writer.indent++;
writer << "auto input_data_desc = memory::desc({" << join(arg_shape) << "}, " << et
writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
writer << "memory::desc input_data_desc = memory::desc({" << join(arg_shape) << "}, " << et
<< ", memory::format::nchw);\n";
writer << "auto result_desc = memory::desc({" << join(result_shape) << "}, " << et
writer << "memory::desc result_desc = memory::desc({" << join(result_shape) << "}, " << et
<< ", memory::format::nchw);\n";
writer << "auto input_data = memory({input_data_desc, cpu_engine}, " << args[0].get_name()
writer << "memory input_data = memory({input_data_desc, cpu_engine}, " << args[0].get_name()
<< ");\n";
writer << "memory result = memory({result_desc, cpu_engine}, " << out[0].get_name()
<< ");\n";
writer << "auto result = memory({result_desc, cpu_engine}, " << out[0].get_name() << ");\n";
// TODO(jmenon): Use a workspace
writer << "auto avg_pooling = pooling_forward({"
writer << "pooling_forward avg_pooling = pooling_forward({"
<< "{prop_kind::forward_inference, algorithm::pooling_avg, "
<< "input_data_desc, result_desc, {" << join(avg_pool->get_window_movement_strides())
<< "}, {" << join(avg_pool->get_window_shape()) << "}, "
......@@ -2311,7 +2320,7 @@ void runtime::cpu::CPU_Emitter::EmitAvgPool(codegen::CodeWriter& writer,
<< "padding_kind::zero}, cpu_engine}, "
<< "input_data, result);\n";
writer << "auto s = stream(stream::kind::eager);\n"
writer << "stream s = stream(stream::kind::eager);\n"
<< "s.submit({avg_pooling}).wait();\n";
writer.indent--;
writer << "}\n";
......
......@@ -233,20 +233,28 @@ void runtime::cpu::CPU_ExternalFunction::compile()
codegen::CodeWriter writer;
bool include_mkldnn_headers = false;
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
{
for (shared_ptr<Node> node : current_function->get_ordered_ops())
{
if (dynamic_cast<op::Convolution*>(node.get()) ||
dynamic_cast<op::AvgPool*>(node.get()) || dynamic_cast<op::MaxPool*>(node.get()))
{
include_mkldnn_headers = true;
}
}
}
writer +=
R"(// Generated by the NGraph CPU backend
#include <cmath>
)";
#ifdef NGRAPH_TBB_ENABLE
writer += "#include <tbb/flow_graph.h>\n\n";
#endif
writer +=
R"(#include <Eigen/Dense>
#include <mkldnn.hpp>
#include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/runtime/cpu/cpu_eigen_utils.hpp"
#include "ngraph/runtime/cpu/cpu_kernels.hpp"
......@@ -273,6 +281,17 @@ using namespace ngraph::runtime::cpu::eigen;
using namespace ngraph::runtime;
)";
if (m_use_tbb)
{
writer << "#include <tbb/flow_graph.h>\n";
}
if (include_mkldnn_headers)
{
runtime::cpu::CPU_Emitter::EmitMKLDNNPreamble(writer);
}
string pch_header_source = writer.get_code();
// The "dso_handle" symbol is required by __cxa_atexit()
......@@ -465,8 +484,6 @@ using namespace ngraph::runtime;
writer << "tbb::flow::graph G;\n\n";
}
runtime::cpu::CPU_Emitter::EmitMKLDNNPreamble(writer);
bool temporaries_used = false;
size_t worst_case_tmp_size = 0;
for (shared_ptr<Node> node : current_function->get_ordered_ops())
......
......@@ -5700,6 +5700,47 @@ TEST(${BACKEND_NAME}, zero_sized_subtract)
make_binary_empty_test<op::Subtract>("${BACKEND_NAME}");
}
TEST(${BACKEND_NAME}, convolution_outlining)
{
auto shape_a = Shape{1, 2, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
auto shape_b = Shape{2, 2, 1, 1};
auto B = make_shared<op::Parameter>(element::f32, shape_b);
auto shape_r = Shape{1, 2, 2, 2};
auto conv1 = make_shared<op::Convolution>(A,
B,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
auto conv2 = make_shared<op::Convolution>(conv1,
B,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
auto f = make_shared<Function>(conv2, op::Parameters{A, B});
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
// Create some tensors for input/output
auto a = backend->make_primary_tensor_view(element::f32, shape_a);
copy_data(a, vector<float>{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});
auto b = backend->make_primary_tensor_view(element::f32, shape_b);
copy_data(b, vector<float>{1.0f, 1.0f, 1.0f, 1.0f});
auto result = backend->make_primary_tensor_view(element::f32, shape_r);
vector<float> expected_result{4.0f, 4.0f, 4.0f, 4.0f, 4.0f, 4.0f, 4.0f, 4.0f};
cf->call({a, b}, {result});
EXPECT_EQ(vector<float>{expected_result}, read_vector<float>(result));
}
TEST(${BACKEND_NAME}, avg_pool_1d_1channel_1image)
{
auto shape_a = Shape{1, 1, 14};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment