Unverified Commit 233e4b1b authored by Jayaram Bobba's avatar Jayaram Bobba Committed by GitHub

Merge pull request #513 from NervanaSystems/jmenon/mkldnn-compile

MKLDNN Emitter
parents 96cabff0 e05a356e
......@@ -176,6 +176,8 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
runtime/cpu/cpu_tensor_view_wrapper.cpp
runtime/cpu/cpu_layout_descriptor.cpp
runtime/cpu/cpu_tracing.cpp
runtime/cpu/mkldnn_emitter.cpp
runtime/cpu/mkldnn_invoke.cpp
runtime/cpu/mkldnn_utils.cpp
runtime/cpu/ops/convert_layout.cpp
runtime/cpu/ops/matmul_bias.cpp
......
......@@ -142,6 +142,8 @@ void runtime::cpu::CPU_CallFrame::setup_runtime_context()
{
ctx->op_durations = new int64_t[m_external_function->get_op_attrs().size()];
}
const auto& mkldnn_emitter = m_external_function->get_mkldnn_emitter();
ctx->mkldnn_primitives = mkldnn_emitter->get_mkldnn_primitives().data();
}
void runtime::cpu::CPU_CallFrame::cleanup_runtime_context()
......
......@@ -2036,95 +2036,59 @@ namespace ngraph
data_dilated = data_dilated || (s != 1);
}
// TODO(jmenon): MKLDNN streams should be static so we need to either implement
// codegen for statics or move primitive and stream construction out
// of the generated function and only generate code to run/rerun the stream
if (!filter_dilated && !data_dilated && arg0_rank == 4 && arg1_rank == 4 &&
if (!data_dilated && arg0_rank == 4 && arg1_rank == 4 &&
args[0].get_element_type() == element::f32)
{
const string& et =
get_mkldnn_data_type(args[0].get_element_type().c_type_string());
writer << "{\n";
writer.indent++;
writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
writer << "memory::desc input_data_desc = memory::desc({" << join(arg0_shape)
<< "}, " << et << ", memory::format::nchw);\n";
writer << "memory::desc weights_desc = memory::desc({" << join(arg1_shape)
<< "}, " << et << ", memory::format::oihw);\n";
writer << "memory::desc result_desc = memory::desc({" << join(result_shape)
<< "}, " << et << ", memory::format::nchw);\n";
writer << "memory input_data = memory({input_data_desc, cpu_engine}, "
<< args[0].get_name() << ");\n";
writer << "memory weights = memory({weights_desc, cpu_engine}, "
<< args[1].get_name() << ");\n";
writer << "memory result = memory({result_desc, cpu_engine}, "
<< out[0].get_name() << ");\n";
writer
<< "convolution_forward conv = convolution_forward({"
<< "{prop_kind::forward, algorithm::convolution_direct, input_data_desc, "
"weights_desc, result_desc, {"
<< join(convolution->get_window_movement_strides()) << "}, {"
<< join(convolution->get_padding_below()) << "}, {"
<< join(convolution->get_padding_above())
<< "}, padding_kind::zero}, cpu_engine}, "
<< "input_data, weights, result);\n";
writer << "stream s = stream(stream::kind::eager);\n"
<< "s.submit({conv}).wait();\n";
writer.indent--;
writer << "}\n";
}
else if (filter_dilated && !data_dilated && arg0_rank == 4 && arg1_rank == 4 &&
args[0].get_element_type() == element::f32)
{
// For dilation, MKLDNN wants to know how many elements to insert between, not how far
// apart to space the elements like nGraph. So we have to subtract 1 from each pos.
Strides window_dilation_strides_adjusted;
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_data_desc = mkldnn_emitter->build_memory_descriptor(
args[0], mkldnn::memory::format::nchw);
auto weights_desc = mkldnn_emitter->build_memory_descriptor(
args[1], mkldnn::memory::format::oihw);
auto result_desc = mkldnn_emitter->build_memory_descriptor(
out[0], mkldnn::memory::format::nchw);
size_t conv_index = 0;
for (size_t s : convolution->get_window_dilation_strides())
if (!filter_dilated)
{
window_dilation_strides_adjusted.push_back(s - 1);
conv_index = mkldnn_emitter->build_convolution_forward(
input_data_desc,
weights_desc,
result_desc,
convolution->get_window_movement_strides(),
convolution->get_padding_below(),
convolution->get_padding_above());
}
else
{
// For dilation, MKLDNN wants to know how many elements to insert between, not how far
// apart to space the elements like nGraph. So we have to subtract 1 from each pos.
Strides window_dilation_strides_adjusted;
const string& et =
get_mkldnn_data_type(args[0].get_element_type().c_type_string());
writer << "{\n";
writer.indent++;
for (size_t s : convolution->get_window_dilation_strides())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
writer << "memory::desc input_data_desc = memory::desc({" << join(arg0_shape)
<< "}, " << et << ", memory::format::nchw);\n";
writer << "memory::desc weights_desc = memory::desc({" << join(arg1_shape)
<< "}, " << et << ", memory::format::oihw);\n";
writer << "memory::desc result_desc = memory::desc({" << join(result_shape)
<< "}, " << et << ", memory::format::nchw);\n";
conv_index = mkldnn_emitter->build_convolution_forward(
input_data_desc,
weights_desc,
result_desc,
convolution->get_window_movement_strides(),
window_dilation_strides_adjusted,
convolution->get_padding_below(),
convolution->get_padding_above());
}
writer << "memory input_data = memory({input_data_desc, cpu_engine}, "
<< args[0].get_name() << ");\n";
writer << "memory weights = memory({weights_desc, cpu_engine}, "
<< args[1].get_name() << ");\n";
writer << "memory result = memory({result_desc, cpu_engine}, "
<< out[0].get_name() << ");\n";
writer
<< "convolution_forward conv = convolution_forward({"
<< "{prop_kind::forward, algorithm::convolution_direct, input_data_desc, "
"weights_desc, result_desc, {"
<< join(convolution->get_window_movement_strides()) << "}, {"
<< join(window_dilation_strides_adjusted) << "}, {"
<< join(convolution->get_padding_below()) << "}, {"
<< join(convolution->get_padding_above())
<< "}, padding_kind::zero}, cpu_engine}, "
<< "input_data, weights, result);\n";
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << out[0].get_name() << ");\n";
writer << "stream s = stream(stream::kind::eager);\n"
<< "s.submit({conv}).wait();\n";
writer.indent--;
writer << "}\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(conv_index) << ");\n";
}
else
{
......
......@@ -249,6 +249,8 @@ void runtime::cpu::CPU_ExternalFunction::compile()
string function_name = m_function->get_name();
m_mkldnn_emitter.reset(new MKLDNNEmitter(shared_from_this()));
ngraph::pass::Manager pass_manager;
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
......@@ -284,6 +286,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
#include "ngraph/runtime/cpu/cpu_eigen_utils.hpp"
#include "ngraph/runtime/cpu/cpu_kernels.hpp"
#include "ngraph/runtime/cpu/cpu_runtime_context.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/kernel/avg_pool.hpp"
#include "ngraph/runtime/kernel/broadcast.hpp"
#include "ngraph/runtime/kernel/concat.hpp"
......
......@@ -31,6 +31,7 @@
#include "ngraph/runtime/cpu/cpu_call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
#include "ngraph/runtime/cpu/mkldnn_emitter.hpp"
#include "ngraph/runtime/external_function.hpp"
namespace ngraph
......@@ -80,6 +81,11 @@ namespace ngraph
const LayoutDescriptorPtrs& get_result_layout_descriptors();
const std::vector<OpAttributes>& get_op_attrs() const { return m_op_attrs; }
const std::unique_ptr<MKLDNNEmitter>& get_mkldnn_emitter() const
{
return m_mkldnn_emitter;
}
protected:
void compile();
......@@ -115,6 +121,8 @@ namespace ngraph
LayoutDescriptorPtrs parameter_layout_descriptors;
LayoutDescriptorPtrs result_layout_descriptors;
std::vector<OpAttributes> m_op_attrs;
std::unique_ptr<MKLDNNEmitter> m_mkldnn_emitter;
};
}
}
......
......@@ -17,6 +17,11 @@
#include <chrono>
#include <cstdint>
namespace mkldnn
{
class primitive;
}
namespace ngraph
{
namespace runtime
......@@ -31,6 +36,7 @@ namespace ngraph
struct CPURuntimeContext
{
int64_t* op_durations;
mkldnn::primitive* const* mkldnn_primitives;
};
}
}
......
......@@ -69,3 +69,9 @@ bool runtime::cpu::TensorViewWrapper::is_output() const
{
return m_tensor_view->get_tensor().is_output();
}
const std::shared_ptr<descriptor::TensorView>
runtime::cpu::TensorViewWrapper::get_tensor_view() const
{
return m_tensor_view;
}
......@@ -45,6 +45,7 @@ public:
const std::string& get_name() const;
const std::string& get_type() const;
bool is_output() const;
const std::shared_ptr<descriptor::TensorView> get_tensor_view() const;
private:
std::shared_ptr<descriptor::TensorView> m_tensor_view;
......
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <memory>
#include "mkldnn_emitter.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
using namespace ngraph::runtime::cpu;
const std::vector<mkldnn::primitive*>& MKLDNNEmitter::get_mkldnn_primitives() const
{
return mkldnn_primitives;
}
size_t MKLDNNEmitter::insert_primitive(mkldnn::primitive* primitive)
{
mkldnn_primitives.emplace_back(primitive);
return (mkldnn_primitives.size() - 1);
}
const std::vector<size_t>& MKLDNNEmitter::get_primitive_deps(size_t index) const
{
return primitive_deps.at(index);
}
mkldnn::memory::desc MKLDNNEmitter::build_memory_descriptor(const TensorViewWrapper& tvw,
mkldnn::memory::format fmt) const
{
return mkldnn::memory::desc(
mkldnn::memory::dims(tvw.get_shape().begin(), tvw.get_shape().end()),
mkldnn_utils::GetDataType(tvw.get_element_type()),
fmt);
}
mkldnn::memory::desc MKLDNNEmitter::build_memory_descriptor(const TensorViewWrapper& tvw) const
{
auto layout =
std::static_pointer_cast<LayoutDescriptor>(tvw.get_tensor_view()->get_tensor_view_layout());
return build_memory_descriptor(tvw, layout->get_mkldnn_format());
}
mkldnn::memory MKLDNNEmitter::build_memory_primitive(const TensorViewWrapper& tvw) const
{
return mkldnn::memory({build_memory_descriptor(tvw), mkldnn_utils::global_cpu_engine}, nullptr);
}
size_t MKLDNNEmitter::build_memory_primitive(const mkldnn::memory::desc& desc)
{
// The MKL-DNN C++ API forces proper initialization of a memory primitive
// with a non-null pointer (unlike the C API)
// Primitives are initialized at runtime so we use a known-invalid address here
// to bypass this check
return insert_primitive(
new mkldnn::memory({desc, mkldnn_utils::global_cpu_engine}, reinterpret_cast<void*>(0x42)));
}
size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above)
{
size_t input_data_index = build_memory_primitive(input_data_desc);
size_t weights_index = build_memory_primitive(weights_desc);
size_t result_index = build_memory_primitive(result_desc);
size_t conv_index = insert_primitive(new mkldnn::convolution_forward(
{{mkldnn::prop_kind::forward,
mkldnn::algorithm::convolution_direct,
input_data_desc,
weights_desc,
result_desc,
mkldnn::memory::dims(strides.begin(), strides.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero},
mkldnn_utils::global_cpu_engine},
*mkldnn_primitives[input_data_index],
*mkldnn_primitives[weights_index],
*mkldnn_primitives[result_index]));
primitive_deps[conv_index] = {input_data_index, weights_index, result_index};
return conv_index;
}
size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above)
{
size_t input_data_index = build_memory_primitive(input_data_desc);
size_t weights_index = build_memory_primitive(weights_desc);
size_t result_index = build_memory_primitive(result_desc);
size_t conv_index = insert_primitive(new mkldnn::convolution_forward(
{{mkldnn::prop_kind::forward,
mkldnn::algorithm::convolution_direct,
input_data_desc,
weights_desc,
result_desc,
mkldnn::memory::dims(strides.begin(), strides.end()),
mkldnn::memory::dims(dilation_strides.begin(), dilation_strides.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero},
mkldnn_utils::global_cpu_engine},
*mkldnn_primitives[input_data_index],
*mkldnn_primitives[weights_index],
*mkldnn_primitives[result_index]));
primitive_deps[conv_index] = {input_data_index, weights_index, result_index};
return conv_index;
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <memory>
#include <unordered_map>
#include <vector>
#include <mkldnn.hpp>
#include "ngraph/common.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
class CPU_ExternalFunction;
class TensorViewWrapper;
class MKLDNNEmitter
{
public:
MKLDNNEmitter(std::shared_ptr<CPU_ExternalFunction> ef)
: external_function(ef)
{
}
const std::vector<mkldnn::primitive*>& get_mkldnn_primitives() const;
size_t insert_primitive(mkldnn::primitive* primitive);
const std::vector<size_t>& get_primitive_deps(size_t index) const;
// TODO(jmenon): Get rid of TensorViewWrappers at some point
mkldnn::memory::desc build_memory_descriptor(const TensorViewWrapper& tvw,
mkldnn::memory::format fmt) const;
mkldnn::memory::desc build_memory_descriptor(const TensorViewWrapper& tvw) const;
mkldnn::memory build_memory_primitive(const TensorViewWrapper& tvw) const;
size_t build_memory_primitive(const mkldnn::memory::desc& desc);
size_t build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above);
size_t build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above);
private:
std::shared_ptr<CPU_ExternalFunction> external_function;
std::vector<mkldnn::primitive*> mkldnn_primitives;
std::vector<mkldnn::stream> mkldnn_streams;
std::unordered_map<size_t, std::vector<size_t>> primitive_deps;
};
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <mkldnn.hpp>
#include "mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/cpu_runtime_context.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
mkldnn::engine ngraph::runtime::cpu::mkldnn_utils::global_cpu_engine(mkldnn::engine::cpu, 0);
extern "C" void ngraph::runtime::cpu::mkldnn_utils::set_memory_ptr(CPURuntimeContext* ctx,
size_t primitive_index,
void* ptr)
{
auto primitive = static_cast<mkldnn::memory*>(ctx->mkldnn_primitives[primitive_index]);
primitive->set_data_handle(ptr);
}
extern "C" void ngraph::runtime::cpu::mkldnn_utils::mkldnn_invoke_primitive(CPURuntimeContext* ctx,
size_t primitive_index)
{
mkldnn::stream s(mkldnn::stream::kind::eager);
s.submit({*ctx->mkldnn_primitives[primitive_index]}).wait();
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <cstddef>
namespace ngraph
{
namespace runtime
{
namespace cpu
{
struct CPURuntimeContext;
namespace mkldnn_utils
{
extern "C" void
set_memory_ptr(CPURuntimeContext* ctx, size_t primitive_index, void* ptr);
extern "C" void mkldnn_invoke_primitive(CPURuntimeContext* ctx,
size_t primitive_index);
}
}
}
}
......@@ -17,6 +17,7 @@
#include <string>
#include <typeindex>
#include <typeinfo>
#include <unordered_map>
#include <unordered_set>
#include "ngraph/node.hpp"
......@@ -37,7 +38,7 @@ namespace ngraph
{
#define TI(x) std::type_index(typeid(x))
const std::unordered_set<std::type_index> s_op_registry{
static const std::unordered_set<std::type_index> s_op_registry{
TI(ngraph::op::AvgPool),
TI(ngraph::op::AvgPoolBackprop),
TI(ngraph::op::BatchNorm),
......@@ -47,6 +48,28 @@ namespace ngraph
TI(ngraph::op::MaxPool),
TI(ngraph::op::MaxPoolBackprop)};
static const std::unordered_map<std::string, const mkldnn::memory::data_type>
s_data_type_map{{"char", mkldnn::memory::data_type::s8},
{"float", mkldnn::memory::data_type::f32},
{"double", mkldnn::memory::data_type::data_undef},
{"int8_t", mkldnn::memory::data_type::s8},
{"int16_t", mkldnn::memory::data_type::s16},
{"int32_t", mkldnn::memory::data_type::s32},
{"int64_t", mkldnn::memory::data_type::data_undef},
{"uint8_t", mkldnn::memory::data_type::u8},
{"uint16_t", mkldnn::memory::data_type::data_undef},
{"uint32_t", mkldnn::memory::data_type::data_undef},
{"uint64_t", mkldnn::memory::data_type::data_undef}};
mkldnn::memory::data_type GetDataType(const ngraph::element::Type& et)
{
auto it = s_data_type_map.find(et.c_type_string());
if (it == s_data_type_map.end() ||
it->second == mkldnn::memory::data_type::data_undef)
throw ngraph_error("No MKLDNN data type exists for the given element type");
return it->second;
}
bool IsMKLDNNOp(ngraph::Node& op)
{
return (s_op_registry.find(TI(op)) != s_op_registry.end());
......
......@@ -16,15 +16,11 @@
#pragma once
#include <string>
#include <typeindex>
#include <typeinfo>
#include <unordered_set>
#include <mkldnn.hpp>
#include "ngraph/node.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/types/element_type.hpp"
namespace ngraph
{
......@@ -34,7 +30,12 @@ namespace ngraph
{
namespace mkldnn_utils
{
extern mkldnn::engine global_cpu_engine;
mkldnn::memory::data_type GetDataType(const ngraph::element::Type& et);
bool IsMKLDNNOp(ngraph::Node& op);
mkldnn::memory::format
CreateNativeDataFormat(const ngraph::runtime::cpu::LayoutDescriptor& layout);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment