Commit eabfebe5 authored by pthoreho's avatar pthoreho

Merge remote-tracking branch 'origin/master' into pruthvi/mkldnn_elementwise_add

parents 16a18d29 d0f8dff2
...@@ -183,6 +183,7 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND ...@@ -183,6 +183,7 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
runtime/cpu/mkldnn_utils.cpp runtime/cpu/mkldnn_utils.cpp
runtime/cpu/ops/convert_layout.cpp runtime/cpu/ops/convert_layout.cpp
runtime/cpu/ops/matmul_bias.cpp runtime/cpu/ops/matmul_bias.cpp
runtime/cpu/pass/cpu_assignment.cpp
runtime/cpu/pass/cpu_fusion.cpp runtime/cpu/pass/cpu_fusion.cpp
runtime/cpu/pass/cpu_layout.cpp runtime/cpu/pass/cpu_layout.cpp
) )
......
...@@ -14,8 +14,10 @@ ...@@ -14,8 +14,10 @@
* limitations under the License. * limitations under the License.
*******************************************************************************/ *******************************************************************************/
#include "ngraph/builder/reduce_ops.hpp" #include <numeric>
#include "ngraph/builder/autobroadcast.hpp" #include "ngraph/builder/autobroadcast.hpp"
#include "ngraph/builder/reduce_ops.hpp"
#include "ngraph/ops/add.hpp" #include "ngraph/ops/add.hpp"
#include "ngraph/ops/divide.hpp" #include "ngraph/ops/divide.hpp"
#include "ngraph/ops/multiply.hpp" #include "ngraph/ops/multiply.hpp"
......
...@@ -40,7 +40,7 @@ std::string codegen::CodeWriter::generate_temporary_name(std::string prefix) ...@@ -40,7 +40,7 @@ std::string codegen::CodeWriter::generate_temporary_name(std::string prefix)
{ {
std::stringstream ss; std::stringstream ss;
ss << prefix << "__" << m_temporary_name_count; ss << prefix << m_temporary_name_count;
m_temporary_name_count++; m_temporary_name_count++;
return ss.str(); return ss.str();
......
...@@ -24,6 +24,6 @@ void ngraph::op::Divide::generate_adjoints(autodiff::Adjoints& adjoints, ...@@ -24,6 +24,6 @@ void ngraph::op::Divide::generate_adjoints(autodiff::Adjoints& adjoints,
auto x = get_input_op(0); auto x = get_input_op(0);
auto y = get_input_op(1); auto y = get_input_op(1);
adjoints.add_delta(x, delta * shared_from_this() / x); adjoints.add_delta(x, delta / y);
adjoints.add_delta(y, -delta * shared_from_this() / y); adjoints.add_delta(y, -delta * shared_from_this() / y);
} }
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "ngraph/common.hpp" #include "ngraph/common.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/ops/util/op_annotations.hpp"
namespace ngraph namespace ngraph
{ {
...@@ -28,8 +29,21 @@ namespace ngraph ...@@ -28,8 +29,21 @@ namespace ngraph
/// Root of all actual ops /// Root of all actual ops
class Op : public Node class Op : public Node
{ {
public:
void set_op_annotations(std::shared_ptr<ngraph::op::util::OpAnnotations> op_annotations)
{
m_op_annotations = op_annotations;
}
std::shared_ptr<ngraph::op::util::OpAnnotations> get_op_annotations() const
{
return m_op_annotations;
}
protected: protected:
Op(const std::string& node_type, const Nodes& arguments); Op(const std::string& node_type, const Nodes& arguments);
private:
std::shared_ptr<ngraph::op::util::OpAnnotations> m_op_annotations;
}; };
} }
} }
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
namespace ngraph
{
namespace op
{
namespace util
{
/// \brief Abstract base class for annotations added to graph ops
class OpAnnotations
{
public:
OpAnnotations() {}
};
}
}
}
...@@ -15,13 +15,13 @@ ...@@ -15,13 +15,13 @@
*******************************************************************************/ *******************************************************************************/
#include <algorithm> #include <algorithm>
#include <iostream>
#include <unordered_set> #include <unordered_set>
#include "ngraph/pattern/core_fusion.hpp" #include "ngraph/pattern/core_fusion.hpp"
#include "ngraph/graph_util.hpp" #include "ngraph/graph_util.hpp"
#include "ngraph/log.hpp" #include "ngraph/log.hpp"
#include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/constant.hpp" #include "ngraph/ops/constant.hpp"
#include "ngraph/ops/maximum.hpp" #include "ngraph/ops/maximum.hpp"
#include "ngraph/ops/parameter.hpp" #include "ngraph/ops/parameter.hpp"
...@@ -51,7 +51,12 @@ void pass::CoreFusion::construct_relu_pattern() ...@@ -51,7 +51,12 @@ void pass::CoreFusion::construct_relu_pattern()
auto iconst0 = construct_constant_node(0); auto iconst0 = construct_constant_node(0);
auto val = make_shared<pattern::op::Label>(iconst0); auto val = make_shared<pattern::op::Label>(iconst0);
auto zero = make_shared<pattern::op::Label>(iconst0, nullptr, Nodes{iconst0}); auto zero = make_shared<pattern::op::Label>(iconst0, nullptr, Nodes{iconst0});
auto max = make_shared<op::Maximum>(zero, val);
auto broadcast_pred = [](std::shared_ptr<Node> n) {
return static_cast<bool>(std::dynamic_pointer_cast<op::Broadcast>(n));
};
auto skip_broadcast = std::make_shared<pattern::op::Any>(zero, broadcast_pred);
auto max = make_shared<op::Maximum>(skip_broadcast, val);
pattern::gr_callback_fn callback = [val, zero](pattern::Matcher& m) { pattern::gr_callback_fn callback = [val, zero](pattern::Matcher& m) {
NGRAPH_DEBUG << "In a callback for construct_relu_pattern against " NGRAPH_DEBUG << "In a callback for construct_relu_pattern against "
......
...@@ -87,6 +87,8 @@ ...@@ -87,6 +87,8 @@
#include "ngraph/ops/tanh.hpp" #include "ngraph/ops/tanh.hpp"
#include "ngraph/runtime/cpu/cpu_emitter.hpp" #include "ngraph/runtime/cpu/cpu_emitter.hpp"
#include "ngraph/runtime/cpu/cpu_kernel_emitters.hpp" #include "ngraph/runtime/cpu/cpu_kernel_emitters.hpp"
#include "ngraph/runtime/cpu/cpu_op_annotations.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/ops/convert_layout.hpp" #include "ngraph/runtime/cpu/ops/convert_layout.hpp"
#include "ngraph/runtime/cpu/ops/matmul_bias.hpp" #include "ngraph/runtime/cpu/ops/matmul_bias.hpp"
#include "ngraph/types/element_type.hpp" #include "ngraph/types/element_type.hpp"
...@@ -116,32 +118,6 @@ static string eigen_matrix_format(const ngraph::Shape& shape, const ngraph::Stri ...@@ -116,32 +118,6 @@ static string eigen_matrix_format(const ngraph::Shape& shape, const ngraph::Stri
return ss.str(); return ss.str();
} }
// Mapping from POD types to MKLDNN data types
// An empty string implies the corresponding MKLDNN data type
// is not supported
static const unordered_map<string, const string> mkldnn_data_type_map{
{"char", "memory::data_type::s8"},
{"float", "memory::data_type::f32"},
{"double", ""},
{"int8_t", "memory::data_type::s8"},
{"int16_t", "memory::data_type::s16"},
{"int32_t", "memory::data_type::s32"},
{"int64_t", ""},
{"uint8_t", "memory::data_type::u8"},
{"uint16_t", ""},
{"uint32_t", ""},
{"uint64_t", ""}};
static const string& get_mkldnn_data_type(const string& type)
{
auto it = mkldnn_data_type_map.find(type);
if (it == mkldnn_data_type_map.end() || it->second.empty())
{
throw ngraph_error("No MKLDNN data type exists for the given element type");
}
return it->second;
}
void runtime::cpu::CPU_Emitter::emit_mkldnn_preamble(codegen::CodeWriter& writer) void runtime::cpu::CPU_Emitter::emit_mkldnn_preamble(codegen::CodeWriter& writer)
{ {
writer << "// MKLDNN Preamble\n"; writer << "// MKLDNN Preamble\n";
...@@ -340,7 +316,9 @@ namespace ngraph ...@@ -340,7 +316,9 @@ namespace ngraph
auto result_shape = out[0].get_shape(); auto result_shape = out[0].get_shape();
// get input element type // get input element type
const string& et = get_mkldnn_data_type(args[2].get_element_type().c_type_string()); const string& et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string(
args[2].get_element_type());
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
...@@ -2081,53 +2059,45 @@ namespace ngraph ...@@ -2081,53 +2059,45 @@ namespace ngraph
auto arg0_shape = args[0].get_shape(); auto arg0_shape = args[0].get_shape();
auto arg1_shape = args[1].get_shape(); auto arg1_shape = args[1].get_shape();
auto result_shape = out[0].get_shape(); auto result_shape = out[0].get_shape();
auto arg0_rank = arg0_shape.size();
auto arg1_rank = arg1_shape.size();
bool filter_dilated = false;
for (size_t s : convolution->get_window_dilation_strides())
{
filter_dilated = filter_dilated || (s != 1);
}
bool data_dilated = false;
for (size_t s : convolution->get_data_dilation_strides())
{
data_dilated = data_dilated || (s != 1);
}
if (!data_dilated && arg0_rank == 4 && arg1_rank == 4 &&
args[0].get_element_type() == element::f32)
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_data_desc = mkldnn_emitter->build_memory_descriptor(
args[0], mkldnn::memory::format::nchw);
auto weights_desc = mkldnn_emitter->build_memory_descriptor(
args[1], mkldnn::memory::format::oihw);
auto result_desc = mkldnn_emitter->build_memory_descriptor(
out[0], mkldnn::memory::format::nchw);
size_t conv_index = 0;
if (!filter_dilated) auto op_annotations =
{ static_cast<const ngraph::op::Op*>(node)->get_op_annotations();
conv_index = mkldnn_emitter->build_convolution_forward( if (op_annotations &&
input_data_desc, static_pointer_cast<ngraph::runtime::cpu::CPUOpAnnotations>(op_annotations)
weights_desc, ->is_mkldnn_op())
result_desc,
convolution->get_window_movement_strides(),
convolution->get_padding_below(),
convolution->get_padding_above());
}
else
{ {
// For dilation, MKLDNN wants to know how many elements to insert between, not how far // For dilation, MKLDNN wants to know how many elements to insert between, not how far
// apart to space the elements like nGraph. So we have to subtract 1 from each pos. // apart to space the elements like nGraph. So we have to subtract 1 from each pos.
Strides window_dilation_strides_adjusted; Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides()) for (size_t s : convolution->get_window_dilation_strides())
{ {
window_dilation_strides_adjusted.push_back(s - 1); window_dilation_strides_adjusted.push_back(s - 1);
} }
auto input_tvl = node->get_inputs()[0]
.get_output()
.get_tensor_view()
->get_tensor_view_layout();
auto weights_tvl = node->get_inputs()[1]
.get_output()
.get_tensor_view()
->get_tensor_view_layout();
auto output_tvl = node->get_output_tensor_view(0)->get_tensor_view_layout();
auto input_format = dynamic_cast<runtime::cpu::LayoutDescriptor&>(*input_tvl)
.get_mkldnn_format();
auto weights_format =
dynamic_cast<runtime::cpu::LayoutDescriptor&>(*weights_tvl)
.get_mkldnn_format();
auto output_format = dynamic_cast<runtime::cpu::LayoutDescriptor&>(*output_tvl)
.get_mkldnn_format();
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_data_desc =
mkldnn_emitter->build_memory_descriptor(args[0], input_format);
auto weights_desc =
mkldnn_emitter->build_memory_descriptor(args[1], weights_format);
auto result_desc =
mkldnn_emitter->build_memory_descriptor(out[0], output_format);
size_t conv_index = 0;
conv_index = mkldnn_emitter->build_convolution_forward( conv_index = mkldnn_emitter->build_convolution_forward(
input_data_desc, input_data_desc,
...@@ -2137,7 +2107,6 @@ namespace ngraph ...@@ -2137,7 +2107,6 @@ namespace ngraph
window_dilation_strides_adjusted, window_dilation_strides_adjusted,
convolution->get_padding_below(), convolution->get_padding_below(),
convolution->get_padding_above()); convolution->get_padding_above());
}
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index); auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
...@@ -2194,7 +2163,8 @@ namespace ngraph ...@@ -2194,7 +2163,8 @@ namespace ngraph
args[0].get_element_type() == element::f32) args[0].get_element_type() == element::f32)
{ {
const string& elem_type = const string& elem_type =
get_mkldnn_data_type(args[0].get_element_type().c_type_string()); runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string(
args[0].get_element_type());
Strides window_dilation_strides_adjusted; Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides_forward()) for (size_t s : convolution->get_window_dilation_strides_forward())
...@@ -2304,7 +2274,8 @@ namespace ngraph ...@@ -2304,7 +2274,8 @@ namespace ngraph
args[0].get_element_type() == element::f32) args[0].get_element_type() == element::f32)
{ {
const string& elem_type = const string& elem_type =
get_mkldnn_data_type(args[0].get_element_type().c_type_string()); runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string(
args[0].get_element_type());
Strides window_dilation_strides_adjusted; Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides_forward()) for (size_t s : convolution->get_window_dilation_strides_forward())
...@@ -2418,8 +2389,8 @@ namespace ngraph ...@@ -2418,8 +2389,8 @@ namespace ngraph
if (arg_rank == 4 && max_pool->get_window_shape().size() == 2 && if (arg_rank == 4 && max_pool->get_window_shape().size() == 2 &&
args[0].get_element_type() == element::f32) args[0].get_element_type() == element::f32)
{ {
const string& et = const string& et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string(
get_mkldnn_data_type(args[0].get_element_type().c_type_string()); args[0].get_element_type());
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
...@@ -2601,8 +2572,8 @@ namespace ngraph ...@@ -2601,8 +2572,8 @@ namespace ngraph
if (arg_rank == 4 && avg_pool->get_window_shape().size() == 2 && if (arg_rank == 4 && avg_pool->get_window_shape().size() == 2 &&
args[0].get_element_type() == element::f32) args[0].get_element_type() == element::f32)
{ {
const string& et = const string& et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string(
get_mkldnn_data_type(args[0].get_element_type().c_type_string()); args[0].get_element_type());
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
...@@ -2681,8 +2652,8 @@ namespace ngraph ...@@ -2681,8 +2652,8 @@ namespace ngraph
if (delta_rank == 4 && apb->get_window_shape().size() == 2 && if (delta_rank == 4 && apb->get_window_shape().size() == 2 &&
args[0].get_element_type() == element::f32) args[0].get_element_type() == element::f32)
{ {
const string& et = const string& et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string(
get_mkldnn_data_type(args[0].get_element_type().c_type_string()); args[0].get_element_type());
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
...@@ -2749,8 +2720,8 @@ namespace ngraph ...@@ -2749,8 +2720,8 @@ namespace ngraph
if (delta_rank == 4 && mpb->get_window_shape().size() == 2 && if (delta_rank == 4 && mpb->get_window_shape().size() == 2 &&
args[0].get_element_type() == element::f32) args[0].get_element_type() == element::f32)
{ {
const string& et = const string& et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string(
get_mkldnn_data_type(args[1].get_element_type().c_type_string()); args[1].get_element_type());
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
...@@ -3068,13 +3039,51 @@ namespace ngraph ...@@ -3068,13 +3039,51 @@ namespace ngraph
writer << "}\n"; writer << "}\n";
} }
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::runtime::cpu::op::ConvertLayout)
{
auto input_tvl =
node->get_inputs()[0].get_output().get_tensor_view()->get_tensor_view_layout();
auto output_tvl = node->get_output_tensor_view(0)->get_tensor_view_layout();
auto input_format =
dynamic_cast<runtime::cpu::LayoutDescriptor&>(*input_tvl).get_mkldnn_format();
auto output_format =
dynamic_cast<runtime::cpu::LayoutDescriptor&>(*output_tvl).get_mkldnn_format();
const string& et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string(
args[0].get_element_type());
writer << "{\n";
writer.indent++;
writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
writer << "memory::desc input_desc = memory::desc({" << join(args[0].get_shape())
<< "}, " << et << ", "
<< runtime::cpu::mkldnn_utils::get_mkldnn_format_string(input_format)
<< ");\n";
writer << "memory::desc output_desc = memory::desc({" << join(out[0].get_shape())
<< "}, " << et << ", "
<< runtime::cpu::mkldnn_utils::get_mkldnn_format_string(output_format)
<< ");\n";
writer << "memory input = memory({input_desc, cpu_engine}, " << args[0].get_name()
<< ");\n";
writer << "memory output = memory({output_desc, cpu_engine}, " << out[0].get_name()
<< ");\n";
writer << "reorder prim = reorder(input, output);\n";
writer << "stream s = stream(stream::kind::eager);\n"
<< "s.submit({prim}).wait();\n";
writer.indent--;
writer << "}\n";
}
template <> template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::ReluBackprop) void CPU_Emitter::EMITTER_DECL(ngraph::op::ReluBackprop)
{ {
const auto& arg_shape = args[0].get_shape(); const auto& arg_shape = args[0].get_shape();
const size_t arg_rank = arg_shape.size(); const size_t arg_rank = arg_shape.size();
const auto& result_shape = out[0].get_shape(); const auto& result_shape = out[0].get_shape();
const string& et = get_mkldnn_data_type(args[0].get_element_type().c_type_string()); const string& et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string(
args[0].get_element_type());
if (arg_rank == 4 && args[0].get_element_type() == element::f32) if (arg_rank == 4 && args[0].get_element_type() == element::f32)
{ {
writer << "{\n"; writer << "{\n";
...@@ -3139,7 +3148,8 @@ namespace ngraph ...@@ -3139,7 +3148,8 @@ namespace ngraph
const auto& arg_shape = args[0].get_shape(); const auto& arg_shape = args[0].get_shape();
const size_t arg_rank = arg_shape.size(); const size_t arg_rank = arg_shape.size();
const auto& result_shape = out[0].get_shape(); const auto& result_shape = out[0].get_shape();
const string& et = get_mkldnn_data_type(args[0].get_element_type().c_type_string()); const string& et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string(
args[0].get_element_type());
if (arg_rank == 4 && args[0].get_element_type() == element::f32) if (arg_rank == 4 && args[0].get_element_type() == element::f32)
{ {
writer << "{\n"; writer << "{\n";
......
...@@ -106,7 +106,9 @@ ...@@ -106,7 +106,9 @@
#include "ngraph/runtime/cpu/cpu_tensor_view.hpp" #include "ngraph/runtime/cpu/cpu_tensor_view.hpp"
#include "ngraph/runtime/cpu/cpu_tracing.hpp" #include "ngraph/runtime/cpu/cpu_tracing.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp" #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/ops/convert_layout.hpp"
#include "ngraph/runtime/cpu/ops/matmul_bias.hpp" #include "ngraph/runtime/cpu/ops/matmul_bias.hpp"
#include "ngraph/runtime/cpu/pass/cpu_assignment.hpp"
#include "ngraph/runtime/cpu/pass/cpu_fusion.hpp" #include "ngraph/runtime/cpu/pass/cpu_fusion.hpp"
#include "ngraph/runtime/cpu/pass/cpu_layout.hpp" #include "ngraph/runtime/cpu/pass/cpu_layout.hpp"
...@@ -218,6 +220,8 @@ static const runtime::cpu::OpMap dispatcher{ ...@@ -218,6 +220,8 @@ static const runtime::cpu::OpMap dispatcher{
&runtime::cpu::CPU_Emitter::emit<op::ConvolutionBackpropFilters>}, &runtime::cpu::CPU_Emitter::emit<op::ConvolutionBackpropFilters>},
{TI(ngraph::op::ConvolutionBackpropData), {TI(ngraph::op::ConvolutionBackpropData),
&runtime::cpu::CPU_Emitter::emit<op::ConvolutionBackpropData>}, &runtime::cpu::CPU_Emitter::emit<op::ConvolutionBackpropData>},
{TI(ngraph::runtime::cpu::op::ConvertLayout),
&runtime::cpu::CPU_Emitter::emit<runtime::cpu::op::ConvertLayout>},
{TI(ngraph::op::Not), &runtime::cpu::CPU_Emitter::emit<op::Not>}, {TI(ngraph::op::Not), &runtime::cpu::CPU_Emitter::emit<op::Not>},
{TI(ngraph::op::MaxPool), &runtime::cpu::CPU_Emitter::emit<op::MaxPool>}, {TI(ngraph::op::MaxPool), &runtime::cpu::CPU_Emitter::emit<op::MaxPool>},
{TI(ngraph::op::Reverse), &runtime::cpu::CPU_Emitter::emit<op::Reverse>}, {TI(ngraph::op::Reverse), &runtime::cpu::CPU_Emitter::emit<op::Reverse>},
...@@ -259,7 +263,8 @@ void runtime::cpu::CPU_ExternalFunction::compile() ...@@ -259,7 +263,8 @@ void runtime::cpu::CPU_ExternalFunction::compile()
pass_manager.register_pass<ngraph::pass::CoreFusion>(); pass_manager.register_pass<ngraph::pass::CoreFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>(); pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPULayout>(); pass_manager.register_pass<runtime::cpu::pass::CPUAssignment>(shared_from_this());
pass_manager.register_pass<runtime::cpu::pass::CPULayout>(shared_from_this());
pass_manager.register_pass<ngraph::pass::Liveness>(); pass_manager.register_pass<ngraph::pass::Liveness>();
pass_manager.register_pass<ngraph::pass::MemoryLayout>(s_memory_pool_alignment); pass_manager.register_pass<ngraph::pass::MemoryLayout>(s_memory_pool_alignment);
...@@ -279,13 +284,8 @@ void runtime::cpu::CPU_ExternalFunction::compile() ...@@ -279,13 +284,8 @@ void runtime::cpu::CPU_ExternalFunction::compile()
} }
writer += writer +=
R"(// Generated by the NGraph CPU backend R"(// Generated by the nGraph CPU backend
#include <cmath> #include <cmath>
)";
writer +=
R"(#include <Eigen/Dense>
#include "ngraph/except.hpp" #include "ngraph/except.hpp"
#include "ngraph/runtime/aligned_buffer.hpp" #include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/runtime/cpu/cpu_eigen_utils.hpp" #include "ngraph/runtime/cpu/cpu_eigen_utils.hpp"
...@@ -417,7 +417,7 @@ using namespace ngraph::runtime; ...@@ -417,7 +417,7 @@ using namespace ngraph::runtime;
{ {
for (shared_ptr<Node> node : current_function->get_ordered_ops()) for (shared_ptr<Node> node : current_function->get_ordered_ops())
{ {
const op::Constant* c = dynamic_cast<op::Constant*>(node.get()); const ngraph::op::Constant* c = dynamic_cast<ngraph::op::Constant*>(node.get());
if (c) if (c)
{ {
shared_ptr<descriptor::TensorView> tv = node->get_outputs()[0].get_tensor_view(); shared_ptr<descriptor::TensorView> tv = node->get_outputs()[0].get_tensor_view();
...@@ -462,7 +462,14 @@ using namespace ngraph::runtime; ...@@ -462,7 +462,14 @@ using namespace ngraph::runtime;
{ {
continue; continue;
} }
Node& node = *op_list[i]; Node& node = *op_list[i];
auto handler = dispatcher.find(type_index(typeid(node)));
if (handler == dispatcher.end())
{
throw ngraph_error("Unhandled op during code generation : " + node.description());
}
string s = emit_op_as_function(node, "f"); string s = emit_op_as_function(node, "f");
node_cache.insert({&node, s}); node_cache.insert({&node, s});
} }
...@@ -509,7 +516,7 @@ using namespace ngraph::runtime; ...@@ -509,7 +516,7 @@ using namespace ngraph::runtime;
set<descriptor::TensorView*> constants; set<descriptor::TensorView*> constants;
for (shared_ptr<Node> node : current_function->get_ordered_ops()) for (shared_ptr<Node> node : current_function->get_ordered_ops())
{ {
if (dynamic_cast<op::Constant*>(node.get())) if (dynamic_cast<ngraph::op::Constant*>(node.get()))
{ {
shared_ptr<descriptor::TensorView> tv = node->get_outputs()[0].get_tensor_view(); shared_ptr<descriptor::TensorView> tv = node->get_outputs()[0].get_tensor_view();
constants.insert(tv.get()); constants.insert(tv.get());
...@@ -573,7 +580,7 @@ using namespace ngraph::runtime; ...@@ -573,7 +580,7 @@ using namespace ngraph::runtime;
// Add inputs to the variable name map // Add inputs to the variable name map
size_t arg_index = 0; size_t arg_index = 0;
for (shared_ptr<op::Parameter> param : current_function->get_parameters()) for (shared_ptr<ngraph::op::Parameter> param : current_function->get_parameters())
{ {
for (size_t i = 0; i < param->get_output_size(); ++i) for (size_t i = 0; i < param->get_output_size(); ++i)
{ {
...@@ -612,7 +619,7 @@ using namespace ngraph::runtime; ...@@ -612,7 +619,7 @@ using namespace ngraph::runtime;
shared_ptr<descriptor::TensorView> tv = op->get_output_tensor_view(); shared_ptr<descriptor::TensorView> tv = op->get_output_tensor_view();
const element::Type& et = tv->get_tensor_view_type()->get_element_type(); const element::Type& et = tv->get_tensor_view_type()->get_element_type();
bool parameter_as_output = false; bool parameter_as_output = false;
for (shared_ptr<op::Parameter> param : current_function->get_parameters()) for (shared_ptr<ngraph::op::Parameter> param : current_function->get_parameters())
{ {
for (const descriptor::Output& pout : param->get_outputs()) for (const descriptor::Output& pout : param->get_outputs())
{ {
...@@ -657,7 +664,8 @@ using namespace ngraph::runtime; ...@@ -657,7 +664,8 @@ using namespace ngraph::runtime;
throw ngraph_error("Unhandled op during code generation : " + node->description()); throw ngraph_error("Unhandled op during code generation : " + node->description());
} }
vector<TensorViewWrapper> in; vector<TensorViewWrapper> in;
vector<string> node_input_names, node_output_names; vector<string> node_input_names;
vector<string> node_output_names;
for (const descriptor::Input& input : node->get_inputs()) for (const descriptor::Input& input : node->get_inputs())
{ {
const descriptor::Output& output = input.get_output(); const descriptor::Output& output = input.get_output();
...@@ -702,19 +710,23 @@ using namespace ngraph::runtime; ...@@ -702,19 +710,23 @@ using namespace ngraph::runtime;
} }
} }
writer << "\n// " << node->get_name() << "(";
vector<string> parameter_nodes = node_input_names;
parameter_nodes.insert(
parameter_nodes.end(), node_output_names.begin(), node_output_names.end());
writer << join(parameter_nodes);
writer << ")\n";
// Emit operation body // Emit operation body
string func_name; string func_name;
auto it = match_functions.find(node.get()); auto it = match_functions.find(node.get());
if (it != match_functions.end()) if (it == match_functions.end())
{
func_name = it->second;
}
if (func_name.empty())
{ {
handler->second(this, writer, node.get(), in, out); handler->second(this, writer, node.get(), in, out);
} }
else else
{ {
func_name = it->second;
vector<string> names; vector<string> names;
for (const TensorViewWrapper& tv : in) for (const TensorViewWrapper& tv : in)
{ {
......
...@@ -30,9 +30,6 @@ string emit_bracketed_string(vector<T> data) ...@@ -30,9 +30,6 @@ string emit_bracketed_string(vector<T> data)
{ {
stringstream ss; stringstream ss;
if (data.size() == 0)
return "";
for (auto s : data) for (auto s : data)
{ {
ss << "[" << s << "]"; ss << "[" << s << "]";
...@@ -75,7 +72,7 @@ vector<string> ...@@ -75,7 +72,7 @@ vector<string>
vector<string> index_vars; vector<string> index_vars;
for (size_t i = 0; i < top.size(); i++) for (size_t i = 0; i < top.size(); i++)
{ {
string index_var = writer.generate_temporary_name("i"); string index_var = writer.generate_temporary_name("_i");
writer << runtime::cpu::kernel::start_index_loop(index_var, new_bottom[i], top[i], i == 0); writer << runtime::cpu::kernel::start_index_loop(index_var, new_bottom[i], top[i], i == 0);
writer.indent++; writer.indent++;
......
...@@ -149,7 +149,7 @@ string ngraph::runtime::cpu::kernel::end_index_loop(const string& index_var) ...@@ -149,7 +149,7 @@ string ngraph::runtime::cpu::kernel::end_index_loop(const string& index_var)
{ {
stringstream ss; stringstream ss;
ss << "} // end for(" << index_var << ")\n"; ss << "}\n";
return ss.str(); return ss.str();
} }
...@@ -209,7 +209,7 @@ void ngraph::runtime::cpu::kernel::emit_pointwise_copy(codegen::CodeWriter& writ ...@@ -209,7 +209,7 @@ void ngraph::runtime::cpu::kernel::emit_pointwise_copy(codegen::CodeWriter& writ
for (size_t i = 0; i < n_axes; i++) for (size_t i = 0; i < n_axes; i++)
{ {
string index_var = writer.generate_temporary_name("i"); string index_var = writer.generate_temporary_name("_j");
writer << start_index_loop(index_var, source_start_corner[i], source_end_corner[i], i == 0); writer << start_index_loop(index_var, source_start_corner[i], source_end_corner[i], i == 0);
writer.indent++; writer.indent++;
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/ops/util/op_annotations.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
/// \brief Annotations added to graph ops by CPU backend passes
class CPUOpAnnotations : public ngraph::op::util::OpAnnotations
{
public:
CPUOpAnnotations() { m_mkldnn_op = false; }
bool is_mkldnn_op() { return m_mkldnn_op; }
void set_mkldnn_op(bool val) { m_mkldnn_op = val; }
private:
bool m_mkldnn_op;
};
}
}
}
...@@ -22,8 +22,10 @@ ...@@ -22,8 +22,10 @@
#include "ngraph/descriptor/primary_tensor_view.hpp" #include "ngraph/descriptor/primary_tensor_view.hpp"
#include "ngraph/except.hpp" #include "ngraph/except.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp" #include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/shape.hpp" #include "ngraph/shape.hpp"
using namespace mkldnn;
using namespace ngraph; using namespace ngraph;
using namespace std; using namespace std;
...@@ -101,8 +103,34 @@ void runtime::cpu::CPUTensorView::read(void* target, size_t tensor_offset, size_ ...@@ -101,8 +103,34 @@ void runtime::cpu::CPUTensorView::read(void* target, size_t tensor_offset, size_
{ {
throw out_of_range("read access past end of tensor"); throw out_of_range("read access past end of tensor");
} }
auto tvl = this->get_tensor_view_layout();
auto cpu_tvl = dynamic_cast<runtime::cpu::LayoutDescriptor*>(tvl.get());
if (cpu_tvl && cpu_tvl->get_mkldnn_format() != memory::format::format_undef &&
cpu_tvl->get_mkldnn_format() !=
runtime::cpu::mkldnn_utils::CreateNativeDataFormat(*cpu_tvl))
{
auto tensor_shape = this->get_shape();
auto input_format = cpu_tvl->get_mkldnn_format();
auto output_format = runtime::cpu::mkldnn_utils::CreateNativeDataFormat(*cpu_tvl);
memory::data_type et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type(
this->get_descriptor()->get_tensor_view_type()->get_element_type());
engine cpu_engine{engine::cpu, 0};
memory::dims mkldnn_shape{tensor_shape.begin(), tensor_shape.end()};
memory::desc input_desc{mkldnn_shape, et, input_format};
memory::desc output_desc{mkldnn_shape, et, output_format};
memory input{{input_desc, cpu_engine}, aligned_buffer};
memory output{{output_desc, cpu_engine}, target};
reorder prim{input, output};
mkldnn::stream s(mkldnn::stream::kind::eager);
s.submit({prim}).wait();
}
else
{
const char* source = get_data_ptr(); const char* source = get_data_ptr();
memcpy(target, &source[tensor_offset], n); memcpy(target, &source[tensor_offset], n);
}
} }
size_t runtime::cpu::CPUTensorView::get_size() const size_t runtime::cpu::CPUTensorView::get_size() const
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
*******************************************************************************/ *******************************************************************************/
#include <memory> #include <memory>
#include <string>
#include "mkldnn_emitter.hpp" #include "mkldnn_emitter.hpp"
...@@ -45,7 +46,7 @@ mkldnn::memory::desc MKLDNNEmitter::build_memory_descriptor(const TensorViewWrap ...@@ -45,7 +46,7 @@ mkldnn::memory::desc MKLDNNEmitter::build_memory_descriptor(const TensorViewWrap
{ {
return mkldnn::memory::desc( return mkldnn::memory::desc(
mkldnn::memory::dims(tvw.get_shape().begin(), tvw.get_shape().end()), mkldnn::memory::dims(tvw.get_shape().begin(), tvw.get_shape().end()),
mkldnn_utils::GetDataType(tvw.get_element_type()), mkldnn_utils::get_mkldnn_data_type(tvw.get_element_type()),
fmt); fmt);
} }
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
* limitations under the License. * limitations under the License.
*******************************************************************************/ *******************************************************************************/
#include <string>
#include <mkldnn.hpp> #include <mkldnn.hpp>
#include "mkldnn_invoke.hpp" #include "mkldnn_invoke.hpp"
......
...@@ -17,9 +17,9 @@ ...@@ -17,9 +17,9 @@
#include <string> #include <string>
#include <typeindex> #include <typeindex>
#include <typeinfo> #include <typeinfo>
#include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include "ngraph/types/element_type.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/ops/add.hpp" #include "ngraph/ops/add.hpp"
#include "ngraph/ops/avg_pool.hpp" #include "ngraph/ops/avg_pool.hpp"
...@@ -30,17 +30,12 @@ ...@@ -30,17 +30,12 @@
#include "mkldnn_utils.hpp" #include "mkldnn_utils.hpp"
namespace ngraph using namespace mkldnn;
{ using namespace ngraph;
namespace runtime
{
namespace cpu
{
namespace mkldnn_utils
{
#define TI(x) std::type_index(typeid(x)) #define TI(x) std::type_index(typeid(x))
static const std::unordered_set<std::type_index> s_op_registry{ static const std::unordered_set<std::type_index> s_op_registry{
TI(ngraph::op::Add), TI(ngraph::op::Add),
TI(ngraph::op::AvgPool), TI(ngraph::op::AvgPool),
TI(ngraph::op::AvgPoolBackprop), TI(ngraph::op::AvgPoolBackprop),
...@@ -53,36 +48,71 @@ namespace ngraph ...@@ -53,36 +48,71 @@ namespace ngraph
TI(ngraph::op::Relu), TI(ngraph::op::Relu),
TI(ngraph::op::ReluBackprop)}; TI(ngraph::op::ReluBackprop)};
static const std::unordered_map<std::string, const mkldnn::memory::data_type> // Mapping from POD types to MKLDNN data types
s_data_type_map{{"char", mkldnn::memory::data_type::s8}, static const std::map<element::Type, const mkldnn::memory::data_type> s_mkldnn_data_type_map{
{"float", mkldnn::memory::data_type::f32}, {element::boolean, mkldnn::memory::data_type::s8},
{"double", mkldnn::memory::data_type::data_undef}, {element::f32, mkldnn::memory::data_type::f32},
{"int8_t", mkldnn::memory::data_type::s8}, {element::f64, mkldnn::memory::data_type::data_undef},
{"int16_t", mkldnn::memory::data_type::s16}, {element::i8, mkldnn::memory::data_type::s8},
{"int32_t", mkldnn::memory::data_type::s32}, {element::i16, mkldnn::memory::data_type::s16},
{"int64_t", mkldnn::memory::data_type::data_undef}, {element::i32, mkldnn::memory::data_type::s32},
{"uint8_t", mkldnn::memory::data_type::u8}, {element::i64, mkldnn::memory::data_type::data_undef},
{"uint16_t", mkldnn::memory::data_type::data_undef}, {element::u8, mkldnn::memory::data_type::u8},
{"uint32_t", mkldnn::memory::data_type::data_undef}, {element::u16, mkldnn::memory::data_type::data_undef},
{"uint64_t", mkldnn::memory::data_type::data_undef}}; {element::u32, mkldnn::memory::data_type::data_undef},
{element::u64, mkldnn::memory::data_type::data_undef}};
mkldnn::memory::data_type GetDataType(const ngraph::element::Type& et) static const std::map<element::Type, const std::string> s_mkldnn_data_type_string_map{
{ {element::boolean, "mkldnn::memory::data_type::s8"},
auto it = s_data_type_map.find(et.c_type_string()); {element::f32, "mkldnn::memory::data_type::f32"},
if (it == s_data_type_map.end() || {element::f64, "mkldnn::memory::data_type::data_undef"},
it->second == mkldnn::memory::data_type::data_undef) {element::i8, "mkldnn::memory::data_type::s8"},
throw ngraph_error("No MKLDNN data type exists for the given element type"); {element::i16, "mkldnn::memory::data_type::s16"},
return it->second; {element::i32, "mkldnn::memory::data_type::s32"},
} {element::i64, "mkldnn::memory::data_type::data_undef"},
{element::u8, "mkldnn::memory::data_type::u8"},
{element::u16, "mkldnn::memory::data_type::data_undef"},
{element::u32, "mkldnn::memory::data_type::data_undef"},
{element::u64, "mkldnn::memory::data_type::data_undef"}};
bool IsMKLDNNOp(ngraph::Node& op) // TODO (jbobba): Add the rest of memory formats to this map as well
{ static const std::map<memory::format, const std::string> s_mkldnn_format_string_map{
{memory::format::format_undef, "memory::format::format_undef"},
{memory::format::any, "memory::format::any"},
{memory::format::blocked, "memory::format::blocked"},
{memory::format::x, "memory::format::x"},
{memory::format::nc, "memory::format::nc"},
{memory::format::nchw, "memory::format::nchw"},
{memory::format::nhwc, "memory::format::nhwc"},
{memory::format::chwn, "memory::format::chwn"},
{memory::format::nChw8c, "memory::format::nChw8c"},
{memory::format::nChw16c, "memory::format::nChw16c"},
{memory::format::oi, "memory::format::oi"},
{memory::format::io, "memory::format::io"},
{memory::format::oihw, "memory::format::oihw"},
{memory::format::ihwo, "memory::format::ihwo"},
{memory::format::hwio, "memory::format::hwio"},
{memory::format::oIhw8i, "memory::format::oIhw8i"},
{memory::format::oIhw16i, "memory::format::oIhw16i"},
{memory::format::OIhw8i8o, "memory::format::OIhw8i8o"},
{memory::format::OIhw16i16o, "memory::format::OIhw16i16o"},
{memory::format::OIhw8o8i, "memory::format::OIhw8o8i"},
{memory::format::OIhw16o16i, "memory::format::OIhw16o16i"},
{memory::format::Oihw8o, "memory::format::Oihw8o"},
{memory::format::Oihw16o, "memory::format::Oihw16o"},
{memory::format::Ohwi8o, "memory::format::Ohwi8o"},
{memory::format::Ohwi16o, "memory::format::Ohwi16o"},
{memory::format::OhIw16o4i, "memory::format::OhIw16o4i"},
};
bool runtime::cpu::mkldnn_utils::IsMKLDNNOp(ngraph::Node& op)
{
return (s_op_registry.find(TI(op)) != s_op_registry.end()); return (s_op_registry.find(TI(op)) != s_op_registry.end());
} }
mkldnn::memory::format mkldnn::memory::format runtime::cpu::mkldnn_utils::CreateNativeDataFormat(
CreateNativeDataFormat(const ngraph::runtime::cpu::LayoutDescriptor& layout) const ngraph::runtime::cpu::LayoutDescriptor& layout)
{ {
switch (layout.get_shape().size()) switch (layout.get_shape().size())
{ {
case 1: return mkldnn::memory::format::x; case 1: return mkldnn::memory::format::x;
...@@ -90,8 +120,31 @@ namespace ngraph ...@@ -90,8 +120,31 @@ namespace ngraph
case 4: return mkldnn::memory::format::nchw; case 4: return mkldnn::memory::format::nchw;
default: return mkldnn::memory::format::format_undef; default: return mkldnn::memory::format::format_undef;
} }
}
const std::string& runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string(const ngraph::element::Type& type)
{
auto it = s_mkldnn_data_type_string_map.find(type);
if (it == s_mkldnn_data_type_string_map.end() || it->second.empty())
throw ngraph_error("No MKLDNN data type exists for the given element type");
return it->second;
}
mkldnn::memory::data_type runtime::cpu::mkldnn_utils::get_mkldnn_data_type(const ngraph::element::Type& type)
{
auto it = s_mkldnn_data_type_map.find(type);
if (it == s_mkldnn_data_type_map.end() || it->second == memory::data_type::data_undef)
{
throw ngraph_error("No MKLDNN data type exists for the given element type");
} }
} return it->second;
} }
}
const std::string& runtime::cpu::mkldnn_utils::get_mkldnn_format_string(memory::format fmt)
{
auto it = s_mkldnn_format_string_map.find(fmt);
if (it == s_mkldnn_format_string_map.end())
throw ngraph_error("No MKLDNN format exists for the given format type " +
std::to_string(fmt));
return it->second;
} }
...@@ -32,12 +32,12 @@ namespace ngraph ...@@ -32,12 +32,12 @@ namespace ngraph
{ {
extern mkldnn::engine global_cpu_engine; extern mkldnn::engine global_cpu_engine;
mkldnn::memory::data_type GetDataType(const ngraph::element::Type& et);
bool IsMKLDNNOp(ngraph::Node& op); bool IsMKLDNNOp(ngraph::Node& op);
mkldnn::memory::format mkldnn::memory::format
CreateNativeDataFormat(const ngraph::runtime::cpu::LayoutDescriptor& layout); CreateNativeDataFormat(const ngraph::runtime::cpu::LayoutDescriptor& layout);
const std::string& get_mkldnn_data_type_string(const ngraph::element::Type& type);
mkldnn::memory::data_type get_mkldnn_data_type(const ngraph::element::Type& type);
const std::string& get_mkldnn_format_string(mkldnn::memory::format fmt);
} }
} }
} }
......
...@@ -39,7 +39,7 @@ runtime::cpu::op::ConvertLayout::ConvertLayout( ...@@ -39,7 +39,7 @@ runtime::cpu::op::ConvertLayout::ConvertLayout(
if (!arg_layout) if (!arg_layout)
{ {
throw ngraph_error("Layout conversion input tensor is missing layout information"); //throw ngraph_error("Layout conversion input tensor is missing layout information");
} }
add_output(layout->get_element_type(), layout->get_shape()); add_output(layout->get_element_type(), layout->get_shape());
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/runtime/cpu/pass/cpu_assignment.hpp"
#include <algorithm>
#include <cassert>
#include <memory>
#include <typeindex>
#include <typeinfo>
#include <unordered_map>
#include <mkldnn.hpp>
#include "ngraph/descriptor/output.hpp"
#include "ngraph/ops/convolution.hpp"
#include "ngraph/runtime/cpu/cpu_op_annotations.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace pass
{
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::Convolution)
{
auto convolution = static_cast<op::Convolution*>(node);
auto arg0_shape = node->get_input_shape(0);
auto arg1_shape = node->get_input_shape(1);
auto result_shape = node->get_output_shape(0);
auto arg0_rank = arg0_shape.size();
auto arg1_rank = arg1_shape.size();
bool data_dilated = false;
for (size_t s : convolution->get_data_dilation_strides())
{
data_dilated = data_dilated || (s != 1);
}
if (!data_dilated && arg0_rank == 4 && arg1_rank == 4 &&
node->get_input_element_type(0) == element::f32)
{
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
convolution->set_op_annotations(op_annotations);
}
}
}
}
}
}
#define TI(x) type_index(typeid(x))
static const runtime::cpu::pass::AssignOpMap s_dispatcher{
{TI(ngraph::op::Convolution),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Convolution>},
};
bool runtime::cpu::pass::CPUAssignment::run_on_call_graph(
const std::list<std::shared_ptr<Node>>& nodes)
{
for (const auto& node : nodes)
{
auto& n = *node;
auto handler = s_dispatcher.find(TI(n));
if (handler != s_dispatcher.end())
{
handler->second(m_external_function.get(), node.get());
}
}
return false;
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/pass/pass.hpp"
#include "ngraph/runtime/cpu/cpu_external_function.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view.hpp"
#define ASSIGN_DECL(op_name) \
assign<op_name>(ngraph::runtime::cpu::CPU_ExternalFunction * external_function, \
ngraph::Node * node)
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace pass
{
using AssignFunction = std::function<void(CPU_ExternalFunction*, ngraph::Node*)>;
using AssignOpMap = std::unordered_map<std::type_index, AssignFunction>;
class CPUAssignment : public ngraph::pass::CallGraphPass
{
public:
CPUAssignment(std::shared_ptr<CPU_ExternalFunction> external_function)
: m_external_function(external_function)
{
}
virtual bool
run_on_call_graph(const std::list<std::shared_ptr<Node>>& nodes) override;
template <typename OP>
static void
assign(ngraph::runtime::cpu::CPU_ExternalFunction* external_function,
ngraph::Node* node)
{
throw std::runtime_error("Unimplemented op in CPU assignment");
}
private:
std::shared_ptr<CPU_ExternalFunction> m_external_function;
};
}
}
}
}
...@@ -17,21 +17,82 @@ ...@@ -17,21 +17,82 @@
#include <algorithm> #include <algorithm>
#include <memory> #include <memory>
#include <string> #include <string>
#include <typeindex>
#include <typeinfo>
#include <mkldnn.hpp> #include <mkldnn.hpp>
#include "cpu_layout.hpp" #include "cpu_layout.hpp"
#include "ngraph/descriptor/output.hpp" #include "ngraph/descriptor/output.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/op.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp" #include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/cpu_op_annotations.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp" #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/ops/convert_layout.hpp"
//using namespace ngraph::runtime::cpu::pass; using namespace std;
using namespace mkldnn;
using namespace ngraph; using namespace ngraph;
bool runtime::cpu::pass::CPULayout::run_on_call_graph(const std::list<std::shared_ptr<Node>>& nodes) void runtime::cpu::pass::CPULayout::set_default_layouts(
runtime::cpu::CPU_ExternalFunction* external_function, std::shared_ptr<Node> node)
{ {
for (const auto& node : nodes) std::vector<shared_ptr<Node>> new_args;
bool replace_node = false;
uint index = 0;
for (const descriptor::Input& input : node->get_inputs())
{
const auto& output = input.get_output();
auto tv = output.get_tensor_view();
auto tvt = tv->get_tensor_view_type();
auto rank = tvt->get_shape().size();
auto tvl = tv->get_tensor_view_layout();
auto cpu_tvl = dynamic_cast<runtime::cpu::LayoutDescriptor*>(tvl.get());
if (cpu_tvl && cpu_tvl->get_mkldnn_format() != memory::format::format_undef &&
cpu_tvl->get_mkldnn_format() !=
runtime::cpu::mkldnn_utils::CreateNativeDataFormat(*cpu_tvl))
{
auto native_axis_order =
ngraph::runtime::cpu::LayoutDescriptor::create_native_axis_order(rank);
auto layout =
std::make_shared<ngraph::runtime::cpu::LayoutDescriptor>(*tv, native_axis_order);
layout->set_mkldnn_format(runtime::cpu::mkldnn_utils::CreateNativeDataFormat(*cpu_tvl));
auto new_node = std::shared_ptr<Node>(
new runtime::cpu::op::ConvertLayout(output.get_node(), output.get_index(), layout));
new_args.push_back(new_node);
replace_node = true;
NGRAPH_DEBUG << "Inserted conversion node " << new_node->get_name() << " between "
<< output.get_node()->get_name()
<< "(layout: " << cpu_tvl->get_mkldnn_format() << ") and "
<< node->get_name() << "(layout: default)";
}
else
{
new_args.push_back(node->get_input_op(index));
}
index++;
}
shared_ptr<Node> new_node;
if (replace_node)
{
new_node = node->copy_with_new_args(new_args);
if (node->is_output())
{
external_function->get_function()->replace_node(node, new_node);
}
else
{ {
ngraph::replace_node(node, new_node);
}
NGRAPH_DEBUG << "Replaced " << node->get_name() << " with " << new_node->get_name();
auto old_op_annotations = static_pointer_cast<ngraph::op::Op>(node)->get_op_annotations();
static_pointer_cast<ngraph::op::Op>(new_node)->set_op_annotations(old_op_annotations);
node = new_node;
}
for (size_t i = 0; i < node->get_output_size(); ++i) for (size_t i = 0; i < node->get_output_size(); ++i)
{ {
auto tv = node->get_output_tensor_view(i); auto tv = node->get_output_tensor_view(i);
...@@ -41,7 +102,6 @@ bool runtime::cpu::pass::CPULayout::run_on_call_graph(const std::list<std::share ...@@ -41,7 +102,6 @@ bool runtime::cpu::pass::CPULayout::run_on_call_graph(const std::list<std::share
} }
auto tvt = tv->get_tensor_view_type(); auto tvt = tv->get_tensor_view_type();
auto& tensor = tv->get_tensor();
auto rank = tvt->get_shape().size(); auto rank = tvt->get_shape().size();
auto native_axis_order = auto native_axis_order =
...@@ -50,34 +110,201 @@ bool runtime::cpu::pass::CPULayout::run_on_call_graph(const std::list<std::share ...@@ -50,34 +110,201 @@ bool runtime::cpu::pass::CPULayout::run_on_call_graph(const std::list<std::share
auto layout = auto layout =
std::make_shared<ngraph::runtime::cpu::LayoutDescriptor>(*tv, native_axis_order); std::make_shared<ngraph::runtime::cpu::LayoutDescriptor>(*tv, native_axis_order);
if (tensor.is_output() || tensor.is_input() || tensor.is_constant())
{
// Set the MKLDNN format to native row-major variants // Set the MKLDNN format to native row-major variants
layout->set_mkldnn_format(mkldnn_utils::CreateNativeDataFormat(*layout)); layout->set_mkldnn_format(mkldnn_utils::CreateNativeDataFormat(*layout));
tv->set_tensor_view_layout(layout);
} }
else }
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace pass
{ {
if (ngraph::runtime::cpu::mkldnn_utils::IsMKLDNNOp(*node)) template <>
void CPULayout::LAYOUT_DECL(ngraph::op::Convolution)
{ {
// TODO(jmenon): get_inputs is marked as to-be-deprecated auto op_annotations =
// but get_input_ops isn't a suitable API so this needs to be static_pointer_cast<ngraph::op::Op>(node)->get_op_annotations();
// reworked if (op_annotations &&
static_pointer_cast<ngraph::runtime::cpu::CPUOpAnnotations>(op_annotations)
->is_mkldnn_op())
{
auto convolution = static_cast<const ngraph::op::Convolution*>(node.get());
auto arg0_shape = node->get_input_shape(0);
auto arg1_shape = node->get_input_shape(1);
auto result_shape = node->get_output_shape(0);
auto filter_strides = convolution->get_window_movement_strides();
auto padding_below = convolution->get_padding_below();
auto padding_above = convolution->get_padding_above();
Strides window_dilation_strides_adjusted;
for (size_t s : convolution->get_window_dilation_strides())
{
window_dilation_strides_adjusted.push_back(s - 1);
}
memory::data_type et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type(
node->get_input_element_type(0));
engine cpu_engine(engine::cpu, 0);
memory::dims mkldnn_arg0_shape(arg0_shape.begin(), arg0_shape.end());
memory::dims mkldnn_arg1_shape(arg1_shape.begin(), arg1_shape.end());
memory::dims mkldnn_result_shape(result_shape.begin(), result_shape.end());
memory::dims mkldnn_filter_strides(filter_strides.begin(),
filter_strides.end());
memory::dims mkldnn_dilated_strides(
window_dilation_strides_adjusted.begin(),
window_dilation_strides_adjusted.end());
memory::dims mkldnn_padding_below(padding_below.begin(),
padding_below.end());
memory::dims mkldnn_padding_above(padding_above.begin(),
padding_above.end());
const memory::desc input_data_desc(
mkldnn_arg0_shape, et, memory::format::any);
const memory::desc weights_desc(mkldnn_arg1_shape, et, memory::format::any);
const memory::desc result_desc(
mkldnn_result_shape, et, memory::format::any);
convolution_forward::desc fwd_desc(prop_kind::forward,
algorithm::convolution_direct,
input_data_desc,
weights_desc,
result_desc,
mkldnn_filter_strides,
mkldnn_dilated_strides,
mkldnn_padding_below,
mkldnn_padding_above,
padding_kind::zero);
convolution_forward::primitive_desc prim_desc(fwd_desc, cpu_engine);
memory::format prim_input_formats[2];
memory::format prim_output_formats[1];
prim_input_formats[0] = static_cast<memory::format>(
prim_desc.src_primitive_desc().desc().data.format);
prim_output_formats[0] = static_cast<memory::format>(
prim_desc.dst_primitive_desc().desc().data.format);
prim_input_formats[1] = static_cast<memory::format>(
prim_desc.weights_primitive_desc().desc().data.format);
std::vector<shared_ptr<Node>> new_args;
bool replace_node = false;
uint index = 0;
for (const descriptor::Input& input : node->get_inputs()) for (const descriptor::Input& input : node->get_inputs())
{ {
const auto& output = input.get_output(); const auto& output = input.get_output();
auto output_tv = output.get_tensor_view(); auto tv = output.get_tensor_view();
auto output_tvl = output_tv->get_tensor_view_layout(); auto tvt = tv->get_tensor_view_type();
auto rank = tvt->get_shape().size();
// TODO(jmenon): Propagate layout based on inputs auto tvl = tv->get_tensor_view_layout();
// TODO(jmenon): Insert layout conversions when needed auto mkldnn_tvl =
dynamic_cast<runtime::cpu::LayoutDescriptor*>(tvl.get());
if (!mkldnn_tvl ||
mkldnn_tvl->get_mkldnn_format() != prim_input_formats[index])
{
auto native_axis_order = ngraph::runtime::cpu::LayoutDescriptor::
create_native_axis_order(rank);
auto layout =
std::make_shared<ngraph::runtime::cpu::LayoutDescriptor>(
*tv, native_axis_order);
layout->set_mkldnn_format(prim_input_formats[index]);
auto new_node =
std::shared_ptr<Node>(new runtime::cpu::op::ConvertLayout(
output.get_node(), output.get_index(), layout));
new_args.push_back(new_node);
replace_node = true;
NGRAPH_DEBUG << "Inserted conversion node " << new_node->get_name()
<< " between " << output.get_node()->get_name()
<< "(layout: " << mkldnn_tvl->get_mkldnn_format()
<< ") and " << node->get_name()
<< "(layout: " << prim_input_formats[index] << ")";
}
else
{
new_args.push_back(node->get_input_op(index));
} }
index++;
}
shared_ptr<Node> new_node;
if (replace_node)
{
new_node = node->copy_with_new_args(new_args);
if (node->is_output())
{
external_function->get_function()->replace_node(node, new_node);
} }
else else
{ {
layout->set_mkldnn_format(mkldnn::memory::format::format_undef); ngraph::replace_node(node, new_node);
}
NGRAPH_DEBUG << "Replaced " << node->get_name() << " with "
<< new_node->get_name();
auto old_op_annotations =
static_pointer_cast<ngraph::op::Op>(node)->get_op_annotations();
static_pointer_cast<ngraph::op::Op>(new_node)->set_op_annotations(
old_op_annotations);
node = new_node;
} }
// Set convolution output format
for (size_t i = 0; i < node->get_output_size(); ++i)
{
auto tv = node->get_output_tensor_view(i);
auto tvt = tv->get_tensor_view_type();
auto rank = tvt->get_shape().size();
auto tvl = tv->get_tensor_view_layout();
if (tvl)
{
throw ngraph_error("Convolution output layout already set");
} }
auto native_axis_order =
ngraph::runtime::cpu::LayoutDescriptor::create_native_axis_order(
rank);
auto layout = std::make_shared<ngraph::runtime::cpu::LayoutDescriptor>(
*tv, native_axis_order);
layout->set_mkldnn_format(prim_output_formats[i]);
tv->set_tensor_view_layout(layout); tv->set_tensor_view_layout(layout);
NGRAPH_DEBUG << "Setting Node: " << node->get_name()
<< " output layout: " << prim_output_formats[i] << endl;
}
}
else
{
set_default_layouts(external_function, node);
}
}
}
}
}
}
#define TI(x) type_index(typeid(x))
static const runtime::cpu::pass::LayoutOpMap s_dispatcher{
{TI(ngraph::op::Convolution), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Convolution>},
};
bool runtime::cpu::pass::CPULayout::run_on_call_graph(const std::list<std::shared_ptr<Node>>& nodes)
{
for (const auto& node : nodes)
{
auto& n = *node;
auto handler = s_dispatcher.find(TI(n));
if (handler != s_dispatcher.end())
{
handler->second(m_external_function.get(), node);
}
else
{
set_default_layouts(m_external_function.get(), node);
} }
} }
......
...@@ -17,6 +17,11 @@ ...@@ -17,6 +17,11 @@
#pragma once #pragma once
#include "ngraph/pass/pass.hpp" #include "ngraph/pass/pass.hpp"
#include "ngraph/runtime/cpu/cpu_external_function.hpp"
#define LAYOUT_DECL(op_type) \
layout<op_type>(ngraph::runtime::cpu::CPU_ExternalFunction * external_function, \
std::shared_ptr<ngraph::Node> node)
namespace ngraph namespace ngraph
{ {
...@@ -26,11 +31,30 @@ namespace ngraph ...@@ -26,11 +31,30 @@ namespace ngraph
{ {
namespace pass namespace pass
{ {
using LayoutFunction =
std::function<void(CPU_ExternalFunction*, std::shared_ptr<ngraph::Node>)>;
using LayoutOpMap = std::unordered_map<std::type_index, LayoutFunction>;
class CPULayout : public ngraph::pass::CallGraphPass class CPULayout : public ngraph::pass::CallGraphPass
{ {
public: public:
CPULayout(std::shared_ptr<CPU_ExternalFunction> external_function)
: m_external_function(external_function)
{
}
virtual bool virtual bool
run_on_call_graph(const std::list<std::shared_ptr<Node>>& nodes) override; run_on_call_graph(const std::list<std::shared_ptr<Node>>& nodes) override;
template <typename OP>
static void
layout(ngraph::runtime::cpu::CPU_ExternalFunction* external_function,
std::shared_ptr<ngraph::Node> node);
private:
std::shared_ptr<CPU_ExternalFunction> m_external_function;
static void set_default_layouts(CPU_ExternalFunction* external_function,
std::shared_ptr<Node> node);
}; };
} }
} }
......
...@@ -46,6 +46,7 @@ namespace ngraph ...@@ -46,6 +46,7 @@ namespace ngraph
virtual ~ExternalFunction() {} virtual ~ExternalFunction() {}
virtual std::shared_ptr<CallFrame> make_call_frame() = 0; virtual std::shared_ptr<CallFrame> make_call_frame() = 0;
const std::shared_ptr<ngraph::Function> get_function() { return m_function; }
protected: protected:
std::shared_ptr<ngraph::Function> m_function; std::shared_ptr<ngraph::Function> m_function;
bool m_release_function; bool m_release_function;
......
...@@ -555,6 +555,53 @@ TEST(${BACKEND_NAME}, divide) ...@@ -555,6 +555,53 @@ TEST(${BACKEND_NAME}, divide)
EXPECT_EQ((vector<float>{2, 2, 2, 2}), read_vector<float>(result)); EXPECT_EQ((vector<float>{2, 2, 2, 2}), read_vector<float>(result));
} }
TEST(${BACKEND_NAME}, divide_adjoint_stability)
{
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend();
Shape shape{2, 2};
auto make_external = [&]() {
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Divide>(A, B), op::Parameters{A, B});
auto Y_out = f->get_output_op(0);
auto Xs = f->get_parameters();
auto C = std::make_shared<op::Parameter>(Y_out->get_element_type(), Y_out->get_shape());
std::vector<std::shared_ptr<Node>> dYdXs(Xs.size());
transform(Xs.begin(), Xs.end(), dYdXs.begin(), [C, Y_out](const std::shared_ptr<Node>& X) {
return Y_out->backprop_node(X, C);
});
std::vector<std::shared_ptr<op::Parameter>> params(Xs);
params.push_back(C);
auto bf = std::make_shared<Function>(dYdXs, params);
auto external = manager->compile(bf);
return external;
};
auto cf = backend->make_call_frame(make_external());
// Create some tensors for input/output
auto a = backend->make_primary_tensor_view(element::f32, shape);
copy_data(a, vector<float>{0, 0, 1, 1});
auto b = backend->make_primary_tensor_view(element::f32, shape);
copy_data(b, vector<float>{2, 2, 2, 2});
auto c = backend->make_primary_tensor_view(element::f32, shape);
copy_data(c, vector<float>{1, 1, 1, 1});
auto resulta = backend->make_primary_tensor_view(element::f32, shape);
auto resultb = backend->make_primary_tensor_view(element::f32, shape);
cf->call({a, b, c}, {resulta, resultb});
EXPECT_EQ((vector<float>{0.5, 0.5, 0.5, 0.5}), read_vector<float>(resulta));
EXPECT_EQ((vector<float>{-0.0, -0.0, -0.25, -0.25}), read_vector<float>(resultb));
}
TEST(${BACKEND_NAME}, divide_by_zero_float32) TEST(${BACKEND_NAME}, divide_by_zero_float32)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}"); SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
...@@ -6009,6 +6056,42 @@ TEST(${BACKEND_NAME}, convolution_outlining) ...@@ -6009,6 +6056,42 @@ TEST(${BACKEND_NAME}, convolution_outlining)
EXPECT_EQ(vector<float>{expected_result}, read_vector<float>(result)); EXPECT_EQ(vector<float>{expected_result}, read_vector<float>(result));
} }
TEST(${BACKEND_NAME}, convolution_layout)
{
Shape shape_a{1, 16, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_b{32, 16, 1, 1};
auto B = make_shared<op::Parameter>(element::f32, shape_b);
Shape shape_r{1, 32, 2, 2};
auto conv1 = make_shared<op::Convolution>(A,
B,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
auto f = make_shared<Function>(conv1, op::Parameters{A, B});
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
// Create some tensors for input/output
auto a = backend->make_primary_tensor_view(element::f32, shape_a);
vector<float> input(64, 1.0f);
copy_data(a, input);
auto b = backend->make_primary_tensor_view(element::f32, shape_b);
vector<float> weights(512, 1.0f);
copy_data(b, weights);
auto result = backend->make_primary_tensor_view(element::f32, shape_r);
vector<float> expected_result(128, 16.0f);
cf->call({a, b}, {result});
EXPECT_EQ(vector<float>{expected_result}, read_vector<float>(result));
}
TEST(${BACKEND_NAME}, avg_pool_1d_1channel_1image) TEST(${BACKEND_NAME}, avg_pool_1d_1channel_1image)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}"); SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
......
...@@ -288,3 +288,23 @@ TEST(cpu_fusion, fuse_fprop_bn) ...@@ -288,3 +288,23 @@ TEST(cpu_fusion, fuse_fprop_bn)
size_t ccg = count_ops_of_type<op::BatchNorm>(func); size_t ccg = count_ops_of_type<op::BatchNorm>(func);
ASSERT_EQ(ccg, 1); ASSERT_EQ(ccg, 1);
} }
class UnhandledOp : public ngraph::op::Abs
{
public:
UnhandledOp(const std::shared_ptr<Node>& arg)
: Abs(arg)
{
}
};
TEST(cpu_fusion, unhandled_op)
{
auto A = make_shared<op::Parameter>(element::f32, Shape{});
auto unhandled = make_shared<UnhandledOp>(A);
auto f = make_shared<Function>(unhandled, op::Parameters{A});
auto manager = runtime::Manager::get("CPU");
auto backend = manager->allocate_backend();
auto external = manager->compile(f);
ASSERT_THROW(backend->make_call_frame(external), ngraph_error);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment