Commit eabfebe5 authored by pthoreho's avatar pthoreho

Merge remote-tracking branch 'origin/master' into pruthvi/mkldnn_elementwise_add

parents 16a18d29 d0f8dff2
...@@ -183,6 +183,7 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND ...@@ -183,6 +183,7 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
runtime/cpu/mkldnn_utils.cpp runtime/cpu/mkldnn_utils.cpp
runtime/cpu/ops/convert_layout.cpp runtime/cpu/ops/convert_layout.cpp
runtime/cpu/ops/matmul_bias.cpp runtime/cpu/ops/matmul_bias.cpp
runtime/cpu/pass/cpu_assignment.cpp
runtime/cpu/pass/cpu_fusion.cpp runtime/cpu/pass/cpu_fusion.cpp
runtime/cpu/pass/cpu_layout.cpp runtime/cpu/pass/cpu_layout.cpp
) )
......
...@@ -14,8 +14,10 @@ ...@@ -14,8 +14,10 @@
* limitations under the License. * limitations under the License.
*******************************************************************************/ *******************************************************************************/
#include "ngraph/builder/reduce_ops.hpp" #include <numeric>
#include "ngraph/builder/autobroadcast.hpp" #include "ngraph/builder/autobroadcast.hpp"
#include "ngraph/builder/reduce_ops.hpp"
#include "ngraph/ops/add.hpp" #include "ngraph/ops/add.hpp"
#include "ngraph/ops/divide.hpp" #include "ngraph/ops/divide.hpp"
#include "ngraph/ops/multiply.hpp" #include "ngraph/ops/multiply.hpp"
......
...@@ -40,7 +40,7 @@ std::string codegen::CodeWriter::generate_temporary_name(std::string prefix) ...@@ -40,7 +40,7 @@ std::string codegen::CodeWriter::generate_temporary_name(std::string prefix)
{ {
std::stringstream ss; std::stringstream ss;
ss << prefix << "__" << m_temporary_name_count; ss << prefix << m_temporary_name_count;
m_temporary_name_count++; m_temporary_name_count++;
return ss.str(); return ss.str();
......
...@@ -24,6 +24,6 @@ void ngraph::op::Divide::generate_adjoints(autodiff::Adjoints& adjoints, ...@@ -24,6 +24,6 @@ void ngraph::op::Divide::generate_adjoints(autodiff::Adjoints& adjoints,
auto x = get_input_op(0); auto x = get_input_op(0);
auto y = get_input_op(1); auto y = get_input_op(1);
adjoints.add_delta(x, delta * shared_from_this() / x); adjoints.add_delta(x, delta / y);
adjoints.add_delta(y, -delta * shared_from_this() / y); adjoints.add_delta(y, -delta * shared_from_this() / y);
} }
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "ngraph/common.hpp" #include "ngraph/common.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/ops/util/op_annotations.hpp"
namespace ngraph namespace ngraph
{ {
...@@ -28,8 +29,21 @@ namespace ngraph ...@@ -28,8 +29,21 @@ namespace ngraph
/// Root of all actual ops /// Root of all actual ops
class Op : public Node class Op : public Node
{ {
public:
void set_op_annotations(std::shared_ptr<ngraph::op::util::OpAnnotations> op_annotations)
{
m_op_annotations = op_annotations;
}
std::shared_ptr<ngraph::op::util::OpAnnotations> get_op_annotations() const
{
return m_op_annotations;
}
protected: protected:
Op(const std::string& node_type, const Nodes& arguments); Op(const std::string& node_type, const Nodes& arguments);
private:
std::shared_ptr<ngraph::op::util::OpAnnotations> m_op_annotations;
}; };
} }
} }
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
namespace ngraph
{
namespace op
{
namespace util
{
/// \brief Abstract base class for annotations added to graph ops
class OpAnnotations
{
public:
OpAnnotations() {}
};
}
}
}
...@@ -15,13 +15,13 @@ ...@@ -15,13 +15,13 @@
*******************************************************************************/ *******************************************************************************/
#include <algorithm> #include <algorithm>
#include <iostream>
#include <unordered_set> #include <unordered_set>
#include "ngraph/pattern/core_fusion.hpp" #include "ngraph/pattern/core_fusion.hpp"
#include "ngraph/graph_util.hpp" #include "ngraph/graph_util.hpp"
#include "ngraph/log.hpp" #include "ngraph/log.hpp"
#include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/constant.hpp" #include "ngraph/ops/constant.hpp"
#include "ngraph/ops/maximum.hpp" #include "ngraph/ops/maximum.hpp"
#include "ngraph/ops/parameter.hpp" #include "ngraph/ops/parameter.hpp"
...@@ -51,7 +51,12 @@ void pass::CoreFusion::construct_relu_pattern() ...@@ -51,7 +51,12 @@ void pass::CoreFusion::construct_relu_pattern()
auto iconst0 = construct_constant_node(0); auto iconst0 = construct_constant_node(0);
auto val = make_shared<pattern::op::Label>(iconst0); auto val = make_shared<pattern::op::Label>(iconst0);
auto zero = make_shared<pattern::op::Label>(iconst0, nullptr, Nodes{iconst0}); auto zero = make_shared<pattern::op::Label>(iconst0, nullptr, Nodes{iconst0});
auto max = make_shared<op::Maximum>(zero, val);
auto broadcast_pred = [](std::shared_ptr<Node> n) {
return static_cast<bool>(std::dynamic_pointer_cast<op::Broadcast>(n));
};
auto skip_broadcast = std::make_shared<pattern::op::Any>(zero, broadcast_pred);
auto max = make_shared<op::Maximum>(skip_broadcast, val);
pattern::gr_callback_fn callback = [val, zero](pattern::Matcher& m) { pattern::gr_callback_fn callback = [val, zero](pattern::Matcher& m) {
NGRAPH_DEBUG << "In a callback for construct_relu_pattern against " NGRAPH_DEBUG << "In a callback for construct_relu_pattern against "
......
This diff is collapsed.
...@@ -106,7 +106,9 @@ ...@@ -106,7 +106,9 @@
#include "ngraph/runtime/cpu/cpu_tensor_view.hpp" #include "ngraph/runtime/cpu/cpu_tensor_view.hpp"
#include "ngraph/runtime/cpu/cpu_tracing.hpp" #include "ngraph/runtime/cpu/cpu_tracing.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp" #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/ops/convert_layout.hpp"
#include "ngraph/runtime/cpu/ops/matmul_bias.hpp" #include "ngraph/runtime/cpu/ops/matmul_bias.hpp"
#include "ngraph/runtime/cpu/pass/cpu_assignment.hpp"
#include "ngraph/runtime/cpu/pass/cpu_fusion.hpp" #include "ngraph/runtime/cpu/pass/cpu_fusion.hpp"
#include "ngraph/runtime/cpu/pass/cpu_layout.hpp" #include "ngraph/runtime/cpu/pass/cpu_layout.hpp"
...@@ -218,6 +220,8 @@ static const runtime::cpu::OpMap dispatcher{ ...@@ -218,6 +220,8 @@ static const runtime::cpu::OpMap dispatcher{
&runtime::cpu::CPU_Emitter::emit<op::ConvolutionBackpropFilters>}, &runtime::cpu::CPU_Emitter::emit<op::ConvolutionBackpropFilters>},
{TI(ngraph::op::ConvolutionBackpropData), {TI(ngraph::op::ConvolutionBackpropData),
&runtime::cpu::CPU_Emitter::emit<op::ConvolutionBackpropData>}, &runtime::cpu::CPU_Emitter::emit<op::ConvolutionBackpropData>},
{TI(ngraph::runtime::cpu::op::ConvertLayout),
&runtime::cpu::CPU_Emitter::emit<runtime::cpu::op::ConvertLayout>},
{TI(ngraph::op::Not), &runtime::cpu::CPU_Emitter::emit<op::Not>}, {TI(ngraph::op::Not), &runtime::cpu::CPU_Emitter::emit<op::Not>},
{TI(ngraph::op::MaxPool), &runtime::cpu::CPU_Emitter::emit<op::MaxPool>}, {TI(ngraph::op::MaxPool), &runtime::cpu::CPU_Emitter::emit<op::MaxPool>},
{TI(ngraph::op::Reverse), &runtime::cpu::CPU_Emitter::emit<op::Reverse>}, {TI(ngraph::op::Reverse), &runtime::cpu::CPU_Emitter::emit<op::Reverse>},
...@@ -259,7 +263,8 @@ void runtime::cpu::CPU_ExternalFunction::compile() ...@@ -259,7 +263,8 @@ void runtime::cpu::CPU_ExternalFunction::compile()
pass_manager.register_pass<ngraph::pass::CoreFusion>(); pass_manager.register_pass<ngraph::pass::CoreFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>(); pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPULayout>(); pass_manager.register_pass<runtime::cpu::pass::CPUAssignment>(shared_from_this());
pass_manager.register_pass<runtime::cpu::pass::CPULayout>(shared_from_this());
pass_manager.register_pass<ngraph::pass::Liveness>(); pass_manager.register_pass<ngraph::pass::Liveness>();
pass_manager.register_pass<ngraph::pass::MemoryLayout>(s_memory_pool_alignment); pass_manager.register_pass<ngraph::pass::MemoryLayout>(s_memory_pool_alignment);
...@@ -279,13 +284,8 @@ void runtime::cpu::CPU_ExternalFunction::compile() ...@@ -279,13 +284,8 @@ void runtime::cpu::CPU_ExternalFunction::compile()
} }
writer += writer +=
R"(// Generated by the NGraph CPU backend R"(// Generated by the nGraph CPU backend
#include <cmath> #include <cmath>
)";
writer +=
R"(#include <Eigen/Dense>
#include "ngraph/except.hpp" #include "ngraph/except.hpp"
#include "ngraph/runtime/aligned_buffer.hpp" #include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/runtime/cpu/cpu_eigen_utils.hpp" #include "ngraph/runtime/cpu/cpu_eigen_utils.hpp"
...@@ -417,7 +417,7 @@ using namespace ngraph::runtime; ...@@ -417,7 +417,7 @@ using namespace ngraph::runtime;
{ {
for (shared_ptr<Node> node : current_function->get_ordered_ops()) for (shared_ptr<Node> node : current_function->get_ordered_ops())
{ {
const op::Constant* c = dynamic_cast<op::Constant*>(node.get()); const ngraph::op::Constant* c = dynamic_cast<ngraph::op::Constant*>(node.get());
if (c) if (c)
{ {
shared_ptr<descriptor::TensorView> tv = node->get_outputs()[0].get_tensor_view(); shared_ptr<descriptor::TensorView> tv = node->get_outputs()[0].get_tensor_view();
...@@ -462,7 +462,14 @@ using namespace ngraph::runtime; ...@@ -462,7 +462,14 @@ using namespace ngraph::runtime;
{ {
continue; continue;
} }
Node& node = *op_list[i]; Node& node = *op_list[i];
auto handler = dispatcher.find(type_index(typeid(node)));
if (handler == dispatcher.end())
{
throw ngraph_error("Unhandled op during code generation : " + node.description());
}
string s = emit_op_as_function(node, "f"); string s = emit_op_as_function(node, "f");
node_cache.insert({&node, s}); node_cache.insert({&node, s});
} }
...@@ -509,7 +516,7 @@ using namespace ngraph::runtime; ...@@ -509,7 +516,7 @@ using namespace ngraph::runtime;
set<descriptor::TensorView*> constants; set<descriptor::TensorView*> constants;
for (shared_ptr<Node> node : current_function->get_ordered_ops()) for (shared_ptr<Node> node : current_function->get_ordered_ops())
{ {
if (dynamic_cast<op::Constant*>(node.get())) if (dynamic_cast<ngraph::op::Constant*>(node.get()))
{ {
shared_ptr<descriptor::TensorView> tv = node->get_outputs()[0].get_tensor_view(); shared_ptr<descriptor::TensorView> tv = node->get_outputs()[0].get_tensor_view();
constants.insert(tv.get()); constants.insert(tv.get());
...@@ -573,7 +580,7 @@ using namespace ngraph::runtime; ...@@ -573,7 +580,7 @@ using namespace ngraph::runtime;
// Add inputs to the variable name map // Add inputs to the variable name map
size_t arg_index = 0; size_t arg_index = 0;
for (shared_ptr<op::Parameter> param : current_function->get_parameters()) for (shared_ptr<ngraph::op::Parameter> param : current_function->get_parameters())
{ {
for (size_t i = 0; i < param->get_output_size(); ++i) for (size_t i = 0; i < param->get_output_size(); ++i)
{ {
...@@ -612,7 +619,7 @@ using namespace ngraph::runtime; ...@@ -612,7 +619,7 @@ using namespace ngraph::runtime;
shared_ptr<descriptor::TensorView> tv = op->get_output_tensor_view(); shared_ptr<descriptor::TensorView> tv = op->get_output_tensor_view();
const element::Type& et = tv->get_tensor_view_type()->get_element_type(); const element::Type& et = tv->get_tensor_view_type()->get_element_type();
bool parameter_as_output = false; bool parameter_as_output = false;
for (shared_ptr<op::Parameter> param : current_function->get_parameters()) for (shared_ptr<ngraph::op::Parameter> param : current_function->get_parameters())
{ {
for (const descriptor::Output& pout : param->get_outputs()) for (const descriptor::Output& pout : param->get_outputs())
{ {
...@@ -657,7 +664,8 @@ using namespace ngraph::runtime; ...@@ -657,7 +664,8 @@ using namespace ngraph::runtime;
throw ngraph_error("Unhandled op during code generation : " + node->description()); throw ngraph_error("Unhandled op during code generation : " + node->description());
} }
vector<TensorViewWrapper> in; vector<TensorViewWrapper> in;
vector<string> node_input_names, node_output_names; vector<string> node_input_names;
vector<string> node_output_names;
for (const descriptor::Input& input : node->get_inputs()) for (const descriptor::Input& input : node->get_inputs())
{ {
const descriptor::Output& output = input.get_output(); const descriptor::Output& output = input.get_output();
...@@ -702,19 +710,23 @@ using namespace ngraph::runtime; ...@@ -702,19 +710,23 @@ using namespace ngraph::runtime;
} }
} }
writer << "\n// " << node->get_name() << "(";
vector<string> parameter_nodes = node_input_names;
parameter_nodes.insert(
parameter_nodes.end(), node_output_names.begin(), node_output_names.end());
writer << join(parameter_nodes);
writer << ")\n";
// Emit operation body // Emit operation body
string func_name; string func_name;
auto it = match_functions.find(node.get()); auto it = match_functions.find(node.get());
if (it != match_functions.end()) if (it == match_functions.end())
{
func_name = it->second;
}
if (func_name.empty())
{ {
handler->second(this, writer, node.get(), in, out); handler->second(this, writer, node.get(), in, out);
} }
else else
{ {
func_name = it->second;
vector<string> names; vector<string> names;
for (const TensorViewWrapper& tv : in) for (const TensorViewWrapper& tv : in)
{ {
......
...@@ -30,9 +30,6 @@ string emit_bracketed_string(vector<T> data) ...@@ -30,9 +30,6 @@ string emit_bracketed_string(vector<T> data)
{ {
stringstream ss; stringstream ss;
if (data.size() == 0)
return "";
for (auto s : data) for (auto s : data)
{ {
ss << "[" << s << "]"; ss << "[" << s << "]";
...@@ -75,7 +72,7 @@ vector<string> ...@@ -75,7 +72,7 @@ vector<string>
vector<string> index_vars; vector<string> index_vars;
for (size_t i = 0; i < top.size(); i++) for (size_t i = 0; i < top.size(); i++)
{ {
string index_var = writer.generate_temporary_name("i"); string index_var = writer.generate_temporary_name("_i");
writer << runtime::cpu::kernel::start_index_loop(index_var, new_bottom[i], top[i], i == 0); writer << runtime::cpu::kernel::start_index_loop(index_var, new_bottom[i], top[i], i == 0);
writer.indent++; writer.indent++;
......
...@@ -149,7 +149,7 @@ string ngraph::runtime::cpu::kernel::end_index_loop(const string& index_var) ...@@ -149,7 +149,7 @@ string ngraph::runtime::cpu::kernel::end_index_loop(const string& index_var)
{ {
stringstream ss; stringstream ss;
ss << "} // end for(" << index_var << ")\n"; ss << "}\n";
return ss.str(); return ss.str();
} }
...@@ -209,7 +209,7 @@ void ngraph::runtime::cpu::kernel::emit_pointwise_copy(codegen::CodeWriter& writ ...@@ -209,7 +209,7 @@ void ngraph::runtime::cpu::kernel::emit_pointwise_copy(codegen::CodeWriter& writ
for (size_t i = 0; i < n_axes; i++) for (size_t i = 0; i < n_axes; i++)
{ {
string index_var = writer.generate_temporary_name("i"); string index_var = writer.generate_temporary_name("_j");
writer << start_index_loop(index_var, source_start_corner[i], source_end_corner[i], i == 0); writer << start_index_loop(index_var, source_start_corner[i], source_end_corner[i], i == 0);
writer.indent++; writer.indent++;
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/ops/util/op_annotations.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
/// \brief Annotations added to graph ops by CPU backend passes
class CPUOpAnnotations : public ngraph::op::util::OpAnnotations
{
public:
CPUOpAnnotations() { m_mkldnn_op = false; }
bool is_mkldnn_op() { return m_mkldnn_op; }
void set_mkldnn_op(bool val) { m_mkldnn_op = val; }
private:
bool m_mkldnn_op;
};
}
}
}
...@@ -22,8 +22,10 @@ ...@@ -22,8 +22,10 @@
#include "ngraph/descriptor/primary_tensor_view.hpp" #include "ngraph/descriptor/primary_tensor_view.hpp"
#include "ngraph/except.hpp" #include "ngraph/except.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp" #include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/shape.hpp" #include "ngraph/shape.hpp"
using namespace mkldnn;
using namespace ngraph; using namespace ngraph;
using namespace std; using namespace std;
...@@ -101,8 +103,34 @@ void runtime::cpu::CPUTensorView::read(void* target, size_t tensor_offset, size_ ...@@ -101,8 +103,34 @@ void runtime::cpu::CPUTensorView::read(void* target, size_t tensor_offset, size_
{ {
throw out_of_range("read access past end of tensor"); throw out_of_range("read access past end of tensor");
} }
auto tvl = this->get_tensor_view_layout();
auto cpu_tvl = dynamic_cast<runtime::cpu::LayoutDescriptor*>(tvl.get());
if (cpu_tvl && cpu_tvl->get_mkldnn_format() != memory::format::format_undef &&
cpu_tvl->get_mkldnn_format() !=
runtime::cpu::mkldnn_utils::CreateNativeDataFormat(*cpu_tvl))
{
auto tensor_shape = this->get_shape();
auto input_format = cpu_tvl->get_mkldnn_format();
auto output_format = runtime::cpu::mkldnn_utils::CreateNativeDataFormat(*cpu_tvl);
memory::data_type et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type(
this->get_descriptor()->get_tensor_view_type()->get_element_type());
engine cpu_engine{engine::cpu, 0};
memory::dims mkldnn_shape{tensor_shape.begin(), tensor_shape.end()};
memory::desc input_desc{mkldnn_shape, et, input_format};
memory::desc output_desc{mkldnn_shape, et, output_format};
memory input{{input_desc, cpu_engine}, aligned_buffer};
memory output{{output_desc, cpu_engine}, target};
reorder prim{input, output};
mkldnn::stream s(mkldnn::stream::kind::eager);
s.submit({prim}).wait();
}
else
{
const char* source = get_data_ptr(); const char* source = get_data_ptr();
memcpy(target, &source[tensor_offset], n); memcpy(target, &source[tensor_offset], n);
}
} }
size_t runtime::cpu::CPUTensorView::get_size() const size_t runtime::cpu::CPUTensorView::get_size() const
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
*******************************************************************************/ *******************************************************************************/
#include <memory> #include <memory>
#include <string>
#include "mkldnn_emitter.hpp" #include "mkldnn_emitter.hpp"
...@@ -45,7 +46,7 @@ mkldnn::memory::desc MKLDNNEmitter::build_memory_descriptor(const TensorViewWrap ...@@ -45,7 +46,7 @@ mkldnn::memory::desc MKLDNNEmitter::build_memory_descriptor(const TensorViewWrap
{ {
return mkldnn::memory::desc( return mkldnn::memory::desc(
mkldnn::memory::dims(tvw.get_shape().begin(), tvw.get_shape().end()), mkldnn::memory::dims(tvw.get_shape().begin(), tvw.get_shape().end()),
mkldnn_utils::GetDataType(tvw.get_element_type()), mkldnn_utils::get_mkldnn_data_type(tvw.get_element_type()),
fmt); fmt);
} }
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
* limitations under the License. * limitations under the License.
*******************************************************************************/ *******************************************************************************/
#include <string>
#include <mkldnn.hpp> #include <mkldnn.hpp>
#include "mkldnn_invoke.hpp" #include "mkldnn_invoke.hpp"
......
...@@ -17,9 +17,9 @@ ...@@ -17,9 +17,9 @@
#include <string> #include <string>
#include <typeindex> #include <typeindex>
#include <typeinfo> #include <typeinfo>
#include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include "ngraph/types/element_type.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/ops/add.hpp" #include "ngraph/ops/add.hpp"
#include "ngraph/ops/avg_pool.hpp" #include "ngraph/ops/avg_pool.hpp"
...@@ -30,17 +30,12 @@ ...@@ -30,17 +30,12 @@
#include "mkldnn_utils.hpp" #include "mkldnn_utils.hpp"
namespace ngraph using namespace mkldnn;
{ using namespace ngraph;
namespace runtime
{
namespace cpu
{
namespace mkldnn_utils
{
#define TI(x) std::type_index(typeid(x)) #define TI(x) std::type_index(typeid(x))
static const std::unordered_set<std::type_index> s_op_registry{ static const std::unordered_set<std::type_index> s_op_registry{
TI(ngraph::op::Add), TI(ngraph::op::Add),
TI(ngraph::op::AvgPool), TI(ngraph::op::AvgPool),
TI(ngraph::op::AvgPoolBackprop), TI(ngraph::op::AvgPoolBackprop),
...@@ -53,36 +48,71 @@ namespace ngraph ...@@ -53,36 +48,71 @@ namespace ngraph
TI(ngraph::op::Relu), TI(ngraph::op::Relu),
TI(ngraph::op::ReluBackprop)}; TI(ngraph::op::ReluBackprop)};
static const std::unordered_map<std::string, const mkldnn::memory::data_type> // Mapping from POD types to MKLDNN data types
s_data_type_map{{"char", mkldnn::memory::data_type::s8}, static const std::map<element::Type, const mkldnn::memory::data_type> s_mkldnn_data_type_map{
{"float", mkldnn::memory::data_type::f32}, {element::boolean, mkldnn::memory::data_type::s8},
{"double", mkldnn::memory::data_type::data_undef}, {element::f32, mkldnn::memory::data_type::f32},
{"int8_t", mkldnn::memory::data_type::s8}, {element::f64, mkldnn::memory::data_type::data_undef},
{"int16_t", mkldnn::memory::data_type::s16}, {element::i8, mkldnn::memory::data_type::s8},
{"int32_t", mkldnn::memory::data_type::s32}, {element::i16, mkldnn::memory::data_type::s16},
{"int64_t", mkldnn::memory::data_type::data_undef}, {element::i32, mkldnn::memory::data_type::s32},
{"uint8_t", mkldnn::memory::data_type::u8}, {element::i64, mkldnn::memory::data_type::data_undef},
{"uint16_t", mkldnn::memory::data_type::data_undef}, {element::u8, mkldnn::memory::data_type::u8},
{"uint32_t", mkldnn::memory::data_type::data_undef}, {element::u16, mkldnn::memory::data_type::data_undef},
{"uint64_t", mkldnn::memory::data_type::data_undef}}; {element::u32, mkldnn::memory::data_type::data_undef},
{element::u64, mkldnn::memory::data_type::data_undef}};
mkldnn::memory::data_type GetDataType(const ngraph::element::Type& et) static const std::map<element::Type, const std::string> s_mkldnn_data_type_string_map{
{ {element::boolean, "mkldnn::memory::data_type::s8"},
auto it = s_data_type_map.find(et.c_type_string()); {element::f32, "mkldnn::memory::data_type::f32"},
if (it == s_data_type_map.end() || {element::f64, "mkldnn::memory::data_type::data_undef"},
it->second == mkldnn::memory::data_type::data_undef) {element::i8, "mkldnn::memory::data_type::s8"},
throw ngraph_error("No MKLDNN data type exists for the given element type"); {element::i16, "mkldnn::memory::data_type::s16"},
return it->second; {element::i32, "mkldnn::memory::data_type::s32"},
} {element::i64, "mkldnn::memory::data_type::data_undef"},
{element::u8, "mkldnn::memory::data_type::u8"},
{element::u16, "mkldnn::memory::data_type::data_undef"},
{element::u32, "mkldnn::memory::data_type::data_undef"},
{element::u64, "mkldnn::memory::data_type::data_undef"}};
bool IsMKLDNNOp(ngraph::Node& op) // TODO (jbobba): Add the rest of memory formats to this map as well
{ static const std::map<memory::format, const std::string> s_mkldnn_format_string_map{
{memory::format::format_undef, "memory::format::format_undef"},
{memory::format::any, "memory::format::any"},
{memory::format::blocked, "memory::format::blocked"},
{memory::format::x, "memory::format::x"},
{memory::format::nc, "memory::format::nc"},
{memory::format::nchw, "memory::format::nchw"},
{memory::format::nhwc, "memory::format::nhwc"},
{memory::format::chwn, "memory::format::chwn"},
{memory::format::nChw8c, "memory::format::nChw8c"},
{memory::format::nChw16c, "memory::format::nChw16c"},
{memory::format::oi, "memory::format::oi"},
{memory::format::io, "memory::format::io"},
{memory::format::oihw, "memory::format::oihw"},
{memory::format::ihwo, "memory::format::ihwo"},
{memory::format::hwio, "memory::format::hwio"},
{memory::format::oIhw8i, "memory::format::oIhw8i"},
{memory::format::oIhw16i, "memory::format::oIhw16i"},
{memory::format::OIhw8i8o, "memory::format::OIhw8i8o"},
{memory::format::OIhw16i16o, "memory::format::OIhw16i16o"},
{memory::format::OIhw8o8i, "memory::format::OIhw8o8i"},
{memory::format::OIhw16o16i, "memory::format::OIhw16o16i"},
{memory::format::Oihw8o, "memory::format::Oihw8o"},
{memory::format::Oihw16o, "memory::format::Oihw16o"},
{memory::format::Ohwi8o, "memory::format::Ohwi8o"},
{memory::format::Ohwi16o, "memory::format::Ohwi16o"},
{memory::format::OhIw16o4i, "memory::format::OhIw16o4i"},
};
bool runtime::cpu::mkldnn_utils::IsMKLDNNOp(ngraph::Node& op)
{
return (s_op_registry.find(TI(op)) != s_op_registry.end()); return (s_op_registry.find(TI(op)) != s_op_registry.end());
} }
mkldnn::memory::format mkldnn::memory::format runtime::cpu::mkldnn_utils::CreateNativeDataFormat(
CreateNativeDataFormat(const ngraph::runtime::cpu::LayoutDescriptor& layout) const ngraph::runtime::cpu::LayoutDescriptor& layout)
{ {
switch (layout.get_shape().size()) switch (layout.get_shape().size())
{ {
case 1: return mkldnn::memory::format::x; case 1: return mkldnn::memory::format::x;
...@@ -90,8 +120,31 @@ namespace ngraph ...@@ -90,8 +120,31 @@ namespace ngraph
case 4: return mkldnn::memory::format::nchw; case 4: return mkldnn::memory::format::nchw;
default: return mkldnn::memory::format::format_undef; default: return mkldnn::memory::format::format_undef;
} }
}
const std::string& runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string(const ngraph::element::Type& type)
{
auto it = s_mkldnn_data_type_string_map.find(type);
if (it == s_mkldnn_data_type_string_map.end() || it->second.empty())
throw ngraph_error("No MKLDNN data type exists for the given element type");
return it->second;
}
mkldnn::memory::data_type runtime::cpu::mkldnn_utils::get_mkldnn_data_type(const ngraph::element::Type& type)
{
auto it = s_mkldnn_data_type_map.find(type);
if (it == s_mkldnn_data_type_map.end() || it->second == memory::data_type::data_undef)
{
throw ngraph_error("No MKLDNN data type exists for the given element type");
} }
} return it->second;
} }
}
const std::string& runtime::cpu::mkldnn_utils::get_mkldnn_format_string(memory::format fmt)
{
auto it = s_mkldnn_format_string_map.find(fmt);
if (it == s_mkldnn_format_string_map.end())
throw ngraph_error("No MKLDNN format exists for the given format type " +
std::to_string(fmt));
return it->second;
} }
...@@ -32,12 +32,12 @@ namespace ngraph ...@@ -32,12 +32,12 @@ namespace ngraph
{ {
extern mkldnn::engine global_cpu_engine; extern mkldnn::engine global_cpu_engine;
mkldnn::memory::data_type GetDataType(const ngraph::element::Type& et);
bool IsMKLDNNOp(ngraph::Node& op); bool IsMKLDNNOp(ngraph::Node& op);
mkldnn::memory::format mkldnn::memory::format
CreateNativeDataFormat(const ngraph::runtime::cpu::LayoutDescriptor& layout); CreateNativeDataFormat(const ngraph::runtime::cpu::LayoutDescriptor& layout);
const std::string& get_mkldnn_data_type_string(const ngraph::element::Type& type);
mkldnn::memory::data_type get_mkldnn_data_type(const ngraph::element::Type& type);
const std::string& get_mkldnn_format_string(mkldnn::memory::format fmt);
} }
} }
} }
......
...@@ -39,7 +39,7 @@ runtime::cpu::op::ConvertLayout::ConvertLayout( ...@@ -39,7 +39,7 @@ runtime::cpu::op::ConvertLayout::ConvertLayout(
if (!arg_layout) if (!arg_layout)
{ {
throw ngraph_error("Layout conversion input tensor is missing layout information"); //throw ngraph_error("Layout conversion input tensor is missing layout information");
} }
add_output(layout->get_element_type(), layout->get_shape()); add_output(layout->get_element_type(), layout->get_shape());
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/runtime/cpu/pass/cpu_assignment.hpp"
#include <algorithm>
#include <cassert>
#include <memory>
#include <typeindex>
#include <typeinfo>
#include <unordered_map>
#include <mkldnn.hpp>
#include "ngraph/descriptor/output.hpp"
#include "ngraph/ops/convolution.hpp"
#include "ngraph/runtime/cpu/cpu_op_annotations.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace pass
{
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::Convolution)
{
auto convolution = static_cast<op::Convolution*>(node);
auto arg0_shape = node->get_input_shape(0);
auto arg1_shape = node->get_input_shape(1);
auto result_shape = node->get_output_shape(0);
auto arg0_rank = arg0_shape.size();
auto arg1_rank = arg1_shape.size();
bool data_dilated = false;
for (size_t s : convolution->get_data_dilation_strides())
{
data_dilated = data_dilated || (s != 1);
}
if (!data_dilated && arg0_rank == 4 && arg1_rank == 4 &&
node->get_input_element_type(0) == element::f32)
{
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
convolution->set_op_annotations(op_annotations);
}
}
}
}
}
}
#define TI(x) type_index(typeid(x))
static const runtime::cpu::pass::AssignOpMap s_dispatcher{
{TI(ngraph::op::Convolution),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Convolution>},
};
bool runtime::cpu::pass::CPUAssignment::run_on_call_graph(
const std::list<std::shared_ptr<Node>>& nodes)
{
for (const auto& node : nodes)
{
auto& n = *node;
auto handler = s_dispatcher.find(TI(n));
if (handler != s_dispatcher.end())
{
handler->second(m_external_function.get(), node.get());
}
}
return false;
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/pass/pass.hpp"
#include "ngraph/runtime/cpu/cpu_external_function.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view.hpp"
#define ASSIGN_DECL(op_name) \
assign<op_name>(ngraph::runtime::cpu::CPU_ExternalFunction * external_function, \
ngraph::Node * node)
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace pass
{
using AssignFunction = std::function<void(CPU_ExternalFunction*, ngraph::Node*)>;
using AssignOpMap = std::unordered_map<std::type_index, AssignFunction>;
class CPUAssignment : public ngraph::pass::CallGraphPass
{
public:
CPUAssignment(std::shared_ptr<CPU_ExternalFunction> external_function)
: m_external_function(external_function)
{
}
virtual bool
run_on_call_graph(const std::list<std::shared_ptr<Node>>& nodes) override;
template <typename OP>
static void
assign(ngraph::runtime::cpu::CPU_ExternalFunction* external_function,
ngraph::Node* node)
{
throw std::runtime_error("Unimplemented op in CPU assignment");
}
private:
std::shared_ptr<CPU_ExternalFunction> m_external_function;
};
}
}
}
}
This diff is collapsed.
...@@ -17,6 +17,11 @@ ...@@ -17,6 +17,11 @@
#pragma once #pragma once
#include "ngraph/pass/pass.hpp" #include "ngraph/pass/pass.hpp"
#include "ngraph/runtime/cpu/cpu_external_function.hpp"
#define LAYOUT_DECL(op_type) \
layout<op_type>(ngraph::runtime::cpu::CPU_ExternalFunction * external_function, \
std::shared_ptr<ngraph::Node> node)
namespace ngraph namespace ngraph
{ {
...@@ -26,11 +31,30 @@ namespace ngraph ...@@ -26,11 +31,30 @@ namespace ngraph
{ {
namespace pass namespace pass
{ {
using LayoutFunction =
std::function<void(CPU_ExternalFunction*, std::shared_ptr<ngraph::Node>)>;
using LayoutOpMap = std::unordered_map<std::type_index, LayoutFunction>;
class CPULayout : public ngraph::pass::CallGraphPass class CPULayout : public ngraph::pass::CallGraphPass
{ {
public: public:
CPULayout(std::shared_ptr<CPU_ExternalFunction> external_function)
: m_external_function(external_function)
{
}
virtual bool virtual bool
run_on_call_graph(const std::list<std::shared_ptr<Node>>& nodes) override; run_on_call_graph(const std::list<std::shared_ptr<Node>>& nodes) override;
template <typename OP>
static void
layout(ngraph::runtime::cpu::CPU_ExternalFunction* external_function,
std::shared_ptr<ngraph::Node> node);
private:
std::shared_ptr<CPU_ExternalFunction> m_external_function;
static void set_default_layouts(CPU_ExternalFunction* external_function,
std::shared_ptr<Node> node);
}; };
} }
} }
......
...@@ -46,6 +46,7 @@ namespace ngraph ...@@ -46,6 +46,7 @@ namespace ngraph
virtual ~ExternalFunction() {} virtual ~ExternalFunction() {}
virtual std::shared_ptr<CallFrame> make_call_frame() = 0; virtual std::shared_ptr<CallFrame> make_call_frame() = 0;
const std::shared_ptr<ngraph::Function> get_function() { return m_function; }
protected: protected:
std::shared_ptr<ngraph::Function> m_function; std::shared_ptr<ngraph::Function> m_function;
bool m_release_function; bool m_release_function;
......
...@@ -555,6 +555,53 @@ TEST(${BACKEND_NAME}, divide) ...@@ -555,6 +555,53 @@ TEST(${BACKEND_NAME}, divide)
EXPECT_EQ((vector<float>{2, 2, 2, 2}), read_vector<float>(result)); EXPECT_EQ((vector<float>{2, 2, 2, 2}), read_vector<float>(result));
} }
TEST(${BACKEND_NAME}, divide_adjoint_stability)
{
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend();
Shape shape{2, 2};
auto make_external = [&]() {
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Divide>(A, B), op::Parameters{A, B});
auto Y_out = f->get_output_op(0);
auto Xs = f->get_parameters();
auto C = std::make_shared<op::Parameter>(Y_out->get_element_type(), Y_out->get_shape());
std::vector<std::shared_ptr<Node>> dYdXs(Xs.size());
transform(Xs.begin(), Xs.end(), dYdXs.begin(), [C, Y_out](const std::shared_ptr<Node>& X) {
return Y_out->backprop_node(X, C);
});
std::vector<std::shared_ptr<op::Parameter>> params(Xs);
params.push_back(C);
auto bf = std::make_shared<Function>(dYdXs, params);
auto external = manager->compile(bf);
return external;
};
auto cf = backend->make_call_frame(make_external());
// Create some tensors for input/output
auto a = backend->make_primary_tensor_view(element::f32, shape);
copy_data(a, vector<float>{0, 0, 1, 1});
auto b = backend->make_primary_tensor_view(element::f32, shape);
copy_data(b, vector<float>{2, 2, 2, 2});
auto c = backend->make_primary_tensor_view(element::f32, shape);
copy_data(c, vector<float>{1, 1, 1, 1});
auto resulta = backend->make_primary_tensor_view(element::f32, shape);
auto resultb = backend->make_primary_tensor_view(element::f32, shape);
cf->call({a, b, c}, {resulta, resultb});
EXPECT_EQ((vector<float>{0.5, 0.5, 0.5, 0.5}), read_vector<float>(resulta));
EXPECT_EQ((vector<float>{-0.0, -0.0, -0.25, -0.25}), read_vector<float>(resultb));
}
TEST(${BACKEND_NAME}, divide_by_zero_float32) TEST(${BACKEND_NAME}, divide_by_zero_float32)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}"); SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
...@@ -6009,6 +6056,42 @@ TEST(${BACKEND_NAME}, convolution_outlining) ...@@ -6009,6 +6056,42 @@ TEST(${BACKEND_NAME}, convolution_outlining)
EXPECT_EQ(vector<float>{expected_result}, read_vector<float>(result)); EXPECT_EQ(vector<float>{expected_result}, read_vector<float>(result));
} }
TEST(${BACKEND_NAME}, convolution_layout)
{
Shape shape_a{1, 16, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_b{32, 16, 1, 1};
auto B = make_shared<op::Parameter>(element::f32, shape_b);
Shape shape_r{1, 32, 2, 2};
auto conv1 = make_shared<op::Convolution>(A,
B,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
auto f = make_shared<Function>(conv1, op::Parameters{A, B});
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
// Create some tensors for input/output
auto a = backend->make_primary_tensor_view(element::f32, shape_a);
vector<float> input(64, 1.0f);
copy_data(a, input);
auto b = backend->make_primary_tensor_view(element::f32, shape_b);
vector<float> weights(512, 1.0f);
copy_data(b, weights);
auto result = backend->make_primary_tensor_view(element::f32, shape_r);
vector<float> expected_result(128, 16.0f);
cf->call({a, b}, {result});
EXPECT_EQ(vector<float>{expected_result}, read_vector<float>(result));
}
TEST(${BACKEND_NAME}, avg_pool_1d_1channel_1image) TEST(${BACKEND_NAME}, avg_pool_1d_1channel_1image)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}"); SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
......
...@@ -288,3 +288,23 @@ TEST(cpu_fusion, fuse_fprop_bn) ...@@ -288,3 +288,23 @@ TEST(cpu_fusion, fuse_fprop_bn)
size_t ccg = count_ops_of_type<op::BatchNorm>(func); size_t ccg = count_ops_of_type<op::BatchNorm>(func);
ASSERT_EQ(ccg, 1); ASSERT_EQ(ccg, 1);
} }
class UnhandledOp : public ngraph::op::Abs
{
public:
UnhandledOp(const std::shared_ptr<Node>& arg)
: Abs(arg)
{
}
};
TEST(cpu_fusion, unhandled_op)
{
auto A = make_shared<op::Parameter>(element::f32, Shape{});
auto unhandled = make_shared<UnhandledOp>(A);
auto f = make_shared<Function>(unhandled, op::Parameters{A});
auto manager = runtime::Manager::get("CPU");
auto backend = manager->allocate_backend();
auto external = manager->compile(f);
ASSERT_THROW(backend->make_call_frame(external), ngraph_error);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment