Unverified Commit 8f5b3e2e authored by Fenglei's avatar Fenglei Committed by GitHub

Merge branch 'master' into tfl/gpu_dot_back

parents 9fd64b6f 95312b8e
...@@ -97,6 +97,7 @@ set (SRC ...@@ -97,6 +97,7 @@ set (SRC
pass/memory_visualize.cpp pass/memory_visualize.cpp
pass/pass.cpp pass/pass.cpp
pass/reshape_elimination.cpp pass/reshape_elimination.cpp
pass/result_copy_elimination.cpp
pass/visualize_tree.cpp pass/visualize_tree.cpp
pattern/matcher.cpp pattern/matcher.cpp
pattern/core_fusion.cpp pattern/core_fusion.cpp
...@@ -186,6 +187,7 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND ...@@ -186,6 +187,7 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
runtime/cpu/mkldnn_invoke.cpp runtime/cpu/mkldnn_invoke.cpp
runtime/cpu/mkldnn_utils.cpp runtime/cpu/mkldnn_utils.cpp
runtime/cpu/ops/convert_layout.cpp runtime/cpu/ops/convert_layout.cpp
runtime/cpu/ops/sigmoid.cpp
runtime/cpu/ops/matmul_bias.cpp runtime/cpu/ops/matmul_bias.cpp
runtime/cpu/pass/cpu_assignment.cpp runtime/cpu/pass/cpu_assignment.cpp
runtime/cpu/pass/cpu_fusion.cpp runtime/cpu/pass/cpu_fusion.cpp
......
...@@ -49,5 +49,7 @@ std::shared_ptr<Node> op::Result::copy_with_new_args(const NodeVector& new_args) ...@@ -49,5 +49,7 @@ std::shared_ptr<Node> op::Result::copy_with_new_args(const NodeVector& new_args)
throw ngraph_error("Expected a single-output argument"); throw ngraph_error("Expected a single-output argument");
} }
return std::make_shared<Result>(new_args.at(0)); auto res = std::make_shared<Result>(new_args.at(0));
res->set_needs_copy(res->needs_copy());
return res;
} }
...@@ -36,12 +36,17 @@ namespace ngraph ...@@ -36,12 +36,17 @@ namespace ngraph
copy_with_new_args(const NodeVector& new_args) const override; copy_with_new_args(const NodeVector& new_args) const override;
virtual bool is_output() const override { return true; } virtual bool is_output() const override { return true; }
void set_needs_copy(bool val) { m_needs_copy = val; }
bool needs_copy() const { return m_needs_copy; }
protected: protected:
virtual void generate_adjoints(autodiff::Adjoints& adjoints, virtual void generate_adjoints(autodiff::Adjoints& adjoints,
const std::shared_ptr<Node>& delta) override const std::shared_ptr<Node>& delta) override
{ {
adjoints.add_delta(get_input_op(0), delta); adjoints.add_delta(get_input_op(0), delta);
} }
private:
bool m_needs_copy{true};
}; };
} }
} }
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "result_copy_elimination.hpp"
#include "ngraph/node.hpp"
#include "ngraph/ops/parameter.hpp"
#include "ngraph/ops/result.hpp"
#include "ngraph/util.hpp"
bool ngraph::pass::ResultCopyElimination::run_on_function(std::shared_ptr<ngraph::Function> f)
{
std::set<std::shared_ptr<Node>> seen;
for (auto res : f->get_results())
{
auto arg = res->get_input_op(0);
//we need a copy
if (arg->is_parameter() || arg->is_constant())
{
continue;
}
//TODO: check if broadcast replace op::Result w/ a copy of broadcast node
//TODO: consider other cases where it's easier to recompute than make a copy
//we will compute the result directly into output[]
if (seen.count(arg) == 0)
{
res->set_needs_copy(false);
seen.insert(arg);
}
}
return true;
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/pass/pass.hpp"
namespace ngraph
{
namespace pass
{
class ResultCopyElimination;
}
}
class ngraph::pass::ResultCopyElimination : public ngraph::pass::FunctionPass
{
public:
ResultCopyElimination()
: FunctionPass()
{
}
virtual bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
};
...@@ -92,6 +92,7 @@ ...@@ -92,6 +92,7 @@
#include "ngraph/runtime/cpu/mkldnn_utils.hpp" #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/ops/convert_layout.hpp" #include "ngraph/runtime/cpu/ops/convert_layout.hpp"
#include "ngraph/runtime/cpu/ops/matmul_bias.hpp" #include "ngraph/runtime/cpu/ops/matmul_bias.hpp"
#include "ngraph/runtime/cpu/ops/sigmoid.hpp"
#include "ngraph/types/element_type.hpp" #include "ngraph/types/element_type.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
...@@ -272,13 +273,6 @@ namespace ngraph ...@@ -272,13 +273,6 @@ namespace ngraph
const char* cbeta = "0.0f"; const char* cbeta = "0.0f";
if (args.size() > 2)
{
writer << "memcpy(" << out[0].get_name() << ", " << args[2].get_name() << ", "
<< out[0].get_size() * out[0].get_element_type().size() << ");\n";
cbeta = "1.0f";
}
writer << "cblas::cblas_sgemm(" writer << "cblas::cblas_sgemm("
<< "cblas::Layout::RowMajor, " << tranpose_a << tranpose_b << m << ", " << n << "cblas::Layout::RowMajor, " << tranpose_a << tranpose_b << m << ", " << n
<< ", " << k << ",\n" << ", " << k << ",\n"
...@@ -287,6 +281,101 @@ namespace ngraph ...@@ -287,6 +281,101 @@ namespace ngraph
<< " " << out[0].get_name() << ", " << max(1UL, arg2_shape[1]) << " " << out[0].get_name() << ", " << max(1UL, arg2_shape[1])
<< ");\n"; << ");\n";
if (args.size() > 2)
{
auto axes = cg->get_broadcast_axes();
if (axes.size() == 1)
{
if (*(axes.begin()) == 0)
{
writer << "static " << out[0].get_element_type().c_type_string()
<< " ones_row[" << arg2_shape[0] << "]"
<< " = { 1.0f";
for (size_t i = 1; i < arg2_shape[0]; ++i)
{
writer << ", 1.0f";
}
writer << "};\n";
writer << "cblas::cblas_sgemm("
<< "cblas::Layout::RowMajor, " << cnotranspose << cnotranspose
<< arg2_shape[0] << ", " << arg2_shape[1] << ", 1"
<< ",\n"
<< " 1.0f, ones_row, "
<< "1"
<< ", " << args[2].get_name() << ", " << max(1UL, arg2_shape[1])
<< ", "
<< "1.0f"
<< ",\n"
<< " " << out[0].get_name() << ", "
<< max(1UL, arg2_shape[1]) << ");\n";
}
else
{
writer << "static " << out[0].get_element_type().c_type_string()
<< " ones_col[" << arg2_shape[1] << "]"
<< " = { 1.0f";
for (size_t i = 1; i < arg2_shape[1]; ++i)
{
writer << ", 1.0f";
}
writer << "};\n";
writer << "cblas::cblas_sgemm("
<< "cblas::Layout::RowMajor, " << cnotranspose << ctranspose
<< arg2_shape[0] << ", " << arg2_shape[1] << ", 1"
<< ",\n"
<< " 1.0f, ones_col," << max(1UL, arg2_shape[1]) << ", "
<< args[2].get_name() << ", "
<< "1"
<< ", "
<< "1.0f"
<< ",\n"
<< " " << out[0].get_name() << ", "
<< max(1UL, arg2_shape[1]) << ");\n";
}
}
else
{
if (axes.size() != 2)
{
throw ngraph_error("unexpected broadcast rank");
}
writer << out[0].get_element_type().c_type_string() << " bias["
<< arg2_shape[1] << "]"
<< " = { " << args[2].get_name() << "[0]";
for (size_t i = 1; i < arg2_shape[1]; ++i)
{
writer << "," << args[2].get_name() << "[0]";
}
writer << "};\n";
writer << "static " << out[0].get_element_type().c_type_string()
<< " ones_scalar[" << arg2_shape[0] << "]"
<< " = { 1.0f";
for (size_t i = 1; i < arg2_shape[0]; ++i)
{
writer << ", 1.0f";
}
writer << "};\n";
writer << "cblas::cblas_sgemm("
<< "cblas::Layout::RowMajor, " << cnotranspose << cnotranspose
<< arg2_shape[0] << ", " << arg2_shape[1] << ", 1"
<< ",\n"
<< " 1.0f, ones_scalar, "
<< "1"
<< ", "
<< "bias"
<< ", " << max(1UL, arg2_shape[1]) << ", "
<< "1.0f"
<< ",\n"
<< " " << out[0].get_name() << ", " << max(1UL, arg2_shape[1])
<< ");\n";
}
}
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
} }
...@@ -3081,6 +3170,19 @@ namespace ngraph ...@@ -3081,6 +3170,19 @@ namespace ngraph
auto output_format = auto output_format =
dynamic_cast<runtime::cpu::LayoutDescriptor&>(*output_tvl).get_mkldnn_format(); dynamic_cast<runtime::cpu::LayoutDescriptor&>(*output_tvl).get_mkldnn_format();
// MKLDNN relies on format names for selecting optimized kernel implementations
// Hacky way to deal with this until they move to using canonicalized layouts
if (input_format == mkldnn::memory::format::nchw &&
runtime::cpu::mkldnn_utils::is_mkldnn_filter_format(output_format))
{
input_format = mkldnn::memory::format::oihw;
}
if (output_format == mkldnn::memory::format::nchw &&
runtime::cpu::mkldnn_utils::is_mkldnn_filter_format(input_format))
{
output_format = mkldnn::memory::format::oihw;
}
auto& mkldnn_emitter = external_function->get_mkldnn_emitter(); auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_emitter->build_memory_descriptor(args[0], input_format); auto input_desc = mkldnn_emitter->build_memory_descriptor(args[0], input_format);
auto result_desc = mkldnn_emitter->build_memory_descriptor(out[0], output_format); auto result_desc = mkldnn_emitter->build_memory_descriptor(out[0], output_format);
...@@ -3217,6 +3319,37 @@ namespace ngraph ...@@ -3217,6 +3319,37 @@ namespace ngraph
} }
} }
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::Sigmoid)
{
auto input_shape = args[0].get_shape();
auto result_shape = out[0].get_shape();
int input_1d_size = static_cast<int>(shape_size(input_shape));
int result_1d_size = static_cast<int>(shape_size(result_shape));
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn::memory::desc(
{input_1d_size},
mkldnn_utils::get_mkldnn_data_type(args[0].get_element_type()),
mkldnn::memory::format::x);
auto result_desc = mkldnn::memory::desc(
{result_1d_size},
mkldnn_utils::get_mkldnn_data_type(out[0].get_element_type()),
mkldnn::memory::format::x);
size_t sigmoid_index =
mkldnn_emitter->build_sigmoid_forward(input_desc, result_desc);
auto& deps = mkldnn_emitter->get_primitive_deps(sigmoid_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0]) << ", "
<< args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1]) << ", "
<< out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(sigmoid_index) << ");\n";
}
template <> template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::Softmax) void CPU_Emitter::EMITTER_DECL(ngraph::op::Softmax)
{ {
...@@ -3417,6 +3550,13 @@ namespace ngraph ...@@ -3417,6 +3550,13 @@ namespace ngraph
template <> template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::Result) void CPU_Emitter::EMITTER_DECL(ngraph::op::Result)
{ {
const ngraph::op::Result* result = static_cast<const ngraph::op::Result*>(node);
if (!result->needs_copy())
{
return;
}
writer << "kernel::result<" << out[0].get_type() << ">(" << args[0].get_name() writer << "kernel::result<" << out[0].get_type() << ">(" << args[0].get_name()
<< ",\n"; << ",\n";
writer << " " << out[0].get_name() << ",\n"; writer << " " << out[0].get_name() << ",\n";
......
...@@ -100,6 +100,7 @@ ...@@ -100,6 +100,7 @@
#include "ngraph/pass/liveness.hpp" #include "ngraph/pass/liveness.hpp"
#include "ngraph/pass/manager.hpp" #include "ngraph/pass/manager.hpp"
#include "ngraph/pass/memory_layout.hpp" #include "ngraph/pass/memory_layout.hpp"
#include "ngraph/pass/result_copy_elimination.hpp"
#include "ngraph/pattern/core_fusion.hpp" #include "ngraph/pattern/core_fusion.hpp"
#include "ngraph/runtime/cpu/cpu_backend.hpp" #include "ngraph/runtime/cpu/cpu_backend.hpp"
#include "ngraph/runtime/cpu/cpu_call_frame.hpp" #include "ngraph/runtime/cpu/cpu_call_frame.hpp"
...@@ -110,6 +111,7 @@ ...@@ -110,6 +111,7 @@
#include "ngraph/runtime/cpu/mkldnn_utils.hpp" #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/ops/convert_layout.hpp" #include "ngraph/runtime/cpu/ops/convert_layout.hpp"
#include "ngraph/runtime/cpu/ops/matmul_bias.hpp" #include "ngraph/runtime/cpu/ops/matmul_bias.hpp"
#include "ngraph/runtime/cpu/ops/sigmoid.hpp"
#include "ngraph/runtime/cpu/pass/cpu_assignment.hpp" #include "ngraph/runtime/cpu/pass/cpu_assignment.hpp"
#include "ngraph/runtime/cpu/pass/cpu_fusion.hpp" #include "ngraph/runtime/cpu/pass/cpu_fusion.hpp"
#include "ngraph/runtime/cpu/pass/cpu_layout.hpp" #include "ngraph/runtime/cpu/pass/cpu_layout.hpp"
...@@ -243,6 +245,7 @@ static const runtime::cpu::OpMap dispatcher{ ...@@ -243,6 +245,7 @@ static const runtime::cpu::OpMap dispatcher{
{TI(ngraph::op::Min), &runtime::cpu::CPU_Emitter::emit<op::Min>}, {TI(ngraph::op::Min), &runtime::cpu::CPU_Emitter::emit<op::Min>},
{TI(ngraph::op::Relu), &runtime::cpu::CPU_Emitter::emit<op::Relu>}, {TI(ngraph::op::Relu), &runtime::cpu::CPU_Emitter::emit<op::Relu>},
{TI(ngraph::op::ReluBackprop), &runtime::cpu::CPU_Emitter::emit<op::ReluBackprop>}, {TI(ngraph::op::ReluBackprop), &runtime::cpu::CPU_Emitter::emit<op::ReluBackprop>},
{TI(ngraph::op::Sigmoid), &runtime::cpu::CPU_Emitter::emit<op::Sigmoid>},
{TI(ngraph::op::Softmax), &runtime::cpu::CPU_Emitter::emit<op::Softmax>}, {TI(ngraph::op::Softmax), &runtime::cpu::CPU_Emitter::emit<op::Softmax>},
}; };
...@@ -276,10 +279,11 @@ void runtime::cpu::CPU_ExternalFunction::compile() ...@@ -276,10 +279,11 @@ void runtime::cpu::CPU_ExternalFunction::compile()
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>(); pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUAssignment>(this); pass_manager.register_pass<runtime::cpu::pass::CPUAssignment>(this);
pass_manager.register_pass<runtime::cpu::pass::CPULayout>(this); pass_manager.register_pass<runtime::cpu::pass::CPULayout>(this);
pass_manager.register_pass<ngraph::pass::ResultCopyElimination>();
pass_manager.register_pass<ngraph::pass::Liveness>(); pass_manager.register_pass<ngraph::pass::Liveness>();
pass_manager.register_pass<ngraph::pass::MemoryLayout>(s_memory_pool_alignment); pass_manager.register_pass<ngraph::pass::MemoryLayout>(s_memory_pool_alignment);
pass_manager.run_passes(m_function); pass_manager.run_passes(m_function);
codegen::CodeWriter writer; codegen::CodeWriter writer;
bool include_mkldnn_headers = false; bool include_mkldnn_headers = false;
...@@ -636,6 +640,16 @@ using namespace ngraph::runtime; ...@@ -636,6 +640,16 @@ using namespace ngraph::runtime;
stringstream ss; stringstream ss;
ss << "((" << type << "*)(outputs[" << i << "]))"; ss << "((" << type << "*)(outputs[" << i << "]))";
m_variable_name_map[tv->get_tensor().get_name()] = ss.str(); m_variable_name_map[tv->get_tensor().get_name()] = ss.str();
//it should be safe to assign both descriptors to one output*
//since needs_copy == false makes `op::Result` an nop
auto res = std::dynamic_pointer_cast<ngraph::op::Result>(op);
if (!res->needs_copy())
{
shared_ptr<descriptor::TensorView> itv =
res->get_input_op(0)->get_output_tensor_view();
m_variable_name_map[itv->get_tensor().get_name()] = ss.str();
}
} }
for (shared_ptr<Node> node : current_function->get_ordered_ops()) for (shared_ptr<Node> node : current_function->get_ordered_ops())
...@@ -827,7 +841,6 @@ using namespace ngraph::runtime; ...@@ -827,7 +841,6 @@ using namespace ngraph::runtime;
} }
// TODO: Cleanup and make this a utility function // TODO: Cleanup and make this a utility function
file_util::make_directory(s_output_dir); file_util::make_directory(s_output_dir);
string filename = file_util::path_join(s_output_dir, m_function_name + "_codegen.cpp"); string filename = file_util::path_join(s_output_dir, m_function_name + "_codegen.cpp");
ofstream out(filename); ofstream out(filename);
......
...@@ -280,6 +280,26 @@ size_t MKLDNNEmitter::build_relu_forward(const mkldnn::memory::desc& input_desc, ...@@ -280,6 +280,26 @@ size_t MKLDNNEmitter::build_relu_forward(const mkldnn::memory::desc& input_desc,
return primitive_index; return primitive_index;
} }
size_t MKLDNNEmitter::build_sigmoid_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc)
{
size_t input_index = build_memory_primitive(input_desc);
size_t result_index = build_memory_primitive(result_desc);
size_t primitive_index =
insert_primitive(new mkldnn::eltwise_forward({{mkldnn::prop_kind::forward_training,
mkldnn::algorithm::eltwise_logistic,
input_desc,
0,
0},
mkldnn_utils::global_cpu_engine},
*m_mkldnn_primitives[input_index],
*m_mkldnn_primitives[result_index]));
m_primitive_deps[primitive_index] = {input_index, result_index};
return primitive_index;
}
size_t MKLDNNEmitter::build_elementwise_add( size_t MKLDNNEmitter::build_elementwise_add(
const mkldnn::memory::desc& input0_data_desc, const mkldnn::memory::desc& input0_data_desc,
const mkldnn::memory::desc& input1_data_desc, const mkldnn::memory::desc& input1_data_desc,
......
...@@ -97,6 +97,9 @@ namespace ngraph ...@@ -97,6 +97,9 @@ namespace ngraph
size_t build_relu_forward(const mkldnn::memory::desc& input_desc, size_t build_relu_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc); const mkldnn::memory::desc& result_desc);
size_t build_sigmoid_forward(const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc);
size_t build_elementwise_add( size_t build_elementwise_add(
const mkldnn::memory::desc& input0_data_desc, const mkldnn::memory::desc& input0_data_desc,
const mkldnn::memory::desc& input1_data_desc, const mkldnn::memory::desc& input1_data_desc,
......
...@@ -110,6 +110,23 @@ static const std::map<memory::format, const std::string> s_mkldnn_format_string_ ...@@ -110,6 +110,23 @@ static const std::map<memory::format, const std::string> s_mkldnn_format_string_
{memory::format::OhIw16o4i, "memory::format::OhIw16o4i"}, {memory::format::OhIw16o4i, "memory::format::OhIw16o4i"},
}; };
static const std::set<memory::format> s_filter_formats{
memory::format::oihw,
memory::format::ihwo,
memory::format::hwio,
// memory::format::oIhw8i, // These currently map to nChw8c and nChw16c
// memory::format::oIhw16i,
memory::format::OIhw8i8o,
memory::format::OIhw16i16o,
memory::format::IOhw16o16i,
memory::format::OIhw8o8i,
memory::format::OIhw16o16i,
memory::format::Oihw8o,
memory::format::Oihw16o,
memory::format::Ohwi8o,
memory::format::Ohwi16o,
memory::format::OhIw16o4i};
bool runtime::cpu::mkldnn_utils::IsMKLDNNOp(ngraph::Node& op) bool runtime::cpu::mkldnn_utils::IsMKLDNNOp(ngraph::Node& op)
{ {
return (s_op_registry.find(TI(op)) != s_op_registry.end()); return (s_op_registry.find(TI(op)) != s_op_registry.end());
...@@ -157,16 +174,16 @@ const std::string& runtime::cpu::mkldnn_utils::get_mkldnn_format_string(memory:: ...@@ -157,16 +174,16 @@ const std::string& runtime::cpu::mkldnn_utils::get_mkldnn_format_string(memory::
} }
mkldnn::memory::format runtime::cpu::mkldnn_utils::get_input_mkldnn_format(const Node* node, mkldnn::memory::format runtime::cpu::mkldnn_utils::get_input_mkldnn_format(const Node* node,
int index) size_t index)
{ {
auto tvl = node->get_inputs()[index].get_output().get_tensor_view()->get_tensor_view_layout(); auto tvl = node->get_inputs()[index].get_output().get_tensor_view()->get_tensor_view_layout();
return dynamic_cast<runtime::cpu::LayoutDescriptor&>(*tvl).get_mkldnn_format(); return dynamic_cast<runtime::cpu::LayoutDescriptor&>(*tvl).get_mkldnn_format();
} }
mkldnn::memory::format runtime::cpu::mkldnn_utils::get_output_mkldnn_format(const Node* node, mkldnn::memory::format runtime::cpu::mkldnn_utils::get_output_mkldnn_format(const Node* node,
int index) size_t index)
{ {
auto tvl = node->get_output_tensor_view(0)->get_tensor_view_layout(); auto tvl = node->get_output_tensor_view(index)->get_tensor_view_layout();
return dynamic_cast<runtime::cpu::LayoutDescriptor&>(*tvl).get_mkldnn_format(); return dynamic_cast<runtime::cpu::LayoutDescriptor&>(*tvl).get_mkldnn_format();
} }
...@@ -181,8 +198,8 @@ bool runtime::cpu::mkldnn_utils::use_mkldnn_kernel(const ngraph::Node* node) ...@@ -181,8 +198,8 @@ bool runtime::cpu::mkldnn_utils::use_mkldnn_kernel(const ngraph::Node* node)
bool runtime::cpu::mkldnn_utils::compare_mkldnn_formats(mkldnn::memory::format fmt1, bool runtime::cpu::mkldnn_utils::compare_mkldnn_formats(mkldnn::memory::format fmt1,
mkldnn::memory::format fmt2) mkldnn::memory::format fmt2)
{ {
set<mkldnn::memory::format> similar_4d_formats{mkldnn::memory::format::nchw, std::set<mkldnn::memory::format> similar_4d_formats{mkldnn::memory::format::nchw,
mkldnn::memory::format::oihw}; mkldnn::memory::format::oihw};
if ((fmt1 == fmt2) || (similar_4d_formats.find(fmt1) != similar_4d_formats.end() && if ((fmt1 == fmt2) || (similar_4d_formats.find(fmt1) != similar_4d_formats.end() &&
similar_4d_formats.find(fmt2) != similar_4d_formats.end())) similar_4d_formats.find(fmt2) != similar_4d_formats.end()))
{ {
...@@ -190,3 +207,12 @@ bool runtime::cpu::mkldnn_utils::compare_mkldnn_formats(mkldnn::memory::format f ...@@ -190,3 +207,12 @@ bool runtime::cpu::mkldnn_utils::compare_mkldnn_formats(mkldnn::memory::format f
} }
return false; return false;
} }
bool runtime::cpu::mkldnn_utils::is_mkldnn_filter_format(mkldnn::memory::format fmt)
{
if (s_filter_formats.find(fmt) != s_filter_formats.end())
{
return true;
}
return false;
}
...@@ -39,11 +39,12 @@ namespace ngraph ...@@ -39,11 +39,12 @@ namespace ngraph
mkldnn::memory::data_type get_mkldnn_data_type(const ngraph::element::Type& type); mkldnn::memory::data_type get_mkldnn_data_type(const ngraph::element::Type& type);
const std::string& get_mkldnn_format_string(mkldnn::memory::format fmt); const std::string& get_mkldnn_format_string(mkldnn::memory::format fmt);
mkldnn::memory::format get_input_mkldnn_format(const Node* node, int index); mkldnn::memory::format get_input_mkldnn_format(const Node* node, size_t index);
mkldnn::memory::format get_output_mkldnn_format(const Node* node, int index); mkldnn::memory::format get_output_mkldnn_format(const Node* node, size_t index);
bool use_mkldnn_kernel(const ngraph::Node* node); bool use_mkldnn_kernel(const ngraph::Node* node);
bool compare_mkldnn_formats(mkldnn::memory::format fmt1, bool compare_mkldnn_formats(mkldnn::memory::format fmt1,
mkldnn::memory::format fmt2); mkldnn::memory::format fmt2);
bool is_mkldnn_filter_format(mkldnn::memory::format fmt);
} }
} }
} }
......
...@@ -32,7 +32,8 @@ std::shared_ptr<ngraph::Node> ...@@ -32,7 +32,8 @@ std::shared_ptr<ngraph::Node>
m_shape_w, m_shape_w,
m_shape_x, m_shape_x,
m_transpose_w, m_transpose_w,
m_transpose_x); m_transpose_x,
m_broadcast_axes);
} }
ngraph::op::MatmulBias::MatmulBias(std::shared_ptr<ngraph::Node> W, ngraph::op::MatmulBias::MatmulBias(std::shared_ptr<ngraph::Node> W,
...@@ -41,7 +42,8 @@ ngraph::op::MatmulBias::MatmulBias(std::shared_ptr<ngraph::Node> W, ...@@ -41,7 +42,8 @@ ngraph::op::MatmulBias::MatmulBias(std::shared_ptr<ngraph::Node> W,
Shape shape_w, Shape shape_w,
Shape shape_x, Shape shape_x,
bool transpose_w, bool transpose_w,
bool transpose_x) bool transpose_x,
AxisSet axes)
: RequiresTensorViewArgs("MatMulBias", : RequiresTensorViewArgs("MatMulBias",
b == nullptr ? std::vector<std::shared_ptr<Node>>{W, x} b == nullptr ? std::vector<std::shared_ptr<Node>>{W, x}
: std::vector<std::shared_ptr<Node>>{W, x, b}) : std::vector<std::shared_ptr<Node>>{W, x, b})
...@@ -49,8 +51,24 @@ ngraph::op::MatmulBias::MatmulBias(std::shared_ptr<ngraph::Node> W, ...@@ -49,8 +51,24 @@ ngraph::op::MatmulBias::MatmulBias(std::shared_ptr<ngraph::Node> W,
, m_shape_x(shape_x) , m_shape_x(shape_x)
, m_transpose_w(transpose_w) , m_transpose_w(transpose_w)
, m_transpose_x(transpose_x) , m_transpose_x(transpose_x)
, m_broadcast_axes(axes)
{ {
if (axes.size() == 0 && b != nullptr)
{
throw ngraph_error("Bias but no broadcast axes");
}
if (b == nullptr && axes.size() != 0)
{
throw ngraph_error("Broadcast axes but no bias");
}
if (axes.size() > 2)
{
throw ngraph_error("Broadcasting to > 2D tensor");
}
if (shape_w.size() != 2) if (shape_w.size() != 2)
{ {
NGRAPH_DEBUG << "W shape = " << vector_to_string(shape_w); NGRAPH_DEBUG << "W shape = " << vector_to_string(shape_w);
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#pragma once #pragma once
#include "ngraph/axis_set.hpp"
#include "ngraph/ops/util/requires_tensor_view_args.hpp" #include "ngraph/ops/util/requires_tensor_view_args.hpp"
namespace ngraph namespace ngraph
...@@ -31,12 +32,14 @@ namespace ngraph ...@@ -31,12 +32,14 @@ namespace ngraph
Shape shape_w, Shape shape_w,
Shape shape_x, Shape shape_x,
bool transpose_w, bool transpose_w,
bool transpose_x); bool transpose_x,
AxisSet axes = AxisSet{});
bool get_is_arg0_transposed() const { return m_transpose_w; } bool get_is_arg0_transposed() const { return m_transpose_w; }
bool get_is_arg1_transposed() const { return m_transpose_x; } bool get_is_arg1_transposed() const { return m_transpose_x; }
Shape get_arg0_shape() const { return m_shape_w; } Shape get_arg0_shape() const { return m_shape_w; }
Shape get_arg1_shape() const { return m_shape_x; } Shape get_arg1_shape() const { return m_shape_x; }
const AxisSet& get_broadcast_axes() const { return m_broadcast_axes; }
virtual std::shared_ptr<Node> virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override; copy_with_new_args(const NodeVector& new_args) const override;
...@@ -45,6 +48,7 @@ namespace ngraph ...@@ -45,6 +48,7 @@ namespace ngraph
Shape m_shape_x; Shape m_shape_x;
bool m_transpose_w; bool m_transpose_w;
bool m_transpose_x; bool m_transpose_x;
AxisSet m_broadcast_axes;
}; };
} }
} }
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/runtime/cpu/ops/sigmoid.hpp"
#include "ngraph/log.hpp"
#include "ngraph/util.hpp"
std::shared_ptr<ngraph::Node>
ngraph::op::Sigmoid::copy_with_new_args(const NodeVector& new_args) const
{
if (new_args.size() != 1)
{
throw ngraph_error("Incorrect number of new arguments");
}
return std::make_shared<Sigmoid>(new_args.at(0));
}
ngraph::op::Sigmoid::Sigmoid(std::shared_ptr<ngraph::Node> input)
: RequiresTensorViewArgs("Sigmoid", {input})
, m_shape_input(input->get_shape())
{
add_output(input->get_element_type(), m_shape_input);
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/ops/util/requires_tensor_view_args.hpp"
namespace ngraph
{
namespace op
{
class Sigmoid : public util::RequiresTensorViewArgs
{
public:
Sigmoid(std::shared_ptr<Node> input);
Shape get_input_shape() const { return m_shape_input; }
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
private:
Shape m_shape_input;
};
}
}
...@@ -27,10 +27,12 @@ ...@@ -27,10 +27,12 @@
#include "ngraph/descriptor/output.hpp" #include "ngraph/descriptor/output.hpp"
#include "ngraph/ops/add.hpp" #include "ngraph/ops/add.hpp"
#include "ngraph/ops/avg_pool.hpp" #include "ngraph/ops/avg_pool.hpp"
#include "ngraph/ops/batch_norm.hpp"
#include "ngraph/ops/convolution.hpp" #include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/relu.hpp" #include "ngraph/ops/relu.hpp"
#include "ngraph/runtime/cpu/cpu_op_annotations.hpp" #include "ngraph/runtime/cpu/cpu_op_annotations.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp" #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/ops/sigmoid.hpp"
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
...@@ -208,6 +210,19 @@ namespace ngraph ...@@ -208,6 +210,19 @@ namespace ngraph
} }
} }
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::Sigmoid)
{
auto sigmoid = static_cast<op::Sigmoid*>(node);
if (node->get_input_element_type(0) == element::f32)
{
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
sigmoid->set_op_annotations(op_annotations);
}
}
template <> template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::ReluBackprop) void CPUAssignment::ASSIGN_DECL(ngraph::op::ReluBackprop)
{ {
...@@ -225,6 +240,16 @@ namespace ngraph ...@@ -225,6 +240,16 @@ namespace ngraph
avg_pool->set_op_annotations(op_annotations); avg_pool->set_op_annotations(op_annotations);
} }
} }
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::BatchNorm)
{
auto batchnorm = static_cast<op::BatchNorm*>(node);
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
batchnorm->set_op_annotations(op_annotations);
}
} }
} }
} }
...@@ -234,6 +259,7 @@ namespace ngraph ...@@ -234,6 +259,7 @@ namespace ngraph
static const runtime::cpu::pass::AssignOpMap s_dispatcher{ static const runtime::cpu::pass::AssignOpMap s_dispatcher{
{TI(ngraph::op::Add), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Add>}, {TI(ngraph::op::Add), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Add>},
{TI(ngraph::op::BatchNorm), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::BatchNorm>},
{TI(ngraph::op::Convolution), {TI(ngraph::op::Convolution),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Convolution>}, &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Convolution>},
{TI(ngraph::op::ConvolutionBackpropData), {TI(ngraph::op::ConvolutionBackpropData),
...@@ -246,6 +272,7 @@ static const runtime::cpu::pass::AssignOpMap s_dispatcher{ ...@@ -246,6 +272,7 @@ static const runtime::cpu::pass::AssignOpMap s_dispatcher{
{TI(ngraph::op::Relu), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Relu>}, {TI(ngraph::op::Relu), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Relu>},
{TI(ngraph::op::ReluBackprop), {TI(ngraph::op::ReluBackprop),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::ReluBackprop>}, &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::ReluBackprop>},
{TI(ngraph::op::Sigmoid), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Sigmoid>},
}; };
bool runtime::cpu::pass::CPUAssignment::run_on_call_graph( bool runtime::cpu::pass::CPUAssignment::run_on_call_graph(
......
...@@ -30,8 +30,10 @@ ...@@ -30,8 +30,10 @@
#include "ngraph/ops/convolution.hpp" #include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/divide.hpp" #include "ngraph/ops/divide.hpp"
#include "ngraph/ops/dot.hpp" #include "ngraph/ops/dot.hpp"
#include "ngraph/ops/exp.hpp"
#include "ngraph/ops/get_output_element.hpp" #include "ngraph/ops/get_output_element.hpp"
#include "ngraph/ops/multiply.hpp" #include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/negative.hpp"
#include "ngraph/ops/pad.hpp" #include "ngraph/ops/pad.hpp"
#include "ngraph/ops/parameter.hpp" #include "ngraph/ops/parameter.hpp"
#include "ngraph/ops/reshape.hpp" #include "ngraph/ops/reshape.hpp"
...@@ -42,6 +44,7 @@ ...@@ -42,6 +44,7 @@
#include "ngraph/pattern/op/any.hpp" #include "ngraph/pattern/op/any.hpp"
#include "ngraph/pattern/op/label.hpp" #include "ngraph/pattern/op/label.hpp"
#include "ngraph/runtime/cpu/ops/matmul_bias.hpp" #include "ngraph/runtime/cpu/ops/matmul_bias.hpp"
#include "ngraph/runtime/cpu/ops/sigmoid.hpp"
static bool init_cblas_arg(std::shared_ptr<ngraph::Node> reshape, static bool init_cblas_arg(std::shared_ptr<ngraph::Node> reshape,
std::shared_ptr<ngraph::Node> arg, std::shared_ptr<ngraph::Node> arg,
...@@ -134,12 +137,21 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_matmulbias_pattern() ...@@ -134,12 +137,21 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_matmulbias_pattern()
<< m.match_root()->get_name(); << m.match_root()->get_name();
auto mpattern = m.match_root(); //add auto mpattern = m.match_root(); //add
auto m_matmul = mpattern->get_input_op(0); auto m_matmul = std::dynamic_pointer_cast<op::MatmulBias>(mpattern->get_input_op(0));
auto m_broadcast = mpattern->get_input_op(1); auto m_broadcast = std::dynamic_pointer_cast<op::Broadcast>(mpattern->get_input_op(1));
auto m_bias = m_broadcast->get_input_op(0);
auto pattern_map = m.get_pattern_map(); auto pattern_map = m.get_pattern_map();
return m_matmul->copy_with_new_args( auto mmb = std::make_shared<op::MatmulBias>(pattern_map[W],
NodeVector{pattern_map[W], pattern_map[x], m_broadcast}); pattern_map[x],
m_bias,
m_matmul->get_arg0_shape(),
m_matmul->get_arg1_shape(),
m_matmul->get_is_arg0_transposed(),
m_matmul->get_is_arg1_transposed(),
m_broadcast->get_broadcast_axes());
return mmb;
}; };
auto m = std::make_shared<ngraph::pattern::Matcher>(padd, callback); auto m = std::make_shared<ngraph::pattern::Matcher>(padd, callback);
...@@ -512,3 +524,45 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_zero_padded_conv() ...@@ -512,3 +524,45 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_zero_padded_conv()
this->add_matcher(std::make_shared<ngraph::pattern::Matcher>(conv_label, callback)); this->add_matcher(std::make_shared<ngraph::pattern::Matcher>(conv_label, callback));
} }
void ngraph::runtime::cpu::pass::CPUFusion::construct_sigmoid()
{
//construct variance
auto input = std::make_shared<pattern::op::Label>(element::f32, Shape{3, 4});
auto neg_input = std::make_shared<op::Negative>(input);
auto exp_neg_input = std::make_shared<op::Exp>(neg_input);
// broadcast input
auto constant = std::make_shared<pattern::op::Label>(element::f32, Shape{});
auto broadcast_constant = std::make_shared<op::Broadcast>(constant, Shape{3, 4}, AxisSet{0, 1});
auto add_exp = std::make_shared<op::Add>(exp_neg_input, broadcast_constant);
auto divide_1_over_exp = std::make_shared<op::Divide>(broadcast_constant, add_exp);
//Define a call back that needs to called once the DFG matches the pattern
ngraph::pattern::gr_callback_fn callback =
[input](pattern::Matcher& m) -> std::shared_ptr<Node> {
NGRAPH_DEBUG << "In a callback for construct_fprop_sigmoid pattern against "
<< m.match_root()->get_name();
auto pattern_map = m.get_pattern_map();
if (m.match_root()->get_element_type() != element::f32)
{
NGRAPH_DEBUG << "mpattern = " << m.match_root()->get_name() << " type is not float!";
return nullptr;
}
if (m.match_root()->get_outputs().size() != pattern_map[input]->get_outputs().size())
{
NGRAPH_DEBUG << "mpattern = " << m.match_root()->get_name()
<< "input= " << pattern_map[input]->get_name() << "size dont match!";
return nullptr;
}
auto sigmoid_node = std::make_shared<op::Sigmoid>(pattern_map[input]);
return sigmoid_node;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(divide_1_over_exp, callback);
this->add_matcher(m);
}
...@@ -43,12 +43,14 @@ public: ...@@ -43,12 +43,14 @@ public:
construct_fprop_bn(); construct_fprop_bn();
construct_zero_padded_reshaped_conv(); construct_zero_padded_reshaped_conv();
construct_zero_padded_conv(); construct_zero_padded_conv();
construct_sigmoid();
} }
private: private:
void construct_matmul_pattern(); void construct_matmul_pattern();
void construct_matmulbias_pattern(); void construct_matmulbias_pattern();
void construct_fprop_bn(); void construct_fprop_bn();
void construct_sigmoid();
void construct_zero_padded_reshaped_conv(); void construct_zero_padded_reshaped_conv();
void construct_zero_padded_conv(); void construct_zero_padded_conv();
}; };
...@@ -28,7 +28,9 @@ ...@@ -28,7 +28,9 @@
#include "ngraph/log.hpp" #include "ngraph/log.hpp"
#include "ngraph/ops/add.hpp" #include "ngraph/ops/add.hpp"
#include "ngraph/ops/avg_pool.hpp" #include "ngraph/ops/avg_pool.hpp"
#include "ngraph/ops/batch_norm.hpp"
#include "ngraph/ops/convolution.hpp" #include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/get_output_element.hpp"
#include "ngraph/ops/op.hpp" #include "ngraph/ops/op.hpp"
#include "ngraph/ops/relu.hpp" #include "ngraph/ops/relu.hpp"
#include "ngraph/ops/result.hpp" #include "ngraph/ops/result.hpp"
...@@ -36,6 +38,7 @@ ...@@ -36,6 +38,7 @@
#include "ngraph/runtime/cpu/cpu_op_annotations.hpp" #include "ngraph/runtime/cpu/cpu_op_annotations.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp" #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/ops/convert_layout.hpp" #include "ngraph/runtime/cpu/ops/convert_layout.hpp"
#include "ngraph/runtime/cpu/ops/sigmoid.hpp"
using namespace std; using namespace std;
using namespace mkldnn; using namespace mkldnn;
...@@ -640,6 +643,17 @@ namespace ngraph ...@@ -640,6 +643,17 @@ namespace ngraph
set_output_layouts(node, prim_output_formats); set_output_layouts(node, prim_output_formats);
} }
template <>
void CPULayout::LAYOUT_DECL(ngraph::op::GetOutputElement)
{
auto goe = static_cast<const ngraph::op::GetOutputElement*>(node.get());
auto input_layout = runtime::cpu::mkldnn_utils::get_input_mkldnn_format(
node.get(), goe->get_n());
vector<memory::format> prim_output_formats;
prim_output_formats.push_back(input_layout);
set_output_layouts(node, prim_output_formats);
}
template <> template <>
void CPULayout::LAYOUT_DECL(ngraph::op::Relu) void CPULayout::LAYOUT_DECL(ngraph::op::Relu)
{ {
...@@ -657,6 +671,23 @@ namespace ngraph ...@@ -657,6 +671,23 @@ namespace ngraph
} }
} }
template <>
void CPULayout::LAYOUT_DECL(ngraph::op::Sigmoid)
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node.get()))
{
auto input_layout =
runtime::cpu::mkldnn_utils::get_input_mkldnn_format(node.get(), 0);
vector<memory::format> prim_output_formats;
prim_output_formats.push_back(input_layout);
set_output_layouts(node, prim_output_formats);
}
else
{
set_default_layouts(external_function, node);
}
}
template <> template <>
void CPULayout::LAYOUT_DECL(ngraph::op::ReluBackprop) void CPULayout::LAYOUT_DECL(ngraph::op::ReluBackprop)
{ {
...@@ -680,6 +711,32 @@ namespace ngraph ...@@ -680,6 +711,32 @@ namespace ngraph
} }
} }
template <>
void CPULayout::LAYOUT_DECL(ngraph::op::BatchNorm)
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node.get()))
{
auto input_layout =
runtime::cpu::mkldnn_utils::get_input_mkldnn_format(node.get(), 2);
vector<memory::format> prim_input_formats;
vector<memory::format> prim_output_formats;
prim_input_formats.push_back(memory::format::x);
prim_input_formats.push_back(memory::format::x);
prim_input_formats.push_back(input_layout);
prim_output_formats.push_back(input_layout);
prim_output_formats.push_back(memory::format::x);
prim_output_formats.push_back(memory::format::x);
node =
insert_input_conversions(external_function, node, prim_input_formats);
set_output_layouts(node, prim_output_formats);
}
else
{
throw ngraph_error("Batchnorm only supported in MKLDNN for now");
}
}
template <> template <>
void CPULayout::LAYOUT_DECL(ngraph::op::Add) void CPULayout::LAYOUT_DECL(ngraph::op::Add)
{ {
...@@ -719,10 +776,14 @@ static const runtime::cpu::pass::LayoutOpMap s_dispatcher{ ...@@ -719,10 +776,14 @@ static const runtime::cpu::pass::LayoutOpMap s_dispatcher{
{TI(ngraph::op::AvgPool), &runtime::cpu::pass::CPULayout::layout<ngraph::op::AvgPool>}, {TI(ngraph::op::AvgPool), &runtime::cpu::pass::CPULayout::layout<ngraph::op::AvgPool>},
{TI(ngraph::op::AvgPoolBackprop), {TI(ngraph::op::AvgPoolBackprop),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::AvgPoolBackprop>}, &runtime::cpu::pass::CPULayout::layout<ngraph::op::AvgPoolBackprop>},
{TI(ngraph::op::BatchNorm), &runtime::cpu::pass::CPULayout::layout<ngraph::op::BatchNorm>},
{TI(ngraph::op::GetOutputElement),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::GetOutputElement>},
{TI(ngraph::op::Relu), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Relu>}, {TI(ngraph::op::Relu), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Relu>},
{TI(ngraph::op::Result), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Result>}, {TI(ngraph::op::Result), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Result>},
{TI(ngraph::op::ReluBackprop), {TI(ngraph::op::ReluBackprop),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::ReluBackprop>}, &runtime::cpu::pass::CPULayout::layout<ngraph::op::ReluBackprop>},
{TI(ngraph::op::Sigmoid), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Sigmoid>},
}; };
bool runtime::cpu::pass::CPULayout::run_on_call_graph(const std::list<std::shared_ptr<Node>>& nodes) bool runtime::cpu::pass::CPULayout::run_on_call_graph(const std::list<std::shared_ptr<Node>>& nodes)
......
...@@ -288,7 +288,6 @@ cudnnSetOpTensorDescriptor(opTensorDesc, ...@@ -288,7 +288,6 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
writer << "int n = " << arg1_shape[1] << ";\n"; writer << "int n = " << arg1_shape[1] << ";\n";
writer << "int k = " << arg0_shape[0] << ";\n"; writer << "int k = " << arg0_shape[0] << ";\n";
writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_HOST);\n"; writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_HOST);\n";
writer << "cublasSgemm(" writer << "cublasSgemm("
<< "cublas_handle," << "cublas_handle,"
<< "CUBLAS_OP_N," << "CUBLAS_OP_N,"
...@@ -303,7 +302,6 @@ cudnnSetOpTensorDescriptor(opTensorDesc, ...@@ -303,7 +302,6 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
<< "&beta," // beta << "&beta," // beta
<< out[0].get_name() << "," << out[0].get_name() << ","
<< "n);\n"; << "n);\n";
writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE);\n"; writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE);\n";
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
......
...@@ -758,7 +758,6 @@ using namespace std; ...@@ -758,7 +758,6 @@ using namespace std;
} }
} }
} }
writer.indent--; writer.indent--;
// End generated function // End generated function
writer += "}\n\n"; writer += "}\n\n";
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include "ngraph/pass/reshape_elimination.hpp" #include "ngraph/pass/reshape_elimination.hpp"
#include "ngraph/pass/visualize_tree.hpp" #include "ngraph/pass/visualize_tree.hpp"
#include "ngraph/runtime/cpu/ops/matmul_bias.hpp" #include "ngraph/runtime/cpu/ops/matmul_bias.hpp"
#include "ngraph/runtime/cpu/ops/sigmoid.hpp"
#include "ngraph/runtime/cpu/pass/cpu_fusion.hpp" #include "ngraph/runtime/cpu/pass/cpu_fusion.hpp"
#include "ngraph/serializer.hpp" #include "ngraph/serializer.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
...@@ -91,11 +92,89 @@ TEST(cpu_fusion, gemm_pattern) ...@@ -91,11 +92,89 @@ TEST(cpu_fusion, gemm_pattern)
ASSERT_EQ(n.get_pattern_map()[x], B); ASSERT_EQ(n.get_pattern_map()[x], B);
ASSERT_EQ(n.get_pattern_map()[b], C); ASSERT_EQ(n.get_pattern_map()[b], C);
auto cg = auto cg = make_shared<op::MatmulBias>(
make_shared<op::MatmulBias>(W, x, broadcast, W->get_shape(), x->get_shape(), false, false); W, x, C, W->get_shape(), x->get_shape(), false, false, AxisSet{0});
}
TEST(cpu_fusion, gemm_cpu_broadcast_row)
{
Shape shapeA{3, 2};
Shape shapeB{2, 3};
Shape shapeC{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shapeA);
auto B = make_shared<op::Parameter>(element::f32, shapeB);
auto reshape_w = make_shared<op::Reshape>(A, AxisVector{1, 0}, Shape{2, 3});
auto reshape_x = make_shared<op::Reshape>(B, AxisVector{1, 0}, Shape{3, 2});
auto one = op::Constant::create<float>(element::f32, Shape{2}, std::vector<float>{1.0f, 1.0f});
auto broadcast = make_shared<op::Broadcast>(one, shapeC, AxisSet{0});
auto cg = make_shared<op::MatmulBias>(
A, B, one, A->get_shape(), B->get_shape(), true, true, AxisSet{0});
auto f = make_shared<Function>(cg, op::ParameterVector{A, B});
auto manager = runtime::Manager::get("CPU");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
shared_ptr<runtime::TensorView> a = backend->make_primary_tensor_view(element::f32, shapeA);
shared_ptr<runtime::TensorView> b = backend->make_primary_tensor_view(element::f32, shapeB);
shared_ptr<runtime::TensorView> result =
backend->make_primary_tensor_view(element::f32, shapeC);
vector<float> dataA{1.0f, 4.0f, 1.0f, 4.0f, 1.0f, 4.0f};
vector<float> dataB{3.0f, 3.0f, 3.0f, 9.0f, 9.0f, 9.0f};
copy_data(a, dataA);
copy_data(b, dataB);
cf->call({a, b}, {result});
vector<float> expected{10, 28, 37, 109};
ASSERT_TRUE(read_vector<float>(result) == expected);
}
TEST(cpu_fusion, gemm_cpu_broadcast_column)
{
Shape shapeA{3, 2};
Shape shapeB{2, 3};
Shape shapeC{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shapeA);
auto B = make_shared<op::Parameter>(element::f32, shapeB);
auto reshape_w = make_shared<op::Reshape>(A, AxisVector{1, 0}, Shape{2, 3});
auto reshape_x = make_shared<op::Reshape>(B, AxisVector{1, 0}, Shape{3, 2});
auto one = op::Constant::create<float>(element::f32, Shape{2}, std::vector<float>{1.0f, 1.0f});
auto broadcast = make_shared<op::Broadcast>(one, shapeC, AxisSet{1});
auto cg = make_shared<op::MatmulBias>(
A, B, one, A->get_shape(), B->get_shape(), true, true, AxisSet{1});
auto f = make_shared<Function>(cg, op::ParameterVector{A, B});
auto manager = runtime::Manager::get("CPU");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
shared_ptr<runtime::TensorView> a = backend->make_primary_tensor_view(element::f32, shapeA);
shared_ptr<runtime::TensorView> b = backend->make_primary_tensor_view(element::f32, shapeB);
shared_ptr<runtime::TensorView> result =
backend->make_primary_tensor_view(element::f32, shapeC);
vector<float> dataA{1.0f, 4.0f, 1.0f, 4.0f, 1.0f, 4.0f};
vector<float> dataB{3.0f, 3.0f, 3.0f, 9.0f, 9.0f, 9.0f};
copy_data(a, dataA);
copy_data(b, dataB);
cf->call({a, b}, {result});
vector<float> expected{10, 28, 37, 109};
ASSERT_TRUE(read_vector<float>(result) == expected);
} }
TEST(cpu_fusion, gemm_cpu) TEST(cpu_fusion, gemm_cpu_broadcast_matrix)
{ {
Shape shapeA{3, 2}; Shape shapeA{3, 2};
Shape shapeB{2, 3}; Shape shapeB{2, 3};
...@@ -109,8 +188,8 @@ TEST(cpu_fusion, gemm_cpu) ...@@ -109,8 +188,8 @@ TEST(cpu_fusion, gemm_cpu)
auto one = op::Constant::create<float>(element::f32, Shape{}, std::vector<float>{1.0f}); auto one = op::Constant::create<float>(element::f32, Shape{}, std::vector<float>{1.0f});
auto broadcast = make_shared<op::Broadcast>(one, shapeC, AxisSet{0, 1}); auto broadcast = make_shared<op::Broadcast>(one, shapeC, AxisSet{0, 1});
auto cg = auto cg = make_shared<op::MatmulBias>(
make_shared<op::MatmulBias>(A, B, broadcast, A->get_shape(), B->get_shape(), true, true); A, B, one, A->get_shape(), B->get_shape(), true, true, AxisSet{0, 1});
auto f = make_shared<Function>(cg, op::ParameterVector{A, B}); auto f = make_shared<Function>(cg, op::ParameterVector{A, B});
...@@ -212,7 +291,7 @@ TEST(cpu_fusion, cpu_fusion_pass_matmul_bias) ...@@ -212,7 +291,7 @@ TEST(cpu_fusion, cpu_fusion_pass_matmul_bias)
pass_manager.run_passes(func); pass_manager.run_passes(func);
auto gmm = graph->get_input_op(0); auto gmm = graph->get_input_op(0);
ASSERT_TRUE(std::dynamic_pointer_cast<op::MatmulBias>(gmm)); ASSERT_TRUE(std::dynamic_pointer_cast<op::MatmulBias>(gmm));
ASSERT_EQ(gmm->get_input_op(2), broadcast); ASSERT_EQ(gmm->get_input_op(2), b);
} }
TEST(cpu_fusion, cpu_fusion_pass_matmul_no_bias) TEST(cpu_fusion, cpu_fusion_pass_matmul_no_bias)
...@@ -593,3 +672,64 @@ TEST(cpu_fusion, non_zero_padded_conv) ...@@ -593,3 +672,64 @@ TEST(cpu_fusion, non_zero_padded_conv)
ASSERT_EQ(count_ops_of_type<op::Pad>(func), 1); ASSERT_EQ(count_ops_of_type<op::Pad>(func), 1);
} }
TEST(cpu_fusion, sigmoid_fprop_fusion)
{
pass::Manager pass_manager;
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/Graph_fprop_sigmoid.json");
const string json_string = file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> func = ngraph::deserialize(ss);
pass_manager.run_passes(func);
size_t ccg = count_ops_of_type<op::Sigmoid>(func);
ASSERT_EQ(ccg, 1);
}
TEST(cpu_fusion, sigmoid_n1c1h2w2)
{
auto input = make_shared<op::Parameter>(element::f32, Shape{1, 1, 2, 2});
auto sigmoid_node = make_shared<op::Sigmoid>(input);
auto func = make_shared<Function>(sigmoid_node, op::ParameterVector{input});
auto manager = runtime::Manager::get("CPU");
auto external = manager->compile(func);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
shared_ptr<runtime::TensorView> a =
backend->make_primary_tensor_view(element::f32, input->get_shape());
shared_ptr<runtime::TensorView> result =
backend->make_primary_tensor_view(element::f32, input->get_shape());
vector<float> dataA{1.0f, 4.0f, 1.0f, 4.0f};
copy_data(a, dataA);
cf->call({a}, {result});
vector<float> expected{0.73105858f, 0.98201379f, 0.73105858f, 0.98201379f};
ASSERT_TRUE(read_vector<float>(result) == expected);
}
TEST(cpu_fusion, sigmoid_n1c1h4)
{
auto input = make_shared<op::Parameter>(element::f32, Shape{1, 1, 4});
auto sigmoid_node = make_shared<op::Sigmoid>(input);
auto func = make_shared<Function>(sigmoid_node, op::ParameterVector{input});
auto manager = runtime::Manager::get("CPU");
auto external = manager->compile(func);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
shared_ptr<runtime::TensorView> a =
backend->make_primary_tensor_view(element::f32, input->get_shape());
shared_ptr<runtime::TensorView> result =
backend->make_primary_tensor_view(element::f32, input->get_shape());
vector<float> dataA{1.0f, 4.0f, 1.0f, 4.0f};
copy_data(a, dataA);
cf->call({a}, {result});
vector<float> expected{0.73105858f, 0.98201379f, 0.73105858f, 0.98201379f};
ASSERT_TRUE(read_vector<float>(result) == expected);
}
[{"name":"Function_0","ops":[{"element_type":"float","inputs":[],"name":"Parameter_0","op":"Parameter","outputs":["Parameter_0_0"],"shape":[3,4]},{"element_type":"float","inputs":[],"name":"Constant_1","op":"Constant","outputs":["Constant_1_0"],"shape":[],"value":["1"]},{"inputs":["Parameter_0"],"name":"Negative_3","op":"Negative","outputs":["Negative_3_0"]},{"axes":[0,1],"inputs":["Constant_1"],"name":"Broadcast_2","op":"Broadcast","outputs":["Broadcast_2_0"],"shape":[3,4]},{"inputs":["Negative_3"],"name":"Exp_4","op":"Exp","outputs":["Exp_4_0"]},{"inputs":["Broadcast_2","Exp_4"],"name":"Add_5","op":"Add","outputs":["Add_5_0"]},{"inputs":["Broadcast_2","Add_5"],"name":"Divide_6","op":"Divide","outputs":["Divide_6_0"]}],"parameters":["Parameter_0"],"result":["Divide_6"]}]
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
#include "ngraph/ops/batch_norm.hpp"
#include <memory> #include <memory>
using namespace std; using namespace std;
...@@ -57,6 +58,96 @@ TEST(type_prop, broadcast_deduce_incorrect) ...@@ -57,6 +58,96 @@ TEST(type_prop, broadcast_deduce_incorrect)
} }
} }
TEST(type_prop, batchnorm_backprop_4d_check)
{
auto dummy = make_shared<op::Parameter>(element::f32, Shape{});
auto param = make_shared<op::Parameter>(element::f32, Shape{2, 4});
try
{
auto bc =
make_shared<op::BatchNormBackprop>(0.001, dummy, dummy, param, dummy, dummy, dummy);
FAIL() << "Deduced type should disagree with c-tor arguments";
}
catch (const ngraph_error& error)
{
EXPECT_EQ(error.what(), std::string("Input expected to be a 4D tensor"));
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, batchnorm_backprop_et_check)
{
auto dummy_f32 = make_shared<op::Parameter>(element::f32, Shape{3});
auto dummy_f64 = make_shared<op::Parameter>(element::f64, Shape{3});
auto param = make_shared<op::Parameter>(element::f32, Shape{4, 3, 2, 2});
try
{
auto bc = make_shared<op::BatchNormBackprop>(
0.001, dummy_f32, dummy_f64, param, dummy_f32, dummy_f32, dummy_f32);
FAIL() << "Deduced type should disagree with c-tor arguments";
}
catch (const ngraph_error& error)
{
EXPECT_EQ(error.what(),
std::string("The element type of beta isn't equal to input data's type"));
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, batchnorm_backprop_shape_check)
{
auto dummy = make_shared<op::Parameter>(element::f32, Shape{3});
auto dummy2 = make_shared<op::Parameter>(element::f32, Shape{4});
auto param = make_shared<op::Parameter>(element::f32, Shape{4, 3, 2, 2});
try
{
auto bc =
make_shared<op::BatchNormBackprop>(0.001, dummy, dummy2, param, dummy2, dummy2, dummy2);
FAIL() << "Deduced type should disagree with c-tor arguments";
}
catch (const ngraph_error& error)
{
EXPECT_EQ(error.what(),
std::string("The shape of beta isn't equal to input channel's shape"));
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, batchnorm_backprop_delta_check)
{
auto dummy = make_shared<op::Parameter>(element::f32, Shape{3});
auto dummy2 = make_shared<op::Parameter>(element::f32, Shape{4});
auto param = make_shared<op::Parameter>(element::f32, Shape{4, 3, 2, 2});
auto delta = make_shared<op::Parameter>(element::f32, Shape{4, 3, 2, 3});
try
{
auto bc =
make_shared<op::BatchNormBackprop>(0.001, dummy, dummy, param, dummy, dummy, delta);
FAIL() << "Deduced type should disagree with c-tor arguments";
}
catch (const ngraph_error& error)
{
EXPECT_EQ(error.what(), std::string("delta shape is expected to be equal to input shape"));
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, concat_deduce) TEST(type_prop, concat_deduce)
{ {
// Deduce type // Deduce type
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment