Commit 583dba23 authored by pthoreho's avatar pthoreho

Merge remote-tracking branch 'origin/master' into pruthvi/mkldnn_elementwise_add

parents d57ef7d3 59bdd6ee
...@@ -58,6 +58,7 @@ nervana_aeon.egg-info/ ...@@ -58,6 +58,7 @@ nervana_aeon.egg-info/
# vim # vim
*.swp *.swp
*.swo *.swo
tags
build/ build/
......
...@@ -18,3 +18,4 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIN_NGRAPH_LIBRARY") ...@@ -18,3 +18,4 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIN_NGRAPH_LIBRARY")
add_subdirectory(resource) add_subdirectory(resource)
add_subdirectory(ngraph) add_subdirectory(ngraph)
add_subdirectory(tools)
...@@ -63,6 +63,7 @@ set (SRC ...@@ -63,6 +63,7 @@ set (SRC
ops/power.cpp ops/power.cpp
ops/reduce.cpp ops/reduce.cpp
ops/reduce_window.cpp ops/reduce_window.cpp
ops/relu.cpp
ops/replace_slice.cpp ops/replace_slice.cpp
ops/reshape.cpp ops/reshape.cpp
ops/reverse.cpp ops/reverse.cpp
...@@ -95,6 +96,7 @@ set (SRC ...@@ -95,6 +96,7 @@ set (SRC
pass/reshape_elimination.cpp pass/reshape_elimination.cpp
pass/visualize_tree.cpp pass/visualize_tree.cpp
pattern/matcher.cpp pattern/matcher.cpp
pattern/core_fusion.cpp
runtime/aligned_buffer.cpp runtime/aligned_buffer.cpp
runtime/host_tensor_view.cpp runtime/host_tensor_view.cpp
runtime/interpreter/int_backend.cpp runtime/interpreter/int_backend.cpp
......
...@@ -70,6 +70,18 @@ public: ...@@ -70,6 +70,18 @@ public:
std::string generate_temporary_name(std::string prefix = "tempvar"); std::string generate_temporary_name(std::string prefix = "tempvar");
void block_begin()
{
*this << "{\n";
indent++;
}
void block_end()
{
indent--;
*this << "}\n";
}
private: private:
std::stringstream m_ss; std::stringstream m_ss;
bool m_pending_indent; bool m_pending_indent;
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "ngraph/graph_util.hpp" #include "ngraph/graph_util.hpp"
#include "ngraph/log.hpp" #include "ngraph/log.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/ops/constant.hpp"
using namespace std; using namespace std;
...@@ -256,3 +257,25 @@ std::shared_ptr<ngraph::Function> ngraph::clone_function(std::shared_ptr<ngraph: ...@@ -256,3 +257,25 @@ std::shared_ptr<ngraph::Function> ngraph::clone_function(std::shared_ptr<ngraph:
// create and return cloned function // create and return cloned function
return std::make_shared<ngraph::Function>(cloned_results, cloned_params); return std::make_shared<ngraph::Function>(cloned_results, cloned_params);
} }
bool ngraph::is_equal_to_const_value(std::string const_value, std::shared_ptr<Node> reduce_constant)
{
if (auto rc = dynamic_pointer_cast<ngraph::op::Constant>(reduce_constant))
{
auto cshape = rc->get_shape();
size_t n = shape_size(cshape);
// way to construct a constant of a given type, shape, value
std::vector<std::string> vector_zero{n, const_value};
auto constant_val_op =
std::make_shared<ngraph::op::Constant>(rc->get_element_type(), cshape, vector_zero);
// way to compare elements to const_value
size_t n_bytes = n * rc->get_element_type().size();
NGRAPH_DEBUG << "Comparing " << n_bytes << " bytes";
return !memcmp(constant_val_op->get_data_ptr(), rc->get_data_ptr(), n_bytes);
}
else
{
return false;
}
}
...@@ -53,6 +53,8 @@ namespace ngraph ...@@ -53,6 +53,8 @@ namespace ngraph
std::list<std::shared_ptr<Node>> std::list<std::shared_ptr<Node>>
topological_sort(const std::list<std::shared_ptr<Node>>& nodes); topological_sort(const std::list<std::shared_ptr<Node>>& nodes);
bool is_equal_to_const_value(std::string const_value, std::shared_ptr<Node> reduce_constant);
// maps original to replacement nodes e.g. for clone utilities // maps original to replacement nodes e.g. for clone utilities
// performs index checking on access // performs index checking on access
class NodeMap class NodeMap
......
...@@ -107,6 +107,7 @@ ...@@ -107,6 +107,7 @@
#include "ngraph/ops/product.hpp" #include "ngraph/ops/product.hpp"
#include "ngraph/ops/reduce.hpp" #include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/reduce_window.hpp" #include "ngraph/ops/reduce_window.hpp"
#include "ngraph/ops/relu.hpp"
#include "ngraph/ops/remainder.hpp" #include "ngraph/ops/remainder.hpp"
#include "ngraph/ops/replace_slice.hpp" #include "ngraph/ops/replace_slice.hpp"
#include "ngraph/ops/reshape.hpp" #include "ngraph/ops/reshape.hpp"
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/ops/relu.hpp"
#include "ngraph/ops/multiply.hpp"
using namespace std;
using namespace ngraph;
op::Relu::Relu(shared_ptr<Node> arg)
: UnaryElementwiseArithmetic("Relu", {arg})
{
set_value_type_checked(arg->get_element_type(), arg->get_shape());
}
op::ReluBackprop::ReluBackprop(shared_ptr<Node> arg, shared_ptr<Node> delta)
: RequiresTensorViewArgs("ReluBackprop", {arg, delta})
{
if (arg->get_element_type() != delta->get_element_type())
{
throw ngraph_error("Argument and delta element types for Relu backprop do not match");
}
if (arg->get_shape() != delta->get_shape())
{
throw ngraph_error("Argument and delta shape for Relu backprop do not match");
}
set_value_type_checked(delta->get_element_type(), delta->get_shape());
}
void op::Relu::generate_adjoints(autodiff::Adjoints& adjoints, const std::shared_ptr<Node>& delta)
{
auto backprop = std::make_shared<op::ReluBackprop>(get_input_op(0), delta);
adjoints.add_delta(get_input_op(0), backprop);
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/node.hpp"
#include "ngraph/ops/op.hpp"
#include "ngraph/ops/util/requires_tensor_view_args.hpp"
#include "ngraph/ops/util/unary_elementwise_arithmetic.hpp"
#include "ngraph/util.hpp"
#include <memory>
namespace ngraph
{
namespace op
{
/// \brief Elementwise Relu operation.
///
class Relu : public ngraph::op::util::UnaryElementwiseArithmetic
{
public:
/// \brief Constructs a Relu operation.
///
/// \param arg Node that produces the input tensor.
Relu(std::shared_ptr<ngraph::Node> arg);
virtual std::shared_ptr<Node> copy_with_new_args(
const std::vector<std::shared_ptr<Node>>& new_args) const override
{
if (new_args.size() != 1)
{
throw ngraph_error("Incorrect number of new arguments");
}
return std::make_shared<Relu>(new_args.at(0));
}
virtual void generate_adjoints(autodiff::Adjoints& adjoints,
const std::shared_ptr<Node>& delta) override;
};
/// \brief Elementwise ReluBackprop operation.
///
class ReluBackprop : public ngraph::op::util::RequiresTensorViewArgs
{
public:
/// \brief Constructs a ReluBackprop operation.
///
/// \param arg Node that produces the relu forward input tensor.
ReluBackprop(std::shared_ptr<ngraph::Node> arg, std::shared_ptr<ngraph::Node> delta);
virtual std::shared_ptr<Node> copy_with_new_args(
const std::vector<std::shared_ptr<Node>>& new_args) const override
{
if (new_args.size() != 2)
{
throw ngraph_error("Incorrect number of new arguments");
}
return std::make_shared<ReluBackprop>(new_args.at(0), new_args.at(1));
}
};
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <algorithm>
#include <iostream>
#include <unordered_set>
#include "ngraph/pattern/core_fusion.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/log.hpp"
#include "ngraph/ops/constant.hpp"
#include "ngraph/ops/maximum.hpp"
#include "ngraph/ops/parameter.hpp"
#include "ngraph/ops/relu.hpp"
#include "ngraph/pass/graph_rewrite.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pattern/matcher.hpp"
#include "ngraph/pattern/op/any.hpp"
#include "ngraph/pattern/op/label.hpp"
using namespace ngraph;
using namespace std;
bool is_zero(shared_ptr<Node> reduce_constant)
{
auto result_bool = is_equal_to_const_value("0", reduce_constant);
return result_bool;
}
static shared_ptr<Node> construct_constant_node(int n)
{
return op::Constant::create(element::f32, Shape{}, {n});
}
void pass::CoreFusion::construct_relu_pattern()
{
auto iconst0 = construct_constant_node(0);
auto val = make_shared<pattern::op::Label>(iconst0);
auto zero = make_shared<pattern::op::Label>(iconst0, nullptr, Nodes{iconst0});
auto max = make_shared<op::Maximum>(zero, val);
pattern::gr_callback_fn callback = [val, zero](pattern::Matcher& m) {
NGRAPH_DEBUG << "In a callback for construct_relu_pattern against "
<< m.match_root()->get_name();
auto pattern_map = m.get_pattern_map();
shared_ptr<Node> nn;
auto mzero = m.get_pattern_map()[zero];
if (!is_zero(mzero))
{
NGRAPH_DEBUG << "zero constant = " << mzero->get_name() << " not equal to 0\n";
return nn;
}
auto mpattern = m.match_root();
auto cg = shared_ptr<Node>(new op::Relu(pattern_map[val]));
return cg;
};
auto m = make_shared<pattern::Matcher>(max, callback);
this->add_matcher(m);
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/pass/graph_rewrite.hpp"
namespace ngraph
{
namespace pass
{
class CoreFusion;
}
}
class ngraph::pass::CoreFusion : public ngraph::pass::GraphRewrite
{
public:
CoreFusion()
: GraphRewrite()
{
construct_relu_pattern();
}
void construct_relu_pattern();
};
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <cassert> #include <cassert>
#include <memory.h> #include <memory.h>
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/ops/constant.hpp"
#include "ngraph/pattern/op/any.hpp" #include "ngraph/pattern/op/any.hpp"
#include "ngraph/pattern/op/label.hpp" #include "ngraph/pattern/op/label.hpp"
......
...@@ -69,6 +69,7 @@ ...@@ -69,6 +69,7 @@
#include "ngraph/ops/product.hpp" #include "ngraph/ops/product.hpp"
#include "ngraph/ops/reduce.hpp" #include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/reduce_window.hpp" #include "ngraph/ops/reduce_window.hpp"
#include "ngraph/ops/relu.hpp"
#include "ngraph/ops/remainder.hpp" #include "ngraph/ops/remainder.hpp"
#include "ngraph/ops/replace_slice.hpp" #include "ngraph/ops/replace_slice.hpp"
#include "ngraph/ops/reshape.hpp" #include "ngraph/ops/reshape.hpp"
...@@ -2219,10 +2220,9 @@ namespace ngraph ...@@ -2219,10 +2220,9 @@ namespace ngraph
writer << "memory::dims " << var << "{" << dims << "};\n"; writer << "memory::dims " << var << "{" << dims << "};\n";
}; };
writer << "{\n"; writer.block_begin();
writer.indent++; writer << "try\n";
writer << "try {\n"; writer.block_begin();
writer.indent++;
writer << "engine cpu_engine = engine(engine::cpu, 0);\n"; writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
emit_memory_desc("data_desc", join(arg0_shape), elem_type, "nchw"); emit_memory_desc("data_desc", join(arg0_shape), elem_type, "nchw");
emit_memory_desc("delta_desc", join(arg1_shape), elem_type, "nchw"); emit_memory_desc("delta_desc", join(arg1_shape), elem_type, "nchw");
...@@ -2252,15 +2252,13 @@ namespace ngraph ...@@ -2252,15 +2252,13 @@ namespace ngraph
"result);\n" "result);\n"
"stream s = stream(stream::kind::eager);\n" "stream s = stream(stream::kind::eager);\n"
"s.submit({bwd_weights}).wait();\n"; "s.submit({bwd_weights}).wait();\n";
writer.indent--; writer.block_end();
writer << "} catch (const mkldnn::error& e) {\n"; writer << "catch (const mkldnn::error& e)\n";
writer.indent++; writer.block_begin();
writer << "throw ngraph::ngraph_error(\"MKLDNN ERROR (\" + std::to_string(" writer << "throw ngraph::ngraph_error(\"MKLDNN ERROR (\" + std::to_string("
"e.status) + \"): \" + e.message);\n"; "e.status) + \"): \" + e.message);\n";
writer.indent--; writer.block_end();
writer << "}\n"; writer.block_end();
writer.indent--;
writer << "}\n";
} }
else else
{ {
...@@ -2333,10 +2331,9 @@ namespace ngraph ...@@ -2333,10 +2331,9 @@ namespace ngraph
writer << "memory::dims " << var << "{" << dims << "};\n"; writer << "memory::dims " << var << "{" << dims << "};\n";
}; };
writer << "{\n"; writer.block_begin();
writer.indent++; writer << "try\n";
writer << "try {\n"; writer.block_begin();
writer.indent++;
writer << "engine cpu_engine = engine(engine::cpu, 0);\n"; writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
emit_memory_desc("weight_desc", join(arg0_shape), elem_type, "oihw"); emit_memory_desc("weight_desc", join(arg0_shape), elem_type, "oihw");
emit_memory_desc("delta_desc", join(arg1_shape), elem_type, "nchw"); emit_memory_desc("delta_desc", join(arg1_shape), elem_type, "nchw");
...@@ -2365,15 +2362,13 @@ namespace ngraph ...@@ -2365,15 +2362,13 @@ namespace ngraph
"result);\n" "result);\n"
"stream s = stream(stream::kind::eager);\n" "stream s = stream(stream::kind::eager);\n"
"s.submit({bwd_data}).wait();\n"; "s.submit({bwd_data}).wait();\n";
writer.indent--; writer.block_end();
writer << "} catch (const mkldnn::error& e) {\n"; writer << "catch (const mkldnn::error& e)\n";
writer.indent++; writer.block_begin();
writer << "throw ngraph::ngraph_error(\"MKLDNN ERROR (\" + std::to_string(" writer << "throw ngraph::ngraph_error(\"MKLDNN ERROR (\" + std::to_string("
"e.status) + \"): \" + e.message);\n"; "e.status) + \"): \" + e.message);\n";
writer.indent--; writer.block_end();
writer << "}\n"; writer.block_end();
writer.indent--;
writer << "}\n";
} }
else else
{ {
...@@ -3073,6 +3068,123 @@ namespace ngraph ...@@ -3073,6 +3068,123 @@ namespace ngraph
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
} }
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::ReluBackprop)
{
const auto& arg_shape = args[0].get_shape();
const size_t arg_rank = arg_shape.size();
const auto& result_shape = out[0].get_shape();
const string& et = get_mkldnn_data_type(args[0].get_element_type().c_type_string());
if (arg_rank == 4 && args[0].get_element_type() == element::f32)
{
writer << "{\n";
writer.indent++;
writer << "try {\n";
writer.indent++;
writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
writer << "memory::desc input_data_desc = memory::desc({" << join(arg_shape)
<< "}, " << et << ", memory::format::nchw);\n";
writer << "memory::desc delta_data_desc = memory::desc({"
<< join(args[1].get_shape()) << "}, " << et
<< ", memory::format::nchw);\n";
writer << "memory::desc result_desc = memory::desc({" << join(result_shape)
<< "}, " << et << ", memory::format::nchw);\n";
writer << "memory input_data = memory({input_data_desc, cpu_engine}, "
<< args[0].get_name() << ");\n";
writer << "memory delta_data = memory({delta_data_desc, cpu_engine}, "
<< args[1].get_name() << ");\n";
writer << "memory result = memory({result_desc, cpu_engine}, "
<< out[0].get_name() << ");\n";
writer << "relu_forward::desc relu_fwd_desc = "
"relu_forward::desc(prop_kind::forward, "
"algorithm::eltwise_relu, input_data_desc, 0, 0);\n";
writer << "relu_forward::primitive_desc relu_fwd_prim_desc = "
"relu_forward::primitive_desc(relu_fwd_desc, cpu_engine);\n";
writer << "relu_backward::desc relu_bwd_desc = "
"relu_backward::desc(algorithm::eltwise_relu, "
"delta_data_desc, input_data_desc, 0, 0);\n";
writer << "relu_backward::primitive_desc relu_bdw_prim_desc = "
"relu_backward::primitive_desc(relu_bwd_desc, cpu_engine, "
"relu_fwd_prim_desc);\n";
writer
<< "relu_backward relu_bwd= relu_backward(relu_bdw_prim_desc, input_data, "
"delta_data, result);\n";
writer << "stream s = stream(stream::kind::eager);\n"
"s.submit({relu_bwd}).wait();\n";
writer.indent--;
writer << "} catch (const mkldnn::error& e) {\n";
writer.indent++;
writer << "throw ngraph::ngraph_error(\"MKLDNN ERROR (\" + std::to_string("
"e.status) + \"): \" + e.message);\n";
writer.indent--;
writer << "}\n";
writer.indent--;
writer << "}\n";
}
else
{
writer << "kernel::relu_backprop<" << out[0].get_type() << ">("
<< args[0].get_name() << ",\n";
writer << " " << args[1].get_name() << ",\n";
writer << " " << out[0].get_name() << ",\n";
writer << " " << out[0].get_size() << ");\n";
}
}
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::Relu)
{
const auto& arg_shape = args[0].get_shape();
const size_t arg_rank = arg_shape.size();
const auto& result_shape = out[0].get_shape();
const string& et = get_mkldnn_data_type(args[0].get_element_type().c_type_string());
if (arg_rank == 4 && args[0].get_element_type() == element::f32)
{
writer << "{\n";
writer.indent++;
writer << "try {\n";
writer.indent++;
writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
writer << "memory::desc input_data_desc = memory::desc({" << join(arg_shape)
<< "}, " << et << ", memory::format::nchw);\n";
writer << "memory::desc result_desc = memory::desc({" << join(result_shape)
<< "}, " << et << ", memory::format::nchw);\n";
writer << "memory input_data = memory({input_data_desc, cpu_engine}, "
<< args[0].get_name() << ");\n";
writer << "memory result = memory({result_desc, cpu_engine}, "
<< out[0].get_name() << ");\n";
writer << "relu_forward::desc relu_fwd_desc = "
"relu_forward::desc(prop_kind::forward_training, "
"algorithm::eltwise_relu, input_data_desc, 0, 0);\n";
writer << "relu_forward::primitive_desc relu_prim_desc = "
"relu_forward::primitive_desc(relu_fwd_desc, cpu_engine);\n";
writer << "relu_forward relu_fwd= relu_forward(relu_prim_desc, input_data, "
"result);\n";
writer << "stream s = stream(stream::kind::eager);\n"
"s.submit({relu_fwd}).wait();\n";
writer.indent--;
writer << "} catch (const mkldnn::error& e) {\n";
writer.indent++;
writer << "throw ngraph::ngraph_error(\"MKLDNN ERROR (\" + std::to_string("
"e.status) + \"): \" + e.message);\n";
writer.indent--;
writer << "}\n";
writer.indent--;
writer << "}\n";
}
else
{
writer << "kernel::relu<" << out[0].get_type() << ">(" << args[0].get_name()
<< ",\n";
writer << " " << out[0].get_name() << ",\n";
writer << " " << out[0].get_size() << ");\n";
}
}
} }
} }
} }
......
...@@ -78,6 +78,7 @@ ...@@ -78,6 +78,7 @@
#include "ngraph/ops/product.hpp" #include "ngraph/ops/product.hpp"
#include "ngraph/ops/reduce.hpp" #include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/reduce_window.hpp" #include "ngraph/ops/reduce_window.hpp"
#include "ngraph/ops/relu.hpp"
#include "ngraph/ops/remainder.hpp" #include "ngraph/ops/remainder.hpp"
#include "ngraph/ops/replace_slice.hpp" #include "ngraph/ops/replace_slice.hpp"
#include "ngraph/ops/reshape.hpp" #include "ngraph/ops/reshape.hpp"
...@@ -97,6 +98,7 @@ ...@@ -97,6 +98,7 @@
#include "ngraph/pass/liveness.hpp" #include "ngraph/pass/liveness.hpp"
#include "ngraph/pass/manager.hpp" #include "ngraph/pass/manager.hpp"
#include "ngraph/pass/memory_layout.hpp" #include "ngraph/pass/memory_layout.hpp"
#include "ngraph/pattern/core_fusion.hpp"
#include "ngraph/runtime/cpu/cpu_backend.hpp" #include "ngraph/runtime/cpu/cpu_backend.hpp"
#include "ngraph/runtime/cpu/cpu_call_frame.hpp" #include "ngraph/runtime/cpu/cpu_call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_emitter.hpp" #include "ngraph/runtime/cpu/cpu_emitter.hpp"
...@@ -229,6 +231,8 @@ static const runtime::cpu::OpMap dispatcher{ ...@@ -229,6 +231,8 @@ static const runtime::cpu::OpMap dispatcher{
{TI(ngraph::op::Product), &runtime::cpu::CPU_Emitter::emit<op::Product>}, {TI(ngraph::op::Product), &runtime::cpu::CPU_Emitter::emit<op::Product>},
{TI(ngraph::op::Max), &runtime::cpu::CPU_Emitter::emit<op::Max>}, {TI(ngraph::op::Max), &runtime::cpu::CPU_Emitter::emit<op::Max>},
{TI(ngraph::op::Min), &runtime::cpu::CPU_Emitter::emit<op::Min>}, {TI(ngraph::op::Min), &runtime::cpu::CPU_Emitter::emit<op::Min>},
{TI(ngraph::op::Relu), &runtime::cpu::CPU_Emitter::emit<op::Relu>},
{TI(ngraph::op::ReluBackprop), &runtime::cpu::CPU_Emitter::emit<op::ReluBackprop>},
}; };
runtime::cpu::CPU_ExternalFunction::CPU_ExternalFunction( runtime::cpu::CPU_ExternalFunction::CPU_ExternalFunction(
...@@ -253,6 +257,7 @@ void runtime::cpu::CPU_ExternalFunction::compile() ...@@ -253,6 +257,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
ngraph::pass::Manager pass_manager; ngraph::pass::Manager pass_manager;
pass_manager.register_pass<ngraph::pass::CoreFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>(); pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPULayout>(); pass_manager.register_pass<runtime::cpu::pass::CPULayout>();
pass_manager.register_pass<ngraph::pass::Liveness>(); pass_manager.register_pass<ngraph::pass::Liveness>();
...@@ -301,6 +306,7 @@ void runtime::cpu::CPU_ExternalFunction::compile() ...@@ -301,6 +306,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
#include "ngraph/runtime/kernel/product.hpp" #include "ngraph/runtime/kernel/product.hpp"
#include "ngraph/runtime/kernel/reduce.hpp" #include "ngraph/runtime/kernel/reduce.hpp"
#include "ngraph/runtime/kernel/reduce_window.hpp" #include "ngraph/runtime/kernel/reduce_window.hpp"
#include "ngraph/runtime/kernel/relu.hpp"
#include "ngraph/runtime/kernel/replace_slice.hpp" #include "ngraph/runtime/kernel/replace_slice.hpp"
#include "ngraph/runtime/kernel/reshape.hpp" #include "ngraph/runtime/kernel/reshape.hpp"
#include "ngraph/runtime/kernel/reverse.hpp" #include "ngraph/runtime/kernel/reverse.hpp"
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "ngraph/ops/batch_norm.hpp" #include "ngraph/ops/batch_norm.hpp"
#include "ngraph/ops/convolution.hpp" #include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/max_pool.hpp" #include "ngraph/ops/max_pool.hpp"
#include "ngraph/ops/relu.hpp"
#include "mkldnn_utils.hpp" #include "mkldnn_utils.hpp"
...@@ -48,7 +49,9 @@ namespace ngraph ...@@ -48,7 +49,9 @@ namespace ngraph
TI(ngraph::op::ConvolutionBackpropData), TI(ngraph::op::ConvolutionBackpropData),
TI(ngraph::op::ConvolutionBackpropFilters), TI(ngraph::op::ConvolutionBackpropFilters),
TI(ngraph::op::MaxPool), TI(ngraph::op::MaxPool),
TI(ngraph::op::MaxPoolBackprop)}; TI(ngraph::op::MaxPoolBackprop),
TI(ngraph::op::Relu),
TI(ngraph::op::ReluBackprop)};
static const std::unordered_map<std::string, const mkldnn::memory::data_type> static const std::unordered_map<std::string, const mkldnn::memory::data_type>
s_data_type_map{{"char", mkldnn::memory::data_type::s8}, s_data_type_map{{"char", mkldnn::memory::data_type::s8},
......
...@@ -85,6 +85,7 @@ ...@@ -85,6 +85,7 @@
#include "ngraph/runtime/kernel/product.hpp" #include "ngraph/runtime/kernel/product.hpp"
#include "ngraph/runtime/kernel/reduce.hpp" #include "ngraph/runtime/kernel/reduce.hpp"
#include "ngraph/runtime/kernel/reduce_window.hpp" #include "ngraph/runtime/kernel/reduce_window.hpp"
#include "ngraph/runtime/kernel/relu.hpp"
#include "ngraph/runtime/kernel/replace_slice.hpp" #include "ngraph/runtime/kernel/replace_slice.hpp"
#include "ngraph/runtime/kernel/reshape.hpp" #include "ngraph/runtime/kernel/reshape.hpp"
#include "ngraph/runtime/kernel/reverse.hpp" #include "ngraph/runtime/kernel/reverse.hpp"
...@@ -677,6 +678,19 @@ private: ...@@ -677,6 +678,19 @@ private:
reduce_window->get_window_shape(), reduce_window->get_window_shape(),
reduce_window->get_window_movement_strides()); reduce_window->get_window_movement_strides());
} }
else if (node_op == "Relu")
{
kernel::relu<T>(reinterpret_cast<T*>(args[0]->get_data_ptr()),
reinterpret_cast<T*>(out[0]->get_data_ptr()),
out[0]->get_element_count());
}
else if (node_op == "ReluBackprop")
{
kernel::relu_backprop<T>(reinterpret_cast<T*>(args[0]->get_data_ptr()),
reinterpret_cast<T*>(args[1]->get_data_ptr()),
reinterpret_cast<T*>(out[0]->get_data_ptr()),
out[0]->get_element_count());
}
// else if (node_op == "Remainder") // else if (node_op == "Remainder")
// { // {
// // node = make_shared<op::Remainder>(args[0], args[1]); // // node = make_shared<op::Remainder>(args[0], args[1]);
......
...@@ -57,6 +57,7 @@ ...@@ -57,6 +57,7 @@
#include "ngraph/ops/not_equal.hpp" #include "ngraph/ops/not_equal.hpp"
#include "ngraph/ops/power.hpp" #include "ngraph/ops/power.hpp"
#include "ngraph/ops/reduce.hpp" #include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/relu.hpp"
#include "ngraph/ops/reshape.hpp" #include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/select.hpp" #include "ngraph/ops/select.hpp"
#include "ngraph/ops/sign.hpp" #include "ngraph/ops/sign.hpp"
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
namespace ngraph
{
namespace runtime
{
namespace kernel
{
template <typename T>
void relu(T* arg, T* out, size_t count)
{
T zero = 0;
for (size_t i = 0; i < count; i++)
{
out[i] = arg[i] > zero ? arg[i] : zero;
}
}
template <typename T>
void relu_backprop(T* arg, T* delta_arg, T* out, size_t count)
{
T zero = 0;
for (size_t i = 0; i < count; i++)
{
out[i] = arg[i] > zero ? delta_arg[i] : zero;
}
}
}
}
}
...@@ -58,6 +58,7 @@ ...@@ -58,6 +58,7 @@
#include "ngraph/ops/product.hpp" #include "ngraph/ops/product.hpp"
#include "ngraph/ops/reduce.hpp" #include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/reduce_window.hpp" #include "ngraph/ops/reduce_window.hpp"
#include "ngraph/ops/relu.hpp"
#include "ngraph/ops/remainder.hpp" #include "ngraph/ops/remainder.hpp"
#include "ngraph/ops/replace_slice.hpp" #include "ngraph/ops/replace_slice.hpp"
#include "ngraph/ops/reshape.hpp" #include "ngraph/ops/reshape.hpp"
...@@ -686,6 +687,14 @@ static shared_ptr<ngraph::Function> ...@@ -686,6 +687,14 @@ static shared_ptr<ngraph::Function>
{ {
node = make_shared<op::Remainder>(args[0], args[1]); node = make_shared<op::Remainder>(args[0], args[1]);
} }
else if (node_op == "Relu")
{
node = make_shared<op::Relu>(args[0]);
}
else if (node_op == "ReluBackprop")
{
node = make_shared<op::ReluBackprop>(args[0], args[1]);
}
else if (node_op == "ReplaceSlice") else if (node_op == "ReplaceSlice")
{ {
auto lower_bounds = node_js.at("lower_bounds").get<vector<size_t>>(); auto lower_bounds = node_js.at("lower_bounds").get<vector<size_t>>();
...@@ -1053,6 +1062,12 @@ static json write(const Node& n) ...@@ -1053,6 +1062,12 @@ static json write(const Node& n)
node["window_shape"] = tmp->get_window_shape(); node["window_shape"] = tmp->get_window_shape();
node["window_movement_strides"] = tmp->get_window_movement_strides(); node["window_movement_strides"] = tmp->get_window_movement_strides();
} }
else if (node_op == "Relu")
{
}
else if (node_op == "ReluBackprop")
{
}
else if (node_op == "Remainder") else if (node_op == "Remainder")
{ {
} }
......
# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
if(MKLDNN_INCLUDE_DIR)
link_directories(${MKLDNN_LIB_DIR})
endif()
if (NGRAPH_CPU_ENABLE)
set (SRC
nbench.cpp
${PROJECT_SOURCE_DIR}/test/util/benchmark.cpp
)
add_executable(nbench ${SRC})
add_dependencies(nbench ngraph)
set(HEADER_SEARCH_DEFINES
"NGRAPH_HEADERS_PATH=\"${NGRAPH_INCLUDE_PATH}\""
)
target_link_libraries(nbench ngraph)
set_source_files_properties(nbench.cpp PROPERTIES COMPILE_DEFINITIONS "${HEADER_SEARCH_DEFINES}")
endif()
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
// tool to benchmark any ngraph json model with given backend.
// compile and run with:
// g++ ./nbench.cpp -std=c++11 -I$HOME/ngraph_dist/include -L$HOME/ngraph_dist/lib -lngraph -o nbench
// env LD_LIBRARY_PATH=$HOME/ngraph_dist/lib env NGRAPH_INTERPRETER_EMIT_TIMING=1 ./nbench
// sample models are under ../../test/models
#include <fstream>
#include <ngraph/runtime/backend.hpp>
#include <ngraph/runtime/call_frame.hpp>
#include <ngraph/runtime/manager.hpp>
#include "../../test/util/benchmark.hpp"
#include "../../test/util/test_tools.hpp"
using namespace std;
int main(int argc, char** argv)
{
string model = "model.json";
string backend = "INTERPRETER";
int iter = 10;
bool failed = false;
for (size_t i = 1; i < argc; i++)
{
if (string(argv[i]) == "-f")
{
model = argv[++i];
}
else if (string(argv[i]) == "-b")
{
backend = argv[++i];
}
else if (string(argv[i]) == "-i")
{
try
{
iter = stoi(argv[++i]);
}
catch (...)
{
cout << "Invalid Argument\n";
failed = true;
}
}
}
if (!static_cast<bool>(ifstream(model)))
{
cout << "File " << model << " not found\n";
failed = true;
}
if (failed)
{
cout << R"###(
DESCRIPTION
Benchmark ngraph json model with given backend.
SYNOPSIS
nbench [-f <filename>] [-b <backend>] [-i <iterations>]
OPTIONS
-f model json file to use (default: model.json)
-b Backend to use (default: INTERPRETER)
-i Iterations (default: 10)
)###";
return 1;
}
cout << "Benchmarking " << model << ", " << backend << " backend, " << iter << " iterations.\n";
run_benchmark(model, backend, iter);
}
...@@ -31,6 +31,7 @@ set (SRC ...@@ -31,6 +31,7 @@ set (SRC
builder_xla.cpp builder_xla.cpp
build_graph.cpp build_graph.cpp
copy.cpp copy.cpp
core_fusion.cpp
eigen.cpp eigen.cpp
element_type.cpp element_type.cpp
file_util.cpp file_util.cpp
...@@ -50,6 +51,7 @@ set (SRC ...@@ -50,6 +51,7 @@ set (SRC
type_prop.cpp type_prop.cpp
util/autodiff/backprop_function.cpp util/autodiff/backprop_function.cpp
util/test_tools.cpp util/test_tools.cpp
util/benchmark.cpp
util.cpp util.cpp
uuid.cpp uuid.cpp
) )
......
...@@ -27,13 +27,13 @@ ...@@ -27,13 +27,13 @@
#include "ngraph/json.hpp" #include "ngraph/json.hpp"
#include "ngraph/log.hpp" #include "ngraph/log.hpp"
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
#include "ngraph/ops/relu.hpp"
#include "ngraph/ops/sum.hpp" #include "ngraph/ops/sum.hpp"
#include "ngraph/pass/graph_rewrite.hpp" #include "ngraph/pass/graph_rewrite.hpp"
#include "ngraph/pass/manager.hpp" #include "ngraph/pass/manager.hpp"
#include "ngraph/pattern/matcher.hpp" #include "ngraph/pattern/matcher.hpp"
#include "ngraph/pattern/op/any.hpp" #include "ngraph/pattern/op/any.hpp"
#include "ngraph/pattern/op/label.hpp" #include "ngraph/pattern/op/label.hpp"
#include "ngraph/runtime/argon/ops/relu.hpp"
#include "ngraph/runtime/argon/pass/argon_fusion.hpp" #include "ngraph/runtime/argon/pass/argon_fusion.hpp"
#include "ngraph/serializer.hpp" #include "ngraph/serializer.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
......
...@@ -1000,6 +1000,37 @@ TEST(${BACKEND_NAME}, backwards_power) ...@@ -1000,6 +1000,37 @@ TEST(${BACKEND_NAME}, backwards_power)
autodiff_numeric_compare<float>(manager, backend, make_graph, {x0, x1}, .01f, .01f)); autodiff_numeric_compare<float>(manager, backend, make_graph, {x0, x1}, .01f, .01f));
} }
TEST(${BACKEND_NAME}, backwards_relu)
{
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend();
test::Uniform<float> rng_neg(-1.0f, -0.01f);
test::Uniform<float> rng_pos(0.01f, 1.0f);
Shape shape{2, 3};
auto x0 = rng_neg.initialize(backend->make_primary_tensor_view<float>(shape));
auto x1 = rng_pos.initialize(backend->make_primary_tensor_view<float>(shape));
auto make_graph = [shape]() {
auto X = make_shared<op::Parameter>(element::f32, shape);
return make_shared<Function>(make_shared<op::Relu>(X),
std::vector<std::shared_ptr<op::Parameter>>{X});
};
for (auto i = 0; i < ${TEST_LOOPS}; i++)
{
auto x_neg = rng_neg.initialize(backend->make_primary_tensor_view<float>(shape));
EXPECT_TRUE(
autodiff_numeric_compare<float>(manager, backend, make_graph, {x_neg}, .01f, .01f));
auto x_pos = rng_pos.initialize(backend->make_primary_tensor_view<float>(shape));
EXPECT_TRUE(
autodiff_numeric_compare<float>(manager, backend, make_graph, {x_pos}, .01f, .01f));
}
}
TEST(${BACKEND_NAME}, backwards_replace_slice) TEST(${BACKEND_NAME}, backwards_replace_slice)
{ {
auto manager = runtime::Manager::get("${BACKEND_NAME}"); auto manager = runtime::Manager::get("${BACKEND_NAME}");
......
...@@ -31,92 +31,13 @@ ...@@ -31,92 +31,13 @@
#include "ngraph/runtime/manager.hpp" #include "ngraph/runtime/manager.hpp"
#include "ngraph/serializer.hpp" #include "ngraph/serializer.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
#include "util/benchmark.hpp"
#include "util/random.hpp" #include "util/random.hpp"
#include "util/test_tools.hpp" #include "util/test_tools.hpp"
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
static multimap<size_t, string>
agregate_timing(const vector<runtime::PerformanceCounter>& perf_data)
{
unordered_map<string, size_t> timing;
for (const runtime::PerformanceCounter& p : perf_data)
{
string op = p.name().substr(0, p.name().find('_'));
timing[op] += p.microseconds();
}
multimap<size_t, string> rc;
for (const pair<string, size_t>& t : timing)
{
rc.insert({t.second, t.first});
}
return rc;
}
void run_benchmark(const string& json_path, const string& backend_name, size_t iterations)
{
string env_var_name = "NGRAPH_" + backend_name + "_EMIT_TIMING";
bool emit_timing = (std::getenv(env_var_name.c_str()) != nullptr);
if (!emit_timing)
{
cout << "To get per-op timing set the environment variable " << env_var_name << "\n";
}
test::Uniform<float> rng{-1, 1, 0};
const string json_string = file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> f = ngraph::deserialize(ss);
stopwatch build_time;
build_time.start();
auto manager = runtime::Manager::get(backend_name);
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
build_time.stop();
cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
vector<shared_ptr<runtime::TensorView>> args;
for (shared_ptr<op::Parameter> param : f->get_parameters())
{
auto tensor =
backend->make_primary_tensor_view(param->get_element_type(), param->get_shape());
rng.initialize(tensor);
args.push_back(tensor);
}
vector<shared_ptr<runtime::TensorView>> results;
for (shared_ptr<Node> out : f->get_results())
{
auto result = backend->make_primary_tensor_view(out->get_element_type(), out->get_shape());
results.push_back(result);
}
stopwatch t1;
t1.start();
for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
{
cf->tensor_call(args, results);
}
t1.stop();
float time = t1.get_milliseconds();
cout << time / iterations << "ms per iteration" << endl;
vector<runtime::PerformanceCounter> perf_data = cf->get_performance_data();
sort(perf_data.begin(),
perf_data.end(),
[](const runtime::PerformanceCounter& p1, const runtime::PerformanceCounter& p2) {
return p1.total_microseconds() > p2.total_microseconds();
});
multimap<size_t, string> timing = agregate_timing(perf_data);
for (auto it = timing.rbegin(); it != timing.rend(); it++)
{
cout.imbue(locale(""));
cout << setw(15) << left << it->second << " " << setw(10) << right << it->first << "us\n";
}
}
TEST(benchmark, mxnet_mnist_mlp_forward) TEST(benchmark, mxnet_mnist_mlp_forward)
{ {
const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/mnist_mlp_forward.json"); const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/mnist_mlp_forward.json");
......
...@@ -7677,3 +7677,120 @@ TEST(${BACKEND_NAME}, min_3d_eliminate_zero_dim) ...@@ -7677,3 +7677,120 @@ TEST(${BACKEND_NAME}, min_3d_eliminate_zero_dim)
cf->call({a}, {result}); cf->call({a}, {result});
EXPECT_EQ((vector<float>{inf, inf, inf, inf, inf, inf}), read_vector<float>(result)); EXPECT_EQ((vector<float>{inf, inf, inf, inf, inf, inf}), read_vector<float>(result));
} }
TEST(${BACKEND_NAME}, relu_2Dfprop)
{
auto shape_a = Shape{2, 5};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
auto relu = make_shared<op::Relu>(A);
auto shape_rt = Shape{2, 5};
auto f = make_shared<Function>(relu, op::Parameters{A});
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
auto a = backend->make_primary_tensor_view(element::f32, shape_a);
copy_data(a, vector<float>{1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5});
auto result = backend->make_primary_tensor_view(element::f32, shape_rt);
vector<float> expected{1, 8, 0, 17, 0, 1, 8, 0, 17, 0};
cf->call({a}, {result});
EXPECT_EQ(read_vector<float>(result), expected);
}
TEST(${BACKEND_NAME}, relu_4Dfprop)
{
auto shape_a = Shape{2, 2, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
auto relu = make_shared<op::Relu>(A);
auto shape_rt = Shape{2, 2, 2, 2};
auto f = make_shared<Function>(relu, op::Parameters{A});
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
auto a = backend->make_primary_tensor_view(element::f32, shape_a);
copy_data(a, vector<float>{1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5, 1});
auto result = backend->make_primary_tensor_view(element::f32, shape_rt);
vector<float> expected{1, 8, 0, 17, 0, 1, 8, 0, 17, 0, 1, 8, 0, 17, 0, 1};
cf->call({a}, {result});
EXPECT_EQ(read_vector<float>(result), expected);
}
TEST(${BACKEND_NAME}, fuse_max_with_constant_zero_input_as_relu)
{
auto shape_a = Shape{2, 5};
auto A = op::Constant::create(element::f32, shape_a, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0});
auto B = make_shared<op::Parameter>(element::f32, shape_a);
auto max = make_shared<op::Maximum>(A, B);
auto shape_rt = Shape{2, 5};
auto f = make_shared<Function>(max, op::Parameters{B});
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
auto b = backend->make_primary_tensor_view(element::f32, shape_a);
copy_data(b, vector<float>{1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5});
auto result = backend->make_primary_tensor_view(element::f32, shape_rt);
vector<float> expected{1, 8, 0, 17, 0, 1, 8, 0, 17, 0};
cf->call({b}, {result});
EXPECT_EQ(read_vector<float>(result), expected);
}
TEST(${BACKEND_NAME}, relu_2Dbackprop)
{
auto shape_a = Shape{2, 5};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
auto delta_val = make_shared<op::Parameter>(element::f32, shape_a);
auto relu = make_shared<op::ReluBackprop>(A, delta_val);
auto shape_rt = Shape{2, 5};
auto f = make_shared<Function>(relu, op::Parameters{A, delta_val});
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
auto a = backend->make_primary_tensor_view(element::f32, shape_a);
copy_data(a, vector<float>{1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5});
auto delta = backend->make_primary_tensor_view(element::f32, shape_a);
copy_data(delta, vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10});
auto result = backend->make_primary_tensor_view(element::f32, shape_rt);
vector<float> expected{1, 2, 0, 4, 0, 6, 7, 0, 9, 0};
cf->call({a, delta}, {result});
EXPECT_EQ(read_vector<float>(result), expected);
}
TEST(${BACKEND_NAME}, relu_4Dbackprop)
{
auto shape_a = Shape{2, 2, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
auto delta_val = make_shared<op::Parameter>(element::f32, shape_a);
auto relu = make_shared<op::ReluBackprop>(A, delta_val);
auto shape_rt = Shape{2, 2, 2, 2};
auto f = make_shared<Function>(relu, op::Parameters{A, delta_val});
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
auto a = backend->make_primary_tensor_view(element::f32, shape_a);
copy_data(a, vector<float>{1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5, 1});
auto delta = backend->make_primary_tensor_view(element::f32, shape_a);
copy_data(delta, vector<float>{1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5, 1});
auto result = backend->make_primary_tensor_view(element::f32, shape_rt);
vector<float> expected{1, 8, 0, 17, 0, 1, 8, 0, 17, 0, 1, 8, 0, 17, 0, 1};
cf->call({a, delta}, {result});
EXPECT_EQ(read_vector<float>(result), expected);
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <algorithm>
#include <cstdio>
#include <iostream>
#include <list>
#include <memory>
#include "gtest/gtest.h"
#include "ngraph/file_util.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/json.hpp"
#include "ngraph/log.hpp"
#include "ngraph/ngraph.hpp"
#include "ngraph/ops/relu.hpp"
#include "ngraph/pass/graph_rewrite.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pattern/core_fusion.hpp"
#include "ngraph/pattern/matcher.hpp"
#include "ngraph/pattern/op/any.hpp"
#include "ngraph/pattern/op/label.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
#include "util/matcher.hpp"
#include "util/test_tools.hpp"
using namespace ngraph;
using namespace std;
TEST(core_fusion, core_fusion_pass_basic)
{
auto shape_a = Shape{1, 5};
auto A = op::Constant::create(element::f32, shape_a, {0, 0, 0, 0, 0});
auto B = make_shared<op::Parameter>(element::f32, shape_a);
auto max = make_shared<op::Maximum>(A, B);
auto graph = make_shared<op::Abs>(max);
pass::Manager pass_manager;
pass_manager.register_pass<pass::CoreFusion>();
auto func = make_shared<Function>(graph, op::Parameters{B});
pass_manager.run_passes(func);
ASSERT_NE(std::dynamic_pointer_cast<op::Relu>(graph->get_input_op(0)), nullptr);
}
...@@ -70,27 +70,6 @@ static std::shared_ptr<Node> construct_constant_node(int n) ...@@ -70,27 +70,6 @@ static std::shared_ptr<Node> construct_constant_node(int n)
return op::Constant::create(element::i32, Shape{}, {n}); return op::Constant::create(element::i32, Shape{}, {n});
} }
bool is_equal_to_const_value(std::string const_value, std::shared_ptr<Node> reduce_constant)
{
if (auto rc = std::dynamic_pointer_cast<op::Constant>(reduce_constant))
{
auto cshape = rc->get_shape();
size_t n = shape_size(cshape);
//awkward(but generic) way to construct a constant of a given type, shape, value
std::vector<std::string> vz{n, const_value};
auto zero_constant = std::make_shared<op::Constant>(rc->get_element_type(), cshape, vz);
//equally awkward way to compare elements to const_value
size_t n_bytes = n * rc->get_element_type().size();
NGRAPH_DEBUG << "Comparing " << n_bytes << " bytes";
return !memcmp(zero_constant->get_data_ptr(), rc->get_data_ptr(), n_bytes);
}
else
{
return false;
}
}
bool is_zero(std::shared_ptr<Node> reduce_constant) bool is_zero(std::shared_ptr<Node> reduce_constant)
{ {
return is_equal_to_const_value("0", reduce_constant); return is_equal_to_const_value("0", reduce_constant);
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "benchmark.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/call_frame.hpp"
#include "ngraph/runtime/manager.hpp"
#include "ngraph/runtime/tensor_view.hpp"
#include "ngraph/serializer.hpp"
#include "random.hpp"
std::multimap<size_t, std::string>
aggregate_timing(const std::vector<ngraph::runtime::PerformanceCounter>& perf_data)
{
std::unordered_map<std::string, size_t> timing;
for (const ngraph::runtime::PerformanceCounter& p : perf_data)
{
std::string op = p.name().substr(0, p.name().find('_'));
timing[op] += p.microseconds();
}
std::multimap<size_t, std::string> rc;
for (const std::pair<std::string, size_t>& t : timing)
{
rc.insert({t.second, t.first});
}
return rc;
}
void run_benchmark(const std::string& json_path, const std::string& backend_name, size_t iterations)
{
using namespace std;
using namespace ngraph;
string env_var_name = "NGRAPH_" + backend_name + "_EMIT_TIMING";
bool emit_timing = (std::getenv(env_var_name.c_str()) != nullptr);
if (!emit_timing)
{
cout << "To get per-op timing set the environment variable " << env_var_name << "\n";
}
ngraph::test::Uniform<float> rng{-1, 1, 0};
const string json_string = file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> f = deserialize(ss);
stopwatch build_time;
build_time.start();
auto manager = runtime::Manager::get(backend_name);
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
build_time.stop();
cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
vector<shared_ptr<runtime::TensorView>> args;
for (shared_ptr<op::Parameter> param : f->get_parameters())
{
auto tensor =
backend->make_primary_tensor_view(param->get_element_type(), param->get_shape());
rng.initialize(tensor);
args.push_back(tensor);
}
vector<shared_ptr<runtime::TensorView>> results;
for (shared_ptr<Node> out : f->get_results())
{
auto result = backend->make_primary_tensor_view(out->get_element_type(), out->get_shape());
results.push_back(result);
}
stopwatch t1;
t1.start();
for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
{
cf->tensor_call(args, results);
}
t1.stop();
float time = t1.get_milliseconds();
cout << time / iterations << "ms per iteration" << endl;
vector<runtime::PerformanceCounter> perf_data = cf->get_performance_data();
sort(perf_data.begin(),
perf_data.end(),
[](const runtime::PerformanceCounter& p1, const runtime::PerformanceCounter& p2) {
return p1.total_microseconds() > p2.total_microseconds();
});
multimap<size_t, string> timing = aggregate_timing(perf_data);
for (auto it = timing.rbegin(); it != timing.rend(); it++)
{
cout.imbue(locale(""));
cout << setw(15) << left << it->second << " " << setw(10) << right << it->first << "us\n";
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <ngraph/runtime/call_frame.hpp>
#include "test_tools.hpp"
/// performance test utilities
std::multimap<size_t, std::string>
aggregate_timing(const std::vector<ngraph::runtime::PerformanceCounter>& perf_data);
void run_benchmark(const std::string& json_path,
const std::string& backend_name,
size_t iterations);
...@@ -21,7 +21,9 @@ ...@@ -21,7 +21,9 @@
#include <memory> #include <memory>
#include "ngraph/descriptor/layout/tensor_view_layout.hpp" #include "ngraph/descriptor/layout/tensor_view_layout.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/runtime/tensor_view.hpp" #include "ngraph/runtime/tensor_view.hpp"
#include "ngraph/serializer.hpp"
namespace ngraph namespace ngraph
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment