Unverified Commit eee71968 authored by Chris Sullivan's avatar Chris Sullivan Committed by GitHub

Merge branch 'master' into tfl/gpu_fix_constant_bug

parents 181be216 9db548c6
...@@ -36,7 +36,7 @@ General Instructions ...@@ -36,7 +36,7 @@ General Instructions
These instructions assume that your system has been prepared in accordance These instructions assume that your system has been prepared in accordance
with the above prerequisites. with the above prerequisites.
$ cd private-ngraph-cpp $ cd ngraph-cpp
$ mkdir build $ mkdir build
$ cd build $ cd build
$ cmake .. \ $ cmake .. \
......
...@@ -79,10 +79,11 @@ information about how to change or customize this location. ...@@ -79,10 +79,11 @@ information about how to change or customize this location.
$ cd build && cmake ../ [-DNGRAPH_USE_PREBUILT_LLVM=TRUE] $ cd build && cmake ../ [-DNGRAPH_USE_PREBUILT_LLVM=TRUE]
#. (Optional) Run ``$ make [-jN]`` where ``-jN`` specifies the number of #. (Optional) Run ``$ make [-jN]`` where ``-jN`` specifies the number of physical
cores. The example here uses a configuration of ``j8``, which is cores to use to build. The example here uses a configuration of ``j8``,
good for a system install using an Intel® Xeon® (CPU processor). This step which is good for a system install using an 8-core Intel® Xeon® CPU processor.
is **not recommended** with Docker / VM installs. This step is **not recommended** for machines with too little RAM available,
such as those whose RAM is superceded by Docker or VM tasks.
.. code-block:: console .. code-block:: console
......
...@@ -67,6 +67,7 @@ set (SRC ...@@ -67,6 +67,7 @@ set (SRC
ops/replace_slice.cpp ops/replace_slice.cpp
ops/reshape.cpp ops/reshape.cpp
ops/reverse.cpp ops/reverse.cpp
ops/result.cpp
ops/select.cpp ops/select.cpp
ops/select_and_scatter.cpp ops/select_and_scatter.cpp
ops/sin.cpp ops/sin.cpp
...@@ -189,6 +190,7 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND ...@@ -189,6 +190,7 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
runtime/cpu/pass/cpu_assignment.cpp runtime/cpu/pass/cpu_assignment.cpp
runtime/cpu/pass/cpu_fusion.cpp runtime/cpu/pass/cpu_fusion.cpp
runtime/cpu/pass/cpu_layout.cpp runtime/cpu/pass/cpu_layout.cpp
runtime/cpu/pass/cpu_nop_elimination.cpp
) )
# LLVM binary builds are typically built without RTTI # LLVM binary builds are typically built without RTTI
# The built-in headers are in a version-specific directory # The built-in headers are in a version-specific directory
......
...@@ -75,10 +75,5 @@ namespace ngraph ...@@ -75,10 +75,5 @@ namespace ngraph
protected: protected:
std::unordered_map<Node*, std::shared_ptr<Node>> m_adjoint_map; std::unordered_map<Node*, std::shared_ptr<Node>> m_adjoint_map;
}; };
/// @brief Returns a FunctionSpec for the backprop derivative of its argument.
/// @param f is f(X_i...)
/// @returns f'(X_i..., c) where f'(x_i, ..., c)_j is backprop for X_j
std::shared_ptr<Function> backprop_function(const std::shared_ptr<Function>& f);
} }
} }
...@@ -27,7 +27,7 @@ using namespace ngraph; ...@@ -27,7 +27,7 @@ using namespace ngraph;
atomic<size_t> Function::m_next_instance_id(0); atomic<size_t> Function::m_next_instance_id(0);
Function::Function(const NodeVector& results, Function::Function(const ResultVector& results,
const op::ParameterVector& parameters, const op::ParameterVector& parameters,
const std::string& name) const std::string& name)
: m_results(results) : m_results(results)
...@@ -37,14 +37,50 @@ Function::Function(const NodeVector& results, ...@@ -37,14 +37,50 @@ Function::Function(const NodeVector& results,
, m_name(name) , m_name(name)
, m_unique_name("Function_" + to_string(m_instance_id)) , m_unique_name("Function_" + to_string(m_instance_id))
{ {
init();
}
Function::Function(const NodeVector& results,
const op::ParameterVector& parameters,
const std::string& name)
: m_results(results.size())
, m_parameters(parameters)
, m_temporary_pool_size(0)
, m_instance_id(m_next_instance_id.fetch_add(1))
, m_name(name)
, m_unique_name("Function_" + to_string(m_instance_id))
{
std::transform(results.begin(), results.end(), m_results.begin(), [](std::shared_ptr<Node> n) {
return std::make_shared<op::Result>(n);
});
init();
}
Function::Function(const std::shared_ptr<Node>& result,
const op::ParameterVector& parameters,
const std::string& name)
: Function(NodeVector{result}, parameters, name)
{
}
void Function::init()
{
for (auto r : m_results)
{
for (descriptor::Output& output : r->get_outputs())
{
output.get_tensor().set_is_output();
}
}
traverse_nodes(this, [&](shared_ptr<Node> node) { traverse_nodes(this, [&](shared_ptr<Node> node) {
std::shared_ptr<op::Parameter> p = std::dynamic_pointer_cast<op::Parameter>(node); std::shared_ptr<op::Parameter> p = std::dynamic_pointer_cast<op::Parameter>(node);
if (nullptr != p) if (nullptr != p)
{ {
auto it = std::find_if(parameters.begin(), auto it = std::find_if(m_parameters.begin(),
parameters.end(), m_parameters.end(),
[p](std::shared_ptr<op::Parameter> q) { return (p == q); }); [p](std::shared_ptr<op::Parameter> q) { return (p == q); });
if (it == parameters.end()) if (it == m_parameters.end())
{ {
throw ngraph_error("Function references undeclared parameter"); throw ngraph_error("Function references undeclared parameter");
} }
...@@ -52,13 +88,6 @@ Function::Function(const NodeVector& results, ...@@ -52,13 +88,6 @@ Function::Function(const NodeVector& results,
}); });
} }
Function::Function(const std::shared_ptr<Node>& result,
const op::ParameterVector& parameters,
const std::string& name)
: Function(NodeVector{result}, parameters, name)
{
}
std::list<shared_ptr<Node>> Function::get_ordered_ops() std::list<shared_ptr<Node>> Function::get_ordered_ops()
{ {
return topological_sort(get_ops()); return topological_sort(get_ops());
...@@ -156,18 +185,7 @@ std::list<shared_ptr<Node>> Function::get_ops() const ...@@ -156,18 +185,7 @@ std::list<shared_ptr<Node>> Function::get_ops() const
return ops; return ops;
} }
void Function::replace_output_op(std::shared_ptr<Node> old, std::shared_ptr<Node> repl)
{
auto it = std::find(begin(m_results), end(m_results), old);
if (it != end(m_results))
{
NGRAPH_DEBUG << "Replacing output " << old->get_name() << " w/ " << repl->get_name();
*it = repl;
}
}
void Function::replace_node(std::shared_ptr<Node> old, std::shared_ptr<Node> repl) void Function::replace_node(std::shared_ptr<Node> old, std::shared_ptr<Node> repl)
{ {
replace_output_op(old, repl); ngraph::replace_node(old, repl);
ngraph::replace_node(old, repl, true);
} }
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/ops/parameter_vector.hpp" #include "ngraph/ops/parameter_vector.hpp"
#include "ngraph/ops/result_vector.hpp"
#include "ngraph/types/type.hpp" #include "ngraph/types/type.hpp"
namespace ngraph namespace ngraph
...@@ -41,6 +42,12 @@ namespace ngraph ...@@ -41,6 +42,12 @@ namespace ngraph
const op::ParameterVector& parameters, const op::ParameterVector& parameters,
const std::string& name = ""); const std::string& name = "");
Function(const ResultVector& results,
const op::ParameterVector& parameters,
const std::string& name = "");
void init();
virtual ~Function() {} virtual ~Function() {}
public: public:
/// Return the number of outputs for this function. /// Return the number of outputs for this function.
...@@ -57,8 +64,8 @@ namespace ngraph ...@@ -57,8 +64,8 @@ namespace ngraph
/// Return the function parameters /// Return the function parameters
const op::ParameterVector& get_parameters() const { return m_parameters; } const op::ParameterVector& get_parameters() const { return m_parameters; }
/// Return the ops that generate the results /// Return a list of function's outputs
const NodeVector get_results() const { return m_results; } const ResultVector& get_results() const { return m_results; }
/// Check that there is a single result and return it. /// Check that there is a single result and return it.
std::shared_ptr<Node> get_result() const; std::shared_ptr<Node> get_result() const;
...@@ -73,13 +80,11 @@ namespace ngraph ...@@ -73,13 +80,11 @@ namespace ngraph
size_t get_instance_id() { return m_instance_id; } size_t get_instance_id() { return m_instance_id; }
size_t get_temporary_pool_size(); size_t get_temporary_pool_size();
void set_temporary_pool_size(size_t); void set_temporary_pool_size(size_t);
// updates old w/ repl in m_results list
void replace_output_op(std::shared_ptr<Node> old, std::shared_ptr<Node> repl);
// updates graph and m_results list // updates graph and m_results list
void replace_node(std::shared_ptr<Node> old, std::shared_ptr<Node> repl); void replace_node(std::shared_ptr<Node> old, std::shared_ptr<Node> repl);
protected: protected:
NodeVector m_results; ResultVector m_results;
op::ParameterVector m_parameters; op::ParameterVector m_parameters;
size_t m_temporary_pool_size; size_t m_temporary_pool_size;
......
...@@ -29,6 +29,8 @@ ...@@ -29,6 +29,8 @@
#include "ngraph/node_vector.hpp" #include "ngraph/node_vector.hpp"
#include "ngraph/ops/constant.hpp" #include "ngraph/ops/constant.hpp"
#include "ngraph/ops/parameter.hpp" #include "ngraph/ops/parameter.hpp"
#include "ngraph/ops/result.hpp"
#include "ngraph/ops/result_vector.hpp"
#include "ngraph/placement.hpp" #include "ngraph/placement.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
...@@ -114,13 +116,11 @@ void ngraph::free_nodes(shared_ptr<Function> p) ...@@ -114,13 +116,11 @@ void ngraph::free_nodes(shared_ptr<Function> p)
} }
} }
void ngraph::replace_node(std::shared_ptr<Node> target, void ngraph::replace_node(std::shared_ptr<Node> target, std::shared_ptr<Node> replacement)
std::shared_ptr<Node> replacement,
bool replace_output)
{ {
if (target->is_output() && !replace_output) if (target->is_output())
{ {
return; throw ngraph_error("Result nodes cannot be replaced.");
} }
// Fix input/output descriptors // Fix input/output descriptors
...@@ -197,6 +197,15 @@ std::list<std::shared_ptr<ngraph::Node>> ...@@ -197,6 +197,15 @@ std::list<std::shared_ptr<ngraph::Node>>
return result_list; return result_list;
} }
void ngraph::NodeMap::update(std::shared_ptr<ngraph::Node> orig, std::shared_ptr<ngraph::Node> val)
{
if (!exists(orig))
{
throw ngraph_error("Node doesn't exist!");
}
m_node_map[orig] = val;
}
void ngraph::NodeMap::add(std::shared_ptr<ngraph::Node> orig, void ngraph::NodeMap::add(std::shared_ptr<ngraph::Node> orig,
std::shared_ptr<ngraph::Node> replacement) std::shared_ptr<ngraph::Node> replacement)
{ {
...@@ -252,10 +261,15 @@ std::shared_ptr<ngraph::Function> ngraph::clone_function(std::shared_ptr<ngraph: ...@@ -252,10 +261,15 @@ std::shared_ptr<ngraph::Function> ngraph::clone_function(std::shared_ptr<ngraph:
clone_nodes(func->get_ops(), node_map); clone_nodes(func->get_ops(), node_map);
// get cloned function results and parameters // get cloned function results and parameters
NodeVector cloned_results; ResultVector cloned_results;
for (shared_ptr<Node> node : func->get_results()) for (shared_ptr<Node> node : func->get_results())
{ {
cloned_results.push_back(node_map.get(node)); auto result = std::dynamic_pointer_cast<op::Result>(node_map.get(node));
if (!result)
{
throw ngraph_error("Results should be of type op::Result");
}
cloned_results.push_back(result);
} }
std::vector<std::shared_ptr<op::Parameter>> cloned_params; std::vector<std::shared_ptr<op::Parameter>> cloned_params;
for (auto param : func->get_parameters()) for (auto param : func->get_parameters())
...@@ -435,8 +449,8 @@ static shared_ptr<Function> build_largest_colocated_function( ...@@ -435,8 +449,8 @@ static shared_ptr<Function> build_largest_colocated_function(
} }
} }
} }
auto func = make_shared<Function>(outputs, collected_parameters);
return make_shared<Function>(outputs, collected_parameters); return func;
} }
// The returned nodes contains the node N with highest order. If N is placed at P, the returned // The returned nodes contains the node N with highest order. If N is placed at P, the returned
...@@ -528,7 +542,7 @@ vector<shared_ptr<Function>> ngraph::split_function_by_placement( ...@@ -528,7 +542,7 @@ vector<shared_ptr<Function>> ngraph::split_function_by_placement(
// Remove input-output and constant-output aliasing // Remove input-output and constant-output aliasing
if (f_parameters.count(node) == 0 && node->description() != "Constant") if (f_parameters.count(node) == 0 && node->description() != "Constant")
{ {
unvisited_outputs.insert(node); unvisited_outputs.insert(node->get_input_op(0));
} }
} }
...@@ -571,6 +585,24 @@ vector<shared_ptr<Function>> ngraph::split_function_by_placement( ...@@ -571,6 +585,24 @@ vector<shared_ptr<Function>> ngraph::split_function_by_placement(
unvisited_outputs = updated_unvisited_outputs; unvisited_outputs = updated_unvisited_outputs;
} }
unordered_map<shared_ptr<Node>, shared_ptr<Node>> map_source_node_to_result;
for (auto cf : colocated_functions)
{
for (auto r : cf->get_results())
{
map_source_node_to_result[r->get_input_op(0)] = r;
}
}
for (auto it = map_parameter_to_source_node.begin(); it != map_parameter_to_source_node.end();
++it)
{
if (map_source_node_to_result.count(it->second) != 0)
{
it->second = map_source_node_to_result[it->second];
}
}
// The colocated_functions should be called in reversed order // The colocated_functions should be called in reversed order
reverse(colocated_functions.begin(), colocated_functions.end()); reverse(colocated_functions.begin(), colocated_functions.end());
return colocated_functions; return colocated_functions;
......
...@@ -48,9 +48,8 @@ namespace ngraph ...@@ -48,9 +48,8 @@ namespace ngraph
void free_nodes(std::shared_ptr<Function>); void free_nodes(std::shared_ptr<Function>);
void replace_node(std::shared_ptr<Node> target, void replace_node(std::shared_ptr<Node> target, std::shared_ptr<Node> replacement);
std::shared_ptr<Node> replacement,
bool replace_output = false);
void replace_node_users_arguments(std::shared_ptr<Node> target, void replace_node_users_arguments(std::shared_ptr<Node> target,
std::shared_ptr<Node> replacement); std::shared_ptr<Node> replacement);
...@@ -78,6 +77,8 @@ namespace ngraph ...@@ -78,6 +77,8 @@ namespace ngraph
return (m_node_map.count(orig) != 0); return (m_node_map.count(orig) != 0);
} }
void update(std::shared_ptr<ngraph::Node> orig, std::shared_ptr<ngraph::Node> val);
const std::unordered_map<std::shared_ptr<ngraph::Node>, std::shared_ptr<ngraph::Node>>& const std::unordered_map<std::shared_ptr<ngraph::Node>, std::shared_ptr<ngraph::Node>>&
get_node_map() const get_node_map() const
{ {
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "ngraph/descriptor/layout/tensor_view_layout.hpp" #include "ngraph/descriptor/layout/tensor_view_layout.hpp"
#include "ngraph/descriptor/primary_tensor_view.hpp" #include "ngraph/descriptor/primary_tensor_view.hpp"
#include "ngraph/ops/parameter.hpp" #include "ngraph/ops/parameter.hpp"
#include "ngraph/ops/result.hpp"
#include "ngraph/placement.hpp" #include "ngraph/placement.hpp"
using namespace std; using namespace std;
...@@ -34,7 +35,6 @@ Node::Node(const std::string& node_type, const NodeVector& arguments) ...@@ -34,7 +35,6 @@ Node::Node(const std::string& node_type, const NodeVector& arguments)
: m_node_type(node_type) : m_node_type(node_type)
, m_instance_id(m_next_instance_id.fetch_add(1)) , m_instance_id(m_next_instance_id.fetch_add(1))
, m_unique_name(description() + "_" + to_string(m_instance_id)) , m_unique_name(description() + "_" + to_string(m_instance_id))
, m_is_output(false)
, m_arguments(arguments) , m_arguments(arguments)
{ {
// Add this node as a user of each argument. // Add this node as a user of each argument.
...@@ -68,7 +68,7 @@ void Node::add_output(const element::Type& element_type, const Shape& shape) ...@@ -68,7 +68,7 @@ void Node::add_output(const element::Type& element_type, const Shape& shape)
auto tensor_view_descriptor = make_shared<descriptor::PrimaryTensorView>( auto tensor_view_descriptor = make_shared<descriptor::PrimaryTensorView>(
tensor_view_type, tensor_view_type,
ngraph::descriptor::Tensor::make_tensor_name(this, i), ngraph::descriptor::Tensor::make_tensor_name(this, i),
is_output(), false,
is_parameter(), is_parameter(),
is_constant()); is_constant());
m_outputs.emplace_back(this, i, tensor_view_descriptor); m_outputs.emplace_back(this, i, tensor_view_descriptor);
...@@ -96,16 +96,7 @@ bool Node::is_parameter() const ...@@ -96,16 +96,7 @@ bool Node::is_parameter() const
bool Node::is_output() const bool Node::is_output() const
{ {
return m_is_output; return false;
}
void Node::set_is_output()
{
m_is_output = true;
for (descriptor::Output& output : get_outputs())
{
output.get_tensor().set_is_output();
}
} }
bool Node::is_constant() const bool Node::is_constant() const
......
...@@ -102,8 +102,7 @@ namespace ngraph ...@@ -102,8 +102,7 @@ namespace ngraph
void set_value_type_checked(const element::Type& element_type, const Shape& shape); void set_value_type_checked(const element::Type& element_type, const Shape& shape);
bool is_parameter() const; bool is_parameter() const;
bool is_output() const; virtual bool is_output() const;
void set_is_output();
virtual bool is_constant() const; virtual bool is_constant() const;
virtual bool is_commutative() { return false; } virtual bool is_commutative() { return false; }
size_t get_instance_id() const { return m_instance_id; } size_t get_instance_id() const { return m_instance_id; }
...@@ -200,7 +199,6 @@ namespace ngraph ...@@ -200,7 +199,6 @@ namespace ngraph
static std::atomic<size_t> m_next_instance_id; static std::atomic<size_t> m_next_instance_id;
std::deque<descriptor::Input> m_inputs; std::deque<descriptor::Input> m_inputs;
std::deque<descriptor::Output> m_outputs; std::deque<descriptor::Output> m_outputs;
bool m_is_output;
std::unordered_map<Node*, autodiff::Adjoints> m_adjoint_map; std::unordered_map<Node*, autodiff::Adjoints> m_adjoint_map;
Placement m_placement = Placement::DEFAULT; Placement m_placement = Placement::DEFAULT;
......
...@@ -23,6 +23,11 @@ namespace ngraph ...@@ -23,6 +23,11 @@ namespace ngraph
{ {
class Node; class Node;
namespace op
{
class Result;
}
/// \brief Zero or more nodes. /// \brief Zero or more nodes.
class NodeVector : public std::vector<std::shared_ptr<Node>> class NodeVector : public std::vector<std::shared_ptr<Node>>
{ {
......
...@@ -21,21 +21,20 @@ ...@@ -21,21 +21,20 @@
ngraph::op::BatchNorm::BatchNorm(double eps, ngraph::op::BatchNorm::BatchNorm(double eps,
std::shared_ptr<ngraph::Node> gamma, std::shared_ptr<ngraph::Node> gamma,
std::shared_ptr<ngraph::Node> beta, std::shared_ptr<ngraph::Node> beta,
std::shared_ptr<ngraph::Node> input, std::shared_ptr<ngraph::Node> input)
std::shared_ptr<ngraph::Node> mean, : RequiresTensorViewArgs("BatchNorm", {gamma, beta, input})
std::shared_ptr<ngraph::Node> variance)
: RequiresTensorViewArgs("BatchNorm", {gamma, beta, input, mean, variance})
, m_bn_input_shape(input->get_shape()) , m_bn_input_shape(input->get_shape())
, m_bn_variance_shape(variance->get_shape())
, m_bn_mean_shape(mean->get_shape())
, m_epsilon(eps) , m_epsilon(eps)
{ {
add_output(input->get_element_type(), m_bn_input_shape);
if (m_bn_input_shape.size() < 2) if (m_bn_input_shape.size() < 2)
{ {
throw ngraph_error("input tensor to batchnorm much have tensor of atleast rank 2"); throw ngraph_error("input tensor to batchnorm much have tensor of atleast rank 2");
} }
else
{
this->m_bn_variance_shape.push_back(input->get_shape()[1]);
this->m_bn_mean_shape.push_back(input->get_shape()[1]);
}
if (m_bn_input_shape[1] == 0) if (m_bn_input_shape[1] == 0)
{ {
...@@ -49,51 +48,27 @@ ngraph::op::BatchNorm::BatchNorm(double eps, ...@@ -49,51 +48,27 @@ ngraph::op::BatchNorm::BatchNorm(double eps,
throw ngraph_error("gamma, beta, mean, variance shoud have all rank 1"); throw ngraph_error("gamma, beta, mean, variance shoud have all rank 1");
} }
// assuming input shape (N, C, H, W), check if the size of mean and
// variance are equal to channel axis
if (mean->get_shape()[0] != m_bn_input_shape[1])
{
throw ngraph_error("mean size is not equal to input channel size");
}
if (variance->get_shape()[0] != m_bn_input_shape[1])
{
throw ngraph_error("variance size is not equal to input channel size");
}
if (variance->get_shape().size() != mean->get_shape().size())
{
throw ngraph_error("mean and variance rank does not match");
}
if (gamma->get_shape().size() != beta->get_shape().size()) if (gamma->get_shape().size() != beta->get_shape().size())
{ {
throw ngraph_error("gamma and beta rank does not match"); throw ngraph_error("gamma and beta rank does not match");
} }
if (input->get_element_type() != mean->get_element_type())
{
throw ngraph_error("input tensor and mean element type does not match");
}
if (input->get_element_type() != variance->get_element_type())
{
throw ngraph_error("input tensor and variance element type does not match");
}
if (gamma->get_element_type() != beta->get_element_type()) if (gamma->get_element_type() != beta->get_element_type())
{ {
throw ngraph_error("gamma and beta element type does not match"); throw ngraph_error("gamma and beta element type does not match");
} }
add_output(input->get_element_type(), m_bn_input_shape);
add_output(input->get_element_type(), m_bn_mean_shape);
add_output(input->get_element_type(), m_bn_variance_shape);
} }
std::shared_ptr<ngraph::Node> std::shared_ptr<ngraph::Node>
ngraph::op::BatchNorm::copy_with_new_args(const NodeVector& new_args) const ngraph::op::BatchNorm::copy_with_new_args(const NodeVector& new_args) const
{ {
if (new_args.size() != 5) if (new_args.size() != 3)
throw ngraph_error("Incorrect number of new arguments"); throw ngraph_error("Incorrect number of new arguments");
return std::make_shared<BatchNorm>( return std::make_shared<BatchNorm>(m_epsilon, new_args.at(0), new_args.at(1), new_args.at(2));
m_epsilon, new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3), new_args.at(4));
} }
ngraph::op::BatchNormBackprop::BatchNormBackprop(double eps, ngraph::op::BatchNormBackprop::BatchNormBackprop(double eps,
...@@ -174,10 +149,10 @@ void ngraph::op::BatchNorm::generate_adjoints(autodiff::Adjoints& adjoints, ...@@ -174,10 +149,10 @@ void ngraph::op::BatchNorm::generate_adjoints(autodiff::Adjoints& adjoints,
auto gamma = get_input_op(0); auto gamma = get_input_op(0);
auto beta = get_input_op(1); auto beta = get_input_op(1);
auto input = get_input_op(2); auto input = get_input_op(2);
auto mean = get_input_op(3); auto mean = std::make_shared<op::GetOutputElement>(shared_from_this(), 1);
auto variance = get_input_op(4); auto var = std::make_shared<op::GetOutputElement>(shared_from_this(), 2);
auto bbn = std::make_shared<op::BatchNormBackprop>( auto bbn = std::make_shared<op::BatchNormBackprop>(
get_eps_value(), gamma, beta, input, mean, variance, delta); get_eps_value(), gamma, beta, input, mean, var, delta);
auto dinput = std::make_shared<op::GetOutputElement>(bbn, 0); auto dinput = std::make_shared<op::GetOutputElement>(bbn, 0);
auto dgamma = std::make_shared<op::GetOutputElement>(bbn, 1); auto dgamma = std::make_shared<op::GetOutputElement>(bbn, 1);
auto dbeta = std::make_shared<op::GetOutputElement>(bbn, 2); auto dbeta = std::make_shared<op::GetOutputElement>(bbn, 2);
......
...@@ -33,9 +33,7 @@ namespace ngraph ...@@ -33,9 +33,7 @@ namespace ngraph
BatchNorm(double eps, BatchNorm(double eps,
std::shared_ptr<Node> gamma, std::shared_ptr<Node> gamma,
std::shared_ptr<Node> beta, std::shared_ptr<Node> beta,
std::shared_ptr<Node> input, std::shared_ptr<Node> input);
std::shared_ptr<Node> mean,
std::shared_ptr<Node> variance);
const Shape& get_inputs_shape() const { return m_bn_input_shape; } const Shape& get_inputs_shape() const { return m_bn_input_shape; }
const Shape& get_variance_shape() const { return m_bn_variance_shape; } const Shape& get_variance_shape() const { return m_bn_variance_shape; }
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <memory>
#include <typeindex>
#include <typeinfo>
#include "ngraph/node.hpp"
#include "ngraph/ops/result.hpp"
using namespace std;
using namespace ngraph;
op::Result::Result(const std::shared_ptr<Node>& arg)
: RequiresTensorViewArgs("Result", {arg})
{
if (arg->get_outputs().size() != 1)
{
throw ngraph_error("Expected a single-output argument");
}
//always borrow the placement conf even the default one
set_placement(arg->get_placement());
set_value_type_checked(arg->get_element_type(), arg->get_shape());
}
std::shared_ptr<Node> op::Result::copy_with_new_args(const NodeVector& new_args) const
{
if (new_args.size() != 1)
{
throw ngraph_error("Incorrect number of new arguments");
}
if (new_args.at(0)->get_outputs().size() != 1)
{
throw ngraph_error("Expected a single-output argument");
}
return std::make_shared<Result>(new_args.at(0));
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <memory>
#include "ngraph/ops/util/requires_tensor_view_args.hpp"
namespace ngraph
{
namespace op
{
class Result : public util::RequiresTensorViewArgs
{
public:
/// \brief Constructs an arcsin operation.
///
/// \param arg Node that produces the input tensor.
Result(const std::shared_ptr<Node>& arg);
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
virtual bool is_output() const override { return true; }
protected:
virtual void generate_adjoints(autodiff::Adjoints& adjoints,
const std::shared_ptr<Node>& delta) override
{
adjoints.add_delta(get_input_op(0), delta);
}
};
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <memory>
#include <vector>
#include "ngraph/ops/result.hpp"
namespace ngraph
{
/// \brief Zero or more nodes.
class ResultVector : public std::vector<std::shared_ptr<op::Result>>
{
public:
ResultVector(size_t size)
: std::vector<std::shared_ptr<op::Result>>(size)
{
}
ResultVector(const std::initializer_list<std::shared_ptr<op::Result>>& nodes)
: std::vector<std::shared_ptr<op::Result>>(nodes)
{
}
ResultVector(const std::vector<std::shared_ptr<op::Result>>& nodes)
: std::vector<std::shared_ptr<op::Result>>(nodes)
{
}
ResultVector(const ResultVector& nodes)
: std::vector<std::shared_ptr<op::Result>>(nodes)
{
}
ResultVector() {}
};
}
...@@ -30,12 +30,10 @@ using namespace std; ...@@ -30,12 +30,10 @@ using namespace std;
using namespace ngraph; using namespace ngraph;
ngraph::pass::Manager::Manager() ngraph::pass::Manager::Manager()
: m_to_set_is_output(true)
{ {
} }
ngraph::pass::Manager::Manager(bool to_set_is_output) ngraph::pass::Manager::Manager(bool to_set_is_output)
: m_to_set_is_output(to_set_is_output)
{ {
} }
...@@ -56,17 +54,6 @@ void ngraph::pass::Manager::run_passes(shared_ptr<Function> func) ...@@ -56,17 +54,6 @@ void ngraph::pass::Manager::run_passes(shared_ptr<Function> func)
set<shared_ptr<Function>> tfs(begin(fs), end(fs)); set<shared_ptr<Function>> tfs(begin(fs), end(fs));
get_state().set_functions(tfs); get_state().set_functions(tfs);
if (m_to_set_is_output)
{
for (shared_ptr<Function> f : get_state().get_functions())
{
for (size_t i = 0; i < f->get_output_size(); ++i)
{
f->get_output_op(i)->set_is_output();
}
}
}
for (shared_ptr<PassBase> pass : m_pass_list) for (shared_ptr<PassBase> pass : m_pass_list)
{ {
pass->set_state(get_state()); pass->set_state(get_state());
......
...@@ -57,5 +57,4 @@ public: ...@@ -57,5 +57,4 @@ public:
private: private:
std::vector<std::shared_ptr<PassBase>> m_pass_list; std::vector<std::shared_ptr<PassBase>> m_pass_list;
ManagerState m_state; ManagerState m_state;
bool m_to_set_is_output;
}; };
...@@ -150,3 +150,50 @@ void ngraph::pass::ReshapeElimination::construct_reshapex2_pattern() ...@@ -150,3 +150,50 @@ void ngraph::pass::ReshapeElimination::construct_reshapex2_pattern()
auto m = std::make_shared<ngraph::pattern::Matcher>(reshape2, callback); auto m = std::make_shared<ngraph::pattern::Matcher>(reshape2, callback);
this->add_matcher(m); this->add_matcher(m);
} }
void ngraph::pass::ReshapeElimination::construct_dot_transpose_pattern()
{
//dot(A,B).T = dot (B.T, A.T)
auto dot_pred = [](std::shared_ptr<Node> n) {
return static_cast<bool>(std::dynamic_pointer_cast<op::Dot>(n));
};
auto pdot = std::make_shared<pattern::op::Label>(element::f32, Shape{2, 1}, dot_pred);
auto preshape = std::make_shared<op::Reshape>(pdot, AxisVector{1, 0}, Shape{1, 2});
ngraph::pattern::gr_callback_fn callback = [](pattern::Matcher& m) {
NGRAPH_DEBUG << "In callback for construct_dot_transpose_pattern against node = "
<< m.match_root()->get_name();
std::shared_ptr<Node> nn;
auto mtranspose = std::dynamic_pointer_cast<op::Reshape>(m.match_root());
//this also checks the rank
if (mtranspose->get_input_order() != AxisVector{1, 0})
{
NGRAPH_DEBUG << "Reshape isn't transpose. "
<< vector_to_string(mtranspose->get_input_order());
return nn;
}
auto mdot = mtranspose->get_input_op(0);
if (mdot->get_shape().size() != 2)
{
NGRAPH_DEBUG << "Dot has the wrong shape. " << vector_to_string(mdot->get_shape());
return nn;
}
auto arg0 = mdot->get_input_op(0);
auto reshape0_shape = Shape{arg0->get_shape().at(1), arg0->get_shape().at(0)};
auto reshape0 = std::make_shared<op::Reshape>(arg0, AxisVector{1, 0}, reshape0_shape);
auto arg1 = mdot->get_input_op(1);
auto reshape1_shape = Shape{arg1->get_shape().at(1), arg1->get_shape().at(0)};
auto reshape1 = std::make_shared<op::Reshape>(arg1, AxisVector{1, 0}, reshape1_shape);
auto tdot = std::shared_ptr<Node>(new op::Dot(reshape1, reshape0));
return tdot;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(preshape, callback);
this->add_matcher(m);
}
...@@ -32,11 +32,13 @@ public: ...@@ -32,11 +32,13 @@ public:
ReshapeElimination() ReshapeElimination()
: GraphRewrite() : GraphRewrite()
{ {
construct_dot_transpose_pattern();
construct_identity_reshape_pattern(); construct_identity_reshape_pattern();
construct_reshapex2_pattern(); construct_reshapex2_pattern();
} }
private: private:
void construct_dot_transpose_pattern();
void construct_identity_reshape_pattern(); void construct_identity_reshape_pattern();
void construct_reshapex2_pattern(); void construct_reshapex2_pattern();
}; };
...@@ -72,6 +72,7 @@ ...@@ -72,6 +72,7 @@
#include "ngraph/ops/remainder.hpp" #include "ngraph/ops/remainder.hpp"
#include "ngraph/ops/replace_slice.hpp" #include "ngraph/ops/replace_slice.hpp"
#include "ngraph/ops/reshape.hpp" #include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/result.hpp"
#include "ngraph/ops/reverse.hpp" #include "ngraph/ops/reverse.hpp"
#include "ngraph/ops/select.hpp" #include "ngraph/ops/select.hpp"
#include "ngraph/ops/select_and_scatter.hpp" #include "ngraph/ops/select_and_scatter.hpp"
...@@ -239,7 +240,7 @@ namespace ngraph ...@@ -239,7 +240,7 @@ namespace ngraph
const Shape& arg0_shape = cg->get_arg0_shape(); //W const Shape& arg0_shape = cg->get_arg0_shape(); //W
const Shape& arg1_shape = cg->get_arg1_shape(); //x const Shape& arg1_shape = cg->get_arg1_shape(); //x
const Shape& arg2_shape = args[2].get_shape(); //bias (C) const Shape& arg2_shape = node->get_shape(); //bias (C)
static const char* ctranspose = "cblas::Transpose::Transpose, "; static const char* ctranspose = "cblas::Transpose::Transpose, ";
static const char* cnotranspose = "cblas::Transpose::None, "; static const char* cnotranspose = "cblas::Transpose::None, ";
...@@ -269,16 +270,23 @@ namespace ngraph ...@@ -269,16 +270,23 @@ namespace ngraph
writer << "{ // " << node->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
const char* cbeta = "0.0f";
if (args.size() > 2)
{
writer << "memcpy(" << out[0].get_name() << ", " << args[2].get_name() << ", " writer << "memcpy(" << out[0].get_name() << ", " << args[2].get_name() << ", "
<< out[0].get_size() * out[0].get_element_type().size() << ");\n"; << out[0].get_size() * out[0].get_element_type().size() << ");\n";
cbeta = "1.0f";
}
writer << "cblas::cblas_sgemm(" writer << "cblas::cblas_sgemm("
<< "cblas::Layout::RowMajor, " << tranpose_a << tranpose_b << m << ", " << n << "cblas::Layout::RowMajor, " << tranpose_a << tranpose_b << m << ", " << n
<< ", " << k << ",\n" << ", " << k << ",\n"
<< " 1.0f, " << args[0].get_name() << ", " << max(1UL, lda) << ", " << " 1.0f, " << args[0].get_name() << ", " << max(1UL, lda) << ", "
<< args[1].get_name() << ", " << max(1UL, ldb) << ", 1.0f,\n" << args[1].get_name() << ", " << max(1UL, ldb) << ", " << cbeta << ",\n"
<< " " << out[0].get_name() << ", " << max(1UL, arg2_shape[1]) << " " << out[0].get_name() << ", " << max(1UL, arg2_shape[1])
<< ");\n"; << ");\n";
writer.indent--; writer.indent--;
writer << "}\n"; writer << "}\n";
} }
...@@ -293,14 +301,26 @@ namespace ngraph ...@@ -293,14 +301,26 @@ namespace ngraph
auto gamma_shape = args[0].get_shape(); auto gamma_shape = args[0].get_shape();
auto beta_shape = args[1].get_shape(); auto beta_shape = args[1].get_shape();
auto input_shape = args[2].get_shape(); auto input_shape = args[2].get_shape();
auto mean_shape = args[3].get_shape();
auto variance_shape = args[4].get_shape();
auto result_shape = out[0].get_shape(); auto result_shape = out[0].get_shape();
auto mean_shape = out[1].get_shape();
auto variance_shape = out[2].get_shape();
// get input element type // get input element type
const string& et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string( const string& et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string(
args[2].get_element_type()); args[2].get_element_type());
const string& gamma_format = runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
runtime::cpu::mkldnn_utils::get_input_mkldnn_format(node, 0));
const string& beta_format = runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
runtime::cpu::mkldnn_utils::get_input_mkldnn_format(node, 1));
if (gamma_format.compare("memory::format::x") != 0 &&
beta_format.compare("memory::format::x") != 0)
{
throw std::runtime_error(
"gamma layout->" + gamma_format + ", beta layout->" + beta_format +
" should match and both should have memory::format::x format");
}
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
...@@ -321,16 +341,20 @@ namespace ngraph ...@@ -321,16 +341,20 @@ namespace ngraph
// get the eps value from the bn node // get the eps value from the bn node
writer << "auto epsilon = " << batchnorm->get_eps_value() << ";\n"; writer << "auto epsilon = " << batchnorm->get_eps_value() << ";\n";
const string& input_format = runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
runtime::cpu::mkldnn_utils::get_input_mkldnn_format(node, 2));
const string& result_format = runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
runtime::cpu::mkldnn_utils::get_output_mkldnn_format(node, 0));
// Bind to CPU engine // Bind to CPU engine
writer << "engine cpu_engine = engine(engine::cpu, 0);\n"; writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
// create memory descriptors // create memory descriptors
writer << "memory::desc input_data_desc = memory::desc({" << join(input_shape) writer << "memory::desc input_data_desc = memory::desc({" << join(input_shape)
<< "}, " << et << ", memory::format::nchw);\n"; << "}, " << et << ", " << input_format << ");\n";
// TODO define weights by stacking gamma and beta values // TODO define weights by stacking gamma and beta values
writer << "memory::desc weights_desc = memory::desc({" << join(weights_shape) writer << "memory::desc weights_desc = memory::desc({" << join(weights_shape)
<< "}, " << et << ", memory::format::nc);\n"; << "}, " << et << ", memory::format::nc);\n";
writer << "memory::desc result_desc = memory::desc({" << join(result_shape) << "}, " writer << "memory::desc result_desc = memory::desc({" << join(result_shape) << "}, "
<< et << ", memory::format::nchw);\n"; << et << ", " << result_format << ");\n";
writer << "memory::desc mean_desc = memory::desc({" << join(mean_shape) << "}, " writer << "memory::desc mean_desc = memory::desc({" << join(mean_shape) << "}, "
<< et << ", memory::format::x);\n"; << et << ", memory::format::x);\n";
writer << "memory::desc variance_desc = memory::desc({" << join(variance_shape) writer << "memory::desc variance_desc = memory::desc({" << join(variance_shape)
...@@ -341,17 +365,17 @@ namespace ngraph ...@@ -341,17 +365,17 @@ namespace ngraph
<< args[2].get_name() << ");\n"; << args[2].get_name() << ");\n";
writer << "memory weights = memory({weights_desc, cpu_engine}, bn_weights.data()" writer << "memory weights = memory({weights_desc, cpu_engine}, bn_weights.data()"
<< ");\n"; << ");\n";
writer << "memory mean = memory({mean_desc, cpu_engine}, " << args[3].get_name()
<< ");\n";
writer << "memory variance = memory({variance_desc, cpu_engine}, "
<< args[4].get_name() << ");\n";
writer << "memory result = memory({result_desc, cpu_engine}, " << out[0].get_name() writer << "memory result = memory({result_desc, cpu_engine}, " << out[0].get_name()
<< ");\n"; << ");\n";
writer << "memory mean = memory({mean_desc, cpu_engine}, " << out[1].get_name()
<< ");\n";
writer << "memory variance = memory({variance_desc, cpu_engine}, "
<< out[2].get_name() << ");\n";
// create batchnorm descriptor // create batchnorm descriptor
writer << "batch_normalization_forward::desc bn_fprop_desc = " writer << "batch_normalization_forward::desc bn_fprop_desc = "
"batch_normalization_forward::desc(forward_training," "batch_normalization_forward::desc(forward_training,"
<< "input_data_desc, epsilon, use_global_stats|use_scale_shift);\n"; << "input_data_desc, epsilon, use_scale_shift);\n";
// bn fprop primitive descriptor // bn fprop primitive descriptor
writer writer
<< "batch_normalization_forward::primitive_desc bn_fprop_prim_desc = " << "batch_normalization_forward::primitive_desc bn_fprop_prim_desc = "
...@@ -360,8 +384,8 @@ namespace ngraph ...@@ -360,8 +384,8 @@ namespace ngraph
// create a batchnorm fprop primitive // create a batchnorm fprop primitive
writer << "batch_normalization_forward bn_fprop = " writer << "batch_normalization_forward bn_fprop = "
"batch_normalization_forward(bn_fprop_prim_desc, " "batch_normalization_forward(bn_fprop_prim_desc, "
"primitive::at(input_data),primitive::at(mean), primitive::at(variance)," "primitive::at(input_data),"
<< "primitive::at(weights), result); \n"; << "primitive::at(weights), result, mean, variance); \n";
// create stream and execute // create stream and execute
writer << "stream s = stream(stream::kind::eager);\n" writer << "stream s = stream(stream::kind::eager);\n"
...@@ -3389,6 +3413,15 @@ namespace ngraph ...@@ -3389,6 +3413,15 @@ namespace ngraph
} }
} }
} }
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::Result)
{
writer << "kernel::result<" << out[0].get_type() << ">(" << args[0].get_name()
<< ",\n";
writer << " " << out[0].get_name() << ",\n";
writer << " " << shape_size(node->get_shape()) << ");\n";
}
} }
} }
} }
......
...@@ -82,6 +82,7 @@ ...@@ -82,6 +82,7 @@
#include "ngraph/ops/remainder.hpp" #include "ngraph/ops/remainder.hpp"
#include "ngraph/ops/replace_slice.hpp" #include "ngraph/ops/replace_slice.hpp"
#include "ngraph/ops/reshape.hpp" #include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/result.hpp"
#include "ngraph/ops/reverse.hpp" #include "ngraph/ops/reverse.hpp"
#include "ngraph/ops/select.hpp" #include "ngraph/ops/select.hpp"
#include "ngraph/ops/select_and_scatter.hpp" #include "ngraph/ops/select_and_scatter.hpp"
...@@ -112,6 +113,7 @@ ...@@ -112,6 +113,7 @@
#include "ngraph/runtime/cpu/pass/cpu_assignment.hpp" #include "ngraph/runtime/cpu/pass/cpu_assignment.hpp"
#include "ngraph/runtime/cpu/pass/cpu_fusion.hpp" #include "ngraph/runtime/cpu/pass/cpu_fusion.hpp"
#include "ngraph/runtime/cpu/pass/cpu_layout.hpp" #include "ngraph/runtime/cpu/pass/cpu_layout.hpp"
#include "ngraph/runtime/cpu/pass/cpu_nop_elimination.hpp"
#ifdef NGRAPH_DISTRIBUTED #ifdef NGRAPH_DISTRIBUTED
#include "ngraph/ops/allreduce.hpp" #include "ngraph/ops/allreduce.hpp"
...@@ -227,6 +229,7 @@ static const runtime::cpu::OpMap dispatcher{ ...@@ -227,6 +229,7 @@ static const runtime::cpu::OpMap dispatcher{
{TI(ngraph::op::Not), &runtime::cpu::CPU_Emitter::emit<op::Not>}, {TI(ngraph::op::Not), &runtime::cpu::CPU_Emitter::emit<op::Not>},
{TI(ngraph::op::MaxPool), &runtime::cpu::CPU_Emitter::emit<op::MaxPool>}, {TI(ngraph::op::MaxPool), &runtime::cpu::CPU_Emitter::emit<op::MaxPool>},
{TI(ngraph::op::Reverse), &runtime::cpu::CPU_Emitter::emit<op::Reverse>}, {TI(ngraph::op::Reverse), &runtime::cpu::CPU_Emitter::emit<op::Reverse>},
{TI(ngraph::op::Result), &runtime::cpu::CPU_Emitter::emit<op::Result>},
{TI(ngraph::op::ReduceWindow), &runtime::cpu::CPU_Emitter::emit<op::ReduceWindow>}, {TI(ngraph::op::ReduceWindow), &runtime::cpu::CPU_Emitter::emit<op::ReduceWindow>},
{TI(ngraph::op::SelectAndScatter), &runtime::cpu::CPU_Emitter::emit<op::SelectAndScatter>}, {TI(ngraph::op::SelectAndScatter), &runtime::cpu::CPU_Emitter::emit<op::SelectAndScatter>},
{TI(ngraph::op::AvgPool), &runtime::cpu::CPU_Emitter::emit<op::AvgPool>}, {TI(ngraph::op::AvgPool), &runtime::cpu::CPU_Emitter::emit<op::AvgPool>},
...@@ -268,6 +271,7 @@ void runtime::cpu::CPU_ExternalFunction::compile() ...@@ -268,6 +271,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
ngraph::pass::Manager pass_manager; ngraph::pass::Manager pass_manager;
pass_manager.register_pass<runtime::cpu::pass::CPUNopElimination>();
pass_manager.register_pass<ngraph::pass::CoreFusion>(); pass_manager.register_pass<ngraph::pass::CoreFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>(); pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUAssignment>(this); pass_manager.register_pass<runtime::cpu::pass::CPUAssignment>(this);
...@@ -316,6 +320,7 @@ void runtime::cpu::CPU_ExternalFunction::compile() ...@@ -316,6 +320,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
#include "ngraph/runtime/kernel/relu.hpp" #include "ngraph/runtime/kernel/relu.hpp"
#include "ngraph/runtime/kernel/replace_slice.hpp" #include "ngraph/runtime/kernel/replace_slice.hpp"
#include "ngraph/runtime/kernel/reshape.hpp" #include "ngraph/runtime/kernel/reshape.hpp"
#include "ngraph/runtime/kernel/result.hpp"
#include "ngraph/runtime/kernel/reverse.hpp" #include "ngraph/runtime/kernel/reverse.hpp"
#include "ngraph/runtime/kernel/select_and_scatter.hpp" #include "ngraph/runtime/kernel/select_and_scatter.hpp"
#include "ngraph/runtime/kernel/slice.hpp" #include "ngraph/runtime/kernel/slice.hpp"
...@@ -604,6 +609,7 @@ using namespace ngraph::runtime; ...@@ -604,6 +609,7 @@ using namespace ngraph::runtime;
} }
// create output alias map // create output alias map
/*
size_t output_index = 0; size_t output_index = 0;
unordered_map<descriptor::TensorView*, vector<size_t>> output_alias_map; unordered_map<descriptor::TensorView*, vector<size_t>> output_alias_map;
vector<size_t> aliases; vector<size_t> aliases;
...@@ -619,49 +625,18 @@ using namespace ngraph::runtime; ...@@ -619,49 +625,18 @@ using namespace ngraph::runtime;
} }
output_index++; output_index++;
} }
*/
// Add outputs to the variable name map // Add outputs to the variable name map
output_index = 0;
for (size_t i = 0; i < current_function->get_output_size(); ++i) for (size_t i = 0; i < current_function->get_output_size(); ++i)
{ {
shared_ptr<Node> op = current_function->get_output_op(i); shared_ptr<Node> op = current_function->get_output_op(i);
shared_ptr<descriptor::TensorView> tv = op->get_output_tensor_view(); shared_ptr<descriptor::TensorView> tv = op->get_output_tensor_view();
const element::Type& et = tv->get_tensor_view_type()->get_element_type(); string type = tv->get_tensor_view_type()->get_element_type().c_type_string();
bool parameter_as_output = false;
for (shared_ptr<ngraph::op::Parameter> param : current_function->get_parameters())
{
for (const descriptor::Output& pout : param->get_outputs())
{
shared_ptr<descriptor::TensorView> ptv = pout.get_tensor_view();
if (tv == ptv)
{
parameter_as_output = true;
writer << "memcpy(static_cast<" << et.c_type_string() << "*>(outputs["
<< output_index << "]), "
<< m_variable_name_map[ptv->get_tensor().get_name()] << ", "
<< ptv->get_tensor().size() << ");\n";
break;
}
}
}
if (!parameter_as_output && !contains(aliases, output_index))
{
if (contains(constants, tv.get()))
{
writer << "memcpy(outputs[" << output_index << "], "
<< tv->get_tensor().get_name() << ", " << tv->get_tensor().size()
<< ");\n";
}
else
{
string type = et.c_type_string();
stringstream ss; stringstream ss;
ss << "((" << type << "*)(outputs[" << output_index << "]))"; ss << "((" << type << "*)(outputs[" << i << "]))";
m_variable_name_map[tv->get_tensor().get_name()] = ss.str(); m_variable_name_map[tv->get_tensor().get_name()] = ss.str();
} }
}
output_index++;
}
for (shared_ptr<Node> node : current_function->get_ordered_ops()) for (shared_ptr<Node> node : current_function->get_ordered_ops())
{ {
...@@ -751,7 +726,6 @@ using namespace ngraph::runtime; ...@@ -751,7 +726,6 @@ using namespace ngraph::runtime;
// Emit operation epilogue // Emit operation epilogue
if (!node->is_parameter() && !node->is_constant()) if (!node->is_parameter() && !node->is_constant())
{ {
handle_output_alias(writer, *node, output_alias_map);
if (m_emit_timing) if (m_emit_timing)
{ {
emit_debug_function_exit(writer, node.get(), in, out); emit_debug_function_exit(writer, node.get(), in, out);
...@@ -888,35 +862,6 @@ using namespace ngraph::runtime; ...@@ -888,35 +862,6 @@ using namespace ngraph::runtime;
} }
} }
void runtime::cpu::CPU_ExternalFunction::handle_output_alias(
codegen::CodeWriter& writer,
const Node& node,
const unordered_map<descriptor::TensorView*, vector<size_t>>& output_alias_map)
{
for (const descriptor::Output& output : node.get_outputs())
{
shared_ptr<descriptor::TensorView> otv = output.get_tensor_view();
auto it = output_alias_map.find(otv.get());
if (it != output_alias_map.end())
{
const vector<size_t>& outputs = it->second;
if (outputs.size() > 1)
{
writer << "{ // handle output alias for previous op\n";
writer.indent++;
for (size_t i = 1; i < outputs.size(); i++)
{
writer << "memcpy(static_cast<void*>(outputs[" << outputs[i]
<< "]), static_cast<void*>(outputs[" << outputs[0] << "]), "
<< otv->get_tensor().size() << ");\n";
}
writer.indent--;
writer << "}\n";
}
}
}
}
shared_ptr<ngraph::runtime::CallFrame> runtime::cpu::CPU_ExternalFunction::make_call_frame() shared_ptr<ngraph::runtime::CallFrame> runtime::cpu::CPU_ExternalFunction::make_call_frame()
{ {
if (!m_is_compiled) if (!m_is_compiled)
......
...@@ -21,13 +21,14 @@ ...@@ -21,13 +21,14 @@
std::shared_ptr<ngraph::Node> std::shared_ptr<ngraph::Node>
ngraph::op::MatmulBias::copy_with_new_args(const NodeVector& new_args) const ngraph::op::MatmulBias::copy_with_new_args(const NodeVector& new_args) const
{ {
if (new_args.size() != 2) if (new_args.size() != 2 && new_args.size() != 3)
{ {
throw ngraph_error("Incorrect number of new arguments"); throw ngraph_error("Incorrect number of new arguments");
} }
return std::make_shared<MatmulBias>(new_args.at(0), return std::make_shared<MatmulBias>(new_args.at(0),
new_args.at(1), new_args.at(1),
new_args.at(1), new_args.size() == 3 ? new_args.at(2) : nullptr,
m_shape_w, m_shape_w,
m_shape_x, m_shape_x,
m_transpose_w, m_transpose_w,
...@@ -41,7 +42,9 @@ ngraph::op::MatmulBias::MatmulBias(std::shared_ptr<ngraph::Node> W, ...@@ -41,7 +42,9 @@ ngraph::op::MatmulBias::MatmulBias(std::shared_ptr<ngraph::Node> W,
Shape shape_x, Shape shape_x,
bool transpose_w, bool transpose_w,
bool transpose_x) bool transpose_x)
: RequiresTensorViewArgs("MatMulBias", {W, x, b}) : RequiresTensorViewArgs("MatMulBias",
b == nullptr ? std::vector<std::shared_ptr<Node>>{W, x}
: std::vector<std::shared_ptr<Node>>{W, x, b})
, m_shape_w(shape_w) , m_shape_w(shape_w)
, m_shape_x(shape_x) , m_shape_x(shape_x)
, m_transpose_w(transpose_w) , m_transpose_w(transpose_w)
...@@ -74,8 +77,12 @@ ngraph::op::MatmulBias::MatmulBias(std::shared_ptr<ngraph::Node> W, ...@@ -74,8 +77,12 @@ ngraph::op::MatmulBias::MatmulBias(std::shared_ptr<ngraph::Node> W,
} }
Shape dot_shape{shape_w.at(1 - dot_dimension_w), shape_x.at(1 - dot_dimension_x)}; Shape dot_shape{shape_w.at(1 - dot_dimension_w), shape_x.at(1 - dot_dimension_x)};
NGRAPH_DEBUG << "dot_shape shape = " << vector_to_string(dot_shape) NGRAPH_DEBUG << "dot_shape shape = " << vector_to_string(dot_shape);
<< " , b shape = " << vector_to_string(b->get_shape());
if (b)
{
NGRAPH_DEBUG << "b shape = " << vector_to_string(b->get_shape());
}
add_output(W->get_element_type(), dot_shape); add_output(W->get_element_type(), dot_shape);
} }
...@@ -27,9 +27,12 @@ ...@@ -27,9 +27,12 @@
#include "ngraph/ops/broadcast.hpp" #include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/broadcast.hpp" #include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/constant.hpp" #include "ngraph/ops/constant.hpp"
#include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/divide.hpp" #include "ngraph/ops/divide.hpp"
#include "ngraph/ops/dot.hpp" #include "ngraph/ops/dot.hpp"
#include "ngraph/ops/get_output_element.hpp"
#include "ngraph/ops/multiply.hpp" #include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/pad.hpp"
#include "ngraph/ops/parameter.hpp" #include "ngraph/ops/parameter.hpp"
#include "ngraph/ops/reshape.hpp" #include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/sqrt.hpp" #include "ngraph/ops/sqrt.hpp"
...@@ -49,6 +52,12 @@ static bool init_cblas_arg(std::shared_ptr<ngraph::Node> reshape, ...@@ -49,6 +52,12 @@ static bool init_cblas_arg(std::shared_ptr<ngraph::Node> reshape,
if (!r_w) if (!r_w)
{ {
if (arg->get_shape().size() != 2)
{
NGRAPH_DEBUG << arg->get_name() << " 's rank != 2 "
<< ngraph::vector_to_string(arg->get_shape());
return false;
}
return true; //nth to do; reshape isn't a reshape return true; //nth to do; reshape isn't a reshape
} }
...@@ -106,7 +115,38 @@ static std::vector<T> apply_permutation(std::vector<T> input, ngraph::AxisVector ...@@ -106,7 +115,38 @@ static std::vector<T> apply_permutation(std::vector<T> input, ngraph::AxisVector
return output; return output;
} }
void ngraph::runtime::cpu::pass::CPUFusion::construct_gemm_pattern() void ngraph::runtime::cpu::pass::CPUFusion::construct_matmulbias_pattern()
{
Shape shape_w{2, 4};
Shape shape_x{4, 1};
Shape shape_b{1};
auto W = std::make_shared<pattern::op::Label>(element::f32, shape_w);
auto x = std::make_shared<pattern::op::Label>(element::f32, shape_x);
auto b = std::make_shared<pattern::op::Label>(element::f32, shape_b);
auto pmmb = std::make_shared<op::MatmulBias>(
W, x, nullptr, W->get_shape(), x->get_shape(), false, false);
auto pbroadcast = std::make_shared<op::Broadcast>(b, pmmb->get_shape(), AxisSet{0});
auto padd = pmmb + pbroadcast;
ngraph::pattern::gr_callback_fn callback = [W, x](pattern::Matcher& m) {
NGRAPH_DEBUG << "In callback for construct_matmulbias_pattern against node = "
<< m.match_root()->get_name();
auto mpattern = m.match_root(); //add
auto m_matmul = mpattern->get_input_op(0);
auto m_broadcast = mpattern->get_input_op(1);
auto pattern_map = m.get_pattern_map();
return m_matmul->copy_with_new_args(
NodeVector{pattern_map[W], pattern_map[x], m_broadcast});
};
auto m = std::make_shared<ngraph::pattern::Matcher>(padd, callback);
this->add_matcher(m);
}
void ngraph::runtime::cpu::pass::CPUFusion::construct_matmul_pattern()
{ {
Shape shape_w{2, 4}; Shape shape_w{2, 4};
Shape shape_x{4, 1}; Shape shape_x{4, 1};
...@@ -124,30 +164,34 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_gemm_pattern() ...@@ -124,30 +164,34 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_gemm_pattern()
auto skip_x = std::make_shared<pattern::op::Any>(x, reshape_pred); auto skip_x = std::make_shared<pattern::op::Any>(x, reshape_pred);
auto pdot = std::make_shared<op::Dot>(skip_w, skip_x); auto pdot = std::make_shared<op::Dot>(skip_w, skip_x);
auto b = std::make_shared<pattern::op::Label>(element::f32, shape_b);
auto pbroadcast = std::make_shared<op::Broadcast>(b, shape_dot, AxisSet{0});
auto padd = pdot + pbroadcast;
ngraph::pattern::gr_callback_fn callback = [W, x, b](pattern::Matcher& m) { ngraph::pattern::gr_callback_fn callback = [W, x](pattern::Matcher& m) {
NGRAPH_DEBUG << "In callback for construct_gemm_pattern against node = " NGRAPH_DEBUG << "In callback for construct_matmul_pattern against node = "
<< m.match_root()->get_name(); << m.match_root()->get_name();
auto pattern_map = m.get_pattern_map(); auto pattern_map = m.get_pattern_map();
std::shared_ptr<Node> nn = nullptr; std::shared_ptr<Node> nn;
auto mpattern = m.match_root(); auto mpattern = m.match_root();
auto dot = m.match_root();
if (mpattern->get_element_type() != element::f32) if (mpattern->get_element_type() != element::f32)
{ {
NGRAPH_DEBUG << "mpattern = " << mpattern->get_name() << " type is not float!"; NGRAPH_DEBUG << "mpattern = " << mpattern->get_name() << " type is not float!";
return nn; return nn;
} }
auto dot = mpattern->get_input_op(0);
if (dot->get_shape().size() != 2) if (dot->get_shape().size() != 2)
{ {
NGRAPH_DEBUG << "dot = " << dot->get_name() << " shape is not equal to 2!"; NGRAPH_DEBUG << "dot = " << dot->get_name() << " shape is not equal to 2!";
return nn; return nn;
} }
if (shape_size(dot->get_shape()) == 0)
{
NGRAPH_DEBUG << "dot has a zero dimension";
return nn;
}
bool transpose_w = false; bool transpose_w = false;
Shape shape_arg0{pattern_map[W]->get_shape()}; Shape shape_arg0{pattern_map[W]->get_shape()};
if (!init_cblas_arg(dot->get_input_op(0), pattern_map[W], transpose_w, shape_arg0)) if (!init_cblas_arg(dot->get_input_op(0), pattern_map[W], transpose_w, shape_arg0))
...@@ -164,7 +208,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_gemm_pattern() ...@@ -164,7 +208,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_gemm_pattern()
auto cg = std::shared_ptr<Node>(new op::MatmulBias(pattern_map[W], auto cg = std::shared_ptr<Node>(new op::MatmulBias(pattern_map[W],
pattern_map[x], pattern_map[x],
mpattern->get_input_op(1), nullptr,
shape_arg0, shape_arg0,
shape_arg1, shape_arg1,
transpose_w, transpose_w,
...@@ -172,7 +216,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_gemm_pattern() ...@@ -172,7 +216,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_gemm_pattern()
return cg; return cg;
}; };
auto m = std::make_shared<ngraph::pattern::Matcher>(padd, callback); auto m = std::make_shared<ngraph::pattern::Matcher>(pdot, callback);
this->add_matcher(m); this->add_matcher(m);
} }
...@@ -258,16 +302,213 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_fprop_bn() ...@@ -258,16 +302,213 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_fprop_bn()
// get epsilon value // get epsilon value
auto eps_ptr = std::dynamic_pointer_cast<op::Constant>(pattern_map[eps_label]); auto eps_ptr = std::dynamic_pointer_cast<op::Constant>(pattern_map[eps_label]);
double epsilon = *(reinterpret_cast<const double*>(eps_ptr->get_data_ptr())); double epsilon = *(reinterpret_cast<const double*>(eps_ptr->get_data_ptr()));
auto bn_node = std::shared_ptr<Node>(new op::BatchNorm(epsilon, auto bn_node = std::make_shared<op::BatchNorm>(
pattern_map[gamma_label], epsilon, pattern_map[gamma_label], pattern_map[beta_label], pattern_map[input]);
pattern_map[beta_label],
pattern_map[input], auto normalized_output = std::shared_ptr<Node>(new op::GetOutputElement(bn_node, 0));
pattern_map[mean_label],
pattern_map[variance_label])); return normalized_output;
return bn_node;
}; };
auto m = std::make_shared<ngraph::pattern::Matcher>(add_beta, callback); auto m = std::make_shared<ngraph::pattern::Matcher>(add_beta, callback);
this->add_matcher(m); this->add_matcher(m);
} }
static bool
zero_padded_conv_consistency_check(const std::shared_ptr<ngraph::Node>& match_root,
const std::shared_ptr<ngraph::op::Constant>& pad_value_op,
const std::shared_ptr<ngraph::Node>& pad_input,
const std::shared_ptr<ngraph::op::Pad>& matched_pad,
const std::shared_ptr<ngraph::op::Convolution>& matched_conv,
size_t batch_index,
size_t channel_index)
{
// Only match float32 convolutions
if (match_root->get_element_type() != ngraph::element::f32)
{
return false;
}
// Only match zero padding
if (pad_value_op->get_vector<float>().at(0) != 0.0f)
{
return false;
}
// Only match 4D tensors
if (pad_input->get_shape().size() != 4)
{
return false;
}
// Only match no interior padding
if (matched_pad->get_padding_interior() != ngraph::Shape(pad_input->get_shape().size()))
{
return false;
}
// Only match convolutions with no padding specification
if (matched_conv->get_padding_below() != ngraph::CoordinateDiff(2) ||
matched_conv->get_padding_above() != ngraph::CoordinateDiff(2))
{
return false;
}
// Only match no padding in the batch dimension
if (matched_pad->get_padding_above().at(batch_index) != 0 ||
matched_pad->get_padding_below().at(batch_index) != 0)
{
return false;
}
// Only match no padding in the channel dimension
if (matched_pad->get_padding_above().at(channel_index) != 0 ||
matched_pad->get_padding_below().at(channel_index) != 0)
{
return false;
}
return true;
}
void ngraph::runtime::cpu::pass::CPUFusion::construct_zero_padded_reshaped_conv()
{
auto pad_input = std::make_shared<pattern::op::Label>(element::f32, Shape{});
auto pad_value = std::make_shared<pattern::op::Label>(element::f32, Shape{});
auto pad = std::make_shared<op::Pad>(pad_input, pad_value, Shape{}, Shape{}, Shape{});
auto pad_label = std::make_shared<pattern::op::Label>(pad, nullptr, NodeVector{pad});
auto reshape = std::make_shared<op::Reshape>(pad_label, AxisVector{}, Shape{1, 1, 1, 1});
auto reshape_label =
std::make_shared<pattern::op::Label>(reshape, nullptr, NodeVector{reshape});
auto conv_filter = std::make_shared<pattern::op::Label>(element::f32, Shape{1, 1, 1, 1});
auto conv = std::make_shared<op::Convolution>(reshape_label,
conv_filter,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{1, 1},
CoordinateDiff{1, 1},
Strides{1, 1});
auto conv_label = std::make_shared<pattern::op::Label>(conv, nullptr, NodeVector{conv});
ngraph::pattern::gr_callback_fn callback =
[pad_input, pad_value, pad_label, reshape_label, conv_filter, conv_label](
pattern::Matcher& m) -> std::shared_ptr<Node> {
auto pattern_map = m.get_pattern_map();
auto pad_value_op = std::dynamic_pointer_cast<op::Constant>(pattern_map[pad_value]);
const auto& matched_conv =
std::dynamic_pointer_cast<op::Convolution>(pattern_map[conv_label]);
const auto& matched_pad = std::dynamic_pointer_cast<op::Pad>(pattern_map[pad_label]);
const auto& matched_reshape =
std::dynamic_pointer_cast<op::Reshape>(pattern_map[reshape_label]);
const auto& input_order = matched_reshape->get_input_order();
auto hoisted_reshape_output_shape =
apply_permutation<Shape::value_type>(pattern_map[pad_input]->get_shape(), input_order);
auto hoisted_reshape = std::make_shared<op::Reshape>(
pattern_map[pad_input],
input_order,
Shape(hoisted_reshape_output_shape.begin(), hoisted_reshape_output_shape.end()));
if (!zero_padded_conv_consistency_check(m.match_root(),
pad_value_op,
pattern_map[pad_input],
matched_pad,
matched_conv,
input_order[0],
input_order[1]))
{
return nullptr;
}
CoordinateDiff padding_below{static_cast<CoordinateDiff::value_type>(
matched_pad->get_padding_below().at(input_order[2])),
static_cast<CoordinateDiff::value_type>(
matched_pad->get_padding_below().at(input_order[3]))};
CoordinateDiff padding_above{static_cast<CoordinateDiff::value_type>(
matched_pad->get_padding_above().at(input_order[2])),
static_cast<CoordinateDiff::value_type>(
matched_pad->get_padding_above().at(input_order[3]))};
auto zero_padded_conv =
std::make_shared<op::Convolution>(hoisted_reshape,
pattern_map[conv_filter],
matched_conv->get_window_movement_strides(),
matched_conv->get_window_dilation_strides(),
padding_below,
padding_above,
matched_conv->get_data_dilation_strides());
return zero_padded_conv;
};
this->add_matcher(std::make_shared<ngraph::pattern::Matcher>(conv_label, callback));
}
void ngraph::runtime::cpu::pass::CPUFusion::construct_zero_padded_conv()
{
auto pad_input = std::make_shared<pattern::op::Label>(element::f32, Shape{1, 1, 1, 1});
auto pad_value = std::make_shared<pattern::op::Label>(element::f32, Shape{});
auto pad = std::make_shared<op::Pad>(
pad_input, pad_value, Shape{0, 0, 0, 0}, Shape{0, 0, 0, 0}, Shape{0, 0, 0, 0});
auto pad_label = std::make_shared<pattern::op::Label>(pad, nullptr, NodeVector{pad});
auto conv_filter = std::make_shared<pattern::op::Label>(element::f32, Shape{1, 1, 1, 1});
auto conv = std::make_shared<op::Convolution>(pad_label,
conv_filter,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{1, 1},
CoordinateDiff{1, 1},
Strides{1, 1});
auto conv_label = std::make_shared<pattern::op::Label>(conv, nullptr, NodeVector{conv});
ngraph::pattern::gr_callback_fn callback =
[pad_input, pad_value, pad_label, conv_filter, conv_label](
pattern::Matcher& m) -> std::shared_ptr<Node> {
auto pattern_map = m.get_pattern_map();
auto pad_value_op = std::dynamic_pointer_cast<op::Constant>(pattern_map[pad_value]);
const auto& matched_conv =
std::dynamic_pointer_cast<op::Convolution>(pattern_map[conv_label]);
const auto& matched_pad = std::dynamic_pointer_cast<op::Pad>(pattern_map[pad_label]);
if (!zero_padded_conv_consistency_check(m.match_root(),
pad_value_op,
pattern_map[pad_input],
matched_pad,
matched_conv,
0,
1))
{
return nullptr;
}
CoordinateDiff padding_below{
static_cast<CoordinateDiff::value_type>(matched_pad->get_padding_below().at(2)),
static_cast<CoordinateDiff::value_type>(matched_pad->get_padding_below().at(3))};
CoordinateDiff padding_above{
static_cast<CoordinateDiff::value_type>(matched_pad->get_padding_above().at(2)),
static_cast<CoordinateDiff::value_type>(matched_pad->get_padding_above().at(3))};
auto zero_padded_conv =
std::make_shared<op::Convolution>(pattern_map[pad_input],
pattern_map[conv_filter],
matched_conv->get_window_movement_strides(),
matched_conv->get_window_dilation_strides(),
padding_below,
padding_above,
matched_conv->get_data_dilation_strides());
return zero_padded_conv;
};
this->add_matcher(std::make_shared<ngraph::pattern::Matcher>(conv_label, callback));
}
...@@ -38,11 +38,17 @@ public: ...@@ -38,11 +38,17 @@ public:
CPUFusion() CPUFusion()
: GraphRewrite() : GraphRewrite()
{ {
construct_gemm_pattern(); construct_matmul_pattern();
construct_matmulbias_pattern();
construct_fprop_bn(); construct_fprop_bn();
construct_zero_padded_reshaped_conv();
construct_zero_padded_conv();
} }
private: private:
void construct_gemm_pattern(); void construct_matmul_pattern();
void construct_matmulbias_pattern();
void construct_fprop_bn(); void construct_fprop_bn();
void construct_zero_padded_reshaped_conv();
void construct_zero_padded_conv();
}; };
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include "ngraph/ops/convolution.hpp" #include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/op.hpp" #include "ngraph/ops/op.hpp"
#include "ngraph/ops/relu.hpp" #include "ngraph/ops/relu.hpp"
#include "ngraph/ops/result.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp" #include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/cpu_op_annotations.hpp" #include "ngraph/runtime/cpu/cpu_op_annotations.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp" #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
...@@ -629,6 +630,16 @@ namespace ngraph ...@@ -629,6 +630,16 @@ namespace ngraph
} }
} }
template <>
void CPULayout::LAYOUT_DECL(ngraph::op::Result)
{
auto input_layout =
runtime::cpu::mkldnn_utils::get_input_mkldnn_format(node.get(), 0);
vector<memory::format> prim_output_formats;
prim_output_formats.push_back(input_layout);
set_output_layouts(node, prim_output_formats);
}
template <> template <>
void CPULayout::LAYOUT_DECL(ngraph::op::Relu) void CPULayout::LAYOUT_DECL(ngraph::op::Relu)
{ {
...@@ -699,6 +710,7 @@ namespace ngraph ...@@ -699,6 +710,7 @@ namespace ngraph
#define TI(x) type_index(typeid(x)) #define TI(x) type_index(typeid(x))
static const runtime::cpu::pass::LayoutOpMap s_dispatcher{ static const runtime::cpu::pass::LayoutOpMap s_dispatcher{
{TI(ngraph::op::Add), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Add>},
{TI(ngraph::op::Convolution), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Convolution>}, {TI(ngraph::op::Convolution), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Convolution>},
{TI(ngraph::op::ConvolutionBackpropData), {TI(ngraph::op::ConvolutionBackpropData),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::ConvolutionBackpropData>}, &runtime::cpu::pass::CPULayout::layout<ngraph::op::ConvolutionBackpropData>},
...@@ -708,6 +720,7 @@ static const runtime::cpu::pass::LayoutOpMap s_dispatcher{ ...@@ -708,6 +720,7 @@ static const runtime::cpu::pass::LayoutOpMap s_dispatcher{
{TI(ngraph::op::AvgPoolBackprop), {TI(ngraph::op::AvgPoolBackprop),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::AvgPoolBackprop>}, &runtime::cpu::pass::CPULayout::layout<ngraph::op::AvgPoolBackprop>},
{TI(ngraph::op::Relu), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Relu>}, {TI(ngraph::op::Relu), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Relu>},
{TI(ngraph::op::Result), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Result>},
{TI(ngraph::op::ReluBackprop), {TI(ngraph::op::ReluBackprop),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::ReluBackprop>}, &runtime::cpu::pass::CPULayout::layout<ngraph::op::ReluBackprop>},
}; };
......
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <memory>
#include <typeindex>
#include <typeinfo>
#include <unordered_map>
#include "cpu_nop_elimination.hpp"
#include "ngraph/ops/pad.hpp"
#define TI(x) std::type_index(typeid(x))
#define HANDLER_DECL(x) \
static bool x(const std::shared_ptr<ngraph::Function>& function, \
const std::shared_ptr<ngraph::Node>& node)
HANDLER_DECL(eliminate_pad)
{
auto pad = std::dynamic_pointer_cast<ngraph::op::Pad>(node);
if (pad->get_input_shape(0) == pad->get_output_shape(0))
{
function->replace_node(node, node->get_input_op(0));
return true;
}
return false;
}
static const std::unordered_map<std::type_index,
std::function<bool(const std::shared_ptr<ngraph::Function>&,
const std::shared_ptr<ngraph::Node>&)>>
dispatcher{{TI(ngraph::op::Pad), &eliminate_pad}};
bool ngraph::runtime::cpu::pass::CPUNopElimination::run_on_function(
std::shared_ptr<ngraph::Function> function)
{
bool clobbered = false;
for (const auto& n : function->get_ops())
{
// Work around a warning [-Wpotentially-evaluated-expression]
const Node& node = *n;
auto handler = dispatcher.find(TI(node));
if (handler != dispatcher.end())
{
clobbered = handler->second(function, n) || clobbered;
}
}
return clobbered;
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/pass/pass.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace pass
{
class CPUNopElimination : public ngraph::pass::FunctionPass
{
public:
bool run_on_function(std::shared_ptr<ngraph::Function> function) override;
};
}
}
}
}
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <cstdlib> #include <cstdlib>
#include <iomanip> #include <iomanip>
#include "ngraph/ops/result.hpp"
#include "ngraph/runtime/host_tensor_view.hpp" #include "ngraph/runtime/host_tensor_view.hpp"
#include "ngraph/runtime/interpreter/int_call_frame.hpp" #include "ngraph/runtime/interpreter/int_call_frame.hpp"
...@@ -52,32 +53,17 @@ void runtime::interpreter::INT_CallFrame::call( ...@@ -52,32 +53,17 @@ void runtime::interpreter::INT_CallFrame::call(
tensor_map.insert({tv, input_tvs[arg_index++]}); tensor_map.insert({tv, input_tvs[arg_index++]});
} }
} }
std::vector<size_t> aliased_outputs;
for (size_t i = 0; i < output_tvs.size(); i++) for (size_t i = 0; i < function->get_output_size(); i++)
{
shared_ptr<Node> op = function->get_output_op(i);
descriptor::TensorView* tv = op->get_output_tensor_view(0).get();
string name = tv->get_tensor().get_name();
if (contains_key(tensor_map, tv))
{ {
if (op->description() == "Parameter") auto output_op = function->get_output_op(i);
if (!std::dynamic_pointer_cast<op::Result>(output_op))
{ {
// Here we handle the special case where an output is just a copy of an input throw ngraph_error("One of function's outputs isn't op::Result");
memcpy(output_tvs[i]->get_data_ptr(),
tensor_map.at(tv)->get_data_ptr(),
tv->get_tensor().size());
} }
else descriptor::TensorView* tv = function->get_output_op(i)->get_output_tensor_view(0).get();
{
// This is a computed value returned more than once and will need to be copied at the end
aliased_outputs.push_back(i);
}
}
else
{
tensor_map.insert({tv, output_tvs[i]}); tensor_map.insert({tv, output_tvs[i]});
} }
}
// Invoke computation // Invoke computation
for (shared_ptr<Node> op : function->get_ordered_ops()) for (shared_ptr<Node> op : function->get_ordered_ops())
...@@ -163,29 +149,6 @@ void runtime::interpreter::INT_CallFrame::call( ...@@ -163,29 +149,6 @@ void runtime::interpreter::INT_CallFrame::call(
} }
} }
} }
for (size_t i : aliased_outputs)
{
shared_ptr<Node> op = function->get_output_op(i);
size_t first_output;
for (first_output = 0; first_output <= i; ++first_output)
{
if (function->get_output_op(first_output) == op)
{
break;
}
}
if (first_output == i)
{
throw ngraph_error("Internal error: duplicate output missing");
}
descriptor::TensorView* tv = op->get_output_tensor_view(0).get();
string name = tv->get_tensor().get_name();
// Here we handle the special case where an output is just a copy of an input
memcpy(output_tvs[i]->get_data_ptr(),
output_tvs[first_output]->get_data_ptr(),
tv->get_tensor().size());
}
} }
void runtime::interpreter::INT_CallFrame::generate_calls( void runtime::interpreter::INT_CallFrame::generate_calls(
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include "ngraph/ops/reduce_window.hpp" #include "ngraph/ops/reduce_window.hpp"
#include "ngraph/ops/replace_slice.hpp" #include "ngraph/ops/replace_slice.hpp"
#include "ngraph/ops/reshape.hpp" #include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/result.hpp"
#include "ngraph/ops/reverse.hpp" #include "ngraph/ops/reverse.hpp"
#include "ngraph/ops/select_and_scatter.hpp" #include "ngraph/ops/select_and_scatter.hpp"
#include "ngraph/ops/slice.hpp" #include "ngraph/ops/slice.hpp"
...@@ -89,6 +90,7 @@ ...@@ -89,6 +90,7 @@
#include "ngraph/runtime/kernel/relu.hpp" #include "ngraph/runtime/kernel/relu.hpp"
#include "ngraph/runtime/kernel/replace_slice.hpp" #include "ngraph/runtime/kernel/replace_slice.hpp"
#include "ngraph/runtime/kernel/reshape.hpp" #include "ngraph/runtime/kernel/reshape.hpp"
#include "ngraph/runtime/kernel/result.hpp"
#include "ngraph/runtime/kernel/reverse.hpp" #include "ngraph/runtime/kernel/reverse.hpp"
#include "ngraph/runtime/kernel/select.hpp" #include "ngraph/runtime/kernel/select.hpp"
#include "ngraph/runtime/kernel/select_and_scatter.hpp" #include "ngraph/runtime/kernel/select_and_scatter.hpp"
...@@ -720,6 +722,13 @@ private: ...@@ -720,6 +722,13 @@ private:
reshape->get_input_order(), reshape->get_input_order(),
out[0]->get_shape()); out[0]->get_shape());
} }
else if (node_op == "Result")
{
ngraph::op::Result* res = dynamic_cast<ngraph::op::Result*>(&node);
kernel::result(reinterpret_cast<T*>(args[0]->get_data_ptr()),
reinterpret_cast<T*>(out[0]->get_data_ptr()),
shape_size(res->get_shape()));
}
else if (node_op == "Reverse") else if (node_op == "Reverse")
{ {
ngraph::op::Reverse* reverse = dynamic_cast<ngraph::op::Reverse*>(&node); ngraph::op::Reverse* reverse = dynamic_cast<ngraph::op::Reverse*>(&node);
......
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#pragma once
#include <algorithm>
#include <cmath>
#include <numeric>
#include <vector>
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace runtime
{
namespace kernel
{
template <typename T>
void result(T* arg, T* out, size_t count)
{
memcpy(out, arg, sizeof(T) * count);
}
}
}
}
...@@ -64,6 +64,7 @@ ...@@ -64,6 +64,7 @@
#include "ngraph/ops/remainder.hpp" #include "ngraph/ops/remainder.hpp"
#include "ngraph/ops/replace_slice.hpp" #include "ngraph/ops/replace_slice.hpp"
#include "ngraph/ops/reshape.hpp" #include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/result.hpp"
#include "ngraph/ops/reverse.hpp" #include "ngraph/ops/reverse.hpp"
#include "ngraph/ops/select.hpp" #include "ngraph/ops/select.hpp"
#include "ngraph/ops/select_and_scatter.hpp" #include "ngraph/ops/select_and_scatter.hpp"
...@@ -327,7 +328,7 @@ static shared_ptr<ngraph::Function> ...@@ -327,7 +328,7 @@ static shared_ptr<ngraph::Function>
else if (node_op == "BatchNorm") else if (node_op == "BatchNorm")
{ {
auto epsilon = node_js.at("eps").get<double>(); auto epsilon = node_js.at("eps").get<double>();
node = make_shared<op::BatchNorm>(epsilon, args[0], args[1], args[2], args[3], args[4]); node = make_shared<op::BatchNorm>(epsilon, args[0], args[1], args[2]);
} }
else if (node_op == "BatchNormBackprop") else if (node_op == "BatchNormBackprop")
{ {
...@@ -667,6 +668,10 @@ static shared_ptr<ngraph::Function> ...@@ -667,6 +668,10 @@ static shared_ptr<ngraph::Function>
auto output_shape = node_js.at("output_shape").get<vector<size_t>>(); auto output_shape = node_js.at("output_shape").get<vector<size_t>>();
node = make_shared<op::Reshape>(args[0], input_order, output_shape); node = make_shared<op::Reshape>(args[0], input_order, output_shape);
} }
else if (node_op == "Result")
{
node = make_shared<op::Result>(args[0]);
}
else if (node_op == "Reverse") else if (node_op == "Reverse")
{ {
auto reversed_axes = node_js.at("reversed_axes").get<set<size_t>>(); auto reversed_axes = node_js.at("reversed_axes").get<set<size_t>>();
...@@ -1061,6 +1066,9 @@ static json write(const Node& n) ...@@ -1061,6 +1066,9 @@ static json write(const Node& n)
node["input_order"] = tmp->get_input_order(); node["input_order"] = tmp->get_input_order();
node["output_shape"] = tmp->get_output_shape(); node["output_shape"] = tmp->get_output_shape();
} }
else if (node_op == "Result")
{
}
else if (node_op == "Reverse") else if (node_op == "Reverse")
{ {
auto tmp = dynamic_cast<const op::Reverse*>(&n); auto tmp = dynamic_cast<const op::Reverse*>(&n);
......
...@@ -25,9 +25,12 @@ ...@@ -25,9 +25,12 @@
#include "ngraph/graph_util.hpp" #include "ngraph/graph_util.hpp"
#include "ngraph/log.hpp" #include "ngraph/log.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/ops/result_vector.hpp"
#include "ngraph/runtime/backend.hpp" #include "ngraph/runtime/backend.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
#include <iostream>
using namespace std; using namespace std;
std::string ngraph::to_cplusplus_sourcecode_literal(bool val) std::string ngraph::to_cplusplus_sourcecode_literal(bool val)
...@@ -239,10 +242,21 @@ ngraph::FpropCache ngraph::cache_fprop(std::shared_ptr<ngraph::Function> fprop, ...@@ -239,10 +242,21 @@ ngraph::FpropCache ngraph::cache_fprop(std::shared_ptr<ngraph::Function> fprop,
} }
// create the new outputs for fprop and the new fprop function // create the new outputs for fprop and the new fprop function
NodeVector fprop_outputs{fprop->get_results()}; ResultVector fprop_outputs;
fprop_outputs.insert(fprop_outputs.end(),
fprop_cache.fprop_output_nodes.begin(), for (auto fpr : fprop->get_results())
fprop_cache.fprop_output_nodes.end()); {
fprop_outputs.push_back(fpr);
}
for (auto fpir : fprop_cache.fprop_output_nodes)
{
if (std::dynamic_pointer_cast<op::Result>(fpir))
{
throw ngraph_error("Expected op::Result in fprop->get_results()");
}
fprop_outputs.push_back(std::make_shared<op::Result>(fpir));
}
fprop_cache.fprop = std::make_shared<Function>(fprop_outputs, fprop->get_parameters()); fprop_cache.fprop = std::make_shared<Function>(fprop_outputs, fprop->get_parameters());
...@@ -251,10 +265,15 @@ ngraph::FpropCache ngraph::cache_fprop(std::shared_ptr<ngraph::Function> fprop, ...@@ -251,10 +265,15 @@ ngraph::FpropCache ngraph::cache_fprop(std::shared_ptr<ngraph::Function> fprop,
ngraph::clone_nodes(bprop->get_ops(), node_param_map); ngraph::clone_nodes(bprop->get_ops(), node_param_map);
// get cloned bprop results // get cloned bprop results
NodeVector cloned_results; ResultVector cloned_results;
for (auto node : bprop->get_results()) for (auto node : bprop->get_results())
{ {
cloned_results.push_back(node_param_map.get(node)); auto result = std::dynamic_pointer_cast<op::Result>(node_param_map.get(node));
if (!result)
{
throw ngraph_error("Expected op::Result values for op::Result keys in node_param_map");
}
cloned_results.push_back(result);
} }
// get clone bprop parameters // get clone bprop parameters
......
...@@ -40,7 +40,7 @@ TEST(build_graph, build_simple) ...@@ -40,7 +40,7 @@ TEST(build_graph, build_simple)
auto cluster_0 = make_shared<Function>(dot, op::ParameterVector{arg0, arg1, arg2, arg3}); auto cluster_0 = make_shared<Function>(dot, op::ParameterVector{arg0, arg1, arg2, arg3});
ASSERT_EQ(cluster_0->get_output_op(0), dot); ASSERT_EQ(cluster_0->get_output_op(0)->get_input_op(0), dot);
} }
// Check node comparisons // Check node comparisons
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "ngraph/log.hpp" #include "ngraph/log.hpp"
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
#include "ngraph/ops/batch_norm.hpp" #include "ngraph/ops/batch_norm.hpp"
#include "ngraph/ops/get_output_element.hpp"
#include "ngraph/ops/sum.hpp" #include "ngraph/ops/sum.hpp"
#include "ngraph/pass/graph_rewrite.hpp" #include "ngraph/pass/graph_rewrite.hpp"
#include "ngraph/pass/manager.hpp" #include "ngraph/pass/manager.hpp"
...@@ -133,6 +134,42 @@ TEST(cpu_fusion, gemm_cpu) ...@@ -133,6 +134,42 @@ TEST(cpu_fusion, gemm_cpu)
ASSERT_TRUE(read_vector<float>(result) == expected); ASSERT_TRUE(read_vector<float>(result) == expected);
} }
TEST(cpu_fusion, gemm_cpu_no_bias)
{
auto shapeA = Shape{3, 2};
auto shapeB = Shape{2, 3};
auto shapeC = Shape{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shapeA);
auto B = make_shared<op::Parameter>(element::f32, shapeB);
auto reshape_w = make_shared<op::Reshape>(A, AxisVector{1, 0}, Shape{2, 3});
auto reshape_x = make_shared<op::Reshape>(B, AxisVector{1, 0}, Shape{3, 2});
auto cg =
make_shared<op::MatmulBias>(A, B, nullptr, A->get_shape(), B->get_shape(), true, true);
auto f = make_shared<Function>(cg, op::ParameterVector{A, B});
auto manager = runtime::Manager::get("CPU");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
shared_ptr<runtime::TensorView> a = backend->make_primary_tensor_view(element::f32, shapeA);
shared_ptr<runtime::TensorView> b = backend->make_primary_tensor_view(element::f32, shapeB);
shared_ptr<runtime::TensorView> result =
backend->make_primary_tensor_view(element::f32, shapeC);
vector<float> dataA{1.0f, 4.0f, 1.0f, 4.0f, 1.0f, 4.0f};
vector<float> dataB{3.0f, 3.0f, 3.0f, 9.0f, 9.0f, 9.0f};
copy_data(a, dataA);
copy_data(b, dataB);
cf->call({a, b}, {result});
vector<float> expected{9, 27, 36, 108};
ASSERT_TRUE(read_vector<float>(result) == expected);
}
TEST(cpu_fusion, cpu_fusion_pass_basic) TEST(cpu_fusion, cpu_fusion_pass_basic)
{ {
Shape shape{}; Shape shape{};
...@@ -154,6 +191,50 @@ TEST(cpu_fusion, cpu_fusion_pass_basic) ...@@ -154,6 +191,50 @@ TEST(cpu_fusion, cpu_fusion_pass_basic)
ASSERT_NE(std::dynamic_pointer_cast<op::MatmulBias>(graph->get_input_op(0)), nullptr); ASSERT_NE(std::dynamic_pointer_cast<op::MatmulBias>(graph->get_input_op(0)), nullptr);
} }
TEST(cpu_fusion, cpu_fusion_pass_matmul_bias)
{
Shape shape_w{2, 4};
Shape shape_x{4, 1};
Shape shape_b{1};
auto W = make_shared<op::Parameter>(element::f32, shape_w);
auto x = make_shared<op::Parameter>(element::f32, shape_x);
auto b = make_shared<op::Parameter>(element::f32, shape_b);
auto mmb = std::make_shared<op::MatmulBias>(
W, x, nullptr, W->get_shape(), x->get_shape(), false, false);
auto broadcast = std::make_shared<op::Broadcast>(b, mmb->get_shape(), AxisSet{0});
auto add = mmb + broadcast;
auto graph = make_shared<op::Abs>(add);
pass::Manager pass_manager;
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
auto func = make_shared<Function>(graph, op::ParameterVector{W, x, b});
pass_manager.run_passes(func);
auto gmm = graph->get_input_op(0);
ASSERT_TRUE(std::dynamic_pointer_cast<op::MatmulBias>(gmm));
ASSERT_EQ(gmm->get_input_op(2), broadcast);
}
TEST(cpu_fusion, cpu_fusion_pass_matmul_no_bias)
{
Shape shape_w{4, 2};
Shape shape_x{1, 4};
auto W = make_shared<op::Parameter>(element::f32, shape_w);
auto x = make_shared<op::Parameter>(element::f32, shape_x);
auto reshape_w = std::make_shared<op::Reshape>(W, AxisVector{1, 0}, Shape{2, 4});
auto reshape_x = std::make_shared<op::Reshape>(x, AxisVector{1, 0}, Shape{4, 1});
auto re_dot = make_shared<op::Dot>(reshape_w, reshape_x);
auto graph = make_shared<op::Abs>(re_dot);
pass::Manager pass_manager;
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
auto func = make_shared<Function>(graph, op::ParameterVector{W, x});
pass_manager.run_passes(func);
size_t mmb = count_ops_of_type<op::MatmulBias>(func);
ASSERT_EQ(mmb, 1);
}
TEST(cpu_fusion, gemm_mlp) TEST(cpu_fusion, gemm_mlp)
{ {
const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/mnist_mlp_forward.json"); const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/mnist_mlp_forward.json");
...@@ -163,8 +244,8 @@ TEST(cpu_fusion, gemm_mlp) ...@@ -163,8 +244,8 @@ TEST(cpu_fusion, gemm_mlp)
pass::Manager pass_manager; pass::Manager pass_manager;
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>(); pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
pass_manager.run_passes(func); pass_manager.run_passes(func);
size_t ccg = count_ops_of_type<op::MatmulBias>(func); size_t mmb = count_ops_of_type<op::MatmulBias>(func);
ASSERT_EQ(ccg, 3); ASSERT_EQ(mmb, 3);
} }
//TODO: Move this test to backend_test.in.cpp once we have the INTERPRETER //TODO: Move this test to backend_test.in.cpp once we have the INTERPRETER
...@@ -174,18 +255,21 @@ TEST(cpu_fusion, batchnorm_fprop_b1c2h2w2) ...@@ -174,18 +255,21 @@ TEST(cpu_fusion, batchnorm_fprop_b1c2h2w2)
auto input_shape = Shape{1, 2, 2, 2}; auto input_shape = Shape{1, 2, 2, 2};
auto input = make_shared<op::Parameter>(element::f32, input_shape); auto input = make_shared<op::Parameter>(element::f32, input_shape);
auto mean_shape = Shape{2}; auto mean_shape = Shape{2};
auto mean = make_shared<op::Parameter>(element::f32, mean_shape);
auto var_shape = Shape{2}; auto var_shape = Shape{2};
auto var = make_shared<op::Parameter>(element::f32, var_shape);
auto gamma_shape = Shape{2}; auto gamma_shape = Shape{2};
auto gamma = make_shared<op::Parameter>(element::f32, gamma_shape); auto gamma = make_shared<op::Parameter>(element::f32, gamma_shape);
auto beta_shape = Shape{2}; auto beta_shape = Shape{2};
auto beta = make_shared<op::Parameter>(element::f32, beta_shape); auto beta = make_shared<op::Parameter>(element::f32, beta_shape);
double eps = 0.001; double eps = 0.001;
auto shape_r = Shape{1, 2, 2, 2}; auto shape_r = Shape{1, 2, 2, 2};
auto bn = make_shared<op::BatchNorm>(eps, gamma, beta, input, mean, var); auto bn = make_shared<op::BatchNorm>(eps, gamma, beta, input);
auto f = make_shared<Function>(bn, op::ParameterVector{mean, var, input, gamma, beta}); auto output_rt = std::make_shared<op::GetOutputElement>(bn, 0);
auto mean_rt = std::make_shared<op::GetOutputElement>(bn, 1);
auto variance_rt = std::make_shared<op::GetOutputElement>(bn, 2);
auto f = make_shared<Function>(NodeVector{output_rt, mean_rt, variance_rt},
op::ParameterVector{input, gamma, beta});
auto manager = runtime::Manager::get("CPU"); auto manager = runtime::Manager::get("CPU");
auto external = manager->compile(f); auto external = manager->compile(f);
auto backend = manager->allocate_backend(); auto backend = manager->allocate_backend();
...@@ -203,15 +287,13 @@ TEST(cpu_fusion, batchnorm_fprop_b1c2h2w2) ...@@ -203,15 +287,13 @@ TEST(cpu_fusion, batchnorm_fprop_b1c2h2w2)
0.64589411f, 0.64589411f,
0.4375872f, 0.4375872f,
0.89177299f}); 0.89177299f});
auto _mean = backend->make_primary_tensor_view(element::f32, mean_shape);
copy_data(_mean, vector<float>{0.60291237f, 0.59972727f});
auto _var = backend->make_primary_tensor_view(element::f32, var_shape);
copy_data(_var, vector<float>{0.00472505f, 0.03617825f});
auto _gamma = backend->make_primary_tensor_view(element::f32, gamma_shape); auto _gamma = backend->make_primary_tensor_view(element::f32, gamma_shape);
copy_data(_gamma, vector<float>{1.0f, 1.0f}); copy_data(_gamma, vector<float>{1.0f, 1.0f});
auto _beta = backend->make_primary_tensor_view(element::f32, beta_shape); auto _beta = backend->make_primary_tensor_view(element::f32, beta_shape);
copy_data(_beta, vector<float>{0.0f, 0.0f}); copy_data(_beta, vector<float>{0.0f, 0.0f});
auto result = backend->make_primary_tensor_view(element::f32, shape_r); auto bn_output = backend->make_primary_tensor_view(element::f32, shape_r);
auto result_mean = backend->make_primary_tensor_view(element::f32, mean_shape);
auto result_variance = backend->make_primary_tensor_view(element::f32, var_shape);
vector<float> expected_result{-0.71498716f, vector<float> expected_result{-0.71498716f,
1.48388731f, 1.48388731f,
...@@ -221,8 +303,14 @@ TEST(cpu_fusion, batchnorm_fprop_b1c2h2w2) ...@@ -221,8 +303,14 @@ TEST(cpu_fusion, batchnorm_fprop_b1c2h2w2)
0.23943391f, 0.23943391f,
-0.84090298f, -0.84090298f,
1.51462936f}; 1.51462936f};
cf->call({_mean, _var, _input, _gamma, _beta}, {result}); vector<float> expected_mean{0.602912f, 0.599727f};
EXPECT_TRUE(test::all_close(expected_result, read_vector<float>(result))); vector<float> expected_variance{0.00472505f, 0.0361782f};
cf->call({_input, _gamma, _beta}, {bn_output, result_mean, result_variance});
EXPECT_TRUE(test::all_close(expected_result, read_vector<float>(bn_output)));
EXPECT_TRUE(test::all_close(expected_mean, read_vector<float>(result_mean)));
EXPECT_TRUE(test::all_close(expected_variance, read_vector<float>(result_variance)));
} }
TEST(cpu_fusion, batchnorm_fprop_b2c2h2w1) TEST(cpu_fusion, batchnorm_fprop_b2c2h2w1)
...@@ -230,18 +318,21 @@ TEST(cpu_fusion, batchnorm_fprop_b2c2h2w1) ...@@ -230,18 +318,21 @@ TEST(cpu_fusion, batchnorm_fprop_b2c2h2w1)
auto input_shape = Shape{2, 2, 2, 1}; auto input_shape = Shape{2, 2, 2, 1};
auto input = make_shared<op::Parameter>(element::f32, input_shape); auto input = make_shared<op::Parameter>(element::f32, input_shape);
auto mean_shape = Shape{2}; auto mean_shape = Shape{2};
auto mean = make_shared<op::Parameter>(element::f32, mean_shape);
auto var_shape = Shape{2}; auto var_shape = Shape{2};
auto var = make_shared<op::Parameter>(element::f32, var_shape);
auto gamma_shape = Shape{2}; auto gamma_shape = Shape{2};
auto gamma = make_shared<op::Parameter>(element::f32, gamma_shape); auto gamma = make_shared<op::Parameter>(element::f32, gamma_shape);
auto beta_shape = Shape{2}; auto beta_shape = Shape{2};
auto beta = make_shared<op::Parameter>(element::f32, beta_shape); auto beta = make_shared<op::Parameter>(element::f32, beta_shape);
double eps = 0.001; double eps = 0.001;
auto shape_r = Shape{2, 2, 2, 1}; auto shape_r = Shape{2, 2, 2, 1};
auto bn = make_shared<op::BatchNorm>(eps, gamma, beta, input, mean, var); auto bn = make_shared<op::BatchNorm>(eps, gamma, beta, input);
auto output_rt = std::make_shared<op::GetOutputElement>(bn, 0);
auto mean_rt = std::make_shared<op::GetOutputElement>(bn, 1);
auto variance_rt = std::make_shared<op::GetOutputElement>(bn, 2);
auto f = make_shared<Function>(bn, op::ParameterVector{mean, var, input, gamma, beta}); auto f = make_shared<Function>(NodeVector{output_rt, mean_rt, variance_rt},
op::ParameterVector{input, gamma, beta});
auto manager = runtime::Manager::get("CPU"); auto manager = runtime::Manager::get("CPU");
auto external = manager->compile(f); auto external = manager->compile(f);
auto backend = manager->allocate_backend(); auto backend = manager->allocate_backend();
...@@ -257,20 +348,24 @@ TEST(cpu_fusion, batchnorm_fprop_b2c2h2w1) ...@@ -257,20 +348,24 @@ TEST(cpu_fusion, batchnorm_fprop_b2c2h2w1)
0.64589411f, 0.64589411f,
0.4375872f, 0.4375872f,
0.89177299f}); 0.89177299f});
auto _mean = backend->make_primary_tensor_view(element::f32, mean_shape);
copy_data(_mean, vector<float>{0.60291237f, 0.59972727f});
auto _var = backend->make_primary_tensor_view(element::f32, var_shape);
copy_data(_var, vector<float>{0.00472505f, 0.03617825f});
auto _gamma = backend->make_primary_tensor_view(element::f32, gamma_shape); auto _gamma = backend->make_primary_tensor_view(element::f32, gamma_shape);
copy_data(_gamma, vector<float>{1.0f, 1.0f}); copy_data(_gamma, vector<float>{1.0f, 1.0f});
auto _beta = backend->make_primary_tensor_view(element::f32, beta_shape); auto _beta = backend->make_primary_tensor_view(element::f32, beta_shape);
copy_data(_beta, vector<float>{0.0f, 0.0f}); copy_data(_beta, vector<float>{0.0f, 0.0f});
auto result = backend->make_primary_tensor_view(element::f32, shape_r); auto bn_output = backend->make_primary_tensor_view(element::f32, shape_r);
auto result_mean = backend->make_primary_tensor_view(element::f32, mean_shape);
auto result_variance = backend->make_primary_tensor_view(element::f32, var_shape);
vector<float> expected_result{ vector<float> expected_result{
-0.714987f, 1.48389f, 0.015746f, -0.284436f, -2.36912f, 0.56806f, -0.840903f, 1.51463f}; -0.30327f, 1.1561f, -0.0963782f, -0.434702f, -1.4011f, 0.548275f, -1.06187f, 1.59295f};
cf->call({_mean, _var, _input, _gamma, _beta}, {result}); vector<float> expected_mean{0.583388f, 0.619252f};
EXPECT_TRUE(test::all_close(expected_result, read_vector<float>(result))); vector<float> expected_variance{0.0119972f, 0.0282681f};
cf->call({_input, _gamma, _beta}, {bn_output, result_mean, result_variance});
EXPECT_TRUE(test::all_close(expected_result, read_vector<float>(bn_output)));
EXPECT_TRUE(test::all_close(expected_mean, read_vector<float>(result_mean)));
EXPECT_TRUE(test::all_close(expected_variance, read_vector<float>(result_variance)));
} }
TEST(cpu_fusion, fuse_fprop_bn) TEST(cpu_fusion, fuse_fprop_bn)
...@@ -324,7 +419,10 @@ TEST(cpu_fusion, bn_bprop_n4c3h2w2) ...@@ -324,7 +419,10 @@ TEST(cpu_fusion, bn_bprop_n4c3h2w2)
auto beta = make_shared<op::Parameter>(element::f32, beta_shape); auto beta = make_shared<op::Parameter>(element::f32, beta_shape);
double eps = 0.001; double eps = 0.001;
auto shape_r = Shape{4, 3, 2, 2}; auto shape_r = Shape{4, 3, 2, 2};
auto bn = make_shared<op::BatchNorm>(eps, gamma, beta, input, mean, var); auto bn = make_shared<op::BatchNorm>(eps, gamma, beta, input);
auto bn_dx = make_shared<op::GetOutputElement>(bn, 0);
auto bn_dgamma = make_shared<op::GetOutputElement>(bn, 1);
auto bn_dbeta = make_shared<op::GetOutputElement>(bn, 2);
auto manager = runtime::Manager::get("CPU"); auto manager = runtime::Manager::get("CPU");
auto backend = manager->allocate_backend(); auto backend = manager->allocate_backend();
...@@ -356,7 +454,8 @@ TEST(cpu_fusion, bn_bprop_n4c3h2w2) ...@@ -356,7 +454,8 @@ TEST(cpu_fusion, bn_bprop_n4c3h2w2)
vector<float> deltaData(shape_size(shape_r), 20.0f); vector<float> deltaData(shape_size(shape_r), 20.0f);
copy_data(_delta, deltaData); copy_data(_delta, deltaData);
auto f = make_shared<Function>(bn, op::ParameterVector{mean, var, input, gamma, beta}); auto f = make_shared<Function>(NodeVector{bn_dx, bn_dgamma, bn_dbeta},
op::ParameterVector{mean, var, input, gamma, beta});
auto C = std::make_shared<op::Parameter>(element::f32, shape_r); auto C = std::make_shared<op::Parameter>(element::f32, shape_r);
auto dinput = bn->backprop_node(input, C); auto dinput = bn->backprop_node(input, C);
...@@ -402,3 +501,95 @@ TEST(cpu_fusion, bn_bprop_n4c3h2w2) ...@@ -402,3 +501,95 @@ TEST(cpu_fusion, bn_bprop_n4c3h2w2)
vector<float> expected_dbeta{320.f, 320.f, 320.f}; vector<float> expected_dbeta{320.f, 320.f, 320.f};
ASSERT_TRUE(ngraph::test::all_close(read_vector<float>(_dbeta), expected_dbeta, 1e-4f, 1e-8f)); ASSERT_TRUE(ngraph::test::all_close(read_vector<float>(_dbeta), expected_dbeta, 1e-4f, 1e-8f));
} }
TEST(cpu_fusion, zero_padded_reshaped_conv)
{
auto X = make_shared<op::Parameter>(element::f32, Shape{1, 2, 2, 1});
auto F = make_shared<op::Parameter>(element::f32, Shape{1, 1, 1, 1});
auto pad_value = op::Constant::create<float>(element::f32, Shape{}, std::vector<float>{0.0f});
auto pad =
make_shared<op::Pad>(X, pad_value, Shape{0, 1, 0, 0}, Shape{0, 0, 1, 0}, Shape{0, 0, 0, 0});
auto reshape = make_shared<op::Reshape>(pad, AxisVector{0, 3, 1, 2}, Shape{1, 1, 3, 3});
auto conv = make_shared<op::Convolution>(reshape,
F,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
auto func = make_shared<Function>(conv, op::ParameterVector{X, F});
ASSERT_EQ(count_ops_of_type<op::Pad>(func), 1);
auto manager = runtime::Manager::get("CPU");
auto external = manager->compile(func);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
ASSERT_EQ(count_ops_of_type<op::Pad>(func), 0);
}
TEST(cpu_fusion, zero_padded_conv)
{
auto X = make_shared<op::Parameter>(element::f32, Shape{1, 1, 2, 2});
auto F = make_shared<op::Parameter>(element::f32, Shape{1, 1, 1, 1});
auto pad_value = op::Constant::create<float>(element::f32, Shape{}, std::vector<float>{0.0f});
auto pad =
make_shared<op::Pad>(X, pad_value, Shape{0, 0, 0, 1}, Shape{0, 0, 1, 0}, Shape{0, 0, 0, 0});
auto conv = make_shared<op::Convolution>(pad,
F,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
auto func = make_shared<Function>(conv, op::ParameterVector{X, F});
ASSERT_EQ(count_ops_of_type<op::Pad>(func), 1);
auto manager = runtime::Manager::get("CPU");
auto external = manager->compile(func);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
ASSERT_EQ(count_ops_of_type<op::Pad>(func), 0);
}
TEST(cpu_fusion, non_zero_padded_conv)
{
auto X = make_shared<op::Parameter>(element::f32, Shape{1, 1, 2, 2});
auto F = make_shared<op::Parameter>(element::f32, Shape{1, 1, 1, 1});
auto pad_value = op::Constant::create<float>(element::f32, Shape{}, std::vector<float>{1.0f});
auto pad =
make_shared<op::Pad>(X, pad_value, Shape{0, 0, 0, 1}, Shape{0, 0, 1, 0}, Shape{0, 0, 0, 0});
auto conv = make_shared<op::Convolution>(pad,
F,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
auto func = make_shared<Function>(conv, op::ParameterVector{X, F});
ASSERT_EQ(count_ops_of_type<op::Pad>(func), 1);
auto manager = runtime::Manager::get("CPU");
auto external = manager->compile(func);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
ASSERT_EQ(count_ops_of_type<op::Pad>(func), 1);
}
...@@ -218,11 +218,6 @@ public: ...@@ -218,11 +218,6 @@ public:
{ {
map_parameter_to_index[f->get_parameters().at(i)] = i; map_parameter_to_index[f->get_parameters().at(i)] = i;
} }
unordered_map<shared_ptr<Node>, size_t> map_result_to_index;
for (size_t i = 0; i < f->get_results().size(); ++i)
{
map_result_to_index[f->get_results().at(i)] = i;
}
// Parameter's source is either itself, or the output node of the upstream function // Parameter's source is either itself, or the output node of the upstream function
unordered_map<shared_ptr<op::Parameter>, shared_ptr<Node>> map_parameter_to_source_node; unordered_map<shared_ptr<op::Parameter>, shared_ptr<Node>> map_parameter_to_source_node;
...@@ -231,6 +226,13 @@ public: ...@@ -231,6 +226,13 @@ public:
vector<shared_ptr<Function>> funcs = vector<shared_ptr<Function>> funcs =
split_function_by_placement(f, map_parameter_to_source_node); split_function_by_placement(f, map_parameter_to_source_node);
auto main_func = funcs.back();
unordered_map<shared_ptr<Node>, size_t> map_result_to_index;
for (size_t i = 0; i < main_func->get_results().size(); ++i)
{
map_result_to_index[main_func->get_results().at(i)] = i;
}
// Make call frames // Make call frames
vector<shared_ptr<runtime::CallFrame>> call_frames; vector<shared_ptr<runtime::CallFrame>> call_frames;
for (auto func : funcs) for (auto func : funcs)
......
...@@ -47,14 +47,22 @@ TEST(liveness, constant) ...@@ -47,14 +47,22 @@ TEST(liveness, constant)
auto tmp = f->get_ordered_ops(); auto tmp = f->get_ordered_ops();
vector<shared_ptr<Node>> sorted{tmp.begin(), tmp.end()}; vector<shared_ptr<Node>> sorted{tmp.begin(), tmp.end()};
ASSERT_EQ(2, sorted.size()); ASSERT_EQ(3, sorted.size());
EXPECT_EQ(0, sorted[0]->liveness_live_list.size()); EXPECT_EQ(0, sorted[0]->liveness_live_list.size());
EXPECT_EQ(0, sorted[0]->liveness_new_list.size()); EXPECT_EQ(0, sorted[0]->liveness_new_list.size());
EXPECT_EQ(0, sorted[0]->liveness_free_list.size()); EXPECT_EQ(0, sorted[0]->liveness_free_list.size());
EXPECT_EQ(0, sorted[1]->liveness_live_list.size()); //op::Negative is live on output to op::Result
EXPECT_EQ(0, sorted[1]->liveness_new_list.size()); EXPECT_EQ(1, sorted[1]->liveness_live_list.size());
//op::Negative is new
EXPECT_EQ(1, sorted[1]->liveness_new_list.size());
EXPECT_EQ(0, sorted[1]->liveness_free_list.size()); EXPECT_EQ(0, sorted[1]->liveness_free_list.size());
//op::Negative is live on input to op::Result
EXPECT_EQ(1, sorted[2]->liveness_live_list.size());
EXPECT_EQ(0, sorted[2]->liveness_new_list.size());
//op::Negative is freed
EXPECT_EQ(1, sorted[2]->liveness_free_list.size());
} }
TEST(liveness, liveness) TEST(liveness, liveness)
......
...@@ -234,5 +234,5 @@ TEST(memory_layout, constant) ...@@ -234,5 +234,5 @@ TEST(memory_layout, constant)
pass_manager.run_passes(f); pass_manager.run_passes(f);
auto sorted = f->get_ordered_ops(); auto sorted = f->get_ordered_ops();
size_t temporary_pool_size = f->get_temporary_pool_size(); size_t temporary_pool_size = f->get_temporary_pool_size();
EXPECT_EQ(0, temporary_pool_size); EXPECT_EQ(4, temporary_pool_size);
} }
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#include "ngraph/runtime/cpu/pass/cpu_fusion.hpp" #include "ngraph/runtime/cpu/pass/cpu_fusion.hpp"
#include "ngraph/serializer.hpp" #include "ngraph/serializer.hpp"
#include "util/matcher.hpp" #include "util/matcher.hpp"
#include "util/test_tools.hpp"
using namespace ngraph; using namespace ngraph;
using namespace std; using namespace std;
...@@ -89,9 +90,9 @@ bool sum_predicate(std::shared_ptr<Node> gn) ...@@ -89,9 +90,9 @@ bool sum_predicate(std::shared_ptr<Node> gn)
return false; return false;
} }
NGRAPH_DEBUG << "looking at function's result " auto result = r->get_functions()[0]->get_result()->get_input_op(0);
<< r->get_functions()[0]->get_result()->get_name(); NGRAPH_DEBUG << "looking at function's result " << result->get_name();
if (auto sum = std::dynamic_pointer_cast<op::Add>(r->get_functions()[0]->get_result())) if (auto sum = std::dynamic_pointer_cast<op::Add>(result))
{ {
auto parm1 = std::dynamic_pointer_cast<op::Parameter>(sum->get_input_op(0)); auto parm1 = std::dynamic_pointer_cast<op::Parameter>(sum->get_input_op(0));
auto parm2 = std::dynamic_pointer_cast<op::Parameter>(sum->get_input_op(1)); auto parm2 = std::dynamic_pointer_cast<op::Parameter>(sum->get_input_op(1));
...@@ -297,7 +298,7 @@ TEST(pattern, graph_rewrite) ...@@ -297,7 +298,7 @@ TEST(pattern, graph_rewrite)
ASSERT_TRUE(graph_b->get_output_inputs(0).empty()); ASSERT_TRUE(graph_b->get_output_inputs(0).empty());
auto expected = ngraph::NodeVector{a, b, a, c, b}; auto expected = ngraph::NodeVector{a, b, a, c, b};
ASSERT_TRUE(f->get_results() == expected); ASSERT_TRUE(count_ops_of_type<op::Add>(f) == 0);
} }
{ {
......
...@@ -82,3 +82,27 @@ TEST(reshape_elimination, bn_bprop_rewrite) ...@@ -82,3 +82,27 @@ TEST(reshape_elimination, bn_bprop_rewrite)
size_t count_after = count_ops_of_type<op::Reshape>(func); size_t count_after = count_ops_of_type<op::Reshape>(func);
ASSERT_TRUE(count_after < count_before); ASSERT_TRUE(count_after < count_before);
} }
TEST(reshape_elimination, dot_transpose_to_dot_w_transpose_args)
{
Shape shape_w{2, 4};
Shape shape_x{4, 1};
auto W = make_shared<op::Parameter>(element::f32, shape_w);
auto x = make_shared<op::Parameter>(element::f32, shape_x);
auto dot = make_shared<op::Dot>(W, x);
auto reshape_dot = std::make_shared<op::Reshape>(dot, AxisVector{1, 0}, Shape{1, 2});
auto graph = make_shared<op::Abs>(reshape_dot);
pass::Manager pass_manager;
pass_manager.register_pass<pass::ReshapeElimination>();
auto func = make_shared<Function>(graph, op::ParameterVector{W, x});
pass_manager.run_passes(func);
auto gdot = graph->get_input_op(0);
ASSERT_TRUE(std::dynamic_pointer_cast<op::Dot>(gdot));
ASSERT_TRUE(std::dynamic_pointer_cast<op::Reshape>(gdot->get_input_op(0)));
ASSERT_TRUE(std::dynamic_pointer_cast<op::Reshape>(gdot->get_input_op(1)));
ASSERT_EQ(gdot->get_input_op(0)->get_input_op(0), x);
ASSERT_EQ(gdot->get_input_op(1)->get_input_op(0), W);
ASSERT_EQ(gdot->get_shape(), (Shape{1, 2}));
}
...@@ -21,15 +21,8 @@ ...@@ -21,15 +21,8 @@
namespace ngraph namespace ngraph
{ {
class Node;
class Function; class Function;
namespace runtime
{
class Backend;
class Manager;
}
namespace autodiff namespace autodiff
{ {
/// @brief Returns a FunctionSpec for the backprop derivative of its argument. /// @brief Returns a FunctionSpec for the backprop derivative of its argument.
......
...@@ -33,12 +33,13 @@ bool validate_list(const list<shared_ptr<Node>>& nodes) ...@@ -33,12 +33,13 @@ bool validate_list(const list<shared_ptr<Node>>& nodes)
auto node_tmp = *it; auto node_tmp = *it;
auto dependencies_tmp = node_tmp->get_input_ops(); auto dependencies_tmp = node_tmp->get_input_ops();
vector<Node*> dependencies; vector<Node*> dependencies;
for (shared_ptr<Node> n : dependencies_tmp) for (shared_ptr<Node> n : dependencies_tmp)
{ {
dependencies.push_back(n.get()); dependencies.push_back(n.get());
} }
auto tmp = it++; auto tmp = it;
for (; tmp != nodes.rend(); tmp++) for (tmp++; tmp != nodes.rend(); tmp++)
{ {
auto dep_tmp = *tmp; auto dep_tmp = *tmp;
auto found = find(dependencies.begin(), dependencies.end(), dep_tmp.get()); auto found = find(dependencies.begin(), dependencies.end(), dep_tmp.get());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment