Unverified Commit eee71968 authored by Chris Sullivan's avatar Chris Sullivan Committed by GitHub

Merge branch 'master' into tfl/gpu_fix_constant_bug

parents 181be216 9db548c6
......@@ -36,7 +36,7 @@ General Instructions
These instructions assume that your system has been prepared in accordance
with the above prerequisites.
$ cd private-ngraph-cpp
$ cd ngraph-cpp
$ mkdir build
$ cd build
$ cmake .. \
......
......@@ -79,10 +79,11 @@ information about how to change or customize this location.
$ cd build && cmake ../ [-DNGRAPH_USE_PREBUILT_LLVM=TRUE]
#. (Optional) Run ``$ make [-jN]`` where ``-jN`` specifies the number of
cores. The example here uses a configuration of ``j8``, which is
good for a system install using an Intel® Xeon® (CPU processor). This step
is **not recommended** with Docker / VM installs.
#. (Optional) Run ``$ make [-jN]`` where ``-jN`` specifies the number of physical
cores to use to build. The example here uses a configuration of ``j8``,
which is good for a system install using an 8-core Intel® Xeon® CPU processor.
This step is **not recommended** for machines with too little RAM available,
such as those whose RAM is superceded by Docker or VM tasks.
.. code-block:: console
......
......@@ -67,6 +67,7 @@ set (SRC
ops/replace_slice.cpp
ops/reshape.cpp
ops/reverse.cpp
ops/result.cpp
ops/select.cpp
ops/select_and_scatter.cpp
ops/sin.cpp
......@@ -189,6 +190,7 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
runtime/cpu/pass/cpu_assignment.cpp
runtime/cpu/pass/cpu_fusion.cpp
runtime/cpu/pass/cpu_layout.cpp
runtime/cpu/pass/cpu_nop_elimination.cpp
)
# LLVM binary builds are typically built without RTTI
# The built-in headers are in a version-specific directory
......
......@@ -75,10 +75,5 @@ namespace ngraph
protected:
std::unordered_map<Node*, std::shared_ptr<Node>> m_adjoint_map;
};
/// @brief Returns a FunctionSpec for the backprop derivative of its argument.
/// @param f is f(X_i...)
/// @returns f'(X_i..., c) where f'(x_i, ..., c)_j is backprop for X_j
std::shared_ptr<Function> backprop_function(const std::shared_ptr<Function>& f);
}
}
......@@ -27,7 +27,7 @@ using namespace ngraph;
atomic<size_t> Function::m_next_instance_id(0);
Function::Function(const NodeVector& results,
Function::Function(const ResultVector& results,
const op::ParameterVector& parameters,
const std::string& name)
: m_results(results)
......@@ -37,14 +37,50 @@ Function::Function(const NodeVector& results,
, m_name(name)
, m_unique_name("Function_" + to_string(m_instance_id))
{
init();
}
Function::Function(const NodeVector& results,
const op::ParameterVector& parameters,
const std::string& name)
: m_results(results.size())
, m_parameters(parameters)
, m_temporary_pool_size(0)
, m_instance_id(m_next_instance_id.fetch_add(1))
, m_name(name)
, m_unique_name("Function_" + to_string(m_instance_id))
{
std::transform(results.begin(), results.end(), m_results.begin(), [](std::shared_ptr<Node> n) {
return std::make_shared<op::Result>(n);
});
init();
}
Function::Function(const std::shared_ptr<Node>& result,
const op::ParameterVector& parameters,
const std::string& name)
: Function(NodeVector{result}, parameters, name)
{
}
void Function::init()
{
for (auto r : m_results)
{
for (descriptor::Output& output : r->get_outputs())
{
output.get_tensor().set_is_output();
}
}
traverse_nodes(this, [&](shared_ptr<Node> node) {
std::shared_ptr<op::Parameter> p = std::dynamic_pointer_cast<op::Parameter>(node);
if (nullptr != p)
{
auto it = std::find_if(parameters.begin(),
parameters.end(),
auto it = std::find_if(m_parameters.begin(),
m_parameters.end(),
[p](std::shared_ptr<op::Parameter> q) { return (p == q); });
if (it == parameters.end())
if (it == m_parameters.end())
{
throw ngraph_error("Function references undeclared parameter");
}
......@@ -52,13 +88,6 @@ Function::Function(const NodeVector& results,
});
}
Function::Function(const std::shared_ptr<Node>& result,
const op::ParameterVector& parameters,
const std::string& name)
: Function(NodeVector{result}, parameters, name)
{
}
std::list<shared_ptr<Node>> Function::get_ordered_ops()
{
return topological_sort(get_ops());
......@@ -156,18 +185,7 @@ std::list<shared_ptr<Node>> Function::get_ops() const
return ops;
}
void Function::replace_output_op(std::shared_ptr<Node> old, std::shared_ptr<Node> repl)
{
auto it = std::find(begin(m_results), end(m_results), old);
if (it != end(m_results))
{
NGRAPH_DEBUG << "Replacing output " << old->get_name() << " w/ " << repl->get_name();
*it = repl;
}
}
void Function::replace_node(std::shared_ptr<Node> old, std::shared_ptr<Node> repl)
{
replace_output_op(old, repl);
ngraph::replace_node(old, repl, true);
ngraph::replace_node(old, repl);
}
......@@ -25,6 +25,7 @@
#include "ngraph/node.hpp"
#include "ngraph/ops/parameter_vector.hpp"
#include "ngraph/ops/result_vector.hpp"
#include "ngraph/types/type.hpp"
namespace ngraph
......@@ -41,6 +42,12 @@ namespace ngraph
const op::ParameterVector& parameters,
const std::string& name = "");
Function(const ResultVector& results,
const op::ParameterVector& parameters,
const std::string& name = "");
void init();
virtual ~Function() {}
public:
/// Return the number of outputs for this function.
......@@ -57,8 +64,8 @@ namespace ngraph
/// Return the function parameters
const op::ParameterVector& get_parameters() const { return m_parameters; }
/// Return the ops that generate the results
const NodeVector get_results() const { return m_results; }
/// Return a list of function's outputs
const ResultVector& get_results() const { return m_results; }
/// Check that there is a single result and return it.
std::shared_ptr<Node> get_result() const;
......@@ -73,13 +80,11 @@ namespace ngraph
size_t get_instance_id() { return m_instance_id; }
size_t get_temporary_pool_size();
void set_temporary_pool_size(size_t);
// updates old w/ repl in m_results list
void replace_output_op(std::shared_ptr<Node> old, std::shared_ptr<Node> repl);
// updates graph and m_results list
void replace_node(std::shared_ptr<Node> old, std::shared_ptr<Node> repl);
protected:
NodeVector m_results;
ResultVector m_results;
op::ParameterVector m_parameters;
size_t m_temporary_pool_size;
......
......@@ -29,6 +29,8 @@
#include "ngraph/node_vector.hpp"
#include "ngraph/ops/constant.hpp"
#include "ngraph/ops/parameter.hpp"
#include "ngraph/ops/result.hpp"
#include "ngraph/ops/result_vector.hpp"
#include "ngraph/placement.hpp"
#include "ngraph/util.hpp"
......@@ -114,13 +116,11 @@ void ngraph::free_nodes(shared_ptr<Function> p)
}
}
void ngraph::replace_node(std::shared_ptr<Node> target,
std::shared_ptr<Node> replacement,
bool replace_output)
void ngraph::replace_node(std::shared_ptr<Node> target, std::shared_ptr<Node> replacement)
{
if (target->is_output() && !replace_output)
if (target->is_output())
{
return;
throw ngraph_error("Result nodes cannot be replaced.");
}
// Fix input/output descriptors
......@@ -197,6 +197,15 @@ std::list<std::shared_ptr<ngraph::Node>>
return result_list;
}
void ngraph::NodeMap::update(std::shared_ptr<ngraph::Node> orig, std::shared_ptr<ngraph::Node> val)
{
if (!exists(orig))
{
throw ngraph_error("Node doesn't exist!");
}
m_node_map[orig] = val;
}
void ngraph::NodeMap::add(std::shared_ptr<ngraph::Node> orig,
std::shared_ptr<ngraph::Node> replacement)
{
......@@ -252,10 +261,15 @@ std::shared_ptr<ngraph::Function> ngraph::clone_function(std::shared_ptr<ngraph:
clone_nodes(func->get_ops(), node_map);
// get cloned function results and parameters
NodeVector cloned_results;
ResultVector cloned_results;
for (shared_ptr<Node> node : func->get_results())
{
cloned_results.push_back(node_map.get(node));
auto result = std::dynamic_pointer_cast<op::Result>(node_map.get(node));
if (!result)
{
throw ngraph_error("Results should be of type op::Result");
}
cloned_results.push_back(result);
}
std::vector<std::shared_ptr<op::Parameter>> cloned_params;
for (auto param : func->get_parameters())
......@@ -435,8 +449,8 @@ static shared_ptr<Function> build_largest_colocated_function(
}
}
}
return make_shared<Function>(outputs, collected_parameters);
auto func = make_shared<Function>(outputs, collected_parameters);
return func;
}
// The returned nodes contains the node N with highest order. If N is placed at P, the returned
......@@ -528,7 +542,7 @@ vector<shared_ptr<Function>> ngraph::split_function_by_placement(
// Remove input-output and constant-output aliasing
if (f_parameters.count(node) == 0 && node->description() != "Constant")
{
unvisited_outputs.insert(node);
unvisited_outputs.insert(node->get_input_op(0));
}
}
......@@ -571,6 +585,24 @@ vector<shared_ptr<Function>> ngraph::split_function_by_placement(
unvisited_outputs = updated_unvisited_outputs;
}
unordered_map<shared_ptr<Node>, shared_ptr<Node>> map_source_node_to_result;
for (auto cf : colocated_functions)
{
for (auto r : cf->get_results())
{
map_source_node_to_result[r->get_input_op(0)] = r;
}
}
for (auto it = map_parameter_to_source_node.begin(); it != map_parameter_to_source_node.end();
++it)
{
if (map_source_node_to_result.count(it->second) != 0)
{
it->second = map_source_node_to_result[it->second];
}
}
// The colocated_functions should be called in reversed order
reverse(colocated_functions.begin(), colocated_functions.end());
return colocated_functions;
......
......@@ -48,9 +48,8 @@ namespace ngraph
void free_nodes(std::shared_ptr<Function>);
void replace_node(std::shared_ptr<Node> target,
std::shared_ptr<Node> replacement,
bool replace_output = false);
void replace_node(std::shared_ptr<Node> target, std::shared_ptr<Node> replacement);
void replace_node_users_arguments(std::shared_ptr<Node> target,
std::shared_ptr<Node> replacement);
......@@ -78,6 +77,8 @@ namespace ngraph
return (m_node_map.count(orig) != 0);
}
void update(std::shared_ptr<ngraph::Node> orig, std::shared_ptr<ngraph::Node> val);
const std::unordered_map<std::shared_ptr<ngraph::Node>, std::shared_ptr<ngraph::Node>>&
get_node_map() const
{
......
......@@ -23,6 +23,7 @@
#include "ngraph/descriptor/layout/tensor_view_layout.hpp"
#include "ngraph/descriptor/primary_tensor_view.hpp"
#include "ngraph/ops/parameter.hpp"
#include "ngraph/ops/result.hpp"
#include "ngraph/placement.hpp"
using namespace std;
......@@ -34,7 +35,6 @@ Node::Node(const std::string& node_type, const NodeVector& arguments)
: m_node_type(node_type)
, m_instance_id(m_next_instance_id.fetch_add(1))
, m_unique_name(description() + "_" + to_string(m_instance_id))
, m_is_output(false)
, m_arguments(arguments)
{
// Add this node as a user of each argument.
......@@ -68,7 +68,7 @@ void Node::add_output(const element::Type& element_type, const Shape& shape)
auto tensor_view_descriptor = make_shared<descriptor::PrimaryTensorView>(
tensor_view_type,
ngraph::descriptor::Tensor::make_tensor_name(this, i),
is_output(),
false,
is_parameter(),
is_constant());
m_outputs.emplace_back(this, i, tensor_view_descriptor);
......@@ -96,16 +96,7 @@ bool Node::is_parameter() const
bool Node::is_output() const
{
return m_is_output;
}
void Node::set_is_output()
{
m_is_output = true;
for (descriptor::Output& output : get_outputs())
{
output.get_tensor().set_is_output();
}
return false;
}
bool Node::is_constant() const
......
......@@ -102,8 +102,7 @@ namespace ngraph
void set_value_type_checked(const element::Type& element_type, const Shape& shape);
bool is_parameter() const;
bool is_output() const;
void set_is_output();
virtual bool is_output() const;
virtual bool is_constant() const;
virtual bool is_commutative() { return false; }
size_t get_instance_id() const { return m_instance_id; }
......@@ -200,7 +199,6 @@ namespace ngraph
static std::atomic<size_t> m_next_instance_id;
std::deque<descriptor::Input> m_inputs;
std::deque<descriptor::Output> m_outputs;
bool m_is_output;
std::unordered_map<Node*, autodiff::Adjoints> m_adjoint_map;
Placement m_placement = Placement::DEFAULT;
......
......@@ -23,6 +23,11 @@ namespace ngraph
{
class Node;
namespace op
{
class Result;
}
/// \brief Zero or more nodes.
class NodeVector : public std::vector<std::shared_ptr<Node>>
{
......
......@@ -21,21 +21,20 @@
ngraph::op::BatchNorm::BatchNorm(double eps,
std::shared_ptr<ngraph::Node> gamma,
std::shared_ptr<ngraph::Node> beta,
std::shared_ptr<ngraph::Node> input,
std::shared_ptr<ngraph::Node> mean,
std::shared_ptr<ngraph::Node> variance)
: RequiresTensorViewArgs("BatchNorm", {gamma, beta, input, mean, variance})
std::shared_ptr<ngraph::Node> input)
: RequiresTensorViewArgs("BatchNorm", {gamma, beta, input})
, m_bn_input_shape(input->get_shape())
, m_bn_variance_shape(variance->get_shape())
, m_bn_mean_shape(mean->get_shape())
, m_epsilon(eps)
{
add_output(input->get_element_type(), m_bn_input_shape);
if (m_bn_input_shape.size() < 2)
{
throw ngraph_error("input tensor to batchnorm much have tensor of atleast rank 2");
}
else
{
this->m_bn_variance_shape.push_back(input->get_shape()[1]);
this->m_bn_mean_shape.push_back(input->get_shape()[1]);
}
if (m_bn_input_shape[1] == 0)
{
......@@ -49,51 +48,27 @@ ngraph::op::BatchNorm::BatchNorm(double eps,
throw ngraph_error("gamma, beta, mean, variance shoud have all rank 1");
}
// assuming input shape (N, C, H, W), check if the size of mean and
// variance are equal to channel axis
if (mean->get_shape()[0] != m_bn_input_shape[1])
{
throw ngraph_error("mean size is not equal to input channel size");
}
if (variance->get_shape()[0] != m_bn_input_shape[1])
{
throw ngraph_error("variance size is not equal to input channel size");
}
if (variance->get_shape().size() != mean->get_shape().size())
{
throw ngraph_error("mean and variance rank does not match");
}
if (gamma->get_shape().size() != beta->get_shape().size())
{
throw ngraph_error("gamma and beta rank does not match");
}
if (input->get_element_type() != mean->get_element_type())
{
throw ngraph_error("input tensor and mean element type does not match");
}
if (input->get_element_type() != variance->get_element_type())
{
throw ngraph_error("input tensor and variance element type does not match");
}
if (gamma->get_element_type() != beta->get_element_type())
{
throw ngraph_error("gamma and beta element type does not match");
}
add_output(input->get_element_type(), m_bn_input_shape);
add_output(input->get_element_type(), m_bn_mean_shape);
add_output(input->get_element_type(), m_bn_variance_shape);
}
std::shared_ptr<ngraph::Node>
ngraph::op::BatchNorm::copy_with_new_args(const NodeVector& new_args) const
{
if (new_args.size() != 5)
if (new_args.size() != 3)
throw ngraph_error("Incorrect number of new arguments");
return std::make_shared<BatchNorm>(
m_epsilon, new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3), new_args.at(4));
return std::make_shared<BatchNorm>(m_epsilon, new_args.at(0), new_args.at(1), new_args.at(2));
}
ngraph::op::BatchNormBackprop::BatchNormBackprop(double eps,
......@@ -174,10 +149,10 @@ void ngraph::op::BatchNorm::generate_adjoints(autodiff::Adjoints& adjoints,
auto gamma = get_input_op(0);
auto beta = get_input_op(1);
auto input = get_input_op(2);
auto mean = get_input_op(3);
auto variance = get_input_op(4);
auto mean = std::make_shared<op::GetOutputElement>(shared_from_this(), 1);
auto var = std::make_shared<op::GetOutputElement>(shared_from_this(), 2);
auto bbn = std::make_shared<op::BatchNormBackprop>(
get_eps_value(), gamma, beta, input, mean, variance, delta);
get_eps_value(), gamma, beta, input, mean, var, delta);
auto dinput = std::make_shared<op::GetOutputElement>(bbn, 0);
auto dgamma = std::make_shared<op::GetOutputElement>(bbn, 1);
auto dbeta = std::make_shared<op::GetOutputElement>(bbn, 2);
......
......@@ -33,9 +33,7 @@ namespace ngraph
BatchNorm(double eps,
std::shared_ptr<Node> gamma,
std::shared_ptr<Node> beta,
std::shared_ptr<Node> input,
std::shared_ptr<Node> mean,
std::shared_ptr<Node> variance);
std::shared_ptr<Node> input);
const Shape& get_inputs_shape() const { return m_bn_input_shape; }
const Shape& get_variance_shape() const { return m_bn_variance_shape; }
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <memory>
#include <typeindex>
#include <typeinfo>
#include "ngraph/node.hpp"
#include "ngraph/ops/result.hpp"
using namespace std;
using namespace ngraph;
op::Result::Result(const std::shared_ptr<Node>& arg)
: RequiresTensorViewArgs("Result", {arg})
{
if (arg->get_outputs().size() != 1)
{
throw ngraph_error("Expected a single-output argument");
}
//always borrow the placement conf even the default one
set_placement(arg->get_placement());
set_value_type_checked(arg->get_element_type(), arg->get_shape());
}
std::shared_ptr<Node> op::Result::copy_with_new_args(const NodeVector& new_args) const
{
if (new_args.size() != 1)
{
throw ngraph_error("Incorrect number of new arguments");
}
if (new_args.at(0)->get_outputs().size() != 1)
{
throw ngraph_error("Expected a single-output argument");
}
return std::make_shared<Result>(new_args.at(0));
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <memory>
#include "ngraph/ops/util/requires_tensor_view_args.hpp"
namespace ngraph
{
namespace op
{
class Result : public util::RequiresTensorViewArgs
{
public:
/// \brief Constructs an arcsin operation.
///
/// \param arg Node that produces the input tensor.
Result(const std::shared_ptr<Node>& arg);
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
virtual bool is_output() const override { return true; }
protected:
virtual void generate_adjoints(autodiff::Adjoints& adjoints,
const std::shared_ptr<Node>& delta) override
{
adjoints.add_delta(get_input_op(0), delta);
}
};
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <memory>
#include <vector>
#include "ngraph/ops/result.hpp"
namespace ngraph
{
/// \brief Zero or more nodes.
class ResultVector : public std::vector<std::shared_ptr<op::Result>>
{
public:
ResultVector(size_t size)
: std::vector<std::shared_ptr<op::Result>>(size)
{
}
ResultVector(const std::initializer_list<std::shared_ptr<op::Result>>& nodes)
: std::vector<std::shared_ptr<op::Result>>(nodes)
{
}
ResultVector(const std::vector<std::shared_ptr<op::Result>>& nodes)
: std::vector<std::shared_ptr<op::Result>>(nodes)
{
}
ResultVector(const ResultVector& nodes)
: std::vector<std::shared_ptr<op::Result>>(nodes)
{
}
ResultVector() {}
};
}
......@@ -30,12 +30,10 @@ using namespace std;
using namespace ngraph;
ngraph::pass::Manager::Manager()
: m_to_set_is_output(true)
{
}
ngraph::pass::Manager::Manager(bool to_set_is_output)
: m_to_set_is_output(to_set_is_output)
{
}
......@@ -56,17 +54,6 @@ void ngraph::pass::Manager::run_passes(shared_ptr<Function> func)
set<shared_ptr<Function>> tfs(begin(fs), end(fs));
get_state().set_functions(tfs);
if (m_to_set_is_output)
{
for (shared_ptr<Function> f : get_state().get_functions())
{
for (size_t i = 0; i < f->get_output_size(); ++i)
{
f->get_output_op(i)->set_is_output();
}
}
}
for (shared_ptr<PassBase> pass : m_pass_list)
{
pass->set_state(get_state());
......
......@@ -57,5 +57,4 @@ public:
private:
std::vector<std::shared_ptr<PassBase>> m_pass_list;
ManagerState m_state;
bool m_to_set_is_output;
};
......@@ -150,3 +150,50 @@ void ngraph::pass::ReshapeElimination::construct_reshapex2_pattern()
auto m = std::make_shared<ngraph::pattern::Matcher>(reshape2, callback);
this->add_matcher(m);
}
void ngraph::pass::ReshapeElimination::construct_dot_transpose_pattern()
{
//dot(A,B).T = dot (B.T, A.T)
auto dot_pred = [](std::shared_ptr<Node> n) {
return static_cast<bool>(std::dynamic_pointer_cast<op::Dot>(n));
};
auto pdot = std::make_shared<pattern::op::Label>(element::f32, Shape{2, 1}, dot_pred);
auto preshape = std::make_shared<op::Reshape>(pdot, AxisVector{1, 0}, Shape{1, 2});
ngraph::pattern::gr_callback_fn callback = [](pattern::Matcher& m) {
NGRAPH_DEBUG << "In callback for construct_dot_transpose_pattern against node = "
<< m.match_root()->get_name();
std::shared_ptr<Node> nn;
auto mtranspose = std::dynamic_pointer_cast<op::Reshape>(m.match_root());
//this also checks the rank
if (mtranspose->get_input_order() != AxisVector{1, 0})
{
NGRAPH_DEBUG << "Reshape isn't transpose. "
<< vector_to_string(mtranspose->get_input_order());
return nn;
}
auto mdot = mtranspose->get_input_op(0);
if (mdot->get_shape().size() != 2)
{
NGRAPH_DEBUG << "Dot has the wrong shape. " << vector_to_string(mdot->get_shape());
return nn;
}
auto arg0 = mdot->get_input_op(0);
auto reshape0_shape = Shape{arg0->get_shape().at(1), arg0->get_shape().at(0)};
auto reshape0 = std::make_shared<op::Reshape>(arg0, AxisVector{1, 0}, reshape0_shape);
auto arg1 = mdot->get_input_op(1);
auto reshape1_shape = Shape{arg1->get_shape().at(1), arg1->get_shape().at(0)};
auto reshape1 = std::make_shared<op::Reshape>(arg1, AxisVector{1, 0}, reshape1_shape);
auto tdot = std::shared_ptr<Node>(new op::Dot(reshape1, reshape0));
return tdot;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(preshape, callback);
this->add_matcher(m);
}
......@@ -32,11 +32,13 @@ public:
ReshapeElimination()
: GraphRewrite()
{
construct_dot_transpose_pattern();
construct_identity_reshape_pattern();
construct_reshapex2_pattern();
}
private:
void construct_dot_transpose_pattern();
void construct_identity_reshape_pattern();
void construct_reshapex2_pattern();
};
......@@ -72,6 +72,7 @@
#include "ngraph/ops/remainder.hpp"
#include "ngraph/ops/replace_slice.hpp"
#include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/result.hpp"
#include "ngraph/ops/reverse.hpp"
#include "ngraph/ops/select.hpp"
#include "ngraph/ops/select_and_scatter.hpp"
......@@ -239,7 +240,7 @@ namespace ngraph
const Shape& arg0_shape = cg->get_arg0_shape(); //W
const Shape& arg1_shape = cg->get_arg1_shape(); //x
const Shape& arg2_shape = args[2].get_shape(); //bias (C)
const Shape& arg2_shape = node->get_shape(); //bias (C)
static const char* ctranspose = "cblas::Transpose::Transpose, ";
static const char* cnotranspose = "cblas::Transpose::None, ";
......@@ -269,16 +270,23 @@ namespace ngraph
writer << "{ // " << node->get_name() << "\n";
writer.indent++;
const char* cbeta = "0.0f";
if (args.size() > 2)
{
writer << "memcpy(" << out[0].get_name() << ", " << args[2].get_name() << ", "
<< out[0].get_size() * out[0].get_element_type().size() << ");\n";
cbeta = "1.0f";
}
writer << "cblas::cblas_sgemm("
<< "cblas::Layout::RowMajor, " << tranpose_a << tranpose_b << m << ", " << n
<< ", " << k << ",\n"
<< " 1.0f, " << args[0].get_name() << ", " << max(1UL, lda) << ", "
<< args[1].get_name() << ", " << max(1UL, ldb) << ", 1.0f,\n"
<< args[1].get_name() << ", " << max(1UL, ldb) << ", " << cbeta << ",\n"
<< " " << out[0].get_name() << ", " << max(1UL, arg2_shape[1])
<< ");\n";
writer.indent--;
writer << "}\n";
}
......@@ -293,14 +301,26 @@ namespace ngraph
auto gamma_shape = args[0].get_shape();
auto beta_shape = args[1].get_shape();
auto input_shape = args[2].get_shape();
auto mean_shape = args[3].get_shape();
auto variance_shape = args[4].get_shape();
auto result_shape = out[0].get_shape();
auto mean_shape = out[1].get_shape();
auto variance_shape = out[2].get_shape();
// get input element type
const string& et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string(
args[2].get_element_type());
const string& gamma_format = runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
runtime::cpu::mkldnn_utils::get_input_mkldnn_format(node, 0));
const string& beta_format = runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
runtime::cpu::mkldnn_utils::get_input_mkldnn_format(node, 1));
if (gamma_format.compare("memory::format::x") != 0 &&
beta_format.compare("memory::format::x") != 0)
{
throw std::runtime_error(
"gamma layout->" + gamma_format + ", beta layout->" + beta_format +
" should match and both should have memory::format::x format");
}
writer << "{\n";
writer.indent++;
......@@ -321,16 +341,20 @@ namespace ngraph
// get the eps value from the bn node
writer << "auto epsilon = " << batchnorm->get_eps_value() << ";\n";
const string& input_format = runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
runtime::cpu::mkldnn_utils::get_input_mkldnn_format(node, 2));
const string& result_format = runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
runtime::cpu::mkldnn_utils::get_output_mkldnn_format(node, 0));
// Bind to CPU engine
writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
// create memory descriptors
writer << "memory::desc input_data_desc = memory::desc({" << join(input_shape)
<< "}, " << et << ", memory::format::nchw);\n";
<< "}, " << et << ", " << input_format << ");\n";
// TODO define weights by stacking gamma and beta values
writer << "memory::desc weights_desc = memory::desc({" << join(weights_shape)
<< "}, " << et << ", memory::format::nc);\n";
writer << "memory::desc result_desc = memory::desc({" << join(result_shape) << "}, "
<< et << ", memory::format::nchw);\n";
<< et << ", " << result_format << ");\n";
writer << "memory::desc mean_desc = memory::desc({" << join(mean_shape) << "}, "
<< et << ", memory::format::x);\n";
writer << "memory::desc variance_desc = memory::desc({" << join(variance_shape)
......@@ -341,17 +365,17 @@ namespace ngraph
<< args[2].get_name() << ");\n";
writer << "memory weights = memory({weights_desc, cpu_engine}, bn_weights.data()"
<< ");\n";
writer << "memory mean = memory({mean_desc, cpu_engine}, " << args[3].get_name()
<< ");\n";
writer << "memory variance = memory({variance_desc, cpu_engine}, "
<< args[4].get_name() << ");\n";
writer << "memory result = memory({result_desc, cpu_engine}, " << out[0].get_name()
<< ");\n";
writer << "memory mean = memory({mean_desc, cpu_engine}, " << out[1].get_name()
<< ");\n";
writer << "memory variance = memory({variance_desc, cpu_engine}, "
<< out[2].get_name() << ");\n";
// create batchnorm descriptor
writer << "batch_normalization_forward::desc bn_fprop_desc = "
"batch_normalization_forward::desc(forward_training,"
<< "input_data_desc, epsilon, use_global_stats|use_scale_shift);\n";
<< "input_data_desc, epsilon, use_scale_shift);\n";
// bn fprop primitive descriptor
writer
<< "batch_normalization_forward::primitive_desc bn_fprop_prim_desc = "
......@@ -360,8 +384,8 @@ namespace ngraph
// create a batchnorm fprop primitive
writer << "batch_normalization_forward bn_fprop = "
"batch_normalization_forward(bn_fprop_prim_desc, "
"primitive::at(input_data),primitive::at(mean), primitive::at(variance),"
<< "primitive::at(weights), result); \n";
"primitive::at(input_data),"
<< "primitive::at(weights), result, mean, variance); \n";
// create stream and execute
writer << "stream s = stream(stream::kind::eager);\n"
......@@ -3389,6 +3413,15 @@ namespace ngraph
}
}
}
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::Result)
{
writer << "kernel::result<" << out[0].get_type() << ">(" << args[0].get_name()
<< ",\n";
writer << " " << out[0].get_name() << ",\n";
writer << " " << shape_size(node->get_shape()) << ");\n";
}
}
}
}
......
......@@ -82,6 +82,7 @@
#include "ngraph/ops/remainder.hpp"
#include "ngraph/ops/replace_slice.hpp"
#include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/result.hpp"
#include "ngraph/ops/reverse.hpp"
#include "ngraph/ops/select.hpp"
#include "ngraph/ops/select_and_scatter.hpp"
......@@ -112,6 +113,7 @@
#include "ngraph/runtime/cpu/pass/cpu_assignment.hpp"
#include "ngraph/runtime/cpu/pass/cpu_fusion.hpp"
#include "ngraph/runtime/cpu/pass/cpu_layout.hpp"
#include "ngraph/runtime/cpu/pass/cpu_nop_elimination.hpp"
#ifdef NGRAPH_DISTRIBUTED
#include "ngraph/ops/allreduce.hpp"
......@@ -227,6 +229,7 @@ static const runtime::cpu::OpMap dispatcher{
{TI(ngraph::op::Not), &runtime::cpu::CPU_Emitter::emit<op::Not>},
{TI(ngraph::op::MaxPool), &runtime::cpu::CPU_Emitter::emit<op::MaxPool>},
{TI(ngraph::op::Reverse), &runtime::cpu::CPU_Emitter::emit<op::Reverse>},
{TI(ngraph::op::Result), &runtime::cpu::CPU_Emitter::emit<op::Result>},
{TI(ngraph::op::ReduceWindow), &runtime::cpu::CPU_Emitter::emit<op::ReduceWindow>},
{TI(ngraph::op::SelectAndScatter), &runtime::cpu::CPU_Emitter::emit<op::SelectAndScatter>},
{TI(ngraph::op::AvgPool), &runtime::cpu::CPU_Emitter::emit<op::AvgPool>},
......@@ -268,6 +271,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
ngraph::pass::Manager pass_manager;
pass_manager.register_pass<runtime::cpu::pass::CPUNopElimination>();
pass_manager.register_pass<ngraph::pass::CoreFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUAssignment>(this);
......@@ -316,6 +320,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
#include "ngraph/runtime/kernel/relu.hpp"
#include "ngraph/runtime/kernel/replace_slice.hpp"
#include "ngraph/runtime/kernel/reshape.hpp"
#include "ngraph/runtime/kernel/result.hpp"
#include "ngraph/runtime/kernel/reverse.hpp"
#include "ngraph/runtime/kernel/select_and_scatter.hpp"
#include "ngraph/runtime/kernel/slice.hpp"
......@@ -604,6 +609,7 @@ using namespace ngraph::runtime;
}
// create output alias map
/*
size_t output_index = 0;
unordered_map<descriptor::TensorView*, vector<size_t>> output_alias_map;
vector<size_t> aliases;
......@@ -619,49 +625,18 @@ using namespace ngraph::runtime;
}
output_index++;
}
*/
// Add outputs to the variable name map
output_index = 0;
for (size_t i = 0; i < current_function->get_output_size(); ++i)
{
shared_ptr<Node> op = current_function->get_output_op(i);
shared_ptr<descriptor::TensorView> tv = op->get_output_tensor_view();
const element::Type& et = tv->get_tensor_view_type()->get_element_type();
bool parameter_as_output = false;
for (shared_ptr<ngraph::op::Parameter> param : current_function->get_parameters())
{
for (const descriptor::Output& pout : param->get_outputs())
{
shared_ptr<descriptor::TensorView> ptv = pout.get_tensor_view();
if (tv == ptv)
{
parameter_as_output = true;
writer << "memcpy(static_cast<" << et.c_type_string() << "*>(outputs["
<< output_index << "]), "
<< m_variable_name_map[ptv->get_tensor().get_name()] << ", "
<< ptv->get_tensor().size() << ");\n";
break;
}
}
}
if (!parameter_as_output && !contains(aliases, output_index))
{
if (contains(constants, tv.get()))
{
writer << "memcpy(outputs[" << output_index << "], "
<< tv->get_tensor().get_name() << ", " << tv->get_tensor().size()
<< ");\n";
}
else
{
string type = et.c_type_string();
string type = tv->get_tensor_view_type()->get_element_type().c_type_string();
stringstream ss;
ss << "((" << type << "*)(outputs[" << output_index << "]))";
ss << "((" << type << "*)(outputs[" << i << "]))";
m_variable_name_map[tv->get_tensor().get_name()] = ss.str();
}
}
output_index++;
}
for (shared_ptr<Node> node : current_function->get_ordered_ops())
{
......@@ -751,7 +726,6 @@ using namespace ngraph::runtime;
// Emit operation epilogue
if (!node->is_parameter() && !node->is_constant())
{
handle_output_alias(writer, *node, output_alias_map);
if (m_emit_timing)
{
emit_debug_function_exit(writer, node.get(), in, out);
......@@ -888,35 +862,6 @@ using namespace ngraph::runtime;
}
}
void runtime::cpu::CPU_ExternalFunction::handle_output_alias(
codegen::CodeWriter& writer,
const Node& node,
const unordered_map<descriptor::TensorView*, vector<size_t>>& output_alias_map)
{
for (const descriptor::Output& output : node.get_outputs())
{
shared_ptr<descriptor::TensorView> otv = output.get_tensor_view();
auto it = output_alias_map.find(otv.get());
if (it != output_alias_map.end())
{
const vector<size_t>& outputs = it->second;
if (outputs.size() > 1)
{
writer << "{ // handle output alias for previous op\n";
writer.indent++;
for (size_t i = 1; i < outputs.size(); i++)
{
writer << "memcpy(static_cast<void*>(outputs[" << outputs[i]
<< "]), static_cast<void*>(outputs[" << outputs[0] << "]), "
<< otv->get_tensor().size() << ");\n";
}
writer.indent--;
writer << "}\n";
}
}
}
}
shared_ptr<ngraph::runtime::CallFrame> runtime::cpu::CPU_ExternalFunction::make_call_frame()
{
if (!m_is_compiled)
......
......@@ -21,13 +21,14 @@
std::shared_ptr<ngraph::Node>
ngraph::op::MatmulBias::copy_with_new_args(const NodeVector& new_args) const
{
if (new_args.size() != 2)
if (new_args.size() != 2 && new_args.size() != 3)
{
throw ngraph_error("Incorrect number of new arguments");
}
return std::make_shared<MatmulBias>(new_args.at(0),
new_args.at(1),
new_args.at(1),
new_args.size() == 3 ? new_args.at(2) : nullptr,
m_shape_w,
m_shape_x,
m_transpose_w,
......@@ -41,7 +42,9 @@ ngraph::op::MatmulBias::MatmulBias(std::shared_ptr<ngraph::Node> W,
Shape shape_x,
bool transpose_w,
bool transpose_x)
: RequiresTensorViewArgs("MatMulBias", {W, x, b})
: RequiresTensorViewArgs("MatMulBias",
b == nullptr ? std::vector<std::shared_ptr<Node>>{W, x}
: std::vector<std::shared_ptr<Node>>{W, x, b})
, m_shape_w(shape_w)
, m_shape_x(shape_x)
, m_transpose_w(transpose_w)
......@@ -74,8 +77,12 @@ ngraph::op::MatmulBias::MatmulBias(std::shared_ptr<ngraph::Node> W,
}
Shape dot_shape{shape_w.at(1 - dot_dimension_w), shape_x.at(1 - dot_dimension_x)};
NGRAPH_DEBUG << "dot_shape shape = " << vector_to_string(dot_shape)
<< " , b shape = " << vector_to_string(b->get_shape());
NGRAPH_DEBUG << "dot_shape shape = " << vector_to_string(dot_shape);
if (b)
{
NGRAPH_DEBUG << "b shape = " << vector_to_string(b->get_shape());
}
add_output(W->get_element_type(), dot_shape);
}
This diff is collapsed.
......@@ -38,11 +38,17 @@ public:
CPUFusion()
: GraphRewrite()
{
construct_gemm_pattern();
construct_matmul_pattern();
construct_matmulbias_pattern();
construct_fprop_bn();
construct_zero_padded_reshaped_conv();
construct_zero_padded_conv();
}
private:
void construct_gemm_pattern();
void construct_matmul_pattern();
void construct_matmulbias_pattern();
void construct_fprop_bn();
void construct_zero_padded_reshaped_conv();
void construct_zero_padded_conv();
};
......@@ -31,6 +31,7 @@
#include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/op.hpp"
#include "ngraph/ops/relu.hpp"
#include "ngraph/ops/result.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/cpu_op_annotations.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
......@@ -629,6 +630,16 @@ namespace ngraph
}
}
template <>
void CPULayout::LAYOUT_DECL(ngraph::op::Result)
{
auto input_layout =
runtime::cpu::mkldnn_utils::get_input_mkldnn_format(node.get(), 0);
vector<memory::format> prim_output_formats;
prim_output_formats.push_back(input_layout);
set_output_layouts(node, prim_output_formats);
}
template <>
void CPULayout::LAYOUT_DECL(ngraph::op::Relu)
{
......@@ -699,6 +710,7 @@ namespace ngraph
#define TI(x) type_index(typeid(x))
static const runtime::cpu::pass::LayoutOpMap s_dispatcher{
{TI(ngraph::op::Add), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Add>},
{TI(ngraph::op::Convolution), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Convolution>},
{TI(ngraph::op::ConvolutionBackpropData),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::ConvolutionBackpropData>},
......@@ -708,6 +720,7 @@ static const runtime::cpu::pass::LayoutOpMap s_dispatcher{
{TI(ngraph::op::AvgPoolBackprop),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::AvgPoolBackprop>},
{TI(ngraph::op::Relu), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Relu>},
{TI(ngraph::op::Result), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Result>},
{TI(ngraph::op::ReluBackprop),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::ReluBackprop>},
};
......
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <memory>
#include <typeindex>
#include <typeinfo>
#include <unordered_map>
#include "cpu_nop_elimination.hpp"
#include "ngraph/ops/pad.hpp"
#define TI(x) std::type_index(typeid(x))
#define HANDLER_DECL(x) \
static bool x(const std::shared_ptr<ngraph::Function>& function, \
const std::shared_ptr<ngraph::Node>& node)
HANDLER_DECL(eliminate_pad)
{
auto pad = std::dynamic_pointer_cast<ngraph::op::Pad>(node);
if (pad->get_input_shape(0) == pad->get_output_shape(0))
{
function->replace_node(node, node->get_input_op(0));
return true;
}
return false;
}
static const std::unordered_map<std::type_index,
std::function<bool(const std::shared_ptr<ngraph::Function>&,
const std::shared_ptr<ngraph::Node>&)>>
dispatcher{{TI(ngraph::op::Pad), &eliminate_pad}};
bool ngraph::runtime::cpu::pass::CPUNopElimination::run_on_function(
std::shared_ptr<ngraph::Function> function)
{
bool clobbered = false;
for (const auto& n : function->get_ops())
{
// Work around a warning [-Wpotentially-evaluated-expression]
const Node& node = *n;
auto handler = dispatcher.find(TI(node));
if (handler != dispatcher.end())
{
clobbered = handler->second(function, n) || clobbered;
}
}
return clobbered;
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/pass/pass.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace pass
{
class CPUNopElimination : public ngraph::pass::FunctionPass
{
public:
bool run_on_function(std::shared_ptr<ngraph::Function> function) override;
};
}
}
}
}
......@@ -18,6 +18,7 @@
#include <cstdlib>
#include <iomanip>
#include "ngraph/ops/result.hpp"
#include "ngraph/runtime/host_tensor_view.hpp"
#include "ngraph/runtime/interpreter/int_call_frame.hpp"
......@@ -52,32 +53,17 @@ void runtime::interpreter::INT_CallFrame::call(
tensor_map.insert({tv, input_tvs[arg_index++]});
}
}
std::vector<size_t> aliased_outputs;
for (size_t i = 0; i < output_tvs.size(); i++)
{
shared_ptr<Node> op = function->get_output_op(i);
descriptor::TensorView* tv = op->get_output_tensor_view(0).get();
string name = tv->get_tensor().get_name();
if (contains_key(tensor_map, tv))
for (size_t i = 0; i < function->get_output_size(); i++)
{
if (op->description() == "Parameter")
auto output_op = function->get_output_op(i);
if (!std::dynamic_pointer_cast<op::Result>(output_op))
{
// Here we handle the special case where an output is just a copy of an input
memcpy(output_tvs[i]->get_data_ptr(),
tensor_map.at(tv)->get_data_ptr(),
tv->get_tensor().size());
throw ngraph_error("One of function's outputs isn't op::Result");
}
else
{
// This is a computed value returned more than once and will need to be copied at the end
aliased_outputs.push_back(i);
}
}
else
{
descriptor::TensorView* tv = function->get_output_op(i)->get_output_tensor_view(0).get();
tensor_map.insert({tv, output_tvs[i]});
}
}
// Invoke computation
for (shared_ptr<Node> op : function->get_ordered_ops())
......@@ -163,29 +149,6 @@ void runtime::interpreter::INT_CallFrame::call(
}
}
}
for (size_t i : aliased_outputs)
{
shared_ptr<Node> op = function->get_output_op(i);
size_t first_output;
for (first_output = 0; first_output <= i; ++first_output)
{
if (function->get_output_op(first_output) == op)
{
break;
}
}
if (first_output == i)
{
throw ngraph_error("Internal error: duplicate output missing");
}
descriptor::TensorView* tv = op->get_output_tensor_view(0).get();
string name = tv->get_tensor().get_name();
// Here we handle the special case where an output is just a copy of an input
memcpy(output_tvs[i]->get_data_ptr(),
output_tvs[first_output]->get_data_ptr(),
tv->get_tensor().size());
}
}
void runtime::interpreter::INT_CallFrame::generate_calls(
......
......@@ -39,6 +39,7 @@
#include "ngraph/ops/reduce_window.hpp"
#include "ngraph/ops/replace_slice.hpp"
#include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/result.hpp"
#include "ngraph/ops/reverse.hpp"
#include "ngraph/ops/select_and_scatter.hpp"
#include "ngraph/ops/slice.hpp"
......@@ -89,6 +90,7 @@
#include "ngraph/runtime/kernel/relu.hpp"
#include "ngraph/runtime/kernel/replace_slice.hpp"
#include "ngraph/runtime/kernel/reshape.hpp"
#include "ngraph/runtime/kernel/result.hpp"
#include "ngraph/runtime/kernel/reverse.hpp"
#include "ngraph/runtime/kernel/select.hpp"
#include "ngraph/runtime/kernel/select_and_scatter.hpp"
......@@ -720,6 +722,13 @@ private:
reshape->get_input_order(),
out[0]->get_shape());
}
else if (node_op == "Result")
{
ngraph::op::Result* res = dynamic_cast<ngraph::op::Result*>(&node);
kernel::result(reinterpret_cast<T*>(args[0]->get_data_ptr()),
reinterpret_cast<T*>(out[0]->get_data_ptr()),
shape_size(res->get_shape()));
}
else if (node_op == "Reverse")
{
ngraph::op::Reverse* reverse = dynamic_cast<ngraph::op::Reverse*>(&node);
......
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#pragma once
#include <algorithm>
#include <cmath>
#include <numeric>
#include <vector>
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace runtime
{
namespace kernel
{
template <typename T>
void result(T* arg, T* out, size_t count)
{
memcpy(out, arg, sizeof(T) * count);
}
}
}
}
......@@ -64,6 +64,7 @@
#include "ngraph/ops/remainder.hpp"
#include "ngraph/ops/replace_slice.hpp"
#include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/result.hpp"
#include "ngraph/ops/reverse.hpp"
#include "ngraph/ops/select.hpp"
#include "ngraph/ops/select_and_scatter.hpp"
......@@ -327,7 +328,7 @@ static shared_ptr<ngraph::Function>
else if (node_op == "BatchNorm")
{
auto epsilon = node_js.at("eps").get<double>();
node = make_shared<op::BatchNorm>(epsilon, args[0], args[1], args[2], args[3], args[4]);
node = make_shared<op::BatchNorm>(epsilon, args[0], args[1], args[2]);
}
else if (node_op == "BatchNormBackprop")
{
......@@ -667,6 +668,10 @@ static shared_ptr<ngraph::Function>
auto output_shape = node_js.at("output_shape").get<vector<size_t>>();
node = make_shared<op::Reshape>(args[0], input_order, output_shape);
}
else if (node_op == "Result")
{
node = make_shared<op::Result>(args[0]);
}
else if (node_op == "Reverse")
{
auto reversed_axes = node_js.at("reversed_axes").get<set<size_t>>();
......@@ -1061,6 +1066,9 @@ static json write(const Node& n)
node["input_order"] = tmp->get_input_order();
node["output_shape"] = tmp->get_output_shape();
}
else if (node_op == "Result")
{
}
else if (node_op == "Reverse")
{
auto tmp = dynamic_cast<const op::Reverse*>(&n);
......
......@@ -25,9 +25,12 @@
#include "ngraph/graph_util.hpp"
#include "ngraph/log.hpp"
#include "ngraph/node.hpp"
#include "ngraph/ops/result_vector.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/util.hpp"
#include <iostream>
using namespace std;
std::string ngraph::to_cplusplus_sourcecode_literal(bool val)
......@@ -239,10 +242,21 @@ ngraph::FpropCache ngraph::cache_fprop(std::shared_ptr<ngraph::Function> fprop,
}
// create the new outputs for fprop and the new fprop function
NodeVector fprop_outputs{fprop->get_results()};
fprop_outputs.insert(fprop_outputs.end(),
fprop_cache.fprop_output_nodes.begin(),
fprop_cache.fprop_output_nodes.end());
ResultVector fprop_outputs;
for (auto fpr : fprop->get_results())
{
fprop_outputs.push_back(fpr);
}
for (auto fpir : fprop_cache.fprop_output_nodes)
{
if (std::dynamic_pointer_cast<op::Result>(fpir))
{
throw ngraph_error("Expected op::Result in fprop->get_results()");
}
fprop_outputs.push_back(std::make_shared<op::Result>(fpir));
}
fprop_cache.fprop = std::make_shared<Function>(fprop_outputs, fprop->get_parameters());
......@@ -251,10 +265,15 @@ ngraph::FpropCache ngraph::cache_fprop(std::shared_ptr<ngraph::Function> fprop,
ngraph::clone_nodes(bprop->get_ops(), node_param_map);
// get cloned bprop results
NodeVector cloned_results;
ResultVector cloned_results;
for (auto node : bprop->get_results())
{
cloned_results.push_back(node_param_map.get(node));
auto result = std::dynamic_pointer_cast<op::Result>(node_param_map.get(node));
if (!result)
{
throw ngraph_error("Expected op::Result values for op::Result keys in node_param_map");
}
cloned_results.push_back(result);
}
// get clone bprop parameters
......
......@@ -40,7 +40,7 @@ TEST(build_graph, build_simple)
auto cluster_0 = make_shared<Function>(dot, op::ParameterVector{arg0, arg1, arg2, arg3});
ASSERT_EQ(cluster_0->get_output_op(0), dot);
ASSERT_EQ(cluster_0->get_output_op(0)->get_input_op(0), dot);
}
// Check node comparisons
......
This diff is collapsed.
......@@ -218,11 +218,6 @@ public:
{
map_parameter_to_index[f->get_parameters().at(i)] = i;
}
unordered_map<shared_ptr<Node>, size_t> map_result_to_index;
for (size_t i = 0; i < f->get_results().size(); ++i)
{
map_result_to_index[f->get_results().at(i)] = i;
}
// Parameter's source is either itself, or the output node of the upstream function
unordered_map<shared_ptr<op::Parameter>, shared_ptr<Node>> map_parameter_to_source_node;
......@@ -231,6 +226,13 @@ public:
vector<shared_ptr<Function>> funcs =
split_function_by_placement(f, map_parameter_to_source_node);
auto main_func = funcs.back();
unordered_map<shared_ptr<Node>, size_t> map_result_to_index;
for (size_t i = 0; i < main_func->get_results().size(); ++i)
{
map_result_to_index[main_func->get_results().at(i)] = i;
}
// Make call frames
vector<shared_ptr<runtime::CallFrame>> call_frames;
for (auto func : funcs)
......
......@@ -47,14 +47,22 @@ TEST(liveness, constant)
auto tmp = f->get_ordered_ops();
vector<shared_ptr<Node>> sorted{tmp.begin(), tmp.end()};
ASSERT_EQ(2, sorted.size());
ASSERT_EQ(3, sorted.size());
EXPECT_EQ(0, sorted[0]->liveness_live_list.size());
EXPECT_EQ(0, sorted[0]->liveness_new_list.size());
EXPECT_EQ(0, sorted[0]->liveness_free_list.size());
EXPECT_EQ(0, sorted[1]->liveness_live_list.size());
EXPECT_EQ(0, sorted[1]->liveness_new_list.size());
//op::Negative is live on output to op::Result
EXPECT_EQ(1, sorted[1]->liveness_live_list.size());
//op::Negative is new
EXPECT_EQ(1, sorted[1]->liveness_new_list.size());
EXPECT_EQ(0, sorted[1]->liveness_free_list.size());
//op::Negative is live on input to op::Result
EXPECT_EQ(1, sorted[2]->liveness_live_list.size());
EXPECT_EQ(0, sorted[2]->liveness_new_list.size());
//op::Negative is freed
EXPECT_EQ(1, sorted[2]->liveness_free_list.size());
}
TEST(liveness, liveness)
......
......@@ -234,5 +234,5 @@ TEST(memory_layout, constant)
pass_manager.run_passes(f);
auto sorted = f->get_ordered_ops();
size_t temporary_pool_size = f->get_temporary_pool_size();
EXPECT_EQ(0, temporary_pool_size);
EXPECT_EQ(4, temporary_pool_size);
}
......@@ -42,6 +42,7 @@
#include "ngraph/runtime/cpu/pass/cpu_fusion.hpp"
#include "ngraph/serializer.hpp"
#include "util/matcher.hpp"
#include "util/test_tools.hpp"
using namespace ngraph;
using namespace std;
......@@ -89,9 +90,9 @@ bool sum_predicate(std::shared_ptr<Node> gn)
return false;
}
NGRAPH_DEBUG << "looking at function's result "
<< r->get_functions()[0]->get_result()->get_name();
if (auto sum = std::dynamic_pointer_cast<op::Add>(r->get_functions()[0]->get_result()))
auto result = r->get_functions()[0]->get_result()->get_input_op(0);
NGRAPH_DEBUG << "looking at function's result " << result->get_name();
if (auto sum = std::dynamic_pointer_cast<op::Add>(result))
{
auto parm1 = std::dynamic_pointer_cast<op::Parameter>(sum->get_input_op(0));
auto parm2 = std::dynamic_pointer_cast<op::Parameter>(sum->get_input_op(1));
......@@ -297,7 +298,7 @@ TEST(pattern, graph_rewrite)
ASSERT_TRUE(graph_b->get_output_inputs(0).empty());
auto expected = ngraph::NodeVector{a, b, a, c, b};
ASSERT_TRUE(f->get_results() == expected);
ASSERT_TRUE(count_ops_of_type<op::Add>(f) == 0);
}
{
......
......@@ -82,3 +82,27 @@ TEST(reshape_elimination, bn_bprop_rewrite)
size_t count_after = count_ops_of_type<op::Reshape>(func);
ASSERT_TRUE(count_after < count_before);
}
TEST(reshape_elimination, dot_transpose_to_dot_w_transpose_args)
{
Shape shape_w{2, 4};
Shape shape_x{4, 1};
auto W = make_shared<op::Parameter>(element::f32, shape_w);
auto x = make_shared<op::Parameter>(element::f32, shape_x);
auto dot = make_shared<op::Dot>(W, x);
auto reshape_dot = std::make_shared<op::Reshape>(dot, AxisVector{1, 0}, Shape{1, 2});
auto graph = make_shared<op::Abs>(reshape_dot);
pass::Manager pass_manager;
pass_manager.register_pass<pass::ReshapeElimination>();
auto func = make_shared<Function>(graph, op::ParameterVector{W, x});
pass_manager.run_passes(func);
auto gdot = graph->get_input_op(0);
ASSERT_TRUE(std::dynamic_pointer_cast<op::Dot>(gdot));
ASSERT_TRUE(std::dynamic_pointer_cast<op::Reshape>(gdot->get_input_op(0)));
ASSERT_TRUE(std::dynamic_pointer_cast<op::Reshape>(gdot->get_input_op(1)));
ASSERT_EQ(gdot->get_input_op(0)->get_input_op(0), x);
ASSERT_EQ(gdot->get_input_op(1)->get_input_op(0), W);
ASSERT_EQ(gdot->get_shape(), (Shape{1, 2}));
}
......@@ -21,15 +21,8 @@
namespace ngraph
{
class Node;
class Function;
namespace runtime
{
class Backend;
class Manager;
}
namespace autodiff
{
/// @brief Returns a FunctionSpec for the backprop derivative of its argument.
......
......@@ -33,12 +33,13 @@ bool validate_list(const list<shared_ptr<Node>>& nodes)
auto node_tmp = *it;
auto dependencies_tmp = node_tmp->get_input_ops();
vector<Node*> dependencies;
for (shared_ptr<Node> n : dependencies_tmp)
{
dependencies.push_back(n.get());
}
auto tmp = it++;
for (; tmp != nodes.rend(); tmp++)
auto tmp = it;
for (tmp++; tmp != nodes.rend(); tmp++)
{
auto dep_tmp = *tmp;
auto found = find(dependencies.begin(), dependencies.end(), dep_tmp.get());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment