Commit af7c81a3 authored by Scott Cyphers's avatar Scott Cyphers Committed by Robert Kimball

Remove internal functions (#3079)

parent 52c0827d
...@@ -385,7 +385,6 @@ set (SRC ...@@ -385,7 +385,6 @@ set (SRC
pass/liveness.hpp pass/liveness.hpp
pass/manager.cpp pass/manager.cpp
pass/manager.hpp pass/manager.hpp
pass/manager_state.cpp
pass/manager_state.hpp pass/manager_state.hpp
pass/memory_layout.cpp pass/memory_layout.cpp
pass/memory_layout.hpp pass/memory_layout.hpp
......
...@@ -108,33 +108,6 @@ void ngraph::traverse_nodes(const NodeVector& subgraph_results, ...@@ -108,33 +108,6 @@ void ngraph::traverse_nodes(const NodeVector& subgraph_results,
} }
} }
void ngraph::traverse_functions(std::shared_ptr<ngraph::Function> p,
std::function<void(shared_ptr<Function>)> f)
{
std::unordered_set<shared_ptr<Function>> instances_seen;
deque<shared_ptr<Function>> stack;
stack.push_front(p);
while (stack.size() > 0)
{
shared_ptr<Function> func = stack.front();
if (instances_seen.find(func) == instances_seen.end())
{
instances_seen.insert(func);
f(func);
}
stack.pop_front();
for (shared_ptr<Node> op : func->get_ops())
{
for (shared_ptr<Function> fp : op->get_functions())
{
stack.push_front(fp);
}
}
}
}
NodeVector ngraph::find_common_args(std::shared_ptr<Node> target, std::shared_ptr<Node> replacement) NodeVector ngraph::find_common_args(std::shared_ptr<Node> target, std::shared_ptr<Node> replacement)
{ {
std::unordered_set<std::shared_ptr<Node>> target_args; std::unordered_set<std::shared_ptr<Node>> target_args;
......
...@@ -70,8 +70,12 @@ namespace ngraph ...@@ -70,8 +70,12 @@ namespace ngraph
bool include_control_deps, bool include_control_deps,
const NodeVector& subgraph_params = {}); const NodeVector& subgraph_params = {});
void traverse_functions(std::shared_ptr<Function> p, inline void traverse_functions(std::shared_ptr<Function> p,
std::function<void(std::shared_ptr<Function>)> f); std::function<void(std::shared_ptr<Function>)> f)
NGRAPH_DEPRECATED("Replace with f(p)")
{
f(p);
};
void replace_node(std::shared_ptr<Node> target, std::shared_ptr<Node> replacement); void replace_node(std::shared_ptr<Node> target, std::shared_ptr<Node> replacement);
......
...@@ -339,11 +339,6 @@ void Node::add_control_dependency(std::shared_ptr<Node> node) ...@@ -339,11 +339,6 @@ void Node::add_control_dependency(std::shared_ptr<Node> node)
m_control_dependencies.insert(node); m_control_dependencies.insert(node);
} }
std::vector<std::shared_ptr<Function>> Node::get_functions() const
{
return std::vector<std::shared_ptr<Function>>{};
}
namespace ngraph namespace ngraph
{ {
ostream& operator<<(ostream& out, const Node& node) ostream& operator<<(ostream& out, const Node& node)
......
...@@ -323,8 +323,6 @@ namespace ngraph ...@@ -323,8 +323,6 @@ namespace ngraph
// Will be replaced with an OutputVector version // Will be replaced with an OutputVector version
virtual std::shared_ptr<Node> copy_with_new_args(const NodeVector& new_args) const = 0; virtual std::shared_ptr<Node> copy_with_new_args(const NodeVector& new_args) const = 0;
virtual std::vector<std::shared_ptr<Function>> get_functions() const;
/// True if this and node have one output with same element type and shape /// True if this and node have one output with same element type and shape
bool has_same_type(std::shared_ptr<const Node> node) const; bool has_same_type(std::shared_ptr<const Node> node) const;
......
...@@ -61,27 +61,9 @@ void pass::Manager::run_passes(shared_ptr<Function> func, bool transitive) ...@@ -61,27 +61,9 @@ void pass::Manager::run_passes(shared_ptr<Function> func, bool transitive)
{ {
bool profile_enabled = getenv("NGRAPH_PROFILE_PASS_ENABLE") != nullptr; bool profile_enabled = getenv("NGRAPH_PROFILE_PASS_ENABLE") != nullptr;
vector<std::pair<shared_ptr<Function>, bool>> fs; get_state().set_function(func);
if (transitive) vector<std::pair<shared_ptr<Function>, bool>> fs{std::make_pair(func, func->is_dynamic())};
{ vector<shared_ptr<Function>> f_array{func};
// find all functions
traverse_functions(func, [&](shared_ptr<Function> f) {
fs.push_back(std::make_pair(f, f->is_dynamic()));
});
}
else
{
fs = {std::make_pair(func, func->is_dynamic())};
}
set<shared_ptr<Function>> tfs;
std::vector<shared_ptr<Function>> f_array;
for (auto f_pair : fs)
{
shared_ptr<Function> f = f_pair.first;
tfs.insert(f);
f_array.push_back(f);
}
get_state().set_functions(tfs);
size_t index = 0; size_t index = 0;
stopwatch pass_timer; stopwatch pass_timer;
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <iostream>
#include <memory>
#include "ngraph/function.hpp"
#include "ngraph/log.hpp"
#include "ngraph/node.hpp"
#include "ngraph/pass/manager_state.hpp"
using namespace std;
using namespace ngraph;
const vector<shared_ptr<Function>>& pass::ManagerState::get_functions()
{
return m_function_list;
}
...@@ -41,15 +41,6 @@ namespace ngraph ...@@ -41,15 +41,6 @@ namespace ngraph
class ngraph::pass::ManagerState class ngraph::pass::ManagerState
{ {
public: public:
const std::vector<std::shared_ptr<Function>>& get_functions();
template <typename T>
void set_functions(const T& collection)
{
m_function_list.clear();
m_function_list.insert(m_function_list.begin(), collection.begin(), collection.end());
}
void set_visualize_tree_ops_map(const visualize_tree_ops_map_t& ops_map) void set_visualize_tree_ops_map(const visualize_tree_ops_map_t& ops_map)
{ {
m_visualize_tree_ops_map = ops_map; m_visualize_tree_ops_map = ops_map;
...@@ -60,7 +51,15 @@ public: ...@@ -60,7 +51,15 @@ public:
return m_visualize_tree_ops_map; return m_visualize_tree_ops_map;
} }
void set_function(const std::shared_ptr<Function> function) { m_function = function; }
std::shared_ptr<Function> get_function() const { return m_function; }
std::vector<std::shared_ptr<Function>> get_functions() const
NGRAPH_DEPRECATED("Use get_function()")
{
return {m_function};
}
private: private:
std::vector<std::shared_ptr<Function>> m_function_list;
visualize_tree_ops_map_t m_visualize_tree_ops_map; visualize_tree_ops_map_t m_visualize_tree_ops_map;
std::shared_ptr<Function> m_function;
}; };
...@@ -497,14 +497,7 @@ void runtime::cpu::CPU_ExternalFunction::compile(ngraph::pass::PassConfig& pass_ ...@@ -497,14 +497,7 @@ void runtime::cpu::CPU_ExternalFunction::compile(ngraph::pass::PassConfig& pass_
femitter, node_function_map, common_function_string); femitter, node_function_map, common_function_string);
pass_manager.run_passes(m_function); pass_manager.run_passes(m_function);
unordered_map<shared_ptr<Function>, list<shared_ptr<Node>>> function_ordered_ops; list<shared_ptr<Node>> ordered_ops = m_function->get_ordered_ops();
// only one function is allowed
NGRAPH_CHECK(pass_manager.get_state().get_functions().size() == 1,
"only one function is allowed");
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
{
function_ordered_ops.insert({current_function, current_function->get_ordered_ops()});
}
CodeWriter writer; CodeWriter writer;
...@@ -594,15 +587,12 @@ using namespace ngraph::runtime; ...@@ -594,15 +587,12 @@ using namespace ngraph::runtime;
writer << "// Declare debug timers\n"; writer << "// Declare debug timers\n";
vector<string> names; vector<string> names;
size_t index = 0; size_t index = 0;
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions()) for (shared_ptr<Node> node : ordered_ops)
{ {
for (shared_ptr<Node> node : function_ordered_ops.at(current_function)) if (!node->is_parameter() && !node->is_constant())
{ {
if (!node->is_parameter() && !node->is_constant()) names.push_back(node->get_name());
{ m_name_index_map.insert({node->get_name(), index++});
names.push_back(node->get_name());
m_name_index_map.insert({node->get_name(), index++});
}
} }
} }
writer << "ngraph::stopwatch timers[" << names.size() << "];\n"; writer << "ngraph::stopwatch timers[" << names.size() << "];\n";
...@@ -644,28 +634,25 @@ using namespace ngraph::runtime; ...@@ -644,28 +634,25 @@ using namespace ngraph::runtime;
} }
writer << "// Declare all constants\n"; writer << "// Declare all constants\n";
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions()) for (shared_ptr<Node> node : ordered_ops)
{ {
for (shared_ptr<Node> node : function_ordered_ops.at(current_function)) ngraph::op::Constant* c = dynamic_cast<ngraph::op::Constant*>(node.get());
if (c)
{ {
ngraph::op::Constant* c = dynamic_cast<ngraph::op::Constant*>(node.get()); m_active_constants.push_back(node);
if (c) shared_ptr<descriptor::Tensor> tv = node->get_outputs()[0].get_tensor_ptr();
string type = tv->get_element_type().c_type_string();
writer << "static " << type << "* " << tv->get_name() << " = ((" << type << "*)("
<< c->get_data_ptr() << "));\n";
auto output_tensor = &node->get_output_tensor();
auto tensor_set = get_tensor_set(output_tensor);
// process all tensors in the set containing the output tensor of the constant
for (auto& ele_t : tensor_set)
{ {
m_active_constants.push_back(node); NGRAPH_CHECK(ele_t->get_pool_offset() == 0, "no offset set for constants");
shared_ptr<descriptor::Tensor> tv = node->get_outputs()[0].get_tensor_ptr(); m_tensor_roles[ele_t->get_name()] = TensorRole::CONSTANT;
string type = tv->get_element_type().c_type_string(); m_variable_name_map[ele_t->get_name()] = output_tensor->get_name();
writer << "static " << type << "* " << tv->get_name() << " = ((" << type << "*)("
<< c->get_data_ptr() << "));\n";
auto output_tensor = &node->get_output_tensor();
auto tensor_set = get_tensor_set(output_tensor);
// process all tensors in the set containing the output tensor of the constant
for (auto& ele_t : tensor_set)
{
NGRAPH_CHECK(ele_t->get_pool_offset() == 0, "no offset set for constants");
m_tensor_roles[ele_t->get_name()] = TensorRole::CONSTANT;
m_variable_name_map[ele_t->get_name()] = output_tensor->get_name();
}
} }
} }
} }
...@@ -694,379 +681,370 @@ using namespace ngraph::runtime; ...@@ -694,379 +681,370 @@ using namespace ngraph::runtime;
writer.block_end(); writer.block_end();
writer << "\n"; writer << "\n";
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions()) set<string> output_names;
for (shared_ptr<Node> op : m_function->get_results())
{ {
auto ordered_ops = function_ordered_ops.at(current_function); shared_ptr<descriptor::Tensor> tv = op->get_output_tensor_ptr();
set<string> output_names; output_names.insert(tv->get_name());
for (shared_ptr<Node> op : current_function->get_results()) }
set<descriptor::Tensor*> constants;
for (shared_ptr<Node> node : ordered_ops)
{
if (dynamic_cast<ngraph::op::Constant*>(node.get()))
{ {
shared_ptr<descriptor::Tensor> tv = op->get_output_tensor_ptr(); shared_ptr<descriptor::Tensor> tv = node->get_outputs()[0].get_tensor_ptr();
output_names.insert(tv->get_name()); constants.insert(tv.get());
} }
set<descriptor::Tensor*> constants; }
for (shared_ptr<Node> node : ordered_ops)
bool temporaries_used = false;
for (shared_ptr<Node> node : ordered_ops)
{
if (node->liveness_new_list.size() > 0)
{ {
if (dynamic_cast<ngraph::op::Constant*>(node.get())) temporaries_used = true;
{
shared_ptr<descriptor::Tensor> tv = node->get_outputs()[0].get_tensor_ptr();
constants.insert(tv.get());
}
} }
}
if (temporaries_used)
{
m_memory_buffer_sizes.push_back(m_function->get_temporary_pool_size());
}
bool temporaries_used = false; // Indexing for Control Flags
for (shared_ptr<Node> node : ordered_ops) std::map<std::string, size_t> tensor_index_map;
std::map<std::string, size_t> param_index_map;
size_t tensor_index = 0;
for (shared_ptr<Node> node : ordered_ops)
{
if (!node->is_parameter() && !node->is_constant())
{ {
if (node->liveness_new_list.size() > 0) for (const descriptor::Input& input : node->get_inputs())
{ {
temporaries_used = true; const descriptor::Output& output = input.get_output();
shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr();
tensor_index_map.insert({tv->get_name(), tensor_index++});
} }
} }
if (temporaries_used) }
writer << "bool " << m_function->get_name() << "_t_en[" << tensor_index << "];\n";
writer << "extern \"C\" void " << m_function->get_name() << func_params << "\n";
writer << "{\n";
writer.indent++;
//deserialize and build mkldnn primitives
if (m_mkldnn_emitter->get_mkldnn_descriptors_size() > 0)
{
writer << "if (ctx->first_iteration)\n";
writer.block_begin();
writer << "// read in memory descriptors and build mkldnn primitives\n";
writer << "std::ifstream desc_file (\"" << m_desc_filename << "\", std::ios::binary);\n";
writer << "deserialize_memory_descs_and_build_memory_primitives(" << m_desc_filename
<< ", cg_ctx, " << to_string(m_mkldnn_emitter->get_mkldnn_descriptors_size())
<< ");\n";
writer.block_end();
}
// Execution tracing support
if (runtime::cpu::IsTracingEnabled() && m_function->get_name() == m_function_name)
{
writer << "cpu::Timestamp start_ts;\n"
<< "int profiler_count = 0;\n\n";
}
if (temporaries_used)
{
writer << "size_t pool_base_ptr = (size_t) ctx->memory_buffers["
<< m_memory_buffer_sizes.size() - 1 << "]->get_ptr();\n";
writer << "\n";
}
writer << "bool* t_en = (bool*)" << m_function->get_name() << "_t_en;\n";
if (m_use_tbb)
{
writer << "\n";
writer << "if (ctx->first_iteration) {\n";
writer.indent++;
writer << "tbb::flow::continue_node<tbb::flow::continue_msg>* "
"flowgraph_node_start"
<< " = new tbb::flow::continue_node<tbb::flow::continue_msg> "
"(*(cg_ctx->tbb_graph), [&](const tbb::flow::continue_msg &msg)\n{});\n";
}
// Add inputs to the variable name map
size_t arg_index = 0;
for (shared_ptr<ngraph::op::Parameter> param : m_function->get_parameters())
{
for (size_t i = 0; i < param->get_output_size(); ++i)
{ {
m_memory_buffer_sizes.push_back(current_function->get_temporary_pool_size()); auto output_tensor = &param->get_outputs().at(i).get_tensor();
param_index_map[output_tensor->get_name()] = arg_index;
auto tensor_set = get_tensor_set(output_tensor);
// process all tensors in the set containing the output tensor of the parameter
for (auto& ele_t : tensor_set)
{
const element::Type& et = ele_t->get_element_type();
string type = et.c_type_string();
stringstream ss;
ss << "(((" << type << "*)(inputs[" << arg_index << "])) + "
<< ele_t->get_pool_offset() / et.size() << ")";
m_variable_name_map[ele_t->get_name()] = ss.str();
m_tensor_roles[ele_t->get_name()] = TensorRole::INPUT;
}
arg_index++;
} }
}
// Indexing for Control Flags // Add temporaries to the variable name map
std::map<std::string, size_t> tensor_index_map; if (temporaries_used)
std::map<std::string, size_t> param_index_map; {
size_t tensor_index = 0; for (auto& ele : bufferID_to_tensorSets)
for (shared_ptr<Node> node : ordered_ops)
{ {
if (!node->is_parameter() && !node->is_constant()) if (ele.second.first == TensorRole::INTERMEDIATE)
{ {
for (const descriptor::Input& input : node->get_inputs()) for (auto& ele_t : ele.second.second)
{ {
const descriptor::Output& output = input.get_output(); stringstream ss;
shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr(); ss << "((" << ele_t->get_element_type().c_type_string() << "*)(pool_base_ptr + "
tensor_index_map.insert({tv->get_name(), tensor_index++}); << ele_t->get_pool_offset() << "))";
m_variable_name_map[ele_t->get_name()] = ss.str();
m_tensor_roles[ele_t->get_name()] = TensorRole::INTERMEDIATE;
} }
} }
} }
}
writer << "bool " << current_function->get_name() << "_t_en[" << tensor_index << "];\n"; // Add outputs to the variable name map
for (size_t i = 0; i < m_function->get_output_size(); ++i)
writer << "extern \"C\" void " << current_function->get_name() << func_params << "\n"; {
writer << "{\n"; shared_ptr<Node> op = m_function->get_output_op(i);
writer.indent++; auto output_tensor = &op->get_output_tensor();
auto tensor_set = get_tensor_set(output_tensor);
//deserialize and build mkldnn primitives // process all tensors in the set containing the output tensor of the result
if (m_mkldnn_emitter->get_mkldnn_descriptors_size() > 0) for (auto& ele_t : tensor_set)
{ {
writer << "if (ctx->first_iteration)\n"; const element::Type& et = ele_t->get_element_type();
writer.block_begin(); string type = et.c_type_string();
writer << "// read in memory descriptors and build mkldnn primitives\n"; stringstream ss;
writer << "std::ifstream desc_file (\"" << m_desc_filename ss << "(((" << type << "*)(outputs[" << i << "])) + "
<< "\", std::ios::binary);\n"; << ele_t->get_pool_offset() / et.size() << ")";
writer << "deserialize_memory_descs_and_build_memory_primitives(" << m_desc_filename m_variable_name_map[ele_t->get_name()] = ss.str();
<< ", cg_ctx, " << to_string(m_mkldnn_emitter->get_mkldnn_descriptors_size()) m_tensor_roles[ele_t->get_name()] = TensorRole::OUTPUT;
<< ");\n";
writer.block_end();
} }
}
// Execution tracing support for (shared_ptr<Node> node : ordered_ops)
if (runtime::cpu::IsTracingEnabled() && current_function->get_name() == m_function_name) {
auto& n = *node; // Work around a compiler warning (*node inside typeid may have effects
// with shared pointers, which is fine here but clang doesn't like it.)
auto handler = dispatcher.find(type_index(typeid(n)));
if (handler == dispatcher.end())
{ {
writer << "cpu::Timestamp start_ts;\n" throw unsupported_op(node->description());
<< "int profiler_count = 0;\n\n";
} }
vector<TensorViewWrapper> in;
if (temporaries_used) vector<string> node_input_names;
vector<string> node_output_names;
for (const descriptor::Input& input : node->get_inputs())
{ {
writer << "size_t pool_base_ptr = (size_t) ctx->memory_buffers[" const descriptor::Output& output = input.get_output();
<< m_memory_buffer_sizes.size() - 1 << "]->get_ptr();\n"; shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr();
writer << "\n"; in.push_back(TensorViewWrapper(tv, m_variable_name_map[tv->get_name()]));
node_input_names.emplace_back(tv->get_name());
} }
vector<TensorViewWrapper> out;
writer << "bool* t_en = (bool*)" << current_function->get_name() << "_t_en;\n"; for (const descriptor::Output& output : node->get_outputs())
if (m_use_tbb)
{ {
writer << "\n"; shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr();
writer << "if (ctx->first_iteration) {\n"; out.push_back(TensorViewWrapper(tv, m_variable_name_map[tv->get_name()]));
writer.indent++; node_output_names.emplace_back(tv->get_name());
writer << "tbb::flow::continue_node<tbb::flow::continue_msg>* "
"flowgraph_node_start"
<< " = new tbb::flow::continue_node<tbb::flow::continue_msg> "
"(*(cg_ctx->tbb_graph), [&](const tbb::flow::continue_msg &msg)\n{});\n";
} }
// Add inputs to the variable name map // Emit operation prologue
size_t arg_index = 0; if (!node->is_parameter() && !node->is_constant())
for (shared_ptr<ngraph::op::Parameter> param : current_function->get_parameters())
{ {
for (size_t i = 0; i < param->get_output_size(); ++i) if (m_function->get_name() == m_function_name)
{ {
auto output_tensor = &param->get_outputs().at(i).get_tensor(); m_op_attrs.emplace_back(node->description(), node_output_names, node_input_names);
param_index_map[output_tensor->get_name()] = arg_index; }
auto tensor_set = get_tensor_set(output_tensor); if (m_use_tbb)
{
// process all tensors in the set containing the output tensor of the parameter writer << "tbb::flow::continue_node<tbb::flow::continue_msg>* "
for (auto& ele_t : tensor_set) "flowgraph_node_"
{ << node->get_name()
const element::Type& et = ele_t->get_element_type(); << " = new tbb::flow::continue_node<tbb::flow::continue_msg> "
string type = et.c_type_string(); "(*(cg_ctx->tbb_graph), [&](const tbb::flow::continue_msg &msg)\n{\n";
stringstream ss; writer.indent++;
ss << "(((" << type << "*)(inputs[" << arg_index << "])) + " }
<< ele_t->get_pool_offset() / et.size() << ")"; if (runtime::cpu::IsTracingEnabled() && m_function->get_name() == m_function_name)
m_variable_name_map[ele_t->get_name()] = ss.str(); {
m_tensor_roles[ele_t->get_name()] = TensorRole::INPUT; writer << "start_ts = cpu::Clock::now();\n";
}
arg_index++;
} }
} }
// Add temporaries to the variable name map if (!node->is_parameter() && !node->is_constant())
if (temporaries_used)
{ {
for (auto& ele : bufferID_to_tensorSets) writer << "\n// " << node->get_name() << "(";
{ vector<string> parameter_nodes = node_input_names;
if (ele.second.first == TensorRole::INTERMEDIATE) parameter_nodes.insert(
{ parameter_nodes.end(), node_output_names.begin(), node_output_names.end());
for (auto& ele_t : ele.second.second) writer << join(parameter_nodes);
{ writer << ")\n";
stringstream ss;
ss << "((" << ele_t->get_element_type().c_type_string()
<< "*)(pool_base_ptr + " << ele_t->get_pool_offset() << "))";
m_variable_name_map[ele_t->get_name()] = ss.str();
m_tensor_roles[ele_t->get_name()] = TensorRole::INTERMEDIATE;
}
}
}
} }
// Add outputs to the variable name map // Emit operation body
for (size_t i = 0; i < current_function->get_output_size(); ++i) if (!node->is_parameter() && !node->is_constant())
{ {
shared_ptr<Node> op = current_function->get_output_op(i); emit_debug_function_entry(writer, node.get(), in, out);
auto output_tensor = &op->get_output_tensor();
auto tensor_set = get_tensor_set(output_tensor);
// process all tensors in the set containing the output tensor of the result
for (auto& ele_t : tensor_set)
{
const element::Type& et = ele_t->get_element_type();
string type = et.c_type_string();
stringstream ss;
ss << "(((" << type << "*)(outputs[" << i << "])) + "
<< ele_t->get_pool_offset() / et.size() << ")";
m_variable_name_map[ele_t->get_name()] = ss.str();
m_tensor_roles[ele_t->get_name()] = TensorRole::OUTPUT;
}
} }
for (shared_ptr<Node> node : ordered_ops) // Op Control
if (!node->is_parameter() && !node->is_constant())
{ {
auto& n = *node; // Work around a compiler warning (*node inside typeid may have effects writer << "if (ctx->first_iteration ";
// with shared pointers, which is fine here but clang doesn't like it.)
auto handler = dispatcher.find(type_index(typeid(n)));
if (handler == dispatcher.end())
{
throw unsupported_op(node->description());
}
vector<TensorViewWrapper> in;
vector<string> node_input_names;
vector<string> node_output_names;
for (const descriptor::Input& input : node->get_inputs()) for (const descriptor::Input& input : node->get_inputs())
{ {
const descriptor::Output& output = input.get_output(); const descriptor::Output& output = input.get_output();
shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr(); shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr();
in.push_back(TensorViewWrapper(tv, m_variable_name_map[tv->get_name()])); auto input_name = tv->get_name();
node_input_names.emplace_back(tv->get_name());
}
vector<TensorViewWrapper> out;
for (const descriptor::Output& output : node->get_outputs())
{
shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr();
out.push_back(TensorViewWrapper(tv, m_variable_name_map[tv->get_name()]));
node_output_names.emplace_back(tv->get_name());
}
// Emit operation prologue if (output.get_node()->is_parameter())
if (!node->is_parameter() && !node->is_constant())
{
if (current_function->get_name() == m_function_name)
{
m_op_attrs.emplace_back(
node->description(), node_output_names, node_input_names);
}
if (m_use_tbb)
{ {
writer << "tbb::flow::continue_node<tbb::flow::continue_msg>* " writer << " || ctx->p_en[" << param_index_map[input_name] << "]";
"flowgraph_node_"
<< node->get_name()
<< " = new tbb::flow::continue_node<tbb::flow::continue_msg> "
"(*(cg_ctx->tbb_graph), [&](const tbb::flow::continue_msg &msg)\n{\n";
writer.indent++;
} }
if (runtime::cpu::IsTracingEnabled() && else if (!output.get_node()->is_constant())
current_function->get_name() == m_function_name)
{ {
writer << "start_ts = cpu::Clock::now();\n"; writer << " || t_en[" << tensor_index_map[input_name] << "]";
} }
} }
if (!node->is_parameter() && !node->is_constant()) // Always enable nodes computing output tensors or nodes whose outputs might get
// overwritten due to inplace kernels
// TODO (jbobba) - Do we need to handle cacheability
if (computes_result(node.get()) || possibly_overwritten(node.get()))
{ {
writer << "\n// " << node->get_name() << "("; writer << " || 1";
vector<string> parameter_nodes = node_input_names;
parameter_nodes.insert(
parameter_nodes.end(), node_output_names.begin(), node_output_names.end());
writer << join(parameter_nodes);
writer << ")\n";
} }
writer << ") {\n";
writer.indent++;
}
// Emit operation body auto it = node_function_map.find(node.get());
if (!node->is_parameter() && !node->is_constant()) if (it == node_function_map.end())
{
handler->second(this, writer, node.get(), in, out);
}
else
{
string func_name =
ngraph::pass::CommonFunctionCollection::create_function_name(*it->second);
vector<string> names;
for (const TensorViewWrapper& tv : in)
{
names.push_back(tv.get_name());
}
for (const TensorViewWrapper& tv : out)
{ {
emit_debug_function_entry(writer, node.get(), in, out); names.push_back(tv.get_name());
} }
writer << func_name << "(" << join(names) << ", ctx, cg_ctx);\n";
}
// Op Control // skip multi-output nodes since they would be covered by GetOutputElement
if (!node->is_parameter() && !node->is_constant()) if (node->get_output_size() == 1 &&
// skip non-FP nodes
(node->get_element_type() == element::f32 || node->get_element_type() == element::f64))
{
// check inputs and constants?
if ((!node->is_parameter() && !node->is_constant()) ||
std::getenv("NGRAPH_CPU_CHECK_PARMS_AND_CONSTS"))
{ {
writer << "if (ctx->first_iteration "; if (std::getenv("NGRAPH_CPU_NAN_CHECK"))
for (const descriptor::Input& input : node->get_inputs())
{ {
const descriptor::Output& output = input.get_output(); generate_isnan_isinf_check(writer, node, out, "isnan");
shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr();
auto input_name = tv->get_name();
if (output.get_node()->is_parameter())
{
writer << " || ctx->p_en[" << param_index_map[input_name] << "]";
}
else if (!output.get_node()->is_constant())
{
writer << " || t_en[" << tensor_index_map[input_name] << "]";
}
} }
// Always enable nodes computing output tensors or nodes whose outputs might get if (std::getenv("NGRAPH_CPU_INF_CHECK"))
// overwritten due to inplace kernels
// TODO (jbobba) - Do we need to handle cacheability
if (computes_result(node.get()) || possibly_overwritten(node.get()))
{ {
writer << " || 1"; generate_isnan_isinf_check(writer, node, out, "isinf");
} }
writer << ") {\n";
writer.indent++;
} }
}
auto it = node_function_map.find(node.get()); // Emit operation epilogue
if (it == node_function_map.end()) if (!node->is_parameter() && !node->is_constant())
{
for (auto output_name : node_output_names)
{ {
handler->second(this, writer, node.get(), in, out); writer << "t_en[" << tensor_index_map[output_name] << "] = true;\n";
} }
else writer.indent--;
writer << "} else {\n";
writer.indent++;
for (auto output_name : node_output_names)
{ {
string func_name = writer << "t_en[" << tensor_index_map[output_name] << "] = false;\n";
ngraph::pass::CommonFunctionCollection::create_function_name(*it->second);
vector<string> names;
for (const TensorViewWrapper& tv : in)
{
names.push_back(tv.get_name());
}
for (const TensorViewWrapper& tv : out)
{
names.push_back(tv.get_name());
}
writer << func_name << "(" << join(names) << ", ctx, cg_ctx);\n";
} }
writer.indent--;
// skip multi-output nodes since they would be covered by GetOutputElement writer << "}\n";
if (node->get_output_size() == 1 && emit_debug_function_exit(writer, node.get(), in, out);
// skip non-FP nodes if (runtime::cpu::IsTracingEnabled() && m_function->get_name() == m_function_name)
(node->get_element_type() == element::f32 ||
node->get_element_type() == element::f64))
{ {
// check inputs and constants? writer << "ctx->op_durations[profiler_count++] = "
if ((!node->is_parameter() && !node->is_constant()) || << "(std::chrono::duration_cast<cpu::Timescale>(cpu::Clock::now() - "
std::getenv("NGRAPH_CPU_CHECK_PARMS_AND_CONSTS")) "start_ts)).count();\n";
{
if (std::getenv("NGRAPH_CPU_NAN_CHECK"))
{
generate_isnan_isinf_check(writer, node, out, "isnan");
}
if (std::getenv("NGRAPH_CPU_INF_CHECK"))
{
generate_isnan_isinf_check(writer, node, out, "isinf");
}
}
} }
if (m_use_tbb)
// Emit operation epilogue
if (!node->is_parameter() && !node->is_constant())
{ {
for (auto output_name : node_output_names)
{
writer << "t_en[" << tensor_index_map[output_name] << "] = true;\n";
}
writer.indent--; writer.indent--;
writer << "} else {\n"; writer << "});\n";
writer.indent++;
for (auto output_name : node_output_names)
{
writer << "t_en[" << tensor_index_map[output_name] << "] = false;\n";
}
writer.indent--;
writer << "}\n";
emit_debug_function_exit(writer, node.get(), in, out);
if (runtime::cpu::IsTracingEnabled() &&
current_function->get_name() == m_function_name)
{
writer << "ctx->op_durations[profiler_count++] = "
<< "(std::chrono::duration_cast<cpu::Timescale>(cpu::Clock::now() - "
"start_ts)).count();\n";
}
if (m_use_tbb)
{
writer.indent--;
writer << "});\n";
}
} }
} }
}
if (m_use_tbb) if (m_use_tbb)
{ {
writer << "\n"; writer << "\n";
// Build the flow graph // Build the flow graph
traverse_nodes(current_function, [&writer](shared_ptr<Node> n) { traverse_nodes(m_function, [&writer](shared_ptr<Node> n) {
if (!n->is_parameter() && !n->is_constant()) if (!n->is_parameter() && !n->is_constant())
{
bool is_head = true;
for (auto arg : n->get_arguments())
{ {
bool is_head = true; if (!arg->is_parameter() && !arg->is_constant())
for (auto arg : n->get_arguments())
{
if (!arg->is_parameter() && !arg->is_constant())
{
is_head = false;
writer << "tbb::flow::make_edge(*flowgraph_node_" << arg->get_name()
<< ", *flowgraph_node_" << n->get_name() << ");\n";
}
}
if (is_head)
{ {
writer << "tbb::flow::make_edge(*flowgraph_node_start" is_head = false;
writer << "tbb::flow::make_edge(*flowgraph_node_" << arg->get_name()
<< ", *flowgraph_node_" << n->get_name() << ");\n"; << ", *flowgraph_node_" << n->get_name() << ");\n";
} }
} }
}); if (is_head)
{
writer.indent--; writer << "tbb::flow::make_edge(*flowgraph_node_start"
writer << "}\n"; << ", *flowgraph_node_" << n->get_name() << ");\n";
}
// Execute the flow graph }
writer << "(static_cast<tbb::flow::continue_node<tbb::flow::continue_msg>*>" });
"(&(*(cg_ctx->tbb_graph->begin()))))"
<< "->try_put(tbb::flow::continue_msg());\n";
writer << "try { cg_ctx->tbb_graph->wait_for_all(); } catch(...) { throw; }\n";
}
writer << "ctx->first_iteration = false;\n";
writer.indent--; writer.indent--;
// End generated function writer << "}\n";
writer += "}\n\n";
// Execute the flow graph
writer << "(static_cast<tbb::flow::continue_node<tbb::flow::continue_msg>*>"
"(&(*(cg_ctx->tbb_graph->begin()))))"
<< "->try_put(tbb::flow::continue_msg());\n";
writer << "try { cg_ctx->tbb_graph->wait_for_all(); } catch(...) { throw; }\n";
} }
writer << "ctx->first_iteration = false;\n";
writer.indent--;
// End generated function
writer += "}\n\n";
// TODO: Cleanup and make this a utility function // TODO: Cleanup and make this a utility function
string filename = file_util::path_join(s_output_dir, m_function_name + "_codegen.cpp"); string filename = file_util::path_join(s_output_dir, m_function_name + "_codegen.cpp");
......
...@@ -184,11 +184,7 @@ void runtime::gpu::GPUCompiledFunction::compile() ...@@ -184,11 +184,7 @@ void runtime::gpu::GPUCompiledFunction::compile()
string dump_filename = file_util::path_join(get_output_dir(), m_function_name + "_ops.txt"); string dump_filename = file_util::path_join(get_output_dir(), m_function_name + "_ops.txt");
pass_manager.register_pass<ngraph::pass::DumpSorted>(dump_filename); pass_manager.register_pass<ngraph::pass::DumpSorted>(dump_filename);
pass_manager.run_passes(m_function); pass_manager.run_passes(m_function);
m_function_ordered_ops.emplace(m_function, m_function->get_ordered_ops());
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
{
m_function_ordered_ops.emplace(current_function, current_function->get_ordered_ops());
}
add_passes(pass_manager); add_passes(pass_manager);
emit(); emit();
......
...@@ -340,33 +340,24 @@ static void serialize_to_cpio(ostream& out, shared_ptr<ngraph::Function> func, s ...@@ -340,33 +340,24 @@ static void serialize_to_cpio(ostream& out, shared_ptr<ngraph::Function> func, s
cpio::Writer writer(out); cpio::Writer writer(out);
writer.write(func->get_name(), j.c_str(), static_cast<uint32_t>(j.size())); writer.write(func->get_name(), j.c_str(), static_cast<uint32_t>(j.size()));
traverse_functions(func, [&](shared_ptr<ngraph::Function> f) { traverse_nodes(const_cast<Function*>(func.get()),
traverse_nodes(const_cast<Function*>(f.get()), [&](shared_ptr<Node> node) {
[&](shared_ptr<Node> node) { if (auto c = dynamic_pointer_cast<op::Constant>(node))
if (auto c = dynamic_pointer_cast<op::Constant>(node)) {
{ uint32_t size =
uint32_t size = static_cast<uint32_t>(shape_size(c->get_output_shape(0)) *
static_cast<uint32_t>(shape_size(c->get_output_shape(0)) * c->get_output_element_type(0).size());
c->get_output_element_type(0).size()); writer.write(c->get_name(), c->get_data_ptr(), size);
writer.write(c->get_name(), c->get_data_ptr(), size); }
} },
}, true);
true);
});
} }
#endif #endif
static string serialize(shared_ptr<ngraph::Function> func, size_t indent, bool binary_constant_data) static string serialize(shared_ptr<ngraph::Function> func, size_t indent, bool binary_constant_data)
{ {
json j; json j;
vector<json> functions; j.push_back(write(*func, binary_constant_data));
traverse_functions(func, [&](shared_ptr<ngraph::Function> f) {
functions.push_back(write(*f, binary_constant_data));
});
for (auto it = functions.rbegin(); it != functions.rend(); it++)
{
j.push_back(*it);
}
string rc; string rc;
if (indent == 0) if (indent == 0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment