Commit af7c81a3 authored by Scott Cyphers's avatar Scott Cyphers Committed by Robert Kimball

Remove internal functions (#3079)

parent 52c0827d
......@@ -385,7 +385,6 @@ set (SRC
pass/liveness.hpp
pass/manager.cpp
pass/manager.hpp
pass/manager_state.cpp
pass/manager_state.hpp
pass/memory_layout.cpp
pass/memory_layout.hpp
......
......@@ -108,33 +108,6 @@ void ngraph::traverse_nodes(const NodeVector& subgraph_results,
}
}
void ngraph::traverse_functions(std::shared_ptr<ngraph::Function> p,
std::function<void(shared_ptr<Function>)> f)
{
std::unordered_set<shared_ptr<Function>> instances_seen;
deque<shared_ptr<Function>> stack;
stack.push_front(p);
while (stack.size() > 0)
{
shared_ptr<Function> func = stack.front();
if (instances_seen.find(func) == instances_seen.end())
{
instances_seen.insert(func);
f(func);
}
stack.pop_front();
for (shared_ptr<Node> op : func->get_ops())
{
for (shared_ptr<Function> fp : op->get_functions())
{
stack.push_front(fp);
}
}
}
}
NodeVector ngraph::find_common_args(std::shared_ptr<Node> target, std::shared_ptr<Node> replacement)
{
std::unordered_set<std::shared_ptr<Node>> target_args;
......
......@@ -70,8 +70,12 @@ namespace ngraph
bool include_control_deps,
const NodeVector& subgraph_params = {});
void traverse_functions(std::shared_ptr<Function> p,
std::function<void(std::shared_ptr<Function>)> f);
inline void traverse_functions(std::shared_ptr<Function> p,
std::function<void(std::shared_ptr<Function>)> f)
NGRAPH_DEPRECATED("Replace with f(p)")
{
f(p);
};
void replace_node(std::shared_ptr<Node> target, std::shared_ptr<Node> replacement);
......
......@@ -339,11 +339,6 @@ void Node::add_control_dependency(std::shared_ptr<Node> node)
m_control_dependencies.insert(node);
}
std::vector<std::shared_ptr<Function>> Node::get_functions() const
{
return std::vector<std::shared_ptr<Function>>{};
}
namespace ngraph
{
ostream& operator<<(ostream& out, const Node& node)
......
......@@ -323,8 +323,6 @@ namespace ngraph
// Will be replaced with an OutputVector version
virtual std::shared_ptr<Node> copy_with_new_args(const NodeVector& new_args) const = 0;
virtual std::vector<std::shared_ptr<Function>> get_functions() const;
/// True if this and node have one output with same element type and shape
bool has_same_type(std::shared_ptr<const Node> node) const;
......
......@@ -61,27 +61,9 @@ void pass::Manager::run_passes(shared_ptr<Function> func, bool transitive)
{
bool profile_enabled = getenv("NGRAPH_PROFILE_PASS_ENABLE") != nullptr;
vector<std::pair<shared_ptr<Function>, bool>> fs;
if (transitive)
{
// find all functions
traverse_functions(func, [&](shared_ptr<Function> f) {
fs.push_back(std::make_pair(f, f->is_dynamic()));
});
}
else
{
fs = {std::make_pair(func, func->is_dynamic())};
}
set<shared_ptr<Function>> tfs;
std::vector<shared_ptr<Function>> f_array;
for (auto f_pair : fs)
{
shared_ptr<Function> f = f_pair.first;
tfs.insert(f);
f_array.push_back(f);
}
get_state().set_functions(tfs);
get_state().set_function(func);
vector<std::pair<shared_ptr<Function>, bool>> fs{std::make_pair(func, func->is_dynamic())};
vector<shared_ptr<Function>> f_array{func};
size_t index = 0;
stopwatch pass_timer;
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <iostream>
#include <memory>
#include "ngraph/function.hpp"
#include "ngraph/log.hpp"
#include "ngraph/node.hpp"
#include "ngraph/pass/manager_state.hpp"
using namespace std;
using namespace ngraph;
const vector<shared_ptr<Function>>& pass::ManagerState::get_functions()
{
return m_function_list;
}
......@@ -41,15 +41,6 @@ namespace ngraph
class ngraph::pass::ManagerState
{
public:
const std::vector<std::shared_ptr<Function>>& get_functions();
template <typename T>
void set_functions(const T& collection)
{
m_function_list.clear();
m_function_list.insert(m_function_list.begin(), collection.begin(), collection.end());
}
void set_visualize_tree_ops_map(const visualize_tree_ops_map_t& ops_map)
{
m_visualize_tree_ops_map = ops_map;
......@@ -60,7 +51,15 @@ public:
return m_visualize_tree_ops_map;
}
void set_function(const std::shared_ptr<Function> function) { m_function = function; }
std::shared_ptr<Function> get_function() const { return m_function; }
std::vector<std::shared_ptr<Function>> get_functions() const
NGRAPH_DEPRECATED("Use get_function()")
{
return {m_function};
}
private:
std::vector<std::shared_ptr<Function>> m_function_list;
visualize_tree_ops_map_t m_visualize_tree_ops_map;
std::shared_ptr<Function> m_function;
};
......@@ -497,14 +497,7 @@ void runtime::cpu::CPU_ExternalFunction::compile(ngraph::pass::PassConfig& pass_
femitter, node_function_map, common_function_string);
pass_manager.run_passes(m_function);
unordered_map<shared_ptr<Function>, list<shared_ptr<Node>>> function_ordered_ops;
// only one function is allowed
NGRAPH_CHECK(pass_manager.get_state().get_functions().size() == 1,
"only one function is allowed");
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
{
function_ordered_ops.insert({current_function, current_function->get_ordered_ops()});
}
list<shared_ptr<Node>> ordered_ops = m_function->get_ordered_ops();
CodeWriter writer;
......@@ -594,15 +587,12 @@ using namespace ngraph::runtime;
writer << "// Declare debug timers\n";
vector<string> names;
size_t index = 0;
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
for (shared_ptr<Node> node : ordered_ops)
{
for (shared_ptr<Node> node : function_ordered_ops.at(current_function))
if (!node->is_parameter() && !node->is_constant())
{
if (!node->is_parameter() && !node->is_constant())
{
names.push_back(node->get_name());
m_name_index_map.insert({node->get_name(), index++});
}
names.push_back(node->get_name());
m_name_index_map.insert({node->get_name(), index++});
}
}
writer << "ngraph::stopwatch timers[" << names.size() << "];\n";
......@@ -644,28 +634,25 @@ using namespace ngraph::runtime;
}
writer << "// Declare all constants\n";
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
for (shared_ptr<Node> node : ordered_ops)
{
for (shared_ptr<Node> node : function_ordered_ops.at(current_function))
ngraph::op::Constant* c = dynamic_cast<ngraph::op::Constant*>(node.get());
if (c)
{
ngraph::op::Constant* c = dynamic_cast<ngraph::op::Constant*>(node.get());
if (c)
m_active_constants.push_back(node);
shared_ptr<descriptor::Tensor> tv = node->get_outputs()[0].get_tensor_ptr();
string type = tv->get_element_type().c_type_string();
writer << "static " << type << "* " << tv->get_name() << " = ((" << type << "*)("
<< c->get_data_ptr() << "));\n";
auto output_tensor = &node->get_output_tensor();
auto tensor_set = get_tensor_set(output_tensor);
// process all tensors in the set containing the output tensor of the constant
for (auto& ele_t : tensor_set)
{
m_active_constants.push_back(node);
shared_ptr<descriptor::Tensor> tv = node->get_outputs()[0].get_tensor_ptr();
string type = tv->get_element_type().c_type_string();
writer << "static " << type << "* " << tv->get_name() << " = ((" << type << "*)("
<< c->get_data_ptr() << "));\n";
auto output_tensor = &node->get_output_tensor();
auto tensor_set = get_tensor_set(output_tensor);
// process all tensors in the set containing the output tensor of the constant
for (auto& ele_t : tensor_set)
{
NGRAPH_CHECK(ele_t->get_pool_offset() == 0, "no offset set for constants");
m_tensor_roles[ele_t->get_name()] = TensorRole::CONSTANT;
m_variable_name_map[ele_t->get_name()] = output_tensor->get_name();
}
NGRAPH_CHECK(ele_t->get_pool_offset() == 0, "no offset set for constants");
m_tensor_roles[ele_t->get_name()] = TensorRole::CONSTANT;
m_variable_name_map[ele_t->get_name()] = output_tensor->get_name();
}
}
}
......@@ -694,379 +681,370 @@ using namespace ngraph::runtime;
writer.block_end();
writer << "\n";
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
set<string> output_names;
for (shared_ptr<Node> op : m_function->get_results())
{
auto ordered_ops = function_ordered_ops.at(current_function);
set<string> output_names;
for (shared_ptr<Node> op : current_function->get_results())
shared_ptr<descriptor::Tensor> tv = op->get_output_tensor_ptr();
output_names.insert(tv->get_name());
}
set<descriptor::Tensor*> constants;
for (shared_ptr<Node> node : ordered_ops)
{
if (dynamic_cast<ngraph::op::Constant*>(node.get()))
{
shared_ptr<descriptor::Tensor> tv = op->get_output_tensor_ptr();
output_names.insert(tv->get_name());
shared_ptr<descriptor::Tensor> tv = node->get_outputs()[0].get_tensor_ptr();
constants.insert(tv.get());
}
set<descriptor::Tensor*> constants;
for (shared_ptr<Node> node : ordered_ops)
}
bool temporaries_used = false;
for (shared_ptr<Node> node : ordered_ops)
{
if (node->liveness_new_list.size() > 0)
{
if (dynamic_cast<ngraph::op::Constant*>(node.get()))
{
shared_ptr<descriptor::Tensor> tv = node->get_outputs()[0].get_tensor_ptr();
constants.insert(tv.get());
}
temporaries_used = true;
}
}
if (temporaries_used)
{
m_memory_buffer_sizes.push_back(m_function->get_temporary_pool_size());
}
bool temporaries_used = false;
for (shared_ptr<Node> node : ordered_ops)
// Indexing for Control Flags
std::map<std::string, size_t> tensor_index_map;
std::map<std::string, size_t> param_index_map;
size_t tensor_index = 0;
for (shared_ptr<Node> node : ordered_ops)
{
if (!node->is_parameter() && !node->is_constant())
{
if (node->liveness_new_list.size() > 0)
for (const descriptor::Input& input : node->get_inputs())
{
temporaries_used = true;
const descriptor::Output& output = input.get_output();
shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr();
tensor_index_map.insert({tv->get_name(), tensor_index++});
}
}
if (temporaries_used)
}
writer << "bool " << m_function->get_name() << "_t_en[" << tensor_index << "];\n";
writer << "extern \"C\" void " << m_function->get_name() << func_params << "\n";
writer << "{\n";
writer.indent++;
//deserialize and build mkldnn primitives
if (m_mkldnn_emitter->get_mkldnn_descriptors_size() > 0)
{
writer << "if (ctx->first_iteration)\n";
writer.block_begin();
writer << "// read in memory descriptors and build mkldnn primitives\n";
writer << "std::ifstream desc_file (\"" << m_desc_filename << "\", std::ios::binary);\n";
writer << "deserialize_memory_descs_and_build_memory_primitives(" << m_desc_filename
<< ", cg_ctx, " << to_string(m_mkldnn_emitter->get_mkldnn_descriptors_size())
<< ");\n";
writer.block_end();
}
// Execution tracing support
if (runtime::cpu::IsTracingEnabled() && m_function->get_name() == m_function_name)
{
writer << "cpu::Timestamp start_ts;\n"
<< "int profiler_count = 0;\n\n";
}
if (temporaries_used)
{
writer << "size_t pool_base_ptr = (size_t) ctx->memory_buffers["
<< m_memory_buffer_sizes.size() - 1 << "]->get_ptr();\n";
writer << "\n";
}
writer << "bool* t_en = (bool*)" << m_function->get_name() << "_t_en;\n";
if (m_use_tbb)
{
writer << "\n";
writer << "if (ctx->first_iteration) {\n";
writer.indent++;
writer << "tbb::flow::continue_node<tbb::flow::continue_msg>* "
"flowgraph_node_start"
<< " = new tbb::flow::continue_node<tbb::flow::continue_msg> "
"(*(cg_ctx->tbb_graph), [&](const tbb::flow::continue_msg &msg)\n{});\n";
}
// Add inputs to the variable name map
size_t arg_index = 0;
for (shared_ptr<ngraph::op::Parameter> param : m_function->get_parameters())
{
for (size_t i = 0; i < param->get_output_size(); ++i)
{
m_memory_buffer_sizes.push_back(current_function->get_temporary_pool_size());
auto output_tensor = &param->get_outputs().at(i).get_tensor();
param_index_map[output_tensor->get_name()] = arg_index;
auto tensor_set = get_tensor_set(output_tensor);
// process all tensors in the set containing the output tensor of the parameter
for (auto& ele_t : tensor_set)
{
const element::Type& et = ele_t->get_element_type();
string type = et.c_type_string();
stringstream ss;
ss << "(((" << type << "*)(inputs[" << arg_index << "])) + "
<< ele_t->get_pool_offset() / et.size() << ")";
m_variable_name_map[ele_t->get_name()] = ss.str();
m_tensor_roles[ele_t->get_name()] = TensorRole::INPUT;
}
arg_index++;
}
}
// Indexing for Control Flags
std::map<std::string, size_t> tensor_index_map;
std::map<std::string, size_t> param_index_map;
size_t tensor_index = 0;
for (shared_ptr<Node> node : ordered_ops)
// Add temporaries to the variable name map
if (temporaries_used)
{
for (auto& ele : bufferID_to_tensorSets)
{
if (!node->is_parameter() && !node->is_constant())
if (ele.second.first == TensorRole::INTERMEDIATE)
{
for (const descriptor::Input& input : node->get_inputs())
for (auto& ele_t : ele.second.second)
{
const descriptor::Output& output = input.get_output();
shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr();
tensor_index_map.insert({tv->get_name(), tensor_index++});
stringstream ss;
ss << "((" << ele_t->get_element_type().c_type_string() << "*)(pool_base_ptr + "
<< ele_t->get_pool_offset() << "))";
m_variable_name_map[ele_t->get_name()] = ss.str();
m_tensor_roles[ele_t->get_name()] = TensorRole::INTERMEDIATE;
}
}
}
}
writer << "bool " << current_function->get_name() << "_t_en[" << tensor_index << "];\n";
writer << "extern \"C\" void " << current_function->get_name() << func_params << "\n";
writer << "{\n";
writer.indent++;
//deserialize and build mkldnn primitives
if (m_mkldnn_emitter->get_mkldnn_descriptors_size() > 0)
// Add outputs to the variable name map
for (size_t i = 0; i < m_function->get_output_size(); ++i)
{
shared_ptr<Node> op = m_function->get_output_op(i);
auto output_tensor = &op->get_output_tensor();
auto tensor_set = get_tensor_set(output_tensor);
// process all tensors in the set containing the output tensor of the result
for (auto& ele_t : tensor_set)
{
writer << "if (ctx->first_iteration)\n";
writer.block_begin();
writer << "// read in memory descriptors and build mkldnn primitives\n";
writer << "std::ifstream desc_file (\"" << m_desc_filename
<< "\", std::ios::binary);\n";
writer << "deserialize_memory_descs_and_build_memory_primitives(" << m_desc_filename
<< ", cg_ctx, " << to_string(m_mkldnn_emitter->get_mkldnn_descriptors_size())
<< ");\n";
writer.block_end();
const element::Type& et = ele_t->get_element_type();
string type = et.c_type_string();
stringstream ss;
ss << "(((" << type << "*)(outputs[" << i << "])) + "
<< ele_t->get_pool_offset() / et.size() << ")";
m_variable_name_map[ele_t->get_name()] = ss.str();
m_tensor_roles[ele_t->get_name()] = TensorRole::OUTPUT;
}
}
// Execution tracing support
if (runtime::cpu::IsTracingEnabled() && current_function->get_name() == m_function_name)
for (shared_ptr<Node> node : ordered_ops)
{
auto& n = *node; // Work around a compiler warning (*node inside typeid may have effects
// with shared pointers, which is fine here but clang doesn't like it.)
auto handler = dispatcher.find(type_index(typeid(n)));
if (handler == dispatcher.end())
{
writer << "cpu::Timestamp start_ts;\n"
<< "int profiler_count = 0;\n\n";
throw unsupported_op(node->description());
}
if (temporaries_used)
vector<TensorViewWrapper> in;
vector<string> node_input_names;
vector<string> node_output_names;
for (const descriptor::Input& input : node->get_inputs())
{
writer << "size_t pool_base_ptr = (size_t) ctx->memory_buffers["
<< m_memory_buffer_sizes.size() - 1 << "]->get_ptr();\n";
writer << "\n";
const descriptor::Output& output = input.get_output();
shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr();
in.push_back(TensorViewWrapper(tv, m_variable_name_map[tv->get_name()]));
node_input_names.emplace_back(tv->get_name());
}
writer << "bool* t_en = (bool*)" << current_function->get_name() << "_t_en;\n";
if (m_use_tbb)
vector<TensorViewWrapper> out;
for (const descriptor::Output& output : node->get_outputs())
{
writer << "\n";
writer << "if (ctx->first_iteration) {\n";
writer.indent++;
writer << "tbb::flow::continue_node<tbb::flow::continue_msg>* "
"flowgraph_node_start"
<< " = new tbb::flow::continue_node<tbb::flow::continue_msg> "
"(*(cg_ctx->tbb_graph), [&](const tbb::flow::continue_msg &msg)\n{});\n";
shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr();
out.push_back(TensorViewWrapper(tv, m_variable_name_map[tv->get_name()]));
node_output_names.emplace_back(tv->get_name());
}
// Add inputs to the variable name map
size_t arg_index = 0;
for (shared_ptr<ngraph::op::Parameter> param : current_function->get_parameters())
// Emit operation prologue
if (!node->is_parameter() && !node->is_constant())
{
for (size_t i = 0; i < param->get_output_size(); ++i)
if (m_function->get_name() == m_function_name)
{
auto output_tensor = &param->get_outputs().at(i).get_tensor();
param_index_map[output_tensor->get_name()] = arg_index;
auto tensor_set = get_tensor_set(output_tensor);
// process all tensors in the set containing the output tensor of the parameter
for (auto& ele_t : tensor_set)
{
const element::Type& et = ele_t->get_element_type();
string type = et.c_type_string();
stringstream ss;
ss << "(((" << type << "*)(inputs[" << arg_index << "])) + "
<< ele_t->get_pool_offset() / et.size() << ")";
m_variable_name_map[ele_t->get_name()] = ss.str();
m_tensor_roles[ele_t->get_name()] = TensorRole::INPUT;
}
arg_index++;
m_op_attrs.emplace_back(node->description(), node_output_names, node_input_names);
}
if (m_use_tbb)
{
writer << "tbb::flow::continue_node<tbb::flow::continue_msg>* "
"flowgraph_node_"
<< node->get_name()
<< " = new tbb::flow::continue_node<tbb::flow::continue_msg> "
"(*(cg_ctx->tbb_graph), [&](const tbb::flow::continue_msg &msg)\n{\n";
writer.indent++;
}
if (runtime::cpu::IsTracingEnabled() && m_function->get_name() == m_function_name)
{
writer << "start_ts = cpu::Clock::now();\n";
}
}
// Add temporaries to the variable name map
if (temporaries_used)
if (!node->is_parameter() && !node->is_constant())
{
for (auto& ele : bufferID_to_tensorSets)
{
if (ele.second.first == TensorRole::INTERMEDIATE)
{
for (auto& ele_t : ele.second.second)
{
stringstream ss;
ss << "((" << ele_t->get_element_type().c_type_string()
<< "*)(pool_base_ptr + " << ele_t->get_pool_offset() << "))";
m_variable_name_map[ele_t->get_name()] = ss.str();
m_tensor_roles[ele_t->get_name()] = TensorRole::INTERMEDIATE;
}
}
}
writer << "\n// " << node->get_name() << "(";
vector<string> parameter_nodes = node_input_names;
parameter_nodes.insert(
parameter_nodes.end(), node_output_names.begin(), node_output_names.end());
writer << join(parameter_nodes);
writer << ")\n";
}
// Add outputs to the variable name map
for (size_t i = 0; i < current_function->get_output_size(); ++i)
// Emit operation body
if (!node->is_parameter() && !node->is_constant())
{
shared_ptr<Node> op = current_function->get_output_op(i);
auto output_tensor = &op->get_output_tensor();
auto tensor_set = get_tensor_set(output_tensor);
// process all tensors in the set containing the output tensor of the result
for (auto& ele_t : tensor_set)
{
const element::Type& et = ele_t->get_element_type();
string type = et.c_type_string();
stringstream ss;
ss << "(((" << type << "*)(outputs[" << i << "])) + "
<< ele_t->get_pool_offset() / et.size() << ")";
m_variable_name_map[ele_t->get_name()] = ss.str();
m_tensor_roles[ele_t->get_name()] = TensorRole::OUTPUT;
}
emit_debug_function_entry(writer, node.get(), in, out);
}
for (shared_ptr<Node> node : ordered_ops)
// Op Control
if (!node->is_parameter() && !node->is_constant())
{
auto& n = *node; // Work around a compiler warning (*node inside typeid may have effects
// with shared pointers, which is fine here but clang doesn't like it.)
auto handler = dispatcher.find(type_index(typeid(n)));
if (handler == dispatcher.end())
{
throw unsupported_op(node->description());
}
vector<TensorViewWrapper> in;
vector<string> node_input_names;
vector<string> node_output_names;
writer << "if (ctx->first_iteration ";
for (const descriptor::Input& input : node->get_inputs())
{
const descriptor::Output& output = input.get_output();
shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr();
in.push_back(TensorViewWrapper(tv, m_variable_name_map[tv->get_name()]));
node_input_names.emplace_back(tv->get_name());
}
vector<TensorViewWrapper> out;
for (const descriptor::Output& output : node->get_outputs())
{
shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr();
out.push_back(TensorViewWrapper(tv, m_variable_name_map[tv->get_name()]));
node_output_names.emplace_back(tv->get_name());
}
auto input_name = tv->get_name();
// Emit operation prologue
if (!node->is_parameter() && !node->is_constant())
{
if (current_function->get_name() == m_function_name)
{
m_op_attrs.emplace_back(
node->description(), node_output_names, node_input_names);
}
if (m_use_tbb)
if (output.get_node()->is_parameter())
{
writer << "tbb::flow::continue_node<tbb::flow::continue_msg>* "
"flowgraph_node_"
<< node->get_name()
<< " = new tbb::flow::continue_node<tbb::flow::continue_msg> "
"(*(cg_ctx->tbb_graph), [&](const tbb::flow::continue_msg &msg)\n{\n";
writer.indent++;
writer << " || ctx->p_en[" << param_index_map[input_name] << "]";
}
if (runtime::cpu::IsTracingEnabled() &&
current_function->get_name() == m_function_name)
else if (!output.get_node()->is_constant())
{
writer << "start_ts = cpu::Clock::now();\n";
writer << " || t_en[" << tensor_index_map[input_name] << "]";
}
}
if (!node->is_parameter() && !node->is_constant())
// Always enable nodes computing output tensors or nodes whose outputs might get
// overwritten due to inplace kernels
// TODO (jbobba) - Do we need to handle cacheability
if (computes_result(node.get()) || possibly_overwritten(node.get()))
{
writer << "\n// " << node->get_name() << "(";
vector<string> parameter_nodes = node_input_names;
parameter_nodes.insert(
parameter_nodes.end(), node_output_names.begin(), node_output_names.end());
writer << join(parameter_nodes);
writer << ")\n";
writer << " || 1";
}
writer << ") {\n";
writer.indent++;
}
// Emit operation body
if (!node->is_parameter() && !node->is_constant())
auto it = node_function_map.find(node.get());
if (it == node_function_map.end())
{
handler->second(this, writer, node.get(), in, out);
}
else
{
string func_name =
ngraph::pass::CommonFunctionCollection::create_function_name(*it->second);
vector<string> names;
for (const TensorViewWrapper& tv : in)
{
names.push_back(tv.get_name());
}
for (const TensorViewWrapper& tv : out)
{
emit_debug_function_entry(writer, node.get(), in, out);
names.push_back(tv.get_name());
}
writer << func_name << "(" << join(names) << ", ctx, cg_ctx);\n";
}
// Op Control
if (!node->is_parameter() && !node->is_constant())
// skip multi-output nodes since they would be covered by GetOutputElement
if (node->get_output_size() == 1 &&
// skip non-FP nodes
(node->get_element_type() == element::f32 || node->get_element_type() == element::f64))
{
// check inputs and constants?
if ((!node->is_parameter() && !node->is_constant()) ||
std::getenv("NGRAPH_CPU_CHECK_PARMS_AND_CONSTS"))
{
writer << "if (ctx->first_iteration ";
for (const descriptor::Input& input : node->get_inputs())
if (std::getenv("NGRAPH_CPU_NAN_CHECK"))
{
const descriptor::Output& output = input.get_output();
shared_ptr<descriptor::Tensor> tv = output.get_tensor_ptr();
auto input_name = tv->get_name();
if (output.get_node()->is_parameter())
{
writer << " || ctx->p_en[" << param_index_map[input_name] << "]";
}
else if (!output.get_node()->is_constant())
{
writer << " || t_en[" << tensor_index_map[input_name] << "]";
}
generate_isnan_isinf_check(writer, node, out, "isnan");
}
// Always enable nodes computing output tensors or nodes whose outputs might get
// overwritten due to inplace kernels
// TODO (jbobba) - Do we need to handle cacheability
if (computes_result(node.get()) || possibly_overwritten(node.get()))
if (std::getenv("NGRAPH_CPU_INF_CHECK"))
{
writer << " || 1";
generate_isnan_isinf_check(writer, node, out, "isinf");
}
writer << ") {\n";
writer.indent++;
}
}
auto it = node_function_map.find(node.get());
if (it == node_function_map.end())
// Emit operation epilogue
if (!node->is_parameter() && !node->is_constant())
{
for (auto output_name : node_output_names)
{
handler->second(this, writer, node.get(), in, out);
writer << "t_en[" << tensor_index_map[output_name] << "] = true;\n";
}
else
writer.indent--;
writer << "} else {\n";
writer.indent++;
for (auto output_name : node_output_names)
{
string func_name =
ngraph::pass::CommonFunctionCollection::create_function_name(*it->second);
vector<string> names;
for (const TensorViewWrapper& tv : in)
{
names.push_back(tv.get_name());
}
for (const TensorViewWrapper& tv : out)
{
names.push_back(tv.get_name());
}
writer << func_name << "(" << join(names) << ", ctx, cg_ctx);\n";
writer << "t_en[" << tensor_index_map[output_name] << "] = false;\n";
}
// skip multi-output nodes since they would be covered by GetOutputElement
if (node->get_output_size() == 1 &&
// skip non-FP nodes
(node->get_element_type() == element::f32 ||
node->get_element_type() == element::f64))
writer.indent--;
writer << "}\n";
emit_debug_function_exit(writer, node.get(), in, out);
if (runtime::cpu::IsTracingEnabled() && m_function->get_name() == m_function_name)
{
// check inputs and constants?
if ((!node->is_parameter() && !node->is_constant()) ||
std::getenv("NGRAPH_CPU_CHECK_PARMS_AND_CONSTS"))
{
if (std::getenv("NGRAPH_CPU_NAN_CHECK"))
{
generate_isnan_isinf_check(writer, node, out, "isnan");
}
if (std::getenv("NGRAPH_CPU_INF_CHECK"))
{
generate_isnan_isinf_check(writer, node, out, "isinf");
}
}
writer << "ctx->op_durations[profiler_count++] = "
<< "(std::chrono::duration_cast<cpu::Timescale>(cpu::Clock::now() - "
"start_ts)).count();\n";
}
// Emit operation epilogue
if (!node->is_parameter() && !node->is_constant())
if (m_use_tbb)
{
for (auto output_name : node_output_names)
{
writer << "t_en[" << tensor_index_map[output_name] << "] = true;\n";
}
writer.indent--;
writer << "} else {\n";
writer.indent++;
for (auto output_name : node_output_names)
{
writer << "t_en[" << tensor_index_map[output_name] << "] = false;\n";
}
writer.indent--;
writer << "}\n";
emit_debug_function_exit(writer, node.get(), in, out);
if (runtime::cpu::IsTracingEnabled() &&
current_function->get_name() == m_function_name)
{
writer << "ctx->op_durations[profiler_count++] = "
<< "(std::chrono::duration_cast<cpu::Timescale>(cpu::Clock::now() - "
"start_ts)).count();\n";
}
if (m_use_tbb)
{
writer.indent--;
writer << "});\n";
}
writer << "});\n";
}
}
}
if (m_use_tbb)
{
writer << "\n";
// Build the flow graph
if (m_use_tbb)
{
writer << "\n";
// Build the flow graph
traverse_nodes(current_function, [&writer](shared_ptr<Node> n) {
if (!n->is_parameter() && !n->is_constant())
traverse_nodes(m_function, [&writer](shared_ptr<Node> n) {
if (!n->is_parameter() && !n->is_constant())
{
bool is_head = true;
for (auto arg : n->get_arguments())
{
bool is_head = true;
for (auto arg : n->get_arguments())
{
if (!arg->is_parameter() && !arg->is_constant())
{
is_head = false;
writer << "tbb::flow::make_edge(*flowgraph_node_" << arg->get_name()
<< ", *flowgraph_node_" << n->get_name() << ");\n";
}
}
if (is_head)
if (!arg->is_parameter() && !arg->is_constant())
{
writer << "tbb::flow::make_edge(*flowgraph_node_start"
is_head = false;
writer << "tbb::flow::make_edge(*flowgraph_node_" << arg->get_name()
<< ", *flowgraph_node_" << n->get_name() << ");\n";
}
}
});
writer.indent--;
writer << "}\n";
// Execute the flow graph
writer << "(static_cast<tbb::flow::continue_node<tbb::flow::continue_msg>*>"
"(&(*(cg_ctx->tbb_graph->begin()))))"
<< "->try_put(tbb::flow::continue_msg());\n";
writer << "try { cg_ctx->tbb_graph->wait_for_all(); } catch(...) { throw; }\n";
}
writer << "ctx->first_iteration = false;\n";
if (is_head)
{
writer << "tbb::flow::make_edge(*flowgraph_node_start"
<< ", *flowgraph_node_" << n->get_name() << ");\n";
}
}
});
writer.indent--;
// End generated function
writer += "}\n\n";
writer << "}\n";
// Execute the flow graph
writer << "(static_cast<tbb::flow::continue_node<tbb::flow::continue_msg>*>"
"(&(*(cg_ctx->tbb_graph->begin()))))"
<< "->try_put(tbb::flow::continue_msg());\n";
writer << "try { cg_ctx->tbb_graph->wait_for_all(); } catch(...) { throw; }\n";
}
writer << "ctx->first_iteration = false;\n";
writer.indent--;
// End generated function
writer += "}\n\n";
// TODO: Cleanup and make this a utility function
string filename = file_util::path_join(s_output_dir, m_function_name + "_codegen.cpp");
......
......@@ -184,11 +184,7 @@ void runtime::gpu::GPUCompiledFunction::compile()
string dump_filename = file_util::path_join(get_output_dir(), m_function_name + "_ops.txt");
pass_manager.register_pass<ngraph::pass::DumpSorted>(dump_filename);
pass_manager.run_passes(m_function);
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
{
m_function_ordered_ops.emplace(current_function, current_function->get_ordered_ops());
}
m_function_ordered_ops.emplace(m_function, m_function->get_ordered_ops());
add_passes(pass_manager);
emit();
......
......@@ -340,33 +340,24 @@ static void serialize_to_cpio(ostream& out, shared_ptr<ngraph::Function> func, s
cpio::Writer writer(out);
writer.write(func->get_name(), j.c_str(), static_cast<uint32_t>(j.size()));
traverse_functions(func, [&](shared_ptr<ngraph::Function> f) {
traverse_nodes(const_cast<Function*>(f.get()),
[&](shared_ptr<Node> node) {
if (auto c = dynamic_pointer_cast<op::Constant>(node))
{
uint32_t size =
static_cast<uint32_t>(shape_size(c->get_output_shape(0)) *
c->get_output_element_type(0).size());
writer.write(c->get_name(), c->get_data_ptr(), size);
}
},
true);
});
traverse_nodes(const_cast<Function*>(func.get()),
[&](shared_ptr<Node> node) {
if (auto c = dynamic_pointer_cast<op::Constant>(node))
{
uint32_t size =
static_cast<uint32_t>(shape_size(c->get_output_shape(0)) *
c->get_output_element_type(0).size());
writer.write(c->get_name(), c->get_data_ptr(), size);
}
},
true);
}
#endif
static string serialize(shared_ptr<ngraph::Function> func, size_t indent, bool binary_constant_data)
{
json j;
vector<json> functions;
traverse_functions(func, [&](shared_ptr<ngraph::Function> f) {
functions.push_back(write(*f, binary_constant_data));
});
for (auto it = functions.rbegin(); it != functions.rend(); it++)
{
j.push_back(*it);
}
j.push_back(write(*func, binary_constant_data));
string rc;
if (indent == 0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment