Commit dc78c12b authored by Jayaram Bobba's avatar Jayaram Bobba Committed by Scott Cyphers

Jbobba/tensor roles (#1420)

* Define and track tensor roles through graph optimizations

* style fix

* Address PR feedback
parent 70738769
...@@ -547,28 +547,6 @@ size_t ngraph::get_user_count(Node* node) ...@@ -547,28 +547,6 @@ size_t ngraph::get_user_count(Node* node)
return count; return count;
} }
bool ngraph::computes_result(Node* node)
{
if (node->is_output())
{
return true;
}
// Check if node feeds a result node that has been copy eliminated
for (const descriptor::Output& output : node->get_outputs())
{
for (const descriptor::Input* input : output.get_inputs())
{
auto res = std::dynamic_pointer_cast<ngraph::op::Result>(input->get_node());
if (res && !res->needs_copy())
{
return true;
}
}
}
return false;
}
bool ngraph::possibly_overwritten(Node* node) bool ngraph::possibly_overwritten(Node* node)
{ {
for (const descriptor::Output& output : node->get_outputs()) for (const descriptor::Output& output : node->get_outputs())
......
...@@ -148,9 +148,6 @@ namespace ngraph ...@@ -148,9 +148,6 @@ namespace ngraph
// Returns count of `node` users that are still live in the graph // Returns count of `node` users that are still live in the graph
size_t get_user_count(Node* node); size_t get_user_count(Node* node);
// Returns true if `node` computes an output tensor
bool computes_result(Node* node);
// Return true if a node's user could potentially overwrite // Return true if a node's user could potentially overwrite
// the output of this node with in-place kernels // the output of this node with in-place kernels
bool possibly_overwritten(Node* node); bool possibly_overwritten(Node* node);
......
...@@ -37,7 +37,6 @@ namespace ngraph ...@@ -37,7 +37,6 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node)) if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{ {
auto& functors = external_function->get_functors(); auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
vector<float> scale_vector(2, 1); vector<float> scale_vector(2, 1);
vector<mkldnn::memory::primitive_desc> inputs_pd; vector<mkldnn::memory::primitive_desc> inputs_pd;
...@@ -55,9 +54,9 @@ namespace ngraph ...@@ -55,9 +54,9 @@ namespace ngraph
input0_data_desc, input1_data_desc, result_desc, scale_vector, inputs_pd); input0_data_desc, input1_data_desc, result_desc, scale_vector, inputs_pd);
auto& deps = mkldnn_emitter->get_primitive_deps(add_index); auto& deps = mkldnn_emitter->get_primitive_deps(add_index);
auto& arg0_tensor = tensor_data[args[0].get_name()]; auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
auto& arg1_tensor = tensor_data[args[1].get_name()]; auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto& out_tensor = tensor_data[out[0].get_name()]; auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto functor = [&, add_index](CPURuntimeContext* ctx) { auto functor = [&, add_index](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
......
...@@ -33,13 +33,12 @@ namespace ngraph ...@@ -33,13 +33,12 @@ namespace ngraph
void Builder::BUILDER_DECL(ngraph::op::Broadcast) void Builder::BUILDER_DECL(ngraph::op::Broadcast)
{ {
auto& functors = external_function->get_functors(); auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
auto broadcast = static_cast<const ngraph::op::Broadcast*>(node); auto broadcast = static_cast<const ngraph::op::Broadcast*>(node);
auto broadcast_axes = broadcast->get_broadcast_axes(); auto broadcast_axes = broadcast->get_broadcast_axes();
auto& arg_tensor = tensor_data[args[0].get_name()]; auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
auto& out_tensor = tensor_data[out[0].get_name()]; auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto arg_shape = args[0].get_shape(); auto arg_shape = args[0].get_shape();
auto out_shape = out[0].get_shape(); auto out_shape = out[0].get_shape();
......
...@@ -38,10 +38,9 @@ namespace ngraph ...@@ -38,10 +38,9 @@ namespace ngraph
auto arg_rank = arg_shape.size(); auto arg_rank = arg_shape.size();
auto& functors = external_function->get_functors(); auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
auto& arg_tensor = tensor_data[args[0].get_name()]; auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
auto& out_tensor = tensor_data[out[0].get_name()]; auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
if (arg_rank == 0) if (arg_rank == 0)
{ {
......
...@@ -526,6 +526,7 @@ using namespace ngraph::runtime; ...@@ -526,6 +526,7 @@ using namespace ngraph::runtime;
writer << "static " << type << "* " << tv->get_tensor().get_name() << " = ((" writer << "static " << type << "* " << tv->get_tensor().get_name() << " = (("
<< type << "*)(" << c->get_data_ptr() << "));\n"; << type << "*)(" << c->get_data_ptr() << "));\n";
m_variable_name_map[tv->get_tensor().get_name()] = tv->get_tensor().get_name(); m_variable_name_map[tv->get_tensor().get_name()] = tv->get_tensor().get_name();
m_tensor_roles[tv->get_tensor().get_name()] = CPUTensorRole::CONSTANT;
} }
} }
} }
...@@ -623,6 +624,7 @@ using namespace ngraph::runtime; ...@@ -623,6 +624,7 @@ using namespace ngraph::runtime;
ss << "((" << tensor->get_element_type().c_type_string() ss << "((" << tensor->get_element_type().c_type_string()
<< "*)(pool_base_ptr + " << tensor->get_pool_offset() << "))"; << "*)(pool_base_ptr + " << tensor->get_pool_offset() << "))";
m_variable_name_map[tensor->get_name()] = ss.str(); m_variable_name_map[tensor->get_name()] = ss.str();
m_tensor_roles[tensor->get_name()] = CPUTensorRole::INTERMEDIATE;
} }
} }
} }
...@@ -653,6 +655,7 @@ using namespace ngraph::runtime; ...@@ -653,6 +655,7 @@ using namespace ngraph::runtime;
stringstream ss; stringstream ss;
ss << "((" << type << "*)(inputs[" << arg_index << "]))"; ss << "((" << type << "*)(inputs[" << arg_index << "]))";
m_variable_name_map[tv->get_tensor().get_name()] = ss.str(); m_variable_name_map[tv->get_tensor().get_name()] = ss.str();
m_tensor_roles[tv->get_tensor().get_name()] = CPUTensorRole::INPUT;
param_index_map[tv->get_tensor().get_name()] = arg_index; param_index_map[tv->get_tensor().get_name()] = arg_index;
propagate_in_place_input(&param->get_outputs().at(i), ss.str(), false); propagate_in_place_input(&param->get_outputs().at(i), ss.str(), false);
arg_index++; arg_index++;
...@@ -668,6 +671,7 @@ using namespace ngraph::runtime; ...@@ -668,6 +671,7 @@ using namespace ngraph::runtime;
stringstream ss; stringstream ss;
ss << "((" << type << "*)(outputs[" << i << "]))"; ss << "((" << type << "*)(outputs[" << i << "]))";
m_variable_name_map[tv->get_tensor().get_name()] = ss.str(); m_variable_name_map[tv->get_tensor().get_name()] = ss.str();
m_tensor_roles[tv->get_tensor().get_name()] = CPUTensorRole::OUTPUT;
//it should be safe to assign both descriptors to one output* //it should be safe to assign both descriptors to one output*
//since needs_copy == false makes `op::Result` an nop //since needs_copy == false makes `op::Result` an nop
...@@ -679,6 +683,7 @@ using namespace ngraph::runtime; ...@@ -679,6 +683,7 @@ using namespace ngraph::runtime;
auto output_name = ss.str(); auto output_name = ss.str();
m_variable_name_map[itv->get_tensor().get_name()] = ss.str(); m_variable_name_map[itv->get_tensor().get_name()] = ss.str();
m_tensor_roles[itv->get_tensor().get_name()] = CPUTensorRole::OUTPUT;
propagate_in_place_output( propagate_in_place_output(
&(res->get_inputs().at(0).get_output()), output_name, false); &(res->get_inputs().at(0).get_output()), output_name, false);
} }
...@@ -973,6 +978,19 @@ using namespace ngraph::runtime; ...@@ -973,6 +978,19 @@ using namespace ngraph::runtime;
} }
} }
bool runtime::cpu::CPU_ExternalFunction::computes_result(Node* node)
{
for (size_t i = 0; i < node->get_output_size(); i++)
{
auto& output_tensor = node->get_output_tensor(i);
if (m_tensor_roles[output_tensor.get_name()] == CPUTensorRole::OUTPUT)
{
return true;
}
}
return false;
}
void runtime::cpu::CPU_ExternalFunction::propagate_in_place_input( void runtime::cpu::CPU_ExternalFunction::propagate_in_place_input(
ngraph::descriptor::Output* output, std::string input_name, bool dex) ngraph::descriptor::Output* output, std::string input_name, bool dex)
{ {
...@@ -1008,6 +1026,7 @@ void runtime::cpu::CPU_ExternalFunction::propagate_in_place_input( ...@@ -1008,6 +1026,7 @@ void runtime::cpu::CPU_ExternalFunction::propagate_in_place_input(
{ {
m_variable_name_map[output_tensor.get_name()] = input_name; m_variable_name_map[output_tensor.get_name()] = input_name;
} }
m_tensor_roles[output_tensor.get_name()] = CPUTensorRole::INPUT;
NGRAPH_DEBUG << "CPU codegen: Forwarding " << input_name << " through " NGRAPH_DEBUG << "CPU codegen: Forwarding " << input_name << " through "
<< output_tensor.get_name(); << output_tensor.get_name();
...@@ -1059,6 +1078,7 @@ void runtime::cpu::CPU_ExternalFunction::propagate_in_place_output( ...@@ -1059,6 +1078,7 @@ void runtime::cpu::CPU_ExternalFunction::propagate_in_place_output(
{ {
m_variable_name_map[input_tensor.get_name()] = output_name; m_variable_name_map[input_tensor.get_name()] = output_name;
} }
m_tensor_roles[input_tensor.get_name()] = CPUTensorRole::OUTPUT;
it = &arg->get_inputs().at(input_index).get_output(); it = &arg->get_inputs().at(input_index).get_output();
propagate_further = true; propagate_further = true;
...@@ -1141,6 +1161,34 @@ void runtime::cpu::CPU_ExternalFunction::build() ...@@ -1141,6 +1161,34 @@ void runtime::cpu::CPU_ExternalFunction::build()
} }
// Build executor // Build executor
// Intermediates
if (m_function->get_temporary_pool_size())
{
m_memory_buffer_sizes.push_back(m_function->get_temporary_pool_size());
for (auto& node : m_function->get_ordered_ops())
{
for (auto tensor : node->liveness_new_list)
{
intermediates_offsets.emplace_back(tensor_data[tensor->get_name()],
tensor->get_pool_offset());
m_tensor_roles[tensor->get_name()] = CPUTensorRole::INTERMEDIATE;
}
}
}
// Constants
for (auto& node : m_function->get_ordered_ops())
{
if (node->is_constant())
{
auto tv = node->get_outputs()[0].get_tensor_view();
tensor_data[tv->get_tensor().get_name()] =
const_cast<void*>(static_pointer_cast<ngraph::op::Constant>(node)->get_data_ptr());
m_tensor_roles[tv->get_tensor().get_name()] = CPUTensorRole::CONSTANT;
}
}
// Inputs // Inputs
size_t arg_index = 0; size_t arg_index = 0;
for (auto& param : m_function->get_parameters()) for (auto& param : m_function->get_parameters())
...@@ -1151,6 +1199,7 @@ void runtime::cpu::CPU_ExternalFunction::build() ...@@ -1151,6 +1199,7 @@ void runtime::cpu::CPU_ExternalFunction::build()
function_input_index.emplace_back(tensor_data[tv->get_tensor().get_name()], function_input_index.emplace_back(tensor_data[tv->get_tensor().get_name()],
arg_index, arg_index,
tensor_stale[tv->get_tensor().get_name()]); tensor_stale[tv->get_tensor().get_name()]);
m_tensor_roles[tv->get_tensor().get_name()] = CPUTensorRole::INPUT;
propagate_in_place_input( propagate_in_place_input(
&param->get_outputs().at(i), tv->get_tensor().get_name(), true); &param->get_outputs().at(i), tv->get_tensor().get_name(), true);
arg_index++; arg_index++;
...@@ -1163,6 +1212,7 @@ void runtime::cpu::CPU_ExternalFunction::build() ...@@ -1163,6 +1212,7 @@ void runtime::cpu::CPU_ExternalFunction::build()
shared_ptr<Node> op = m_function->get_output_op(i); shared_ptr<Node> op = m_function->get_output_op(i);
shared_ptr<descriptor::TensorView> tv = op->get_output_tensor_view(); shared_ptr<descriptor::TensorView> tv = op->get_output_tensor_view();
function_output_index.emplace_back(tensor_data[tv->get_tensor().get_name()], i); function_output_index.emplace_back(tensor_data[tv->get_tensor().get_name()], i);
m_tensor_roles[tv->get_tensor().get_name()] = CPUTensorRole::OUTPUT;
auto res = std::dynamic_pointer_cast<ngraph::op::Result>(op); auto res = std::dynamic_pointer_cast<ngraph::op::Result>(op);
if (!res->needs_copy()) if (!res->needs_copy())
...@@ -1170,37 +1220,13 @@ void runtime::cpu::CPU_ExternalFunction::build() ...@@ -1170,37 +1220,13 @@ void runtime::cpu::CPU_ExternalFunction::build()
shared_ptr<descriptor::TensorView> itv = shared_ptr<descriptor::TensorView> itv =
res->get_inputs().at(0).get_output().get_tensor_view(); res->get_inputs().at(0).get_output().get_tensor_view();
function_output_index.emplace_back(tensor_data[itv->get_tensor().get_name()], i); function_output_index.emplace_back(tensor_data[itv->get_tensor().get_name()], i);
m_tensor_roles[itv->get_tensor().get_name()] = CPUTensorRole::OUTPUT;
tensor_alias[itv->get_tensor().get_name()] = tv->get_tensor().get_name();
propagate_in_place_output( propagate_in_place_output(
&(res->get_inputs().at(0).get_output()), tv->get_tensor().get_name(), true); &(res->get_inputs().at(0).get_output()), tv->get_tensor().get_name(), true);
} }
} }
// Intermediates
if (m_function->get_temporary_pool_size())
{
m_memory_buffer_sizes.push_back(m_function->get_temporary_pool_size());
for (auto& node : m_function->get_ordered_ops())
{
for (auto tensor : node->liveness_new_list)
{
intermediates_offsets.emplace_back(tensor_data[tensor->get_name()],
tensor->get_pool_offset());
}
}
}
// Constants
for (auto& node : m_function->get_ordered_ops())
{
const auto c = dynamic_cast<ngraph::op::Constant*>(node.get());
if (c)
{
auto tv = node->get_outputs()[0].get_tensor_view();
tensor_data[tv->get_tensor().get_name()] = const_cast<void*>(c->get_data_ptr());
}
}
for (shared_ptr<Node> node : m_function->get_ordered_ops()) for (shared_ptr<Node> node : m_function->get_ordered_ops())
{ {
if (node->is_parameter() || node->is_constant()) if (node->is_parameter() || node->is_constant())
...@@ -1242,9 +1268,16 @@ void runtime::cpu::CPU_ExternalFunction::build() ...@@ -1242,9 +1268,16 @@ void runtime::cpu::CPU_ExternalFunction::build()
vector<reference_wrapper<bool>> in_stale, out_stale; vector<reference_wrapper<bool>> in_stale, out_stale;
for (const auto& name : in_names) for (const auto& name : in_names)
{
if (tensor_alias.count(name))
{
in_stale.emplace_back(tensor_stale[tensor_alias[name]]);
}
else
{ {
in_stale.emplace_back(tensor_stale[name]); in_stale.emplace_back(tensor_stale[name]);
} }
}
for (const auto& name : out_names) for (const auto& name : out_names)
{ {
out_stale.emplace_back(tensor_stale[name]); out_stale.emplace_back(tensor_stale[name]);
......
...@@ -74,6 +74,14 @@ namespace ngraph ...@@ -74,6 +74,14 @@ namespace ngraph
friend class CPU_Backend; friend class CPU_Backend;
public: public:
enum class CPUTensorRole
{
INPUT,
CONSTANT,
OUTPUT,
INTERMEDIATE
};
CPU_ExternalFunction(const std::shared_ptr<ngraph::Function>& function, CPU_ExternalFunction(const std::shared_ptr<ngraph::Function>& function,
bool release_function = true); bool release_function = true);
~CPU_ExternalFunction(); ~CPU_ExternalFunction();
...@@ -128,6 +136,9 @@ namespace ngraph ...@@ -128,6 +136,9 @@ namespace ngraph
void propagate_in_place_output(ngraph::descriptor::Output* res_src_output, void propagate_in_place_output(ngraph::descriptor::Output* res_src_output,
std::string output_name, std::string output_name,
bool dex); bool dex);
bool computes_result(Node* node);
void emit_debug_function_entry(codegen::CodeWriter& writer, void emit_debug_function_entry(codegen::CodeWriter& writer,
Node* node, Node* node,
const std::vector<TensorViewWrapper>& in, const std::vector<TensorViewWrapper>& in,
...@@ -165,6 +176,8 @@ namespace ngraph ...@@ -165,6 +176,8 @@ namespace ngraph
// so they don't get freed before we are done with them // so they don't get freed before we are done with them
std::vector<std::shared_ptr<Node>> m_active_constants; std::vector<std::shared_ptr<Node>> m_active_constants;
std::unordered_map<std::string, CPUTensorRole> m_tensor_roles;
LayoutDescriptorPtrs parameter_layout_descriptors; LayoutDescriptorPtrs parameter_layout_descriptors;
LayoutDescriptorPtrs result_layout_descriptors; LayoutDescriptorPtrs result_layout_descriptors;
std::vector<size_t> m_memory_buffer_sizes; std::vector<size_t> m_memory_buffer_sizes;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment