Commit dc78c12b authored by Jayaram Bobba's avatar Jayaram Bobba Committed by Scott Cyphers

Jbobba/tensor roles (#1420)

* Define and track tensor roles through graph optimizations

* style fix

* Address PR feedback
parent 70738769
......@@ -547,28 +547,6 @@ size_t ngraph::get_user_count(Node* node)
return count;
}
bool ngraph::computes_result(Node* node)
{
if (node->is_output())
{
return true;
}
// Check if node feeds a result node that has been copy eliminated
for (const descriptor::Output& output : node->get_outputs())
{
for (const descriptor::Input* input : output.get_inputs())
{
auto res = std::dynamic_pointer_cast<ngraph::op::Result>(input->get_node());
if (res && !res->needs_copy())
{
return true;
}
}
}
return false;
}
bool ngraph::possibly_overwritten(Node* node)
{
for (const descriptor::Output& output : node->get_outputs())
......
......@@ -148,9 +148,6 @@ namespace ngraph
// Returns count of `node` users that are still live in the graph
size_t get_user_count(Node* node);
// Returns true if `node` computes an output tensor
bool computes_result(Node* node);
// Return true if a node's user could potentially overwrite
// the output of this node with in-place kernels
bool possibly_overwritten(Node* node);
......
......@@ -37,7 +37,6 @@ namespace ngraph
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
vector<float> scale_vector(2, 1);
vector<mkldnn::memory::primitive_desc> inputs_pd;
......@@ -55,9 +54,9 @@ namespace ngraph
input0_data_desc, input1_data_desc, result_desc, scale_vector, inputs_pd);
auto& deps = mkldnn_emitter->get_primitive_deps(add_index);
auto& arg0_tensor = tensor_data[args[0].get_name()];
auto& arg1_tensor = tensor_data[args[1].get_name()];
auto& out_tensor = tensor_data[out[0].get_name()];
auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto functor = [&, add_index](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
......
......@@ -33,13 +33,12 @@ namespace ngraph
void Builder::BUILDER_DECL(ngraph::op::Broadcast)
{
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
auto broadcast = static_cast<const ngraph::op::Broadcast*>(node);
auto broadcast_axes = broadcast->get_broadcast_axes();
auto& arg_tensor = tensor_data[args[0].get_name()];
auto& out_tensor = tensor_data[out[0].get_name()];
auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto arg_shape = args[0].get_shape();
auto out_shape = out[0].get_shape();
......
......@@ -38,10 +38,9 @@ namespace ngraph
auto arg_rank = arg_shape.size();
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
auto& arg_tensor = tensor_data[args[0].get_name()];
auto& out_tensor = tensor_data[out[0].get_name()];
auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
if (arg_rank == 0)
{
......
......@@ -526,6 +526,7 @@ using namespace ngraph::runtime;
writer << "static " << type << "* " << tv->get_tensor().get_name() << " = (("
<< type << "*)(" << c->get_data_ptr() << "));\n";
m_variable_name_map[tv->get_tensor().get_name()] = tv->get_tensor().get_name();
m_tensor_roles[tv->get_tensor().get_name()] = CPUTensorRole::CONSTANT;
}
}
}
......@@ -623,6 +624,7 @@ using namespace ngraph::runtime;
ss << "((" << tensor->get_element_type().c_type_string()
<< "*)(pool_base_ptr + " << tensor->get_pool_offset() << "))";
m_variable_name_map[tensor->get_name()] = ss.str();
m_tensor_roles[tensor->get_name()] = CPUTensorRole::INTERMEDIATE;
}
}
}
......@@ -653,6 +655,7 @@ using namespace ngraph::runtime;
stringstream ss;
ss << "((" << type << "*)(inputs[" << arg_index << "]))";
m_variable_name_map[tv->get_tensor().get_name()] = ss.str();
m_tensor_roles[tv->get_tensor().get_name()] = CPUTensorRole::INPUT;
param_index_map[tv->get_tensor().get_name()] = arg_index;
propagate_in_place_input(&param->get_outputs().at(i), ss.str(), false);
arg_index++;
......@@ -668,6 +671,7 @@ using namespace ngraph::runtime;
stringstream ss;
ss << "((" << type << "*)(outputs[" << i << "]))";
m_variable_name_map[tv->get_tensor().get_name()] = ss.str();
m_tensor_roles[tv->get_tensor().get_name()] = CPUTensorRole::OUTPUT;
//it should be safe to assign both descriptors to one output*
//since needs_copy == false makes `op::Result` an nop
......@@ -679,6 +683,7 @@ using namespace ngraph::runtime;
auto output_name = ss.str();
m_variable_name_map[itv->get_tensor().get_name()] = ss.str();
m_tensor_roles[itv->get_tensor().get_name()] = CPUTensorRole::OUTPUT;
propagate_in_place_output(
&(res->get_inputs().at(0).get_output()), output_name, false);
}
......@@ -973,6 +978,19 @@ using namespace ngraph::runtime;
}
}
bool runtime::cpu::CPU_ExternalFunction::computes_result(Node* node)
{
for (size_t i = 0; i < node->get_output_size(); i++)
{
auto& output_tensor = node->get_output_tensor(i);
if (m_tensor_roles[output_tensor.get_name()] == CPUTensorRole::OUTPUT)
{
return true;
}
}
return false;
}
void runtime::cpu::CPU_ExternalFunction::propagate_in_place_input(
ngraph::descriptor::Output* output, std::string input_name, bool dex)
{
......@@ -1008,6 +1026,7 @@ void runtime::cpu::CPU_ExternalFunction::propagate_in_place_input(
{
m_variable_name_map[output_tensor.get_name()] = input_name;
}
m_tensor_roles[output_tensor.get_name()] = CPUTensorRole::INPUT;
NGRAPH_DEBUG << "CPU codegen: Forwarding " << input_name << " through "
<< output_tensor.get_name();
......@@ -1059,6 +1078,7 @@ void runtime::cpu::CPU_ExternalFunction::propagate_in_place_output(
{
m_variable_name_map[input_tensor.get_name()] = output_name;
}
m_tensor_roles[input_tensor.get_name()] = CPUTensorRole::OUTPUT;
it = &arg->get_inputs().at(input_index).get_output();
propagate_further = true;
......@@ -1141,6 +1161,34 @@ void runtime::cpu::CPU_ExternalFunction::build()
}
// Build executor
// Intermediates
if (m_function->get_temporary_pool_size())
{
m_memory_buffer_sizes.push_back(m_function->get_temporary_pool_size());
for (auto& node : m_function->get_ordered_ops())
{
for (auto tensor : node->liveness_new_list)
{
intermediates_offsets.emplace_back(tensor_data[tensor->get_name()],
tensor->get_pool_offset());
m_tensor_roles[tensor->get_name()] = CPUTensorRole::INTERMEDIATE;
}
}
}
// Constants
for (auto& node : m_function->get_ordered_ops())
{
if (node->is_constant())
{
auto tv = node->get_outputs()[0].get_tensor_view();
tensor_data[tv->get_tensor().get_name()] =
const_cast<void*>(static_pointer_cast<ngraph::op::Constant>(node)->get_data_ptr());
m_tensor_roles[tv->get_tensor().get_name()] = CPUTensorRole::CONSTANT;
}
}
// Inputs
size_t arg_index = 0;
for (auto& param : m_function->get_parameters())
......@@ -1151,6 +1199,7 @@ void runtime::cpu::CPU_ExternalFunction::build()
function_input_index.emplace_back(tensor_data[tv->get_tensor().get_name()],
arg_index,
tensor_stale[tv->get_tensor().get_name()]);
m_tensor_roles[tv->get_tensor().get_name()] = CPUTensorRole::INPUT;
propagate_in_place_input(
&param->get_outputs().at(i), tv->get_tensor().get_name(), true);
arg_index++;
......@@ -1163,6 +1212,7 @@ void runtime::cpu::CPU_ExternalFunction::build()
shared_ptr<Node> op = m_function->get_output_op(i);
shared_ptr<descriptor::TensorView> tv = op->get_output_tensor_view();
function_output_index.emplace_back(tensor_data[tv->get_tensor().get_name()], i);
m_tensor_roles[tv->get_tensor().get_name()] = CPUTensorRole::OUTPUT;
auto res = std::dynamic_pointer_cast<ngraph::op::Result>(op);
if (!res->needs_copy())
......@@ -1170,37 +1220,13 @@ void runtime::cpu::CPU_ExternalFunction::build()
shared_ptr<descriptor::TensorView> itv =
res->get_inputs().at(0).get_output().get_tensor_view();
function_output_index.emplace_back(tensor_data[itv->get_tensor().get_name()], i);
m_tensor_roles[itv->get_tensor().get_name()] = CPUTensorRole::OUTPUT;
tensor_alias[itv->get_tensor().get_name()] = tv->get_tensor().get_name();
propagate_in_place_output(
&(res->get_inputs().at(0).get_output()), tv->get_tensor().get_name(), true);
}
}
// Intermediates
if (m_function->get_temporary_pool_size())
{
m_memory_buffer_sizes.push_back(m_function->get_temporary_pool_size());
for (auto& node : m_function->get_ordered_ops())
{
for (auto tensor : node->liveness_new_list)
{
intermediates_offsets.emplace_back(tensor_data[tensor->get_name()],
tensor->get_pool_offset());
}
}
}
// Constants
for (auto& node : m_function->get_ordered_ops())
{
const auto c = dynamic_cast<ngraph::op::Constant*>(node.get());
if (c)
{
auto tv = node->get_outputs()[0].get_tensor_view();
tensor_data[tv->get_tensor().get_name()] = const_cast<void*>(c->get_data_ptr());
}
}
for (shared_ptr<Node> node : m_function->get_ordered_ops())
{
if (node->is_parameter() || node->is_constant())
......@@ -1243,7 +1269,14 @@ void runtime::cpu::CPU_ExternalFunction::build()
vector<reference_wrapper<bool>> in_stale, out_stale;
for (const auto& name : in_names)
{
in_stale.emplace_back(tensor_stale[name]);
if (tensor_alias.count(name))
{
in_stale.emplace_back(tensor_stale[tensor_alias[name]]);
}
else
{
in_stale.emplace_back(tensor_stale[name]);
}
}
for (const auto& name : out_names)
{
......
......@@ -74,6 +74,14 @@ namespace ngraph
friend class CPU_Backend;
public:
enum class CPUTensorRole
{
INPUT,
CONSTANT,
OUTPUT,
INTERMEDIATE
};
CPU_ExternalFunction(const std::shared_ptr<ngraph::Function>& function,
bool release_function = true);
~CPU_ExternalFunction();
......@@ -128,6 +136,9 @@ namespace ngraph
void propagate_in_place_output(ngraph::descriptor::Output* res_src_output,
std::string output_name,
bool dex);
bool computes_result(Node* node);
void emit_debug_function_entry(codegen::CodeWriter& writer,
Node* node,
const std::vector<TensorViewWrapper>& in,
......@@ -165,6 +176,8 @@ namespace ngraph
// so they don't get freed before we are done with them
std::vector<std::shared_ptr<Node>> m_active_constants;
std::unordered_map<std::string, CPUTensorRole> m_tensor_roles;
LayoutDescriptorPtrs parameter_layout_descriptors;
LayoutDescriptorPtrs result_layout_descriptors;
std::vector<size_t> m_memory_buffer_sizes;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment