Unverified Commit 70e5973c authored by Scott Cyphers's avatar Scott Cyphers Committed by GitHub

Modernize a pass (#4055)

Co-authored-by: 's avatarSang Ik Lee <sang.ik.lee@intel.com>
parent 6bd90ef4
...@@ -60,9 +60,8 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat( ...@@ -60,9 +60,8 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat(
{ {
for (shared_ptr<Node> node : nodes) for (shared_ptr<Node> node : nodes)
{ {
if (node->description() == "Concat") if (auto concat = as_type_ptr<op::Concat>(node))
{ {
auto concat = std::static_pointer_cast<ngraph::op::Concat>(node);
if (auto op_annotations = concat->get_op_annotations()) if (auto op_annotations = concat->get_op_annotations())
{ {
auto in_place_oi_pairs = op_annotations->get_in_place_oi_pairs(); auto in_place_oi_pairs = op_annotations->get_in_place_oi_pairs();
...@@ -72,9 +71,8 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat( ...@@ -72,9 +71,8 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat(
bool found_last_concat = true; bool found_last_concat = true;
for (auto user : concat->get_users()) for (auto user : concat->get_users())
{ {
if (user->description() == "Concat") if (auto user_concat = as_type_ptr<op::Concat>(user))
{ {
auto user_concat = std::static_pointer_cast<ngraph::op::Concat>(user);
if (auto user_op_annotations = user_concat->get_op_annotations()) if (auto user_op_annotations = user_concat->get_op_annotations())
{ {
auto user_in_place_oi_pairs = auto user_in_place_oi_pairs =
...@@ -90,14 +88,14 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat( ...@@ -90,14 +88,14 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat(
// start from the last concat // start from the last concat
if (found_last_concat) if (found_last_concat)
{ {
auto output_tensor = &concat->get_output_tensor(); auto output_tensor = &concat->output(0).get_tensor();
auto output_bufferID = get_bufferID(output_tensor); auto output_bufferID = get_bufferID(output_tensor);
auto offset = output_tensor->get_pool_offset(); auto offset = output_tensor->get_pool_offset();
size_t arg_index = 0; size_t arg_index = 0;
for (auto arg : concat->get_arguments()) for (auto arg : concat->input_values())
{ {
auto input_tensor = &arg->get_output_tensor(); auto input_tensor = &arg.get_tensor();
auto input_bufferID = get_bufferID(input_tensor); auto input_bufferID = get_bufferID(input_tensor);
// same set, in place concat allowed // same set, in place concat allowed
if (input_bufferID == output_bufferID) if (input_bufferID == output_bufferID)
...@@ -109,22 +107,17 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat( ...@@ -109,22 +107,17 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat(
<< old_offset << ", new offset is " << offset << std::endl; << old_offset << ", new offset is " << offset << std::endl;
// check if need to propagate backward // check if need to propagate backward
if (arg->is_op()) auto arg_op = arg.get_node_shared_ptr();
if (auto arg_op_annotations = arg_op->get_op_annotations())
{ {
auto arg_op = std::static_pointer_cast<ngraph::op::Op>(arg); auto arg_in_place_oi_pairs =
if (auto arg_op_annotations = arg_op->get_op_annotations()) arg_op_annotations->get_in_place_oi_pairs();
if (arg_in_place_oi_pairs.size() > 0)
{ {
auto arg_in_place_oi_pairs = NGRAPH_DEBUG << "cpu_memory_assignment: call "
arg_op_annotations->get_in_place_oi_pairs(); "propagate_in_place_concat for "
if (arg_in_place_oi_pairs.size() > 0) << *arg_op;
{ propagate_in_place_concat(arg);
auto input = &arg_op->get_inputs().at(0);
auto output_index = input->get_output().get_index();
NGRAPH_DEBUG << "cpu_memory_assignment: call "
"propagate_in_place_concat for "
<< arg->get_name();
propagate_in_place_concat(arg_op, output_index);
}
} }
} }
} }
...@@ -138,19 +131,19 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat( ...@@ -138,19 +131,19 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat(
} }
} }
void runtime::cpu::pass::CPUMemoryAssignment::propagate_in_place_concat( void runtime::cpu::pass::CPUMemoryAssignment::propagate_in_place_concat(const Output<Node>& output)
shared_ptr<ngraph::op::Op> op, size_t output_index)
{ {
if (op->description() == "Concat") auto op = output.get_node_shared_ptr();
if (is_type<op::Concat>(op))
{ {
auto output_tensor = &op->get_output_tensor(); auto output_tensor = &op->output(0).get_tensor();
auto output_bufferID = get_bufferID(output_tensor); auto output_bufferID = get_bufferID(output_tensor);
auto offset = output_tensor->get_pool_offset(); auto offset = output_tensor->get_pool_offset();
size_t arg_index = 0; size_t arg_index = 0;
for (auto arg : op->get_arguments()) for (auto arg : op->input_values())
{ {
auto input_tensor = &arg->get_output_tensor(); auto input_tensor = &arg.get_tensor();
auto input_bufferID = get_bufferID(input_tensor); auto input_bufferID = get_bufferID(input_tensor);
// same set, in place concat allowed // same set, in place concat allowed
if (input_bufferID == output_bufferID) if (input_bufferID == output_bufferID)
...@@ -161,21 +154,15 @@ void runtime::cpu::pass::CPUMemoryAssignment::propagate_in_place_concat( ...@@ -161,21 +154,15 @@ void runtime::cpu::pass::CPUMemoryAssignment::propagate_in_place_concat(
<< ", new offset is " << offset; << ", new offset is " << offset;
// check if need to propagate backward // check if need to propagate backward
if (arg->is_op()) auto arg_op = arg.get_node_shared_ptr();
if (auto arg_op_annotations = arg_op->get_op_annotations())
{ {
auto arg_op = std::static_pointer_cast<ngraph::op::Op>(arg); auto arg_in_place_oi_pairs = arg_op_annotations->get_in_place_oi_pairs();
if (auto arg_op_annotations = arg_op->get_op_annotations()) if (arg_in_place_oi_pairs.size() > 0)
{ {
auto arg_in_place_oi_pairs = arg_op_annotations->get_in_place_oi_pairs(); NGRAPH_DEBUG << "cpu_memory_assignment: call propagate_in_place_concat for "
if (arg_in_place_oi_pairs.size() > 0) << *arg_op;
{ propagate_in_place_concat(arg);
NGRAPH_DEBUG
<< "cpu_memory_assignment: call propagate_in_place_concat for "
<< arg->get_name();
auto input = &op->get_inputs().at(arg_index);
auto arg_output_index = input->get_output().get_index();
propagate_in_place_concat(arg_op, arg_output_index);
}
} }
} }
} }
...@@ -189,14 +176,14 @@ void runtime::cpu::pass::CPUMemoryAssignment::propagate_in_place_concat( ...@@ -189,14 +176,14 @@ void runtime::cpu::pass::CPUMemoryAssignment::propagate_in_place_concat(
auto op_annotations = op->get_op_annotations(); auto op_annotations = op->get_op_annotations();
for (auto oi_pair : op_annotations->get_in_place_oi_pairs()) for (auto oi_pair : op_annotations->get_in_place_oi_pairs())
{ {
if (oi_pair.output != output_index || oi_pair.destructive) if (oi_pair.output != output.get_index() || oi_pair.destructive)
{ {
continue; continue;
} }
auto input_tensor = &op->get_inputs().at(oi_pair.input).get_tensor(); auto input_tensor = &op->input_value(oi_pair.input).get_tensor();
auto input_bufferID = get_bufferID(input_tensor); auto input_bufferID = get_bufferID(input_tensor);
auto output_tensor = &op->get_outputs().at(oi_pair.output).get_tensor(); auto output_tensor = &op->output(oi_pair.output).get_tensor();
auto output_bufferID = get_bufferID(output_tensor); auto output_bufferID = get_bufferID(output_tensor);
// same set, in place op allowed // same set, in place op allowed
...@@ -207,24 +194,18 @@ void runtime::cpu::pass::CPUMemoryAssignment::propagate_in_place_concat( ...@@ -207,24 +194,18 @@ void runtime::cpu::pass::CPUMemoryAssignment::propagate_in_place_concat(
input_tensor->set_pool_offset(new_offset); input_tensor->set_pool_offset(new_offset);
NGRAPH_DEBUG << "cpu_memory_assignment: change offset, old offset is " << old_offset NGRAPH_DEBUG << "cpu_memory_assignment: change offset, old offset is " << old_offset
<< ", new offset is " << new_offset; << ", new offset is " << new_offset;
auto input = &op->get_inputs().at(oi_pair.input); auto input = op->input_value(oi_pair.input);
auto arg = input->get_node(); auto arg_op = input.get_node_shared_ptr();
// check if need to propagate backward // check if need to propagate backward
if (arg->is_op()) if (auto arg_op_annotations = arg_op->get_op_annotations())
{ {
auto arg_op = std::static_pointer_cast<ngraph::op::Op>(arg); auto arg_in_place_oi_pairs = arg_op_annotations->get_in_place_oi_pairs();
if (auto arg_op_annotations = arg_op->get_op_annotations()) if (arg_in_place_oi_pairs.size() > 0)
{ {
auto arg_in_place_oi_pairs = arg_op_annotations->get_in_place_oi_pairs(); NGRAPH_DEBUG << "cpu_memory_assignment: call propagate_in_place_concat for "
if (arg_in_place_oi_pairs.size() > 0) << *arg_op;
{ propagate_in_place_concat(input);
auto arg_output_index = input->get_output().get_index();
NGRAPH_DEBUG
<< "cpu_memory_assignment: call propagate_in_place_concat for "
<< arg->get_name();
propagate_in_place_concat(arg_op, arg_output_index);
}
} }
} }
} }
...@@ -238,20 +219,18 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_slice( ...@@ -238,20 +219,18 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_slice(
{ {
for (shared_ptr<Node>& node : nodes) for (shared_ptr<Node>& node : nodes)
{ {
if (node->description() == "Slice") if (auto slice = as_type_ptr<op::Slice>(node))
{ {
auto slice = std::static_pointer_cast<ngraph::op::Slice>(node);
if (auto op_annotations = slice->get_op_annotations()) if (auto op_annotations = slice->get_op_annotations())
{ {
auto in_place_oi_pairs = op_annotations->get_in_place_oi_pairs(); auto in_place_oi_pairs = op_annotations->get_in_place_oi_pairs();
if (in_place_oi_pairs.size() > 0) if (in_place_oi_pairs.size() > 0)
{ {
auto input = &slice->get_inputs().at(0); auto input = slice->input_value(0);
auto arg = input->get_output().get_node(); auto arg = input.get_node_shared_ptr();
auto index = input->get_output().get_index(); auto input_tensor = &input.get_tensor();
auto input_tensor = &arg->get_output_tensor(index);
auto input_bufferID = get_bufferID(input_tensor); auto input_bufferID = get_bufferID(input_tensor);
auto output_tensor = &slice->get_output_tensor(); auto output_tensor = &slice->output(0).get_tensor();
auto output_bufferID = get_bufferID(output_tensor); auto output_bufferID = get_bufferID(output_tensor);
// same set, in place slice allowed // same set, in place slice allowed
...@@ -277,17 +256,15 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_slice( ...@@ -277,17 +256,15 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_slice(
<< old_offset << ", new offset is " << offset; << old_offset << ", new offset is " << offset;
// check if need to propagate forward // check if need to propagate forward
for (size_t i = 0; i < slice->get_output_size(); ++i) for (auto slice_output : slice->outputs())
{ {
auto slice_output = &slice->get_outputs().at(i); for (auto slice_output_input : slice_output.get_target_inputs())
for (auto slice_output_input : slice_output->get_inputs())
{ {
NGRAPH_DEBUG NGRAPH_DEBUG
<< "cpu_memory_assignment: call propagate_in_place_slice " << "cpu_memory_assignment: call propagate_in_place_slice "
"for output " "for output "
<< i << " of " << slice->get_name(); << slice_output.get_index() << " of " << *slice;
propagate_in_place_slice(slice_output_input, propagate_in_place_slice(slice_output_input);
slice_output_input->get_index());
} }
} }
} }
...@@ -297,48 +274,43 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_slice( ...@@ -297,48 +274,43 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_slice(
} }
} }
void runtime::cpu::pass::CPUMemoryAssignment::propagate_in_place_slice( void runtime::cpu::pass::CPUMemoryAssignment::propagate_in_place_slice(const Input<Node>& input)
ngraph::descriptor::Input* input, size_t input_index)
{ {
std::deque<std::pair<ngraph::descriptor::Input*, size_t>> stack; std::deque<Input<Node>> stack;
stack.push_front(std::pair<ngraph::descriptor::Input*, size_t>(input, input_index)); stack.push_front(input);
while (stack.size() > 0) while (stack.size() > 0)
{ {
ngraph::descriptor::Input* in = stack.front().first; Input<Node> in = stack.front();
auto index = stack.front().second;
stack.pop_front(); stack.pop_front();
auto node = in->get_node(); auto node = in.get_node();
// let process_in_place_slice handle slice. // let process_in_place_slice handle slice.
if (!node->is_op() || node->description() == "Slice") if (is_type<op::Slice>(node))
{ {
continue; continue;
} }
auto op = std::static_pointer_cast<ngraph::op::Op>(node); if (auto op_annotations = node->get_op_annotations())
if (auto op_annotations = op->get_op_annotations())
{ {
for (auto oi_pair : op_annotations->get_in_place_oi_pairs()) for (auto oi_pair : op_annotations->get_in_place_oi_pairs())
{ {
if (oi_pair.input == index) if (oi_pair.input == in.get_index())
{ {
auto input_tensor = &op->get_inputs().at(oi_pair.input).get_tensor(); auto input_tensor = &node->input(oi_pair.input).get_tensor();
auto input_bufferID = get_bufferID(input_tensor); auto input_bufferID = get_bufferID(input_tensor);
size_t output_index = oi_pair.output; size_t output_index = oi_pair.output;
auto output_tensor = &op->get_outputs().at(output_index).get_tensor(); auto output_tensor = &node->output(output_index).get_tensor();
auto output_bufferID = get_bufferID(output_tensor); auto output_bufferID = get_bufferID(output_tensor);
// same set, in place op allowed // same set, in place op allowed
if (input_bufferID == output_bufferID) if (input_bufferID == output_bufferID)
{ {
output_tensor->set_pool_offset(input_tensor->get_pool_offset()); output_tensor->set_pool_offset(input_tensor->get_pool_offset());
for (size_t i = 0; i < op->get_output_size(); ++i) for (auto op_output : node->outputs())
{ {
auto op_output = &op->get_outputs().at(i); for (auto op_output_input : op_output.get_target_inputs())
for (auto op_output_input : op_output->get_inputs())
{ {
stack.push_front(std::pair<ngraph::descriptor::Input*, size_t>( stack.push_front(op_output_input);
op_output_input, op_output_input->get_index()));
} }
} }
} }
...@@ -365,16 +337,16 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared ...@@ -365,16 +337,16 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
const shared_ptr<Node>& node = *it; const shared_ptr<Node>& node = *it;
if (node->is_parameter()) if (node->is_parameter())
{ {
auto output_tensor = &node->get_output_tensor(); auto output_tensor = &node->output(0).get_tensor();
auto ele = std::pair<TensorRole, unordered_set<descriptor::Tensor*>>( auto ele = std::pair<TensorRole, unordered_set<descriptor::Tensor*>>(
TensorRole::INPUT, unordered_set<descriptor::Tensor*>({output_tensor})); TensorRole::INPUT, unordered_set<descriptor::Tensor*>({output_tensor}));
m_bufferID_to_tensorSets[count] = ele; m_bufferID_to_tensorSets[count] = ele;
m_tensor_to_bufferID[output_tensor] = count; m_tensor_to_bufferID[output_tensor] = count;
count++; count++;
} }
else if (node->is_constant()) else if (is_type<op::Constant>(node))
{ {
auto output_tensor = &node->get_output_tensor(); auto output_tensor = &node->output(0).get_tensor();
auto ele = std::pair<TensorRole, unordered_set<descriptor::Tensor*>>( auto ele = std::pair<TensorRole, unordered_set<descriptor::Tensor*>>(
TensorRole::CONSTANT, unordered_set<descriptor::Tensor*>({output_tensor})); TensorRole::CONSTANT, unordered_set<descriptor::Tensor*>({output_tensor}));
m_bufferID_to_tensorSets[count] = ele; m_bufferID_to_tensorSets[count] = ele;
...@@ -383,8 +355,8 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared ...@@ -383,8 +355,8 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
} }
else if (node->is_output()) else if (node->is_output())
{ {
auto output_tensor = &node->get_output_tensor(); auto output_tensor = &node->output(0).get_tensor();
auto input_tensor = &node->get_inputs().at(0).get_tensor(); auto input_tensor = &node->input(0).get_tensor();
auto bufferID = get_bufferID(input_tensor); auto bufferID = get_bufferID(input_tensor);
NGRAPH_CHECK(bufferID <= count); NGRAPH_CHECK(bufferID <= count);
...@@ -423,18 +395,18 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared ...@@ -423,18 +395,18 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
auto cacheable = op_annotations->is_cacheable(); auto cacheable = op_annotations->is_cacheable();
// in place concat // in place concat
if (node->description() == "Concat") if (is_type<op::Concat>(node))
{ {
auto output_tensor = &node->get_output_tensor(); auto output_tensor = &node->output(0).get_tensor();
auto ele = std::pair<TensorRole, unordered_set<descriptor::Tensor*>>( auto ele = std::pair<TensorRole, unordered_set<descriptor::Tensor*>>(
TensorRole::INTERMEDIATE, TensorRole::INTERMEDIATE,
unordered_set<descriptor::Tensor*>({output_tensor})); unordered_set<descriptor::Tensor*>({output_tensor}));
for (auto& arg : node->get_arguments()) for (auto& arg : node->input_values())
{ {
// when reusing memory, check cacheability // when reusing memory, check cacheability
if (!m_disable_memory_sharing && arg->is_op()) if (!m_disable_memory_sharing)
{ {
auto arg_op = std::static_pointer_cast<op::Op>(arg); auto arg_op = arg.get_node_shared_ptr();
if (auto arg_op_annotations = arg_op->get_op_annotations()) if (auto arg_op_annotations = arg_op->get_op_annotations())
{ {
// when reusing memory, ops with different cacheabilities should // when reusing memory, ops with different cacheabilities should
...@@ -449,7 +421,7 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared ...@@ -449,7 +421,7 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
// because in-place slice before in-place concat cannot use the memory // because in-place slice before in-place concat cannot use the memory
// buffer of concat. In-place slice after in-place concat can use the // buffer of concat. In-place slice after in-place concat can use the
// memory buffer of concat. // memory buffer of concat.
auto input_tensor = &arg->get_output_tensor(); auto input_tensor = &arg.get_tensor();
if (in_place_slice_chain.find(input_tensor) != if (in_place_slice_chain.find(input_tensor) !=
in_place_slice_chain.end()) in_place_slice_chain.end())
{ {
...@@ -494,21 +466,19 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared ...@@ -494,21 +466,19 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
// other in place ops // other in place ops
for (auto oi_pair : op_annotations->get_in_place_oi_pairs()) for (auto oi_pair : op_annotations->get_in_place_oi_pairs())
{ {
auto input_tensor = &node->get_inputs().at(oi_pair.input).get_tensor(); auto input_tensor = &node->input_value(oi_pair.input).get_tensor();
auto output_tensor = auto output_tensor = &node->output(oi_pair.output).get_tensor();
&node->get_outputs().at(oi_pair.output).get_tensor();
// if destructive, do not put input tensor and output tensor into the // if destructive, do not put input tensor and output tensor into the
// same set. // same set.
if (!oi_pair.destructive) if (!oi_pair.destructive)
{ {
bool no_in_place = false; bool no_in_place = false;
auto input_node = auto input_op =
node->get_inputs().at(oi_pair.input).get_output().get_node(); node->input_value(oi_pair.input).get_node_shared_ptr();
// when reusing memory, check cacheability // when reusing memory, check cacheability
if (!m_disable_memory_sharing && input_node->is_op()) if (!m_disable_memory_sharing)
{ {
auto input_op = std::static_pointer_cast<op::Op>(input_node);
if (auto input_op_annotations = input_op->get_op_annotations()) if (auto input_op_annotations = input_op->get_op_annotations())
{ {
// when reusing memory, ops with different cacheabilities // when reusing memory, ops with different cacheabilities
...@@ -527,7 +497,7 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared ...@@ -527,7 +497,7 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
auto input_buffer_it = m_bufferID_to_tensorSets.find(bufferID); auto input_buffer_it = m_bufferID_to_tensorSets.find(bufferID);
NGRAPH_CHECK(input_buffer_it != m_bufferID_to_tensorSets.end()); NGRAPH_CHECK(input_buffer_it != m_bufferID_to_tensorSets.end());
if (node->description() == "Slice") if (is_type<op::Slice>(node))
{ {
if (input_buffer_it->second.first != TensorRole::CONSTANT) if (input_buffer_it->second.first != TensorRole::CONSTANT)
{ {
...@@ -556,9 +526,9 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared ...@@ -556,9 +526,9 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
} }
} }
// process output tensors // process output tensors
for (size_t i = 0; i < node->get_output_size(); i++) for (auto node_output : node->outputs())
{ {
auto output_tensor = &node->get_outputs().at(i).get_tensor(); auto output_tensor = &node_output.get_tensor();
// not in place, create a new set and insert into the map // not in place, create a new set and insert into the map
if (m_tensor_to_bufferID.find(output_tensor) == m_tensor_to_bufferID.end()) if (m_tensor_to_bufferID.find(output_tensor) == m_tensor_to_bufferID.end())
{ {
...@@ -617,9 +587,9 @@ void runtime::cpu::pass::CPUMemoryAssignment::liveness_analysis( ...@@ -617,9 +587,9 @@ void runtime::cpu::pass::CPUMemoryAssignment::liveness_analysis(
const shared_ptr<Node>& node = *it; const shared_ptr<Node>& node = *it;
node->liveness_new_list.clear(); node->liveness_new_list.clear();
for (size_t i = 0; i < node->get_output_size(); ++i) for (auto node_output : node->outputs())
{ {
auto tensor = &node->get_output_tensor(i); auto tensor = &node_output.get_tensor();
auto bufferID = get_bufferID(tensor); auto bufferID = get_bufferID(tensor);
if (allocated_sets.find(bufferID) == allocated_sets.end()) if (allocated_sets.find(bufferID) == allocated_sets.end())
{ {
...@@ -635,9 +605,9 @@ void runtime::cpu::pass::CPUMemoryAssignment::liveness_analysis( ...@@ -635,9 +605,9 @@ void runtime::cpu::pass::CPUMemoryAssignment::liveness_analysis(
const shared_ptr<Node>& node = *it; const shared_ptr<Node>& node = *it;
node->liveness_free_list.clear(); node->liveness_free_list.clear();
for (descriptor::Input& input_decl : node->get_inputs()) for (auto input_value : node->input_values())
{ {
auto tensor = &input_decl.get_tensor(); auto tensor = &input_value.get_tensor();
auto bufferID = get_bufferID(tensor); auto bufferID = get_bufferID(tensor);
if (freed_sets.find(bufferID) == freed_sets.end()) if (freed_sets.find(bufferID) == freed_sets.end())
{ {
...@@ -677,10 +647,9 @@ bool runtime::cpu::pass::CPUMemoryAssignment::run_on_function(shared_ptr<ngraph: ...@@ -677,10 +647,9 @@ bool runtime::cpu::pass::CPUMemoryAssignment::run_on_function(shared_ptr<ngraph:
if (cacheable) if (cacheable)
{ {
for (size_t i = 0; i < node->get_output_size(); ++i) for (auto output : node->outputs())
{ {
shared_ptr<descriptor::Tensor> tv = node->get_output_tensor_ptr(i); m_tensor_caching.insert(&output.get_tensor());
m_tensor_caching.insert(tv.get());
} }
} }
} }
...@@ -698,107 +667,99 @@ bool runtime::cpu::pass::CPUMemoryAssignment::run_on_function(shared_ptr<ngraph: ...@@ -698,107 +667,99 @@ bool runtime::cpu::pass::CPUMemoryAssignment::run_on_function(shared_ptr<ngraph:
unordered_set<descriptor::Tensor*> no_free; unordered_set<descriptor::Tensor*> no_free;
unordered_set<descriptor::Tensor*> no_new; unordered_set<descriptor::Tensor*> no_new;
if (node->is_op()) auto op = std::static_pointer_cast<op::Op>(node);
if (auto op_annotations = op->get_op_annotations())
{ {
auto op = std::static_pointer_cast<op::Op>(node); for (auto oi_pair : op_annotations->get_in_place_oi_pairs())
if (auto op_annotations = op->get_op_annotations())
{ {
for (auto oi_pair : op_annotations->get_in_place_oi_pairs()) auto output_tensor = &node->output(oi_pair.output).get_tensor();
{ auto input_tensor = &node->input_value(oi_pair.input).get_tensor();
auto output_tensor = &node->get_outputs().at(oi_pair.output).get_tensor(); auto input_op = node->input_value(oi_pair.input).get_node_shared_ptr();
auto input_tensor = &node->get_inputs().at(oi_pair.input).get_tensor();
auto input_node = node->get_inputs().at(oi_pair.input).get_output().get_node();
if (oi_pair.destructive && node->liveness_free_list.count(input_tensor) != 0 && if (oi_pair.destructive && node->liveness_free_list.count(input_tensor) != 0 &&
node->liveness_new_list.count(output_tensor) != 0) node->liveness_new_list.count(output_tensor) != 0)
{
if (auto input_op_annotations = input_op->get_op_annotations())
{ {
if (input_node->is_op()) // when reusing memory, ops with different cacheabilities are using
// different memory manager
// and should not share the same buffer.
if (!m_disable_memory_sharing &&
input_op_annotations->is_cacheable() != op_annotations->is_cacheable())
{ {
auto input_op = std::static_pointer_cast<op::Op>(input_node); NGRAPH_DEBUG << "cpu_memory_assignment: reusing memory with "
if (auto input_op_annotations = input_op->get_op_annotations()) "input and output have different cacheabilities, no "
{ "destructive oi";
// when reusing memory, ops with different cacheabilities are using
// different memory manager
// and should not share the same buffer.
if (!m_disable_memory_sharing &&
input_op_annotations->is_cacheable() !=
op_annotations->is_cacheable())
{
NGRAPH_DEBUG
<< "cpu_memory_assignment: reusing memory with "
"input and output have different cacheabilities, no "
"destructive oi";
continue;
}
}
}
auto input_bufferID = get_bufferID(input_tensor);
auto output_bufferID = get_bufferID(output_tensor);
auto input_buffer_it = m_bufferID_to_tensorSets.find(input_bufferID);
NGRAPH_CHECK(input_buffer_it != m_bufferID_to_tensorSets.end());
// do not modify function inputs and constants, so no destructive oi
if (input_buffer_it->second.first == TensorRole::INPUT ||
input_buffer_it->second.first == TensorRole::CONSTANT)
{
NGRAPH_DEBUG << "cpu_memory_assignment: input is function input or "
"constant, no destructive oi";
continue; continue;
} }
auto input_set = input_buffer_it->second.second; }
// check buffer sizes, if required output buffer is larger than input
// buffer, do not reuse input buffer get the largest tensor size, which is auto input_bufferID = get_bufferID(input_tensor);
// the size of the memory buffer for the set auto output_bufferID = get_bufferID(output_tensor);
size_t input_size = input_tensor->size();
// get the smallest offset, which is the offset of the memory buffer for the auto input_buffer_it = m_bufferID_to_tensorSets.find(input_bufferID);
// set NGRAPH_CHECK(input_buffer_it != m_bufferID_to_tensorSets.end());
size_t offset = input_tensor->get_pool_offset(); // do not modify function inputs and constants, so no destructive oi
for (auto e : input_set) if (input_buffer_it->second.first == TensorRole::INPUT ||
{ input_buffer_it->second.first == TensorRole::CONSTANT)
if (e->size() > input_size) {
{ NGRAPH_DEBUG << "cpu_memory_assignment: input is function input or "
input_size = e->size(); "constant, no destructive oi";
} continue;
if (e->get_pool_offset() < offset) }
{ auto input_set = input_buffer_it->second.second;
offset = e->get_pool_offset(); // check buffer sizes, if required output buffer is larger than input
} // buffer, do not reuse input buffer get the largest tensor size, which is
} // the size of the memory buffer for the set
auto output_buffer_it = m_bufferID_to_tensorSets.find(output_bufferID); size_t input_size = input_tensor->size();
NGRAPH_CHECK(output_buffer_it != m_bufferID_to_tensorSets.end()); // get the smallest offset, which is the offset of the memory buffer for the
auto output_set = output_buffer_it->second.second; // set
size_t output_size = input_tensor->size(); size_t offset = input_tensor->get_pool_offset();
// get the largest tensor size, which is the size of memory buffer for the for (auto e : input_set)
// set {
for (auto e : output_set) if (e->size() > input_size)
{ {
if (e->size() > output_size) input_size = e->size();
{
output_size = e->size();
}
} }
if (input_size < output_size) if (e->get_pool_offset() < offset)
{ {
continue; offset = e->get_pool_offset();
} }
NGRAPH_DEBUG << "cpu_memory_assignment: last use of input tensor, " }
"destructive oi allowed:"; auto output_buffer_it = m_bufferID_to_tensorSets.find(output_bufferID);
NGRAPH_DEBUG << "input_tensor is " << input_tensor->get_name(); NGRAPH_CHECK(output_buffer_it != m_bufferID_to_tensorSets.end());
NGRAPH_DEBUG << "output_tensor is " << output_tensor->get_name(); auto output_set = output_buffer_it->second.second;
no_free.insert(input_tensor); size_t output_size = input_tensor->size();
no_new.insert(output_tensor); // get the largest tensor size, which is the size of memory buffer for the
// set
// set the tensor offset for tensors in the set containing the output tensor for (auto e : output_set)
// to the starting offset {
// of the set of input tensor. if (e->size() > output_size)
// do not combine those two sets.
// change the label of output tensor set to that of input tensor set
output_buffer_it->second.first = input_buffer_it->second.first;
for (auto& ele_t : output_set)
{ {
ele_t->set_pool_offset(offset); output_size = e->size();
} }
} }
if (input_size < output_size)
{
continue;
}
NGRAPH_DEBUG << "cpu_memory_assignment: last use of input tensor, "
"destructive oi allowed:";
NGRAPH_DEBUG << "input_tensor is " << input_tensor->get_name();
NGRAPH_DEBUG << "output_tensor is " << output_tensor->get_name();
no_free.insert(input_tensor);
no_new.insert(output_tensor);
// set the tensor offset for tensors in the set containing the output tensor
// to the starting offset
// of the set of input tensor.
// do not combine those two sets.
// change the label of output tensor set to that of input tensor set
output_buffer_it->second.first = input_buffer_it->second.first;
for (auto& ele_t : output_set)
{
ele_t->set_pool_offset(offset);
}
} }
} }
} }
......
...@@ -53,13 +53,13 @@ private: ...@@ -53,13 +53,13 @@ private:
void process_in_place_concat(std::list<std::shared_ptr<Node>> nodes); void process_in_place_concat(std::list<std::shared_ptr<Node>> nodes);
// For a chain of concat ops, propagate memory pool offsets // For a chain of concat ops, propagate memory pool offsets
void propagate_in_place_concat(std::shared_ptr<ngraph::op::Op> concat, size_t index); void propagate_in_place_concat(const ngraph::Output<ngraph::Node>& concat);
// Find in-place slice ops and set appropriate memory pool offset for its output // Find in-place slice ops and set appropriate memory pool offset for its output
void process_in_place_slice(std::list<std::shared_ptr<Node>> nodes); void process_in_place_slice(std::list<std::shared_ptr<Node>> nodes);
// propagate slice when its arg comes from function input // propagate slice when its arg comes from function input
void propagate_in_place_slice(ngraph::descriptor::Input* input, size_t input_index); void propagate_in_place_slice(const ngraph::Input<ngraph::Node>& input);
// build buffer sets maps // build buffer sets maps
void build_buffer_sets_maps(std::list<std::shared_ptr<Node>>& ops); void build_buffer_sets_maps(std::list<std::shared_ptr<Node>>& ops);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment