Commit 2d0871f5 authored by amy.zhuang's avatar amy.zhuang

Refactor code and add comments.

parent b6197967
...@@ -66,6 +66,9 @@ namespace ngraph ...@@ -66,6 +66,9 @@ namespace ngraph
for (size_t i = 0; i < nargs; i++) for (size_t i = 0; i < nargs; i++)
{ {
auto arg_size = shape_size(arg_shapes[i]) * element_size; auto arg_size = shape_size(arg_shapes[i]) * element_size;
// if the argument pointer does not fall within the concat output buffer
// (caused by propagate_in_place_output or propagate_in_place_input), we need to copy the data;
// otherwise, we can skip the copy.
if (arg_tensors[i] < out_tensor || if (arg_tensors[i] < out_tensor ||
arg_tensors[i] >= arg_tensors[i] >=
reinterpret_cast<char*>(out_tensor) + out_size) reinterpret_cast<char*>(out_tensor) + out_size)
......
...@@ -620,41 +620,8 @@ using namespace ngraph::runtime; ...@@ -620,41 +620,8 @@ using namespace ngraph::runtime;
} }
} }
// concat // In place concatenation optimization
for (shared_ptr<Node> node : ordered_ops) process_in_place_concat(ordered_ops);
{
if (auto concat = std::dynamic_pointer_cast<ngraph::op::Concat>(node))
{
if (auto op_annotations = concat->get_op_annotations())
{
auto in_place_oi_pairs = op_annotations->get_in_place_oi_pairs();
if (in_place_oi_pairs.size() > 0)
{
bool found_last_concat = true;
for (auto user : concat->get_users())
{
if (dynamic_pointer_cast<ngraph::op::Concat>(user))
{
found_last_concat = false;
break;
}
}
if (found_last_concat)
{
for (auto arg : concat->get_arguments())
{
if (auto arg_concat = dynamic_pointer_cast<ngraph::op::Concat>(arg))
{
NGRAPH_DEBUG << "call propagate_in_place_concat for "
<< arg->get_name() << std::endl;
propagate_in_place_concat(arg_concat);
}
}
}
}
}
}
}
writer << "bool " << current_function->get_name() << "_t_en[" << tensor_index << "];\n"; writer << "bool " << current_function->get_name() << "_t_en[" << tensor_index << "];\n";
...@@ -1177,6 +1144,56 @@ void runtime::cpu::CPU_ExternalFunction::propagate_in_place_output( ...@@ -1177,6 +1144,56 @@ void runtime::cpu::CPU_ExternalFunction::propagate_in_place_output(
} while (propagate_further); } while (propagate_further);
} }
void runtime::cpu::CPU_ExternalFunction::process_in_place_concat(
std::list<std::shared_ptr<Node>> nodes)
{
for (shared_ptr<Node> node : nodes)
{
if (auto concat = std::dynamic_pointer_cast<ngraph::op::Concat>(node))
{
if (auto op_annotations = concat->get_op_annotations())
{
auto in_place_oi_pairs = op_annotations->get_in_place_oi_pairs();
if (in_place_oi_pairs.size() > 0)
{
bool found_last_concat = true;
for (auto user : concat->get_users())
{
if (dynamic_pointer_cast<ngraph::op::Concat>(user))
{
found_last_concat = false;
break;
}
}
if (found_last_concat)
{
auto output_tensor = &concat->get_output_tensor();
auto offset = output_tensor->get_pool_offset();
for (auto arg : concat->get_arguments())
{
auto input_node = std::dynamic_pointer_cast<ngraph::op::Op>(arg);
auto input_tensor = &input_node->get_output_tensor();
auto old_offset = input_tensor->get_pool_offset();
input_tensor->set_pool_offset(offset);
NGRAPH_DEBUG << "cpu_external_function: change offset, old offset is "
<< old_offset << ", new offset is " << offset << std::endl;
offset += input_tensor->size();
if (auto arg_concat = dynamic_pointer_cast<ngraph::op::Concat>(arg))
{
NGRAPH_DEBUG
<< "cpu_external_function: call propagate_in_place_concat for "
<< arg->get_name() << std::endl;
propagate_in_place_concat(arg_concat);
}
}
}
}
}
}
}
}
void runtime::cpu::CPU_ExternalFunction::propagate_in_place_concat( void runtime::cpu::CPU_ExternalFunction::propagate_in_place_concat(
shared_ptr<ngraph::op::Concat> concat) shared_ptr<ngraph::op::Concat> concat)
{ {
...@@ -1268,54 +1285,8 @@ void runtime::cpu::CPU_ExternalFunction::build() ...@@ -1268,54 +1285,8 @@ void runtime::cpu::CPU_ExternalFunction::build()
// Build executor // Build executor
// concat // In place concatenation optimization
for (shared_ptr<Node> node : m_function->get_ordered_ops()) process_in_place_concat(m_function->get_ordered_ops());
{
if (auto concat = std::dynamic_pointer_cast<ngraph::op::Concat>(node))
{
if (auto op_annotations = concat->get_op_annotations())
{
auto in_place_oi_pairs = op_annotations->get_in_place_oi_pairs();
if (in_place_oi_pairs.size() > 0)
{
bool found_last_concat = true;
auto output_tensor = &concat->get_output_tensor();
auto offset = output_tensor->get_pool_offset();
for (auto arg : concat->get_arguments())
{
auto input_node = std::dynamic_pointer_cast<ngraph::op::Op>(arg);
auto input_tensor = &input_node->get_output_tensor();
auto old_offset = input_tensor->get_pool_offset();
input_tensor->set_pool_offset(offset);
NGRAPH_DEBUG << "cpu_external_function: change offset, old offset is "
<< old_offset << ", new offset is " << offset << std::endl;
offset += input_tensor->size();
}
for (auto user : concat->get_users())
{
if (dynamic_pointer_cast<ngraph::op::Concat>(user))
{
found_last_concat = false;
break;
}
}
if (found_last_concat)
{
for (auto arg : concat->get_arguments())
{
if (auto arg_concat = dynamic_pointer_cast<ngraph::op::Concat>(arg))
{
NGRAPH_DEBUG
<< "cpu_external_function: call propagate_in_place_concat for "
<< arg->get_name() << std::endl;
propagate_in_place_concat(arg_concat);
}
}
}
}
}
}
}
// Intermediates // Intermediates
if (m_function->get_temporary_pool_size()) if (m_function->get_temporary_pool_size())
......
...@@ -158,7 +158,11 @@ namespace ngraph ...@@ -158,7 +158,11 @@ namespace ngraph
void propagate_in_place_output(ngraph::descriptor::Output* res_src_output, void propagate_in_place_output(ngraph::descriptor::Output* res_src_output,
std::string output_name, std::string output_name,
bool dex); bool dex);
// For a chain of concat ops, propagate pool offsets
// Find in-place concat ops and set appropriate memory pool offset for its arguments
void process_in_place_concat(std::list<std::shared_ptr<Node>> nodes);
// For a chain of concat ops, propagate memory pool offsets
void propagate_in_place_concat(std::shared_ptr<ngraph::op::Concat> concat); void propagate_in_place_concat(std::shared_ptr<ngraph::op::Concat> concat);
bool computes_result(Node* node); bool computes_result(Node* node);
......
...@@ -14,6 +14,40 @@ ...@@ -14,6 +14,40 @@
// limitations under the License. // limitations under the License.
//***************************************************************************** //*****************************************************************************
/// In-place-concat optimization makes the argument nodes of a concatenation node use the concatenation node's memory buffer
/// for their outputs. As a result, we eliminate memory copies from the memory buffers of the argument nodes to
/// that of the concatenation node. When there is a chain of in place concatenation nodes, we propagate the
/// memory buffer starting from the last concatenation node. Not all concatenation nodes can be optimized. This pass
/// marks all the nodes that can be optimized.
///
/// Example1:
/// parameter1 parameter2 parameter3 parameter4 parameter5 parameter6
/// \ / \ / \ /
/// add1 add2 add3
/// \ | /
/// concat
///
/// Before optimization: the result of add1 is stored to the memory buffer assigned to add1, same for add2 and add3;
/// then those results are copied to the memory buffer assigned to concat.
/// After optimization: the result of add1 is stored to the memory buffer assigned to concat, same for add2 and add3.
/// there is no need to copy those results.
///
///
/// Example2:
/// parameter1 parameter2 parameter3 parameter4
/// \ / \ /
/// add1 add2
/// \ /
/// concat1 parameter5
/// | \ /
/// | add3
/// \ /
/// concat
///
/// After optimization: the result of add1 is stored to the memory buffer assigned to concat, same for add2 and add3.
#include <cassert>
#include "ngraph/runtime/cpu/pass/cpu_memory_optimization.hpp" #include "ngraph/runtime/cpu/pass/cpu_memory_optimization.hpp"
#include "ngraph/descriptor/output.hpp" #include "ngraph/descriptor/output.hpp"
...@@ -54,6 +88,9 @@ bool runtime::cpu::pass::CPUMemoryOptimization::run_on_function(std::shared_ptr< ...@@ -54,6 +88,9 @@ bool runtime::cpu::pass::CPUMemoryOptimization::run_on_function(std::shared_ptr<
auto index = 0; auto index = 0;
for (descriptor::Input& input : concat->get_inputs()) for (descriptor::Input& input : concat->get_inputs())
{ {
// no tensors with zero-sized dimensions after zero_dim_tensor_elimination
assert(shape_size(input.get_shape()) != 0);
// check if input layout is padded // check if input layout is padded
auto input_md = mkldnn_utils::get_input_mkldnn_md(n.get(), index); auto input_md = mkldnn_utils::get_input_mkldnn_md(n.get(), index);
index++; index++;
...@@ -65,14 +102,6 @@ bool runtime::cpu::pass::CPUMemoryOptimization::run_on_function(std::shared_ptr< ...@@ -65,14 +102,6 @@ bool runtime::cpu::pass::CPUMemoryOptimization::run_on_function(std::shared_ptr<
break; break;
} }
if (shape_size(input.get_shape()) == 0)
{
NGRAPH_DEBUG << "cpu_post_layout_assignment: 0 length tensor, no in "
"place concat";
in_place_concat = false;
break;
}
const auto& output = input.get_output(); const auto& output = input.get_output();
auto arg = output.get_node(); auto arg = output.get_node();
if (std::dynamic_pointer_cast<op::Constant>(arg) || if (std::dynamic_pointer_cast<op::Constant>(arg) ||
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment