Commit 02d4aa59 authored by Amy Zhuang's avatar Amy Zhuang Committed by Robert Kimball

Enable in place slice when slice's arg is function input. (#2214)

* Enable in place slice when slice's arg is function input.

* Add a corner case.

* Add codegen support.

* Set the correct offset for in-place-slice when there is a chain of in place ops starting from parameter.
parent 1234eb97
......@@ -210,6 +210,11 @@ namespace ngraph
// Find in-place slice ops and set appropriate memory pool offset for its output
void process_in_place_slice(std::list<std::shared_ptr<Node>> nodes);
// propagate slice when its arg comes from function input
void propagate_in_place_slice(ngraph::descriptor::Output* output,
size_t input_index,
size_t input_offset);
bool computes_result(Node* node);
void release_function() { m_function = nullptr; }
#if !defined(NGRAPH_DEX_ONLY)
......@@ -255,6 +260,8 @@ namespace ngraph
bool m_direct_execution;
EntryPoint m_compiled_function;
std::unordered_map<std::string, std::string> m_variable_name_map;
std::unordered_map<std::string, std::pair<std::size_t, std::size_t>>
m_variable_input_index_offset_map;
std::unordered_map<std::string, CPUTensorRole> m_tensor_roles;
......@@ -277,7 +284,10 @@ namespace ngraph
std::unordered_map<std::string, void*> tensor_data;
std::unordered_map<std::string, bool> tensor_stale;
std::unordered_map<std::string, std::string> tensor_alias;
std::unordered_map<std::string, size_t> function_input_name_index;
std::list<std::pair<std::reference_wrapper<void*>, size_t>> intermediates_offsets;
std::list<std::tuple<std::reference_wrapper<void*>, size_t, size_t>>
intermediate_input_index_offset;
std::list<
std::tuple<std::reference_wrapper<void*>, size_t, std::reference_wrapper<bool>>>
function_input_index;
......
......@@ -235,11 +235,30 @@ bool runtime::cpu::pass::CPUMemoryOptimization::run_on_function(std::shared_ptr<
auto upper_bounds = slice->get_upper_bounds();
auto arg = slice->get_argument(0);
if (std::dynamic_pointer_cast<op::Constant>(arg) ||
std::dynamic_pointer_cast<op::Parameter>(arg))
if (arg->is_constant())
{
NGRAPH_DEBUG << "cpu_memory_optimization: " << arg->get_name()
<< ": constant or parameter, no in place slice";
<< ": constant, no in place slice";
continue;
}
bool no_in_place_slice = false;
if (arg->is_parameter())
{
for (auto user : slice->get_users())
{
if (user->is_output())
{
NGRAPH_DEBUG << "cpu_memory_optimization: slice between function input and "
"output, no in place slice";
no_in_place_slice = true;
break;
}
}
}
if (no_in_place_slice)
{
continue;
}
......
......@@ -1528,6 +1528,96 @@ NGRAPH_TEST(${BACKEND_NAME}, slice_matrix_axis_0_overlap)
EXPECT_EQ((vector<float>{12, 16, 20, 24, 28, 32, 36, 40}), read_vector<float>(result));
}
NGRAPH_TEST(${BACKEND_NAME}, slice_matrix_axis_0_in_place)
{
Shape shape_a{4, 4};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_r{2, 4};
auto D = make_shared<op::Slice>(A, Coordinate{0, 0}, Coordinate{2, 4});
auto E = make_shared<op::Slice>(A, Coordinate{2, 0}, Coordinate{4, 4});
auto r = make_shared<op::Add>(D, E);
auto f = make_shared<Function>(r, ParameterVector{A});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape_a);
copy_data(a, vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
auto result = backend->create_tensor(element::f32, shape_r);
backend->call_with_validate(backend->compile(f), {result}, {a});
EXPECT_EQ((vector<float>{10, 12, 14, 16, 18, 20, 22, 24}), read_vector<float>(result));
}
NGRAPH_TEST(${BACKEND_NAME}, slice_matrix_axis_0_in_place_twice)
{
Shape shape_a{4, 4};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_r{1, 4};
auto B = make_shared<op::Slice>(A, Coordinate{0, 0}, Coordinate{2, 4});
auto D = make_shared<op::Slice>(B, Coordinate{1, 0}, Coordinate{2, 4});
auto E = make_shared<op::Slice>(A, Coordinate{2, 0}, Coordinate{3, 4});
auto r = make_shared<op::Add>(D, E);
auto f = make_shared<Function>(r, ParameterVector{A});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape_a);
copy_data(a, vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
auto result = backend->create_tensor(element::f32, shape_r);
backend->call_with_validate(backend->compile(f), {result}, {a});
EXPECT_EQ((vector<float>{14, 16, 18, 20}), read_vector<float>(result));
}
NGRAPH_TEST(${BACKEND_NAME}, slice_matrix_axis_0_in_place_twice_overlap)
{
Shape shape_a{5, 4};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_r{2, 4};
auto B = make_shared<op::Slice>(A, Coordinate{1, 0}, Coordinate{5, 4});
auto D = make_shared<op::Slice>(B, Coordinate{1, 0}, Coordinate{3, 4});
auto E = make_shared<op::Slice>(B, Coordinate{2, 0}, Coordinate{4, 4});
auto r = make_shared<op::Add>(D, E);
auto f = make_shared<Function>(r, ParameterVector{A});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape_a);
copy_data(a,
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20});
auto result = backend->create_tensor(element::f32, shape_r);
backend->call_with_validate(backend->compile(f), {result}, {a});
EXPECT_EQ((vector<float>{22, 24, 26, 28, 30, 32, 34, 36}), read_vector<float>(result));
}
NGRAPH_TEST(${BACKEND_NAME}, slice_matrix_axis_0_in_place_with_reshape)
{
Shape shape_a{4, 5};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_r{2, 4};
auto B = make_shared<op::Slice>(A, Coordinate{1, 0}, Coordinate{4, 5});
auto C = make_shared<op::Reshape>(B, AxisVector{1, 0}, Shape{5, 3});
auto D = make_shared<op::Slice>(C, Coordinate{1, 0}, Coordinate{5, 3});
auto E = make_shared<op::Reshape>(D, AxisVector{1, 0}, Shape{3, 4});
auto r = make_shared<op::Slice>(E, Coordinate{1, 0}, Coordinate{3, 4});
auto f = make_shared<Function>(r, ParameterVector{A});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape_a);
copy_data(a,
vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20});
auto result = backend->create_tensor(element::f32, shape_r);
backend->call_with_validate(backend->compile(f), {result}, {a});
EXPECT_EQ((vector<float>{12, 13, 14, 15, 17, 18, 19, 20}), read_vector<float>(result));
}
NGRAPH_TEST(${BACKEND_NAME}, slice_matrix_strided)
{
Shape shape_a{4, 4};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment