Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
65600444
Unverified
Commit
65600444
authored
Oct 25, 2018
by
Jayaram Bobba
Committed by
GitHub
Oct 25, 2018
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1774 from NervanaSystems/ayzhuang/in-place-concat
Add in place concat optimization.
parents
cf241d26
c5f4db5d
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
519 additions
and
16 deletions
+519
-16
memory_layout.cpp
src/ngraph/pass/memory_layout.cpp
+19
-13
CMakeLists.txt
src/ngraph/runtime/cpu/CMakeLists.txt
+1
-0
concat.cpp
src/ngraph/runtime/cpu/builder/concat.cpp
+37
-3
cpu_emitter.cpp
src/ngraph/runtime/cpu/cpu_emitter.cpp
+24
-0
cpu_external_function.cpp
src/ngraph/runtime/cpu/cpu_external_function.cpp
+108
-0
cpu_external_function.hpp
src/ngraph/runtime/cpu/cpu_external_function.hpp
+7
-0
cpu_memory_optimization.cpp
src/ngraph/runtime/cpu/pass/cpu_memory_optimization.cpp
+223
-0
cpu_memory_optimization.hpp
src/ngraph/runtime/cpu/pass/cpu_memory_optimization.hpp
+37
-0
backend_test.in.cpp
test/backend_test.in.cpp
+63
-0
No files found.
src/ngraph/pass/memory_layout.cpp
View file @
65600444
...
...
@@ -19,6 +19,7 @@
#include "ngraph/log.hpp"
#include "ngraph/log.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/pass/liveness.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/memory_layout.hpp"
...
...
@@ -43,22 +44,27 @@ bool pass::MemoryLayout::run_on_function(shared_ptr<ngraph::Function> function)
if
(
auto
op
=
std
::
dynamic_pointer_cast
<
op
::
Op
>
(
node
))
{
if
(
auto
op_annotations
=
op
->
get_op_annotations
())
// concat in_place_oi should be treated differently
if
(
!
std
::
dynamic_pointer_cast
<
op
::
Concat
>
(
node
))
{
for
(
auto
oi_pair
:
op_annotations
->
get_in_place_oi_pair
s
())
if
(
auto
op_annotations
=
op
->
get_op_annotation
s
())
{
auto
output
=
&
node
->
get_outputs
().
at
(
oi_pair
.
output
).
get_tensor
();
auto
input
=
&
node
->
get_inputs
().
at
(
oi_pair
.
input
).
get_tensor
();
auto
input_node
=
node
->
get_inputs
().
at
(
oi_pair
.
input
).
get_output
().
get_node
();
// For destructive kernel, this should be the last use
// Non-destructive kernels can pass through if memory sharing is disabled
if
((
node
->
liveness_free_list
.
count
(
input
)
!=
0
||
(
m_disable_memory_sharing
&&
!
oi_pair
.
destructive
))
&&
node
->
liveness_new_list
.
count
(
output
)
!=
0
)
for
(
auto
oi_pair
:
op_annotations
->
get_in_place_oi_pairs
())
{
in_place_outputs
.
insert
({
output
,
input
});
reused_inputs
.
insert
(
input
);
auto
output
=
&
node
->
get_outputs
().
at
(
oi_pair
.
output
).
get_tensor
();
auto
input
=
&
node
->
get_inputs
().
at
(
oi_pair
.
input
).
get_tensor
();
auto
input_node
=
node
->
get_inputs
().
at
(
oi_pair
.
input
).
get_output
().
get_node
();
// For destructive kernel, this should be the last use
// Non-destructive kernels can pass through if memory sharing is disabled
if
((
node
->
liveness_free_list
.
count
(
input
)
!=
0
||
(
m_disable_memory_sharing
&&
!
oi_pair
.
destructive
))
&&
node
->
liveness_new_list
.
count
(
output
)
!=
0
)
{
in_place_outputs
.
insert
({
output
,
input
});
reused_inputs
.
insert
(
input
);
}
}
}
}
...
...
src/ngraph/runtime/cpu/CMakeLists.txt
View file @
65600444
...
...
@@ -100,6 +100,7 @@ set(SRC
pass/cpu_layout.cpp
pass/cpu_loop_kernel_fusion.cpp
pass/cpu_mat_fusion.cpp
pass/cpu_memory_optimization.cpp
pass/cpu_post_layout_optimizations.cpp
pass/cpu_rnn_fusion.cpp
pass/cpu_workspace_insertion.cpp
...
...
src/ngraph/runtime/cpu/builder/concat.cpp
View file @
65600444
...
...
@@ -32,8 +32,8 @@ namespace ngraph
template
<>
void
Builder
::
BUILDER_DECL
(
ngraph
::
op
::
Concat
)
{
auto
axis
=
(
static_cast
<
const
ngraph
::
op
::
Concat
*>
(
node
))
->
get_concatenation_axis
();
auto
concat
=
static_cast
<
const
ngraph
::
op
::
Concat
*>
(
node
);
auto
axis
=
concat
->
get_concatenation_axis
();
auto
&
functors
=
external_function
->
get_functors
();
...
...
@@ -48,10 +48,45 @@ namespace ngraph
arg_shapes
.
emplace_back
(
arg
.
get_shape
());
}
}
auto
nargs
=
args
.
size
();
auto
&
out_tensor
=
external_function
->
get_tensor_data
(
out
[
0
].
get_name
());
auto
out_shape
=
out
[
0
].
get_shape
();
auto
element_size
=
concat
->
get_input_element_type
(
0
).
size
();
if
(
auto
op_annotations
=
concat
->
get_op_annotations
())
{
auto
in_place_oi_pairs
=
op_annotations
->
get_in_place_oi_pairs
();
if
(
in_place_oi_pairs
.
size
()
>
0
)
{
auto
functor
=
[
&
,
arg_tensors
,
nargs
,
out_shape
,
arg_shapes
,
element_size
](
CPURuntimeContext
*
ctx
)
{
auto
out_size
=
shape_size
(
out_shape
)
*
element_size
;
auto
offset
=
0
;
for
(
size_t
i
=
0
;
i
<
nargs
;
i
++
)
{
auto
arg_size
=
shape_size
(
arg_shapes
[
i
])
*
element_size
;
// if the argument pointer does not fall within the concat output buffer
// (caused by propagate_in_place_output or propagate_in_place_input), we need to copy the data;
// otherwise, we can skip the copy.
if
(
arg_tensors
[
i
]
<
out_tensor
||
arg_tensors
[
i
]
>=
reinterpret_cast
<
char
*>
(
out_tensor
)
+
out_size
)
{
memcpy
(
reinterpret_cast
<
char
*>
(
out_tensor
)
+
offset
,
arg_tensors
[
i
],
arg_size
);
}
offset
+=
arg_size
;
}
};
functors
.
emplace_back
(
functor
);
return
;
}
}
if
(
runtime
::
cpu
::
mkldnn_utils
::
use_mkldnn_kernel
(
node
))
{
auto
&
mkldnn_emitter
=
external_function
->
get_mkldnn_emitter
();
...
...
@@ -65,7 +100,6 @@ namespace ngraph
size_t
concat_dim
=
(
dynamic_cast
<
const
ngraph
::
op
::
Concat
*>
(
node
))
->
get_concatenation_axis
();
auto
nargs
=
args
.
size
();
auto
concat_index
=
mkldnn_emitter
->
build_concat
(
inputs_data_desc
,
result_desc
,
concat_dim
);
auto
&
deps
=
mkldnn_emitter
->
get_primitive_deps
(
concat_index
);
...
...
src/ngraph/runtime/cpu/cpu_emitter.cpp
View file @
65600444
...
...
@@ -1034,6 +1034,30 @@ namespace ngraph
template
<>
void
CPU_Emitter
::
EMITTER_DECL
(
ngraph
::
op
::
Concat
)
{
auto
concat
=
static_cast
<
const
ngraph
::
op
::
Concat
*>
(
node
);
if
(
auto
op_annotations
=
concat
->
get_op_annotations
())
{
auto
in_place_oi_pairs
=
op_annotations
->
get_in_place_oi_pairs
();
if
(
in_place_oi_pairs
.
size
()
>
0
)
{
auto
offset
=
0
;
for
(
auto
i
=
0
;
i
<
args
.
size
();
i
++
)
{
writer
<<
"if ("
<<
args
[
i
].
get_name
()
<<
" < "
<<
out
[
0
].
get_name
()
<<
" || "
<<
args
[
i
].
get_name
()
<<
" >= "
<<
out
[
0
].
get_name
()
<<
" + "
<<
out
[
0
].
get_size
()
*
out
[
0
].
get_element_type
().
size
()
<<
")
\n
"
;
writer
.
block_begin
();
writer
<<
"memcpy("
<<
out
[
0
].
get_name
()
<<
" + "
<<
offset
<<
", "
<<
args
[
i
].
get_name
()
<<
", "
<<
args
[
i
].
get_size
()
*
out
[
0
].
get_element_type
().
size
()
<<
");
\n
"
;
writer
.
block_end
();
offset
+=
args
[
i
].
get_size
()
*
out
[
0
].
get_element_type
().
size
();
}
return
;
}
}
auto
result_shape
=
out
[
0
].
get_shape
();
#if USE_EIGEN_CORE_INLINE == 1
...
...
src/ngraph/runtime/cpu/cpu_external_function.cpp
View file @
65600444
...
...
@@ -166,6 +166,7 @@
#include "ngraph/runtime/cpu/pass/cpu_horizontal_fusion.hpp"
#include "ngraph/runtime/cpu/pass/cpu_layout.hpp"
#include "ngraph/runtime/cpu/pass/cpu_mat_fusion.hpp"
#include "ngraph/runtime/cpu/pass/cpu_memory_optimization.hpp"
#include "ngraph/runtime/cpu/pass/cpu_post_layout_optimizations.hpp"
#include "ngraph/runtime/cpu/pass/cpu_rnn_fusion.hpp"
#include "ngraph/runtime/cpu/pass/cpu_workspace_insertion.hpp"
...
...
@@ -629,6 +630,9 @@ using namespace ngraph::runtime;
}
}
// In place concatenation optimization
process_in_place_concat
(
ordered_ops
);
writer
<<
"bool "
<<
current_function
->
get_name
()
<<
"_t_en["
<<
tensor_index
<<
"];
\n
"
;
writer
<<
"extern
\"
C
\"
void "
<<
current_function
->
get_name
();
...
...
@@ -1047,6 +1051,7 @@ void runtime::cpu::CPU_ExternalFunction::register_common_passes(ngraph::pass::Ma
pass_manager
.
register_pass
<
runtime
::
cpu
::
pass
::
CPUAssignment
>
(
this
);
pass_manager
.
register_pass
<
runtime
::
cpu
::
pass
::
CPULayout
>
(
this
);
pass_manager
.
register_pass
<
runtime
::
cpu
::
pass
::
CPUPostLayoutOptimizations
>
();
pass_manager
.
register_pass
<
runtime
::
cpu
::
pass
::
CPUMemoryOptimization
>
();
pass_manager
.
register_pass
<
ngraph
::
pass
::
GetOutputElementElimination
>
();
pass_manager
.
get_state
().
set_visualize_tree_ops_map
(
runtime
::
cpu
::
get_visualize_tree_ops_map
());
}
...
...
@@ -1209,6 +1214,105 @@ void runtime::cpu::CPU_ExternalFunction::propagate_in_place_output(
}
while
(
propagate_further
);
}
void
runtime
::
cpu
::
CPU_ExternalFunction
::
process_in_place_concat
(
std
::
list
<
std
::
shared_ptr
<
Node
>>
nodes
)
{
for
(
shared_ptr
<
Node
>
node
:
nodes
)
{
if
(
auto
concat
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Concat
>
(
node
))
{
if
(
auto
op_annotations
=
concat
->
get_op_annotations
())
{
auto
in_place_oi_pairs
=
op_annotations
->
get_in_place_oi_pairs
();
if
(
in_place_oi_pairs
.
size
()
>
0
)
{
auto
output_tensor
=
&
concat
->
get_output_tensor
();
auto
offset
=
output_tensor
->
get_pool_offset
();
for
(
auto
arg
:
concat
->
get_arguments
())
{
auto
input_node
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Op
>
(
arg
);
auto
input_tensor
=
&
input_node
->
get_output_tensor
();
auto
old_offset
=
input_tensor
->
get_pool_offset
();
input_tensor
->
set_pool_offset
(
offset
);
NGRAPH_DEBUG
<<
"cpu_external_function: change offset, old offset is "
<<
old_offset
<<
", new offset is "
<<
offset
<<
std
::
endl
;
offset
+=
input_tensor
->
size
();
}
bool
found_last_concat
=
true
;
for
(
auto
user
:
concat
->
get_users
())
{
if
(
auto
user_concat
=
dynamic_pointer_cast
<
ngraph
::
op
::
Concat
>
(
user
))
{
if
(
auto
user_op_annotations
=
user_concat
->
get_op_annotations
())
{
auto
user_in_place_oi_pairs
=
user_op_annotations
->
get_in_place_oi_pairs
();
if
(
user_in_place_oi_pairs
.
size
()
>
0
)
{
found_last_concat
=
false
;
break
;
}
}
}
}
if
(
found_last_concat
)
{
for
(
auto
arg
:
concat
->
get_arguments
())
{
if
(
auto
arg_concat
=
dynamic_pointer_cast
<
ngraph
::
op
::
Concat
>
(
arg
))
{
NGRAPH_DEBUG
<<
"cpu_external_function: call propagate_in_place_concat for "
<<
arg
->
get_name
()
<<
std
::
endl
;
propagate_in_place_concat
(
arg_concat
);
}
}
}
}
}
}
}
}
void
runtime
::
cpu
::
CPU_ExternalFunction
::
propagate_in_place_concat
(
shared_ptr
<
ngraph
::
op
::
Concat
>
concat
)
{
std
::
deque
<
std
::
shared_ptr
<
ngraph
::
op
::
Concat
>>
stack
;
stack
.
push_front
(
concat
);
while
(
stack
.
size
()
>
0
)
{
auto
it
=
stack
.
front
();
stack
.
pop_front
();
if
(
auto
op_annotations
=
it
->
get_op_annotations
())
{
auto
in_place_oi_pairs
=
op_annotations
->
get_in_place_oi_pairs
();
if
(
in_place_oi_pairs
.
size
()
>
0
)
{
auto
output_tensor
=
&
it
->
get_output_tensor
();
auto
offset
=
output_tensor
->
get_pool_offset
();
for
(
auto
arg
:
it
->
get_arguments
())
{
auto
input_node
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Op
>
(
arg
);
auto
input_tensor
=
&
input_node
->
get_output_tensor
();
auto
old_offset
=
input_tensor
->
get_pool_offset
();
input_tensor
->
set_pool_offset
(
offset
);
NGRAPH_DEBUG
<<
"cpu_external_function, propagate: change offset, old offset is "
<<
old_offset
<<
", new offset is "
<<
offset
<<
std
::
endl
;
offset
+=
input_tensor
->
size
();
if
(
auto
arg_concat
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Concat
>
(
arg
))
{
stack
.
push_front
(
arg_concat
);
}
}
}
}
}
}
void
runtime
::
cpu
::
CPU_ExternalFunction
::
build
()
{
if
(
m_is_built
)
...
...
@@ -1270,6 +1374,10 @@ void runtime::cpu::CPU_ExternalFunction::build()
}
// Build executor
// In place concatenation optimization
process_in_place_concat
(
m_function
->
get_ordered_ops
());
// Intermediates
if
(
m_function
->
get_temporary_pool_size
())
{
...
...
src/ngraph/runtime/cpu/cpu_external_function.hpp
View file @
65600444
...
...
@@ -40,6 +40,7 @@
#endif
#include "ngraph/function.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/runtime/cpu/cpu_call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
...
...
@@ -189,6 +190,12 @@ namespace ngraph
void
propagate_in_place_output
(
ngraph
::
descriptor
::
Output
*
res_src_output
,
std
::
string
output_name
,
bool
dex
);
// Find in-place concat ops and set appropriate memory pool offset for its arguments
void
process_in_place_concat
(
std
::
list
<
std
::
shared_ptr
<
Node
>>
nodes
);
// For a chain of concat ops, propagate memory pool offsets
void
propagate_in_place_concat
(
std
::
shared_ptr
<
ngraph
::
op
::
Concat
>
concat
);
bool
computes_result
(
Node
*
node
);
void
release_function
()
{
m_function
=
nullptr
;
}
#if !defined(NGRAPH_DEX_ONLY)
...
...
src/ngraph/runtime/cpu/pass/cpu_memory_optimization.cpp
0 → 100644
View file @
65600444
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
/// In-place-concat optimization makes the argument nodes of a concatenation node use the concatenation node's memory buffer
/// for their outputs. As a result, we eliminate memory copies from the memory buffers of the argument nodes to
/// that of the concatenation node. When there is a chain of in place concatenation nodes, we propagate the
/// memory buffer starting from the last concatenation node. Not all concatenation nodes can be optimized. This pass
/// marks all the nodes that can be optimized.
///
/// Example1:
/// parameter1 parameter2 parameter3 parameter4 parameter5 parameter6
/// \ / \ / \ /
/// add1 add2 add3
/// \ | /
/// concat
///
/// Before optimization: the result of add1 is stored to the memory buffer assigned to add1, same for add2 and add3;
/// then those results are copied to the memory buffer assigned to concat.
/// After optimization: the result of add1 is stored to the memory buffer assigned to concat, same for add2 and add3.
/// there is no need to copy those results.
///
///
/// Example2:
/// parameter1 parameter2 parameter3 parameter4
/// \ / \ /
/// add1 add2
/// \ /
/// concat1 parameter5
/// | \ /
/// | add3
/// \ /
/// concat
///
/// After optimization: the result of add1 is stored to the memory buffer assigned to concat, same for add2 and add3.
#include "ngraph/runtime/cpu/pass/cpu_memory_optimization.hpp"
#include "ngraph/descriptor/output.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/runtime/cpu/cpu_op_annotations.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
using
namespace
ngraph
;
bool
runtime
::
cpu
::
pass
::
CPUMemoryOptimization
::
run_on_function
(
std
::
shared_ptr
<
Function
>
function
)
{
for
(
auto
n
:
function
->
get_ordered_ops
())
{
if
(
auto
concat
=
std
::
dynamic_pointer_cast
<
op
::
Concat
>
(
n
))
{
auto
shape
=
concat
->
get_input_shape
(
0
);
auto
axis
=
concat
->
get_concatenation_axis
();
auto
product
=
1
;
for
(
int
i
=
0
;
i
<
axis
;
i
++
)
{
product
*=
shape
[
i
];
}
if
(
product
!=
1
)
{
NGRAPH_DEBUG
<<
"cpu_memory_optimization: The product of Concat's shape "
"before concat axis is not 1, no in place concat"
;
continue
;
}
bool
in_place_concat
=
true
;
auto
output_md
=
mkldnn_utils
::
get_output_mkldnn_md
(
n
.
get
(),
0
);
auto
output_format
=
static_cast
<
mkldnn
::
memory
::
format
>
(
output_md
.
data
.
format
);
for
(
size_t
i
=
0
;
i
<
n
->
get_input_size
();
i
++
)
{
auto
input_md
=
mkldnn_utils
::
get_input_mkldnn_md
(
n
.
get
(),
i
);
auto
input_format
=
static_cast
<
mkldnn
::
memory
::
format
>
(
input_md
.
data
.
format
);
if
(
output_format
!=
input_format
)
{
NGRAPH_DEBUG
<<
"cpu_memory_optimization: input format is different from "
"output format, no in place concat"
;
in_place_concat
=
false
;
break
;
}
}
if
(
!
in_place_concat
)
{
continue
;
}
AxisVector
axis_list
=
ngraph
::
get_default_order
(
shape
);
auto
index
=
0
;
for
(
descriptor
::
Input
&
input
:
concat
->
get_inputs
())
{
// no tensors with zero-sized dimensions after zero_dim_tensor_elimination
NGRAPH_ASSERT
(
shape_size
(
input
.
get_shape
())
!=
0
);
// check if input layout is padded
auto
input_md
=
mkldnn_utils
::
get_input_mkldnn_md
(
n
.
get
(),
index
);
index
++
;
if
(
mkldnn_utils
::
is_mkldnn_padded_layout
(
input_md
,
axis_list
))
{
NGRAPH_DEBUG
<<
"cpu_memory_optimization: padded input layout, no in place concat"
;
in_place_concat
=
false
;
break
;
}
const
auto
&
output
=
input
.
get_output
();
auto
arg
=
output
.
get_node
();
if
(
std
::
dynamic_pointer_cast
<
op
::
Constant
>
(
arg
)
||
std
::
dynamic_pointer_cast
<
op
::
Parameter
>
(
arg
))
{
NGRAPH_DEBUG
<<
"cpu_memory_optimization: "
<<
arg
->
get_name
()
<<
": constant or parameter, no in place concat"
;
in_place_concat
=
false
;
break
;
}
NGRAPH_ASSERT
(
arg
->
get_output_size
()
==
1
);
if
(
!
std
::
dynamic_pointer_cast
<
op
::
Concat
>
(
arg
))
{
if
(
auto
op
=
std
::
dynamic_pointer_cast
<
op
::
Op
>
(
arg
))
{
auto
annotation
=
op
->
get_op_annotations
();
if
(
annotation
&&
annotation
->
get_in_place_oi_pairs
().
size
()
>
0
)
{
NGRAPH_DEBUG
<<
"cpu_memory_optimization: "
<<
arg
->
get_name
()
<<
": in place non-concat op, no in place concat"
;
in_place_concat
=
false
;
break
;
}
}
}
if
(
output
.
get_inputs
().
size
()
!=
1
)
{
// check if we can do in place concat
auto
concat_count
=
0
;
for
(
auto
output_input
:
output
.
get_inputs
())
{
auto
user
=
output_input
->
get_node
();
if
(
std
::
dynamic_pointer_cast
<
op
::
Concat
>
(
user
))
{
concat_count
++
;
if
(
concat_count
==
2
)
{
NGRAPH_DEBUG
<<
"cpu_memory_optimization: multiple "
"concat users, no in place concat"
;
in_place_concat
=
false
;
break
;
}
}
}
if
(
!
in_place_concat
)
{
break
;
}
for
(
auto
user
:
arg
->
get_users
())
{
if
((
user
!=
concat
))
{
if
(
auto
op
=
std
::
dynamic_pointer_cast
<
op
::
Op
>
(
user
))
{
if
(
auto
op_annotations
=
op
->
get_op_annotations
())
{
if
(
op_annotations
->
get_in_place_oi_pairs
().
size
()
>
0
)
{
NGRAPH_DEBUG
<<
"cpu_memory_optimization: "
"in place oi, no in place concat"
;
in_place_concat
=
false
;
break
;
}
}
}
}
}
if
(
!
in_place_concat
)
{
break
;
}
else
if
(
!
is_post_dominated
(
arg
.
get
(),
n
.
get
()))
{
NGRAPH_DEBUG
<<
"cpu_memory_optimization: "
"not post dominated, no in place concat"
;
in_place_concat
=
false
;
break
;
}
}
}
if
(
in_place_concat
)
{
auto
op_annotations
=
concat
->
get_op_annotations
();
if
(
op_annotations
)
{
op_annotations
->
add_in_place_oi_pair
({
0
,
0
,
false
});
}
else
{
op_annotations
=
std
::
make_shared
<
ngraph
::
runtime
::
cpu
::
CPUOpAnnotations
>
();
op_annotations
->
add_in_place_oi_pair
({
0
,
0
,
false
});
concat
->
set_op_annotations
(
op_annotations
);
}
}
}
}
return
false
;
}
src/ngraph/runtime/cpu/pass/cpu_memory_optimization.hpp
0 → 100644
View file @
65600444
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/pass/pass.hpp"
namespace
ngraph
{
namespace
runtime
{
namespace
cpu
{
namespace
pass
{
class
CPUMemoryOptimization
:
public
ngraph
::
pass
::
FunctionPass
{
public
:
bool
run_on_function
(
std
::
shared_ptr
<
ngraph
::
Function
>
function
)
override
;
};
}
}
}
}
test/backend_test.in.cpp
View file @
65600444
...
...
@@ -518,6 +518,69 @@ NGRAPH_TEST(${BACKEND_NAME}, concat_2d_tensor)
EXPECT_EQ
((
vector
<
float
>
{
1
,
2
,
3
}),
read_vector
<
float
>
(
result
));
}
NGRAPH_TEST
(
$
{
BACKEND_NAME
},
concat_in_place_2d_tensor
)
{
Shape
shape
{
1
,
1
};
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape
);
auto
B
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape
);
auto
add1
=
make_shared
<
op
::
Add
>
(
A
,
B
);
auto
C
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape
);
auto
D
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape
);
auto
add2
=
make_shared
<
op
::
Add
>
(
C
,
D
);
auto
subtract
=
make_shared
<
op
::
Subtract
>
(
C
,
A
);
Shape
shape_r
{
3
,
1
};
auto
f
=
make_shared
<
Function
>
(
make_shared
<
op
::
Concat
>
(
NodeVector
{
add1
,
add2
,
subtract
},
0
),
op
::
ParameterVector
{
A
,
B
,
C
,
D
});
auto
backend
=
runtime
::
Backend
::
create
(
"${BACKEND_NAME}"
);
// Create some tensors for input/output
auto
a
=
backend
->
create_tensor
(
element
::
f32
,
shape
);
copy_data
(
a
,
vector
<
float
>
{
1
});
auto
b
=
backend
->
create_tensor
(
element
::
f32
,
shape
);
copy_data
(
b
,
vector
<
float
>
{
2
});
auto
c
=
backend
->
create_tensor
(
element
::
f32
,
shape
);
copy_data
(
c
,
vector
<
float
>
{
3
});
auto
d
=
backend
->
create_tensor
(
element
::
f32
,
shape
);
copy_data
(
d
,
vector
<
float
>
{
4
});
auto
result
=
backend
->
create_tensor
(
element
::
f32
,
shape_r
);
backend
->
call_with_validate
(
f
,
{
result
},
{
a
,
b
,
c
,
d
});
EXPECT_EQ
((
vector
<
float
>
{
3
,
7
,
2
}),
read_vector
<
float
>
(
result
));
}
NGRAPH_TEST
(
$
{
BACKEND_NAME
},
concat_in_place_propagate_2d_tensor
)
{
Shape
shape
{
1
,
1
};
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape
);
auto
B
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape
);
auto
add1
=
make_shared
<
op
::
Add
>
(
A
,
B
);
auto
C
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape
);
auto
D
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape
);
auto
add2
=
make_shared
<
op
::
Add
>
(
C
,
D
);
auto
concat1
=
make_shared
<
op
::
Concat
>
(
NodeVector
{
add1
,
add2
},
0
);
auto
subtract
=
make_shared
<
op
::
Subtract
>
(
C
,
A
);
Shape
shape_r
{
3
,
1
};
auto
f
=
make_shared
<
Function
>
(
make_shared
<
op
::
Concat
>
(
NodeVector
{
concat1
,
subtract
},
0
),
op
::
ParameterVector
{
A
,
B
,
C
,
D
});
auto
backend
=
runtime
::
Backend
::
create
(
"${BACKEND_NAME}"
);
// Create some tensors for input/output
auto
a
=
backend
->
create_tensor
(
element
::
f32
,
shape
);
copy_data
(
a
,
vector
<
float
>
{
1
});
auto
b
=
backend
->
create_tensor
(
element
::
f32
,
shape
);
copy_data
(
b
,
vector
<
float
>
{
2
});
auto
c
=
backend
->
create_tensor
(
element
::
f32
,
shape
);
copy_data
(
c
,
vector
<
float
>
{
3
});
auto
d
=
backend
->
create_tensor
(
element
::
f32
,
shape
);
copy_data
(
d
,
vector
<
float
>
{
4
});
auto
result
=
backend
->
create_tensor
(
element
::
f32
,
shape_r
);
backend
->
call_with_validate
(
f
,
{
result
},
{
a
,
b
,
c
,
d
});
EXPECT_EQ
((
vector
<
float
>
{
3
,
7
,
2
}),
read_vector
<
float
>
(
result
));
}
// from numpy import *
// a=linspace(1,2*3*4*3*2,2*3*4*3*2)
// b=linspace(1000+1,1000+2*3*3*3*2,2*3*3*3*2)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment