Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
70e5973c
Unverified
Commit
70e5973c
authored
Jan 10, 2020
by
Scott Cyphers
Committed by
GitHub
Jan 10, 2020
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Modernize a pass (#4055)
Co-authored-by:
Sang Ik Lee
<
sang.ik.lee@intel.com
>
parent
6bd90ef4
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
166 additions
and
205 deletions
+166
-205
cpu_memory_assignment.cpp
src/ngraph/runtime/cpu/pass/cpu_memory_assignment.cpp
+164
-203
cpu_memory_assignment.hpp
src/ngraph/runtime/cpu/pass/cpu_memory_assignment.hpp
+2
-2
No files found.
src/ngraph/runtime/cpu/pass/cpu_memory_assignment.cpp
View file @
70e5973c
...
@@ -60,9 +60,8 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat(
...
@@ -60,9 +60,8 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat(
{
{
for
(
shared_ptr
<
Node
>
node
:
nodes
)
for
(
shared_ptr
<
Node
>
node
:
nodes
)
{
{
if
(
node
->
description
()
==
"Concat"
)
if
(
auto
concat
=
as_type_ptr
<
op
::
Concat
>
(
node
)
)
{
{
auto
concat
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Concat
>
(
node
);
if
(
auto
op_annotations
=
concat
->
get_op_annotations
())
if
(
auto
op_annotations
=
concat
->
get_op_annotations
())
{
{
auto
in_place_oi_pairs
=
op_annotations
->
get_in_place_oi_pairs
();
auto
in_place_oi_pairs
=
op_annotations
->
get_in_place_oi_pairs
();
...
@@ -72,9 +71,8 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat(
...
@@ -72,9 +71,8 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat(
bool
found_last_concat
=
true
;
bool
found_last_concat
=
true
;
for
(
auto
user
:
concat
->
get_users
())
for
(
auto
user
:
concat
->
get_users
())
{
{
if
(
user
->
description
()
==
"Concat"
)
if
(
auto
user_concat
=
as_type_ptr
<
op
::
Concat
>
(
user
)
)
{
{
auto
user_concat
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Concat
>
(
user
);
if
(
auto
user_op_annotations
=
user_concat
->
get_op_annotations
())
if
(
auto
user_op_annotations
=
user_concat
->
get_op_annotations
())
{
{
auto
user_in_place_oi_pairs
=
auto
user_in_place_oi_pairs
=
...
@@ -90,14 +88,14 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat(
...
@@ -90,14 +88,14 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat(
// start from the last concat
// start from the last concat
if
(
found_last_concat
)
if
(
found_last_concat
)
{
{
auto
output_tensor
=
&
concat
->
get_outpu
t_tensor
();
auto
output_tensor
=
&
concat
->
output
(
0
).
ge
t_tensor
();
auto
output_bufferID
=
get_bufferID
(
output_tensor
);
auto
output_bufferID
=
get_bufferID
(
output_tensor
);
auto
offset
=
output_tensor
->
get_pool_offset
();
auto
offset
=
output_tensor
->
get_pool_offset
();
size_t
arg_index
=
0
;
size_t
arg_index
=
0
;
for
(
auto
arg
:
concat
->
get_argument
s
())
for
(
auto
arg
:
concat
->
input_value
s
())
{
{
auto
input_tensor
=
&
arg
->
get_outpu
t_tensor
();
auto
input_tensor
=
&
arg
.
ge
t_tensor
();
auto
input_bufferID
=
get_bufferID
(
input_tensor
);
auto
input_bufferID
=
get_bufferID
(
input_tensor
);
// same set, in place concat allowed
// same set, in place concat allowed
if
(
input_bufferID
==
output_bufferID
)
if
(
input_bufferID
==
output_bufferID
)
...
@@ -109,22 +107,17 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat(
...
@@ -109,22 +107,17 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat(
<<
old_offset
<<
", new offset is "
<<
offset
<<
std
::
endl
;
<<
old_offset
<<
", new offset is "
<<
offset
<<
std
::
endl
;
// check if need to propagate backward
// check if need to propagate backward
if
(
arg
->
is_op
())
auto
arg_op
=
arg
.
get_node_shared_ptr
();
if
(
auto
arg_op_annotations
=
arg_op
->
get_op_annotations
())
{
{
auto
arg_op
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Op
>
(
arg
);
auto
arg_in_place_oi_pairs
=
if
(
auto
arg_op_annotations
=
arg_op
->
get_op_annotations
())
arg_op_annotations
->
get_in_place_oi_pairs
();
if
(
arg_in_place_oi_pairs
.
size
()
>
0
)
{
{
auto
arg_in_place_oi_pairs
=
NGRAPH_DEBUG
<<
"cpu_memory_assignment: call "
arg_op_annotations
->
get_in_place_oi_pairs
();
"propagate_in_place_concat for "
if
(
arg_in_place_oi_pairs
.
size
()
>
0
)
<<
*
arg_op
;
{
propagate_in_place_concat
(
arg
);
auto
input
=
&
arg_op
->
get_inputs
().
at
(
0
);
auto
output_index
=
input
->
get_output
().
get_index
();
NGRAPH_DEBUG
<<
"cpu_memory_assignment: call "
"propagate_in_place_concat for "
<<
arg
->
get_name
();
propagate_in_place_concat
(
arg_op
,
output_index
);
}
}
}
}
}
}
}
...
@@ -138,19 +131,19 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat(
...
@@ -138,19 +131,19 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat(
}
}
}
}
void
runtime
::
cpu
::
pass
::
CPUMemoryAssignment
::
propagate_in_place_concat
(
void
runtime
::
cpu
::
pass
::
CPUMemoryAssignment
::
propagate_in_place_concat
(
const
Output
<
Node
>&
output
)
shared_ptr
<
ngraph
::
op
::
Op
>
op
,
size_t
output_index
)
{
{
if
(
op
->
description
()
==
"Concat"
)
auto
op
=
output
.
get_node_shared_ptr
();
if
(
is_type
<
op
::
Concat
>
(
op
))
{
{
auto
output_tensor
=
&
op
->
get_outpu
t_tensor
();
auto
output_tensor
=
&
op
->
output
(
0
).
ge
t_tensor
();
auto
output_bufferID
=
get_bufferID
(
output_tensor
);
auto
output_bufferID
=
get_bufferID
(
output_tensor
);
auto
offset
=
output_tensor
->
get_pool_offset
();
auto
offset
=
output_tensor
->
get_pool_offset
();
size_t
arg_index
=
0
;
size_t
arg_index
=
0
;
for
(
auto
arg
:
op
->
get_argument
s
())
for
(
auto
arg
:
op
->
input_value
s
())
{
{
auto
input_tensor
=
&
arg
->
get_outpu
t_tensor
();
auto
input_tensor
=
&
arg
.
ge
t_tensor
();
auto
input_bufferID
=
get_bufferID
(
input_tensor
);
auto
input_bufferID
=
get_bufferID
(
input_tensor
);
// same set, in place concat allowed
// same set, in place concat allowed
if
(
input_bufferID
==
output_bufferID
)
if
(
input_bufferID
==
output_bufferID
)
...
@@ -161,21 +154,15 @@ void runtime::cpu::pass::CPUMemoryAssignment::propagate_in_place_concat(
...
@@ -161,21 +154,15 @@ void runtime::cpu::pass::CPUMemoryAssignment::propagate_in_place_concat(
<<
", new offset is "
<<
offset
;
<<
", new offset is "
<<
offset
;
// check if need to propagate backward
// check if need to propagate backward
if
(
arg
->
is_op
())
auto
arg_op
=
arg
.
get_node_shared_ptr
();
if
(
auto
arg_op_annotations
=
arg_op
->
get_op_annotations
())
{
{
auto
arg_
op
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Op
>
(
arg
);
auto
arg_
in_place_oi_pairs
=
arg_op_annotations
->
get_in_place_oi_pairs
(
);
if
(
a
uto
arg_op_annotations
=
arg_op
->
get_op_annotations
()
)
if
(
a
rg_in_place_oi_pairs
.
size
()
>
0
)
{
{
auto
arg_in_place_oi_pairs
=
arg_op_annotations
->
get_in_place_oi_pairs
();
NGRAPH_DEBUG
<<
"cpu_memory_assignment: call propagate_in_place_concat for "
if
(
arg_in_place_oi_pairs
.
size
()
>
0
)
<<
*
arg_op
;
{
propagate_in_place_concat
(
arg
);
NGRAPH_DEBUG
<<
"cpu_memory_assignment: call propagate_in_place_concat for "
<<
arg
->
get_name
();
auto
input
=
&
op
->
get_inputs
().
at
(
arg_index
);
auto
arg_output_index
=
input
->
get_output
().
get_index
();
propagate_in_place_concat
(
arg_op
,
arg_output_index
);
}
}
}
}
}
}
}
...
@@ -189,14 +176,14 @@ void runtime::cpu::pass::CPUMemoryAssignment::propagate_in_place_concat(
...
@@ -189,14 +176,14 @@ void runtime::cpu::pass::CPUMemoryAssignment::propagate_in_place_concat(
auto
op_annotations
=
op
->
get_op_annotations
();
auto
op_annotations
=
op
->
get_op_annotations
();
for
(
auto
oi_pair
:
op_annotations
->
get_in_place_oi_pairs
())
for
(
auto
oi_pair
:
op_annotations
->
get_in_place_oi_pairs
())
{
{
if
(
oi_pair
.
output
!=
output
_index
||
oi_pair
.
destructive
)
if
(
oi_pair
.
output
!=
output
.
get_index
()
||
oi_pair
.
destructive
)
{
{
continue
;
continue
;
}
}
auto
input_tensor
=
&
op
->
get_inputs
().
at
(
oi_pair
.
input
).
get_tensor
();
auto
input_tensor
=
&
op
->
input_value
(
oi_pair
.
input
).
get_tensor
();
auto
input_bufferID
=
get_bufferID
(
input_tensor
);
auto
input_bufferID
=
get_bufferID
(
input_tensor
);
auto
output_tensor
=
&
op
->
get_outputs
().
a
t
(
oi_pair
.
output
).
get_tensor
();
auto
output_tensor
=
&
op
->
outpu
t
(
oi_pair
.
output
).
get_tensor
();
auto
output_bufferID
=
get_bufferID
(
output_tensor
);
auto
output_bufferID
=
get_bufferID
(
output_tensor
);
// same set, in place op allowed
// same set, in place op allowed
...
@@ -207,24 +194,18 @@ void runtime::cpu::pass::CPUMemoryAssignment::propagate_in_place_concat(
...
@@ -207,24 +194,18 @@ void runtime::cpu::pass::CPUMemoryAssignment::propagate_in_place_concat(
input_tensor
->
set_pool_offset
(
new_offset
);
input_tensor
->
set_pool_offset
(
new_offset
);
NGRAPH_DEBUG
<<
"cpu_memory_assignment: change offset, old offset is "
<<
old_offset
NGRAPH_DEBUG
<<
"cpu_memory_assignment: change offset, old offset is "
<<
old_offset
<<
", new offset is "
<<
new_offset
;
<<
", new offset is "
<<
new_offset
;
auto
input
=
&
op
->
get_inputs
().
at
(
oi_pair
.
input
);
auto
input
=
op
->
input_value
(
oi_pair
.
input
);
auto
arg
=
input
->
get_node
();
auto
arg
_op
=
input
.
get_node_shared_ptr
();
// check if need to propagate backward
// check if need to propagate backward
if
(
a
rg
->
is_op
())
if
(
a
uto
arg_op_annotations
=
arg_op
->
get_op_annotations
())
{
{
auto
arg_
op
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Op
>
(
arg
);
auto
arg_
in_place_oi_pairs
=
arg_op_annotations
->
get_in_place_oi_pairs
(
);
if
(
a
uto
arg_op_annotations
=
arg_op
->
get_op_annotations
()
)
if
(
a
rg_in_place_oi_pairs
.
size
()
>
0
)
{
{
auto
arg_in_place_oi_pairs
=
arg_op_annotations
->
get_in_place_oi_pairs
();
NGRAPH_DEBUG
<<
"cpu_memory_assignment: call propagate_in_place_concat for "
if
(
arg_in_place_oi_pairs
.
size
()
>
0
)
<<
*
arg_op
;
{
propagate_in_place_concat
(
input
);
auto
arg_output_index
=
input
->
get_output
().
get_index
();
NGRAPH_DEBUG
<<
"cpu_memory_assignment: call propagate_in_place_concat for "
<<
arg
->
get_name
();
propagate_in_place_concat
(
arg_op
,
arg_output_index
);
}
}
}
}
}
}
}
...
@@ -238,20 +219,18 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_slice(
...
@@ -238,20 +219,18 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_slice(
{
{
for
(
shared_ptr
<
Node
>&
node
:
nodes
)
for
(
shared_ptr
<
Node
>&
node
:
nodes
)
{
{
if
(
node
->
description
()
==
"Slice"
)
if
(
auto
slice
=
as_type_ptr
<
op
::
Slice
>
(
node
)
)
{
{
auto
slice
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Slice
>
(
node
);
if
(
auto
op_annotations
=
slice
->
get_op_annotations
())
if
(
auto
op_annotations
=
slice
->
get_op_annotations
())
{
{
auto
in_place_oi_pairs
=
op_annotations
->
get_in_place_oi_pairs
();
auto
in_place_oi_pairs
=
op_annotations
->
get_in_place_oi_pairs
();
if
(
in_place_oi_pairs
.
size
()
>
0
)
if
(
in_place_oi_pairs
.
size
()
>
0
)
{
{
auto
input
=
&
slice
->
get_inputs
().
at
(
0
);
auto
input
=
slice
->
input_value
(
0
);
auto
arg
=
input
->
get_output
().
get_node
();
auto
arg
=
input
.
get_node_shared_ptr
();
auto
index
=
input
->
get_output
().
get_index
();
auto
input_tensor
=
&
input
.
get_tensor
();
auto
input_tensor
=
&
arg
->
get_output_tensor
(
index
);
auto
input_bufferID
=
get_bufferID
(
input_tensor
);
auto
input_bufferID
=
get_bufferID
(
input_tensor
);
auto
output_tensor
=
&
slice
->
get_outpu
t_tensor
();
auto
output_tensor
=
&
slice
->
output
(
0
).
ge
t_tensor
();
auto
output_bufferID
=
get_bufferID
(
output_tensor
);
auto
output_bufferID
=
get_bufferID
(
output_tensor
);
// same set, in place slice allowed
// same set, in place slice allowed
...
@@ -277,17 +256,15 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_slice(
...
@@ -277,17 +256,15 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_slice(
<<
old_offset
<<
", new offset is "
<<
offset
;
<<
old_offset
<<
", new offset is "
<<
offset
;
// check if need to propagate forward
// check if need to propagate forward
for
(
size_t
i
=
0
;
i
<
slice
->
get_output_size
();
++
i
)
for
(
auto
slice_output
:
slice
->
outputs
()
)
{
{
auto
slice_output
=
&
slice
->
get_outputs
().
at
(
i
);
for
(
auto
slice_output_input
:
slice_output
.
get_target_inputs
())
for
(
auto
slice_output_input
:
slice_output
->
get_inputs
())
{
{
NGRAPH_DEBUG
NGRAPH_DEBUG
<<
"cpu_memory_assignment: call propagate_in_place_slice "
<<
"cpu_memory_assignment: call propagate_in_place_slice "
"for output "
"for output "
<<
i
<<
" of "
<<
slice
->
get_name
();
<<
slice_output
.
get_index
()
<<
" of "
<<
*
slice
;
propagate_in_place_slice
(
slice_output_input
,
propagate_in_place_slice
(
slice_output_input
);
slice_output_input
->
get_index
());
}
}
}
}
}
}
...
@@ -297,48 +274,43 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_slice(
...
@@ -297,48 +274,43 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_slice(
}
}
}
}
void
runtime
::
cpu
::
pass
::
CPUMemoryAssignment
::
propagate_in_place_slice
(
void
runtime
::
cpu
::
pass
::
CPUMemoryAssignment
::
propagate_in_place_slice
(
const
Input
<
Node
>&
input
)
ngraph
::
descriptor
::
Input
*
input
,
size_t
input_index
)
{
{
std
::
deque
<
std
::
pair
<
ngraph
::
descriptor
::
Input
*
,
size_t
>>
stack
;
std
::
deque
<
Input
<
Node
>>
stack
;
stack
.
push_front
(
std
::
pair
<
ngraph
::
descriptor
::
Input
*
,
size_t
>
(
input
,
input_index
)
);
stack
.
push_front
(
input
);
while
(
stack
.
size
()
>
0
)
while
(
stack
.
size
()
>
0
)
{
{
ngraph
::
descriptor
::
Input
*
in
=
stack
.
front
().
first
;
Input
<
Node
>
in
=
stack
.
front
();
auto
index
=
stack
.
front
().
second
;
stack
.
pop_front
();
stack
.
pop_front
();
auto
node
=
in
->
get_node
();
auto
node
=
in
.
get_node
();
// let process_in_place_slice handle slice.
// let process_in_place_slice handle slice.
if
(
!
node
->
is_op
()
||
node
->
description
()
==
"Slice"
)
if
(
is_type
<
op
::
Slice
>
(
node
)
)
{
{
continue
;
continue
;
}
}
auto
op
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Op
>
(
node
);
if
(
auto
op_annotations
=
node
->
get_op_annotations
())
if
(
auto
op_annotations
=
op
->
get_op_annotations
())
{
{
for
(
auto
oi_pair
:
op_annotations
->
get_in_place_oi_pairs
())
for
(
auto
oi_pair
:
op_annotations
->
get_in_place_oi_pairs
())
{
{
if
(
oi_pair
.
input
==
in
dex
)
if
(
oi_pair
.
input
==
in
.
get_index
()
)
{
{
auto
input_tensor
=
&
op
->
get_inputs
().
a
t
(
oi_pair
.
input
).
get_tensor
();
auto
input_tensor
=
&
node
->
inpu
t
(
oi_pair
.
input
).
get_tensor
();
auto
input_bufferID
=
get_bufferID
(
input_tensor
);
auto
input_bufferID
=
get_bufferID
(
input_tensor
);
size_t
output_index
=
oi_pair
.
output
;
size_t
output_index
=
oi_pair
.
output
;
auto
output_tensor
=
&
op
->
get_outputs
().
a
t
(
output_index
).
get_tensor
();
auto
output_tensor
=
&
node
->
outpu
t
(
output_index
).
get_tensor
();
auto
output_bufferID
=
get_bufferID
(
output_tensor
);
auto
output_bufferID
=
get_bufferID
(
output_tensor
);
// same set, in place op allowed
// same set, in place op allowed
if
(
input_bufferID
==
output_bufferID
)
if
(
input_bufferID
==
output_bufferID
)
{
{
output_tensor
->
set_pool_offset
(
input_tensor
->
get_pool_offset
());
output_tensor
->
set_pool_offset
(
input_tensor
->
get_pool_offset
());
for
(
size_t
i
=
0
;
i
<
op
->
get_output_size
();
++
i
)
for
(
auto
op_output
:
node
->
outputs
()
)
{
{
auto
op_output
=
&
op
->
get_outputs
().
at
(
i
);
for
(
auto
op_output_input
:
op_output
.
get_target_inputs
())
for
(
auto
op_output_input
:
op_output
->
get_inputs
())
{
{
stack
.
push_front
(
std
::
pair
<
ngraph
::
descriptor
::
Input
*
,
size_t
>
(
stack
.
push_front
(
op_output_input
);
op_output_input
,
op_output_input
->
get_index
()));
}
}
}
}
}
}
...
@@ -365,16 +337,16 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
...
@@ -365,16 +337,16 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
const
shared_ptr
<
Node
>&
node
=
*
it
;
const
shared_ptr
<
Node
>&
node
=
*
it
;
if
(
node
->
is_parameter
())
if
(
node
->
is_parameter
())
{
{
auto
output_tensor
=
&
node
->
get_outpu
t_tensor
();
auto
output_tensor
=
&
node
->
output
(
0
).
ge
t_tensor
();
auto
ele
=
std
::
pair
<
TensorRole
,
unordered_set
<
descriptor
::
Tensor
*>>
(
auto
ele
=
std
::
pair
<
TensorRole
,
unordered_set
<
descriptor
::
Tensor
*>>
(
TensorRole
::
INPUT
,
unordered_set
<
descriptor
::
Tensor
*>
({
output_tensor
}));
TensorRole
::
INPUT
,
unordered_set
<
descriptor
::
Tensor
*>
({
output_tensor
}));
m_bufferID_to_tensorSets
[
count
]
=
ele
;
m_bufferID_to_tensorSets
[
count
]
=
ele
;
m_tensor_to_bufferID
[
output_tensor
]
=
count
;
m_tensor_to_bufferID
[
output_tensor
]
=
count
;
count
++
;
count
++
;
}
}
else
if
(
node
->
is_constant
(
))
else
if
(
is_type
<
op
::
Constant
>
(
node
))
{
{
auto
output_tensor
=
&
node
->
get_outpu
t_tensor
();
auto
output_tensor
=
&
node
->
output
(
0
).
ge
t_tensor
();
auto
ele
=
std
::
pair
<
TensorRole
,
unordered_set
<
descriptor
::
Tensor
*>>
(
auto
ele
=
std
::
pair
<
TensorRole
,
unordered_set
<
descriptor
::
Tensor
*>>
(
TensorRole
::
CONSTANT
,
unordered_set
<
descriptor
::
Tensor
*>
({
output_tensor
}));
TensorRole
::
CONSTANT
,
unordered_set
<
descriptor
::
Tensor
*>
({
output_tensor
}));
m_bufferID_to_tensorSets
[
count
]
=
ele
;
m_bufferID_to_tensorSets
[
count
]
=
ele
;
...
@@ -383,8 +355,8 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
...
@@ -383,8 +355,8 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
}
}
else
if
(
node
->
is_output
())
else
if
(
node
->
is_output
())
{
{
auto
output_tensor
=
&
node
->
get_outpu
t_tensor
();
auto
output_tensor
=
&
node
->
output
(
0
).
ge
t_tensor
();
auto
input_tensor
=
&
node
->
get_inputs
().
a
t
(
0
).
get_tensor
();
auto
input_tensor
=
&
node
->
inpu
t
(
0
).
get_tensor
();
auto
bufferID
=
get_bufferID
(
input_tensor
);
auto
bufferID
=
get_bufferID
(
input_tensor
);
NGRAPH_CHECK
(
bufferID
<=
count
);
NGRAPH_CHECK
(
bufferID
<=
count
);
...
@@ -423,18 +395,18 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
...
@@ -423,18 +395,18 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
auto
cacheable
=
op_annotations
->
is_cacheable
();
auto
cacheable
=
op_annotations
->
is_cacheable
();
// in place concat
// in place concat
if
(
node
->
description
()
==
"Concat"
)
if
(
is_type
<
op
::
Concat
>
(
node
)
)
{
{
auto
output_tensor
=
&
node
->
get_outpu
t_tensor
();
auto
output_tensor
=
&
node
->
output
(
0
).
ge
t_tensor
();
auto
ele
=
std
::
pair
<
TensorRole
,
unordered_set
<
descriptor
::
Tensor
*>>
(
auto
ele
=
std
::
pair
<
TensorRole
,
unordered_set
<
descriptor
::
Tensor
*>>
(
TensorRole
::
INTERMEDIATE
,
TensorRole
::
INTERMEDIATE
,
unordered_set
<
descriptor
::
Tensor
*>
({
output_tensor
}));
unordered_set
<
descriptor
::
Tensor
*>
({
output_tensor
}));
for
(
auto
&
arg
:
node
->
get_argument
s
())
for
(
auto
&
arg
:
node
->
input_value
s
())
{
{
// when reusing memory, check cacheability
// when reusing memory, check cacheability
if
(
!
m_disable_memory_sharing
&&
arg
->
is_op
()
)
if
(
!
m_disable_memory_sharing
)
{
{
auto
arg_op
=
std
::
static_pointer_cast
<
op
::
Op
>
(
arg
);
auto
arg_op
=
arg
.
get_node_shared_ptr
(
);
if
(
auto
arg_op_annotations
=
arg_op
->
get_op_annotations
())
if
(
auto
arg_op_annotations
=
arg_op
->
get_op_annotations
())
{
{
// when reusing memory, ops with different cacheabilities should
// when reusing memory, ops with different cacheabilities should
...
@@ -449,7 +421,7 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
...
@@ -449,7 +421,7 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
// because in-place slice before in-place concat cannot use the memory
// because in-place slice before in-place concat cannot use the memory
// buffer of concat. In-place slice after in-place concat can use the
// buffer of concat. In-place slice after in-place concat can use the
// memory buffer of concat.
// memory buffer of concat.
auto
input_tensor
=
&
arg
->
get_outpu
t_tensor
();
auto
input_tensor
=
&
arg
.
ge
t_tensor
();
if
(
in_place_slice_chain
.
find
(
input_tensor
)
!=
if
(
in_place_slice_chain
.
find
(
input_tensor
)
!=
in_place_slice_chain
.
end
())
in_place_slice_chain
.
end
())
{
{
...
@@ -494,21 +466,19 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
...
@@ -494,21 +466,19 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
// other in place ops
// other in place ops
for
(
auto
oi_pair
:
op_annotations
->
get_in_place_oi_pairs
())
for
(
auto
oi_pair
:
op_annotations
->
get_in_place_oi_pairs
())
{
{
auto
input_tensor
=
&
node
->
get_inputs
().
at
(
oi_pair
.
input
).
get_tensor
();
auto
input_tensor
=
&
node
->
input_value
(
oi_pair
.
input
).
get_tensor
();
auto
output_tensor
=
auto
output_tensor
=
&
node
->
output
(
oi_pair
.
output
).
get_tensor
();
&
node
->
get_outputs
().
at
(
oi_pair
.
output
).
get_tensor
();
// if destructive, do not put input tensor and output tensor into the
// if destructive, do not put input tensor and output tensor into the
// same set.
// same set.
if
(
!
oi_pair
.
destructive
)
if
(
!
oi_pair
.
destructive
)
{
{
bool
no_in_place
=
false
;
bool
no_in_place
=
false
;
auto
input_
node
=
auto
input_
op
=
node
->
get_inputs
().
at
(
oi_pair
.
input
).
get_output
().
get_node
();
node
->
input_value
(
oi_pair
.
input
).
get_node_shared_ptr
();
// when reusing memory, check cacheability
// when reusing memory, check cacheability
if
(
!
m_disable_memory_sharing
&&
input_node
->
is_op
()
)
if
(
!
m_disable_memory_sharing
)
{
{
auto
input_op
=
std
::
static_pointer_cast
<
op
::
Op
>
(
input_node
);
if
(
auto
input_op_annotations
=
input_op
->
get_op_annotations
())
if
(
auto
input_op_annotations
=
input_op
->
get_op_annotations
())
{
{
// when reusing memory, ops with different cacheabilities
// when reusing memory, ops with different cacheabilities
...
@@ -527,7 +497,7 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
...
@@ -527,7 +497,7 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
auto
input_buffer_it
=
m_bufferID_to_tensorSets
.
find
(
bufferID
);
auto
input_buffer_it
=
m_bufferID_to_tensorSets
.
find
(
bufferID
);
NGRAPH_CHECK
(
input_buffer_it
!=
m_bufferID_to_tensorSets
.
end
());
NGRAPH_CHECK
(
input_buffer_it
!=
m_bufferID_to_tensorSets
.
end
());
if
(
node
->
description
()
==
"Slice"
)
if
(
is_type
<
op
::
Slice
>
(
node
)
)
{
{
if
(
input_buffer_it
->
second
.
first
!=
TensorRole
::
CONSTANT
)
if
(
input_buffer_it
->
second
.
first
!=
TensorRole
::
CONSTANT
)
{
{
...
@@ -556,9 +526,9 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
...
@@ -556,9 +526,9 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
}
}
}
}
// process output tensors
// process output tensors
for
(
size_t
i
=
0
;
i
<
node
->
get_output_size
();
i
++
)
for
(
auto
node_output
:
node
->
outputs
()
)
{
{
auto
output_tensor
=
&
node
->
get_outputs
().
at
(
i
)
.
get_tensor
();
auto
output_tensor
=
&
node
_output
.
get_tensor
();
// not in place, create a new set and insert into the map
// not in place, create a new set and insert into the map
if
(
m_tensor_to_bufferID
.
find
(
output_tensor
)
==
m_tensor_to_bufferID
.
end
())
if
(
m_tensor_to_bufferID
.
find
(
output_tensor
)
==
m_tensor_to_bufferID
.
end
())
{
{
...
@@ -617,9 +587,9 @@ void runtime::cpu::pass::CPUMemoryAssignment::liveness_analysis(
...
@@ -617,9 +587,9 @@ void runtime::cpu::pass::CPUMemoryAssignment::liveness_analysis(
const
shared_ptr
<
Node
>&
node
=
*
it
;
const
shared_ptr
<
Node
>&
node
=
*
it
;
node
->
liveness_new_list
.
clear
();
node
->
liveness_new_list
.
clear
();
for
(
size_t
i
=
0
;
i
<
node
->
get_output_size
();
++
i
)
for
(
auto
node_output
:
node
->
outputs
()
)
{
{
auto
tensor
=
&
node
->
get_output_tensor
(
i
);
auto
tensor
=
&
node
_output
.
get_tensor
(
);
auto
bufferID
=
get_bufferID
(
tensor
);
auto
bufferID
=
get_bufferID
(
tensor
);
if
(
allocated_sets
.
find
(
bufferID
)
==
allocated_sets
.
end
())
if
(
allocated_sets
.
find
(
bufferID
)
==
allocated_sets
.
end
())
{
{
...
@@ -635,9 +605,9 @@ void runtime::cpu::pass::CPUMemoryAssignment::liveness_analysis(
...
@@ -635,9 +605,9 @@ void runtime::cpu::pass::CPUMemoryAssignment::liveness_analysis(
const
shared_ptr
<
Node
>&
node
=
*
it
;
const
shared_ptr
<
Node
>&
node
=
*
it
;
node
->
liveness_free_list
.
clear
();
node
->
liveness_free_list
.
clear
();
for
(
descriptor
::
Input
&
input_decl
:
node
->
get_input
s
())
for
(
auto
input_value
:
node
->
input_value
s
())
{
{
auto
tensor
=
&
input_
decl
.
get_tensor
();
auto
tensor
=
&
input_
value
.
get_tensor
();
auto
bufferID
=
get_bufferID
(
tensor
);
auto
bufferID
=
get_bufferID
(
tensor
);
if
(
freed_sets
.
find
(
bufferID
)
==
freed_sets
.
end
())
if
(
freed_sets
.
find
(
bufferID
)
==
freed_sets
.
end
())
{
{
...
@@ -677,10 +647,9 @@ bool runtime::cpu::pass::CPUMemoryAssignment::run_on_function(shared_ptr<ngraph:
...
@@ -677,10 +647,9 @@ bool runtime::cpu::pass::CPUMemoryAssignment::run_on_function(shared_ptr<ngraph:
if
(
cacheable
)
if
(
cacheable
)
{
{
for
(
size_t
i
=
0
;
i
<
node
->
get_output_size
();
++
i
)
for
(
auto
output
:
node
->
outputs
()
)
{
{
shared_ptr
<
descriptor
::
Tensor
>
tv
=
node
->
get_output_tensor_ptr
(
i
);
m_tensor_caching
.
insert
(
&
output
.
get_tensor
());
m_tensor_caching
.
insert
(
tv
.
get
());
}
}
}
}
}
}
...
@@ -698,107 +667,99 @@ bool runtime::cpu::pass::CPUMemoryAssignment::run_on_function(shared_ptr<ngraph:
...
@@ -698,107 +667,99 @@ bool runtime::cpu::pass::CPUMemoryAssignment::run_on_function(shared_ptr<ngraph:
unordered_set
<
descriptor
::
Tensor
*>
no_free
;
unordered_set
<
descriptor
::
Tensor
*>
no_free
;
unordered_set
<
descriptor
::
Tensor
*>
no_new
;
unordered_set
<
descriptor
::
Tensor
*>
no_new
;
if
(
node
->
is_op
())
auto
op
=
std
::
static_pointer_cast
<
op
::
Op
>
(
node
);
if
(
auto
op_annotations
=
op
->
get_op_annotations
())
{
{
auto
op
=
std
::
static_pointer_cast
<
op
::
Op
>
(
node
);
for
(
auto
oi_pair
:
op_annotations
->
get_in_place_oi_pairs
())
if
(
auto
op_annotations
=
op
->
get_op_annotations
())
{
{
for
(
auto
oi_pair
:
op_annotations
->
get_in_place_oi_pairs
())
auto
output_tensor
=
&
node
->
output
(
oi_pair
.
output
).
get_tensor
();
{
auto
input_tensor
=
&
node
->
input_value
(
oi_pair
.
input
).
get_tensor
();
auto
output_tensor
=
&
node
->
get_outputs
().
at
(
oi_pair
.
output
).
get_tensor
();
auto
input_op
=
node
->
input_value
(
oi_pair
.
input
).
get_node_shared_ptr
();
auto
input_tensor
=
&
node
->
get_inputs
().
at
(
oi_pair
.
input
).
get_tensor
();
auto
input_node
=
node
->
get_inputs
().
at
(
oi_pair
.
input
).
get_output
().
get_node
();
if
(
oi_pair
.
destructive
&&
node
->
liveness_free_list
.
count
(
input_tensor
)
!=
0
&&
if
(
oi_pair
.
destructive
&&
node
->
liveness_free_list
.
count
(
input_tensor
)
!=
0
&&
node
->
liveness_new_list
.
count
(
output_tensor
)
!=
0
)
node
->
liveness_new_list
.
count
(
output_tensor
)
!=
0
)
{
if
(
auto
input_op_annotations
=
input_op
->
get_op_annotations
())
{
{
if
(
input_node
->
is_op
())
// when reusing memory, ops with different cacheabilities are using
// different memory manager
// and should not share the same buffer.
if
(
!
m_disable_memory_sharing
&&
input_op_annotations
->
is_cacheable
()
!=
op_annotations
->
is_cacheable
())
{
{
auto
input_op
=
std
::
static_pointer_cast
<
op
::
Op
>
(
input_node
);
NGRAPH_DEBUG
<<
"cpu_memory_assignment: reusing memory with "
if
(
auto
input_op_annotations
=
input_op
->
get_op_annotations
())
"input and output have different cacheabilities, no "
{
"destructive oi"
;
// when reusing memory, ops with different cacheabilities are using
// different memory manager
// and should not share the same buffer.
if
(
!
m_disable_memory_sharing
&&
input_op_annotations
->
is_cacheable
()
!=
op_annotations
->
is_cacheable
())
{
NGRAPH_DEBUG
<<
"cpu_memory_assignment: reusing memory with "
"input and output have different cacheabilities, no "
"destructive oi"
;
continue
;
}
}
}
auto
input_bufferID
=
get_bufferID
(
input_tensor
);
auto
output_bufferID
=
get_bufferID
(
output_tensor
);
auto
input_buffer_it
=
m_bufferID_to_tensorSets
.
find
(
input_bufferID
);
NGRAPH_CHECK
(
input_buffer_it
!=
m_bufferID_to_tensorSets
.
end
());
// do not modify function inputs and constants, so no destructive oi
if
(
input_buffer_it
->
second
.
first
==
TensorRole
::
INPUT
||
input_buffer_it
->
second
.
first
==
TensorRole
::
CONSTANT
)
{
NGRAPH_DEBUG
<<
"cpu_memory_assignment: input is function input or "
"constant, no destructive oi"
;
continue
;
continue
;
}
}
auto
input_set
=
input_buffer_it
->
second
.
second
;
}
// check buffer sizes, if required output buffer is larger than input
// buffer, do not reuse input buffer get the largest tensor size, which is
auto
input_bufferID
=
get_bufferID
(
input_tensor
);
// the size of the memory buffer for the set
auto
output_bufferID
=
get_bufferID
(
output_tensor
);
size_t
input_size
=
input_tensor
->
size
();
// get the smallest offset, which is the offset of the memory buffer for the
auto
input_buffer_it
=
m_bufferID_to_tensorSets
.
find
(
input_bufferID
);
// set
NGRAPH_CHECK
(
input_buffer_it
!=
m_bufferID_to_tensorSets
.
end
());
size_t
offset
=
input_tensor
->
get_pool_offset
();
// do not modify function inputs and constants, so no destructive oi
for
(
auto
e
:
input_set
)
if
(
input_buffer_it
->
second
.
first
==
TensorRole
::
INPUT
||
{
input_buffer_it
->
second
.
first
==
TensorRole
::
CONSTANT
)
if
(
e
->
size
()
>
input_size
)
{
{
NGRAPH_DEBUG
<<
"cpu_memory_assignment: input is function input or "
input_size
=
e
->
size
()
;
"constant, no destructive oi"
;
}
continue
;
if
(
e
->
get_pool_offset
()
<
offset
)
}
{
auto
input_set
=
input_buffer_it
->
second
.
second
;
offset
=
e
->
get_pool_offset
();
// check buffer sizes, if required output buffer is larger than input
}
// buffer, do not reuse input buffer get the largest tensor size, which is
}
// the size of the memory buffer for the set
auto
output_buffer_it
=
m_bufferID_to_tensorSets
.
find
(
output_bufferID
);
size_t
input_size
=
input_tensor
->
size
(
);
NGRAPH_CHECK
(
output_buffer_it
!=
m_bufferID_to_tensorSets
.
end
());
// get the smallest offset, which is the offset of the memory buffer for the
auto
output_set
=
output_buffer_it
->
second
.
second
;
// set
size_t
output_size
=
input_tensor
->
size
();
size_t
offset
=
input_tensor
->
get_pool_offset
();
// get the largest tensor size, which is the size of memory buffer for the
for
(
auto
e
:
input_set
)
// set
{
for
(
auto
e
:
output_set
)
if
(
e
->
size
()
>
input_size
)
{
{
if
(
e
->
size
()
>
output_size
)
input_size
=
e
->
size
();
{
output_size
=
e
->
size
();
}
}
}
if
(
input_size
<
output_size
)
if
(
e
->
get_pool_offset
()
<
offset
)
{
{
continue
;
offset
=
e
->
get_pool_offset
()
;
}
}
NGRAPH_DEBUG
<<
"cpu_memory_assignment: last use of input tensor, "
}
"destructive oi allowed:"
;
auto
output_buffer_it
=
m_bufferID_to_tensorSets
.
find
(
output_bufferID
);
NGRAPH_DEBUG
<<
"input_tensor is "
<<
input_tensor
->
get_name
();
NGRAPH_CHECK
(
output_buffer_it
!=
m_bufferID_to_tensorSets
.
end
());
NGRAPH_DEBUG
<<
"output_tensor is "
<<
output_tensor
->
get_name
();
auto
output_set
=
output_buffer_it
->
second
.
second
;
no_free
.
insert
(
input_tensor
);
size_t
output_size
=
input_tensor
->
size
();
no_new
.
insert
(
output_tensor
);
// get the largest tensor size, which is the size of memory buffer for the
// set
// set the tensor offset for tensors in the set containing the output tensor
for
(
auto
e
:
output_set
)
// to the starting offset
{
// of the set of input tensor.
if
(
e
->
size
()
>
output_size
)
// do not combine those two sets.
// change the label of output tensor set to that of input tensor set
output_buffer_it
->
second
.
first
=
input_buffer_it
->
second
.
first
;
for
(
auto
&
ele_t
:
output_set
)
{
{
ele_t
->
set_pool_offset
(
offset
);
output_size
=
e
->
size
(
);
}
}
}
}
if
(
input_size
<
output_size
)
{
continue
;
}
NGRAPH_DEBUG
<<
"cpu_memory_assignment: last use of input tensor, "
"destructive oi allowed:"
;
NGRAPH_DEBUG
<<
"input_tensor is "
<<
input_tensor
->
get_name
();
NGRAPH_DEBUG
<<
"output_tensor is "
<<
output_tensor
->
get_name
();
no_free
.
insert
(
input_tensor
);
no_new
.
insert
(
output_tensor
);
// set the tensor offset for tensors in the set containing the output tensor
// to the starting offset
// of the set of input tensor.
// do not combine those two sets.
// change the label of output tensor set to that of input tensor set
output_buffer_it
->
second
.
first
=
input_buffer_it
->
second
.
first
;
for
(
auto
&
ele_t
:
output_set
)
{
ele_t
->
set_pool_offset
(
offset
);
}
}
}
}
}
}
}
...
...
src/ngraph/runtime/cpu/pass/cpu_memory_assignment.hpp
View file @
70e5973c
...
@@ -53,13 +53,13 @@ private:
...
@@ -53,13 +53,13 @@ private:
void
process_in_place_concat
(
std
::
list
<
std
::
shared_ptr
<
Node
>>
nodes
);
void
process_in_place_concat
(
std
::
list
<
std
::
shared_ptr
<
Node
>>
nodes
);
// For a chain of concat ops, propagate memory pool offsets
// For a chain of concat ops, propagate memory pool offsets
void
propagate_in_place_concat
(
std
::
shared_ptr
<
ngraph
::
op
::
Op
>
concat
,
size_t
index
);
void
propagate_in_place_concat
(
const
ngraph
::
Output
<
ngraph
::
Node
>&
concat
);
// Find in-place slice ops and set appropriate memory pool offset for its output
// Find in-place slice ops and set appropriate memory pool offset for its output
void
process_in_place_slice
(
std
::
list
<
std
::
shared_ptr
<
Node
>>
nodes
);
void
process_in_place_slice
(
std
::
list
<
std
::
shared_ptr
<
Node
>>
nodes
);
// propagate slice when its arg comes from function input
// propagate slice when its arg comes from function input
void
propagate_in_place_slice
(
ngraph
::
descriptor
::
Input
*
input
,
size_t
input_index
);
void
propagate_in_place_slice
(
const
ngraph
::
Input
<
ngraph
::
Node
>&
input
);
// build buffer sets maps
// build buffer sets maps
void
build_buffer_sets_maps
(
std
::
list
<
std
::
shared_ptr
<
Node
>>&
ops
);
void
build_buffer_sets_maps
(
std
::
list
<
std
::
shared_ptr
<
Node
>>&
ops
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment