Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
70e5973c
Unverified
Commit
70e5973c
authored
Jan 10, 2020
by
Scott Cyphers
Committed by
GitHub
Jan 10, 2020
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Modernize a pass (#4055)
Co-authored-by:
Sang Ik Lee
<
sang.ik.lee@intel.com
>
parent
6bd90ef4
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
166 additions
and
205 deletions
+166
-205
cpu_memory_assignment.cpp
src/ngraph/runtime/cpu/pass/cpu_memory_assignment.cpp
+164
-203
cpu_memory_assignment.hpp
src/ngraph/runtime/cpu/pass/cpu_memory_assignment.hpp
+2
-2
No files found.
src/ngraph/runtime/cpu/pass/cpu_memory_assignment.cpp
View file @
70e5973c
...
...
@@ -60,9 +60,8 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat(
{
for
(
shared_ptr
<
Node
>
node
:
nodes
)
{
if
(
node
->
description
()
==
"Concat"
)
if
(
auto
concat
=
as_type_ptr
<
op
::
Concat
>
(
node
)
)
{
auto
concat
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Concat
>
(
node
);
if
(
auto
op_annotations
=
concat
->
get_op_annotations
())
{
auto
in_place_oi_pairs
=
op_annotations
->
get_in_place_oi_pairs
();
...
...
@@ -72,9 +71,8 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat(
bool
found_last_concat
=
true
;
for
(
auto
user
:
concat
->
get_users
())
{
if
(
user
->
description
()
==
"Concat"
)
if
(
auto
user_concat
=
as_type_ptr
<
op
::
Concat
>
(
user
)
)
{
auto
user_concat
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Concat
>
(
user
);
if
(
auto
user_op_annotations
=
user_concat
->
get_op_annotations
())
{
auto
user_in_place_oi_pairs
=
...
...
@@ -90,14 +88,14 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat(
// start from the last concat
if
(
found_last_concat
)
{
auto
output_tensor
=
&
concat
->
get_outpu
t_tensor
();
auto
output_tensor
=
&
concat
->
output
(
0
).
ge
t_tensor
();
auto
output_bufferID
=
get_bufferID
(
output_tensor
);
auto
offset
=
output_tensor
->
get_pool_offset
();
size_t
arg_index
=
0
;
for
(
auto
arg
:
concat
->
get_argument
s
())
for
(
auto
arg
:
concat
->
input_value
s
())
{
auto
input_tensor
=
&
arg
->
get_outpu
t_tensor
();
auto
input_tensor
=
&
arg
.
ge
t_tensor
();
auto
input_bufferID
=
get_bufferID
(
input_tensor
);
// same set, in place concat allowed
if
(
input_bufferID
==
output_bufferID
)
...
...
@@ -109,22 +107,17 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat(
<<
old_offset
<<
", new offset is "
<<
offset
<<
std
::
endl
;
// check if need to propagate backward
if
(
arg
->
is_op
())
auto
arg_op
=
arg
.
get_node_shared_ptr
();
if
(
auto
arg_op_annotations
=
arg_op
->
get_op_annotations
())
{
auto
arg_op
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Op
>
(
arg
);
if
(
auto
arg_op_annotations
=
arg_op
->
get_op_annotations
())
auto
arg_in_place_oi_pairs
=
arg_op_annotations
->
get_in_place_oi_pairs
();
if
(
arg_in_place_oi_pairs
.
size
()
>
0
)
{
auto
arg_in_place_oi_pairs
=
arg_op_annotations
->
get_in_place_oi_pairs
();
if
(
arg_in_place_oi_pairs
.
size
()
>
0
)
{
auto
input
=
&
arg_op
->
get_inputs
().
at
(
0
);
auto
output_index
=
input
->
get_output
().
get_index
();
NGRAPH_DEBUG
<<
"cpu_memory_assignment: call "
"propagate_in_place_concat for "
<<
arg
->
get_name
();
propagate_in_place_concat
(
arg_op
,
output_index
);
}
NGRAPH_DEBUG
<<
"cpu_memory_assignment: call "
"propagate_in_place_concat for "
<<
*
arg_op
;
propagate_in_place_concat
(
arg
);
}
}
}
...
...
@@ -138,19 +131,19 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_concat(
}
}
void
runtime
::
cpu
::
pass
::
CPUMemoryAssignment
::
propagate_in_place_concat
(
shared_ptr
<
ngraph
::
op
::
Op
>
op
,
size_t
output_index
)
void
runtime
::
cpu
::
pass
::
CPUMemoryAssignment
::
propagate_in_place_concat
(
const
Output
<
Node
>&
output
)
{
if
(
op
->
description
()
==
"Concat"
)
auto
op
=
output
.
get_node_shared_ptr
();
if
(
is_type
<
op
::
Concat
>
(
op
))
{
auto
output_tensor
=
&
op
->
get_outpu
t_tensor
();
auto
output_tensor
=
&
op
->
output
(
0
).
ge
t_tensor
();
auto
output_bufferID
=
get_bufferID
(
output_tensor
);
auto
offset
=
output_tensor
->
get_pool_offset
();
size_t
arg_index
=
0
;
for
(
auto
arg
:
op
->
get_argument
s
())
for
(
auto
arg
:
op
->
input_value
s
())
{
auto
input_tensor
=
&
arg
->
get_outpu
t_tensor
();
auto
input_tensor
=
&
arg
.
ge
t_tensor
();
auto
input_bufferID
=
get_bufferID
(
input_tensor
);
// same set, in place concat allowed
if
(
input_bufferID
==
output_bufferID
)
...
...
@@ -161,21 +154,15 @@ void runtime::cpu::pass::CPUMemoryAssignment::propagate_in_place_concat(
<<
", new offset is "
<<
offset
;
// check if need to propagate backward
if
(
arg
->
is_op
())
auto
arg_op
=
arg
.
get_node_shared_ptr
();
if
(
auto
arg_op_annotations
=
arg_op
->
get_op_annotations
())
{
auto
arg_
op
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Op
>
(
arg
);
if
(
a
uto
arg_op_annotations
=
arg_op
->
get_op_annotations
()
)
auto
arg_
in_place_oi_pairs
=
arg_op_annotations
->
get_in_place_oi_pairs
(
);
if
(
a
rg_in_place_oi_pairs
.
size
()
>
0
)
{
auto
arg_in_place_oi_pairs
=
arg_op_annotations
->
get_in_place_oi_pairs
();
if
(
arg_in_place_oi_pairs
.
size
()
>
0
)
{
NGRAPH_DEBUG
<<
"cpu_memory_assignment: call propagate_in_place_concat for "
<<
arg
->
get_name
();
auto
input
=
&
op
->
get_inputs
().
at
(
arg_index
);
auto
arg_output_index
=
input
->
get_output
().
get_index
();
propagate_in_place_concat
(
arg_op
,
arg_output_index
);
}
NGRAPH_DEBUG
<<
"cpu_memory_assignment: call propagate_in_place_concat for "
<<
*
arg_op
;
propagate_in_place_concat
(
arg
);
}
}
}
...
...
@@ -189,14 +176,14 @@ void runtime::cpu::pass::CPUMemoryAssignment::propagate_in_place_concat(
auto
op_annotations
=
op
->
get_op_annotations
();
for
(
auto
oi_pair
:
op_annotations
->
get_in_place_oi_pairs
())
{
if
(
oi_pair
.
output
!=
output
_index
||
oi_pair
.
destructive
)
if
(
oi_pair
.
output
!=
output
.
get_index
()
||
oi_pair
.
destructive
)
{
continue
;
}
auto
input_tensor
=
&
op
->
get_inputs
().
at
(
oi_pair
.
input
).
get_tensor
();
auto
input_tensor
=
&
op
->
input_value
(
oi_pair
.
input
).
get_tensor
();
auto
input_bufferID
=
get_bufferID
(
input_tensor
);
auto
output_tensor
=
&
op
->
get_outputs
().
a
t
(
oi_pair
.
output
).
get_tensor
();
auto
output_tensor
=
&
op
->
outpu
t
(
oi_pair
.
output
).
get_tensor
();
auto
output_bufferID
=
get_bufferID
(
output_tensor
);
// same set, in place op allowed
...
...
@@ -207,24 +194,18 @@ void runtime::cpu::pass::CPUMemoryAssignment::propagate_in_place_concat(
input_tensor
->
set_pool_offset
(
new_offset
);
NGRAPH_DEBUG
<<
"cpu_memory_assignment: change offset, old offset is "
<<
old_offset
<<
", new offset is "
<<
new_offset
;
auto
input
=
&
op
->
get_inputs
().
at
(
oi_pair
.
input
);
auto
arg
=
input
->
get_node
();
auto
input
=
op
->
input_value
(
oi_pair
.
input
);
auto
arg
_op
=
input
.
get_node_shared_ptr
();
// check if need to propagate backward
if
(
a
rg
->
is_op
())
if
(
a
uto
arg_op_annotations
=
arg_op
->
get_op_annotations
())
{
auto
arg_
op
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Op
>
(
arg
);
if
(
a
uto
arg_op_annotations
=
arg_op
->
get_op_annotations
()
)
auto
arg_
in_place_oi_pairs
=
arg_op_annotations
->
get_in_place_oi_pairs
(
);
if
(
a
rg_in_place_oi_pairs
.
size
()
>
0
)
{
auto
arg_in_place_oi_pairs
=
arg_op_annotations
->
get_in_place_oi_pairs
();
if
(
arg_in_place_oi_pairs
.
size
()
>
0
)
{
auto
arg_output_index
=
input
->
get_output
().
get_index
();
NGRAPH_DEBUG
<<
"cpu_memory_assignment: call propagate_in_place_concat for "
<<
arg
->
get_name
();
propagate_in_place_concat
(
arg_op
,
arg_output_index
);
}
NGRAPH_DEBUG
<<
"cpu_memory_assignment: call propagate_in_place_concat for "
<<
*
arg_op
;
propagate_in_place_concat
(
input
);
}
}
}
...
...
@@ -238,20 +219,18 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_slice(
{
for
(
shared_ptr
<
Node
>&
node
:
nodes
)
{
if
(
node
->
description
()
==
"Slice"
)
if
(
auto
slice
=
as_type_ptr
<
op
::
Slice
>
(
node
)
)
{
auto
slice
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Slice
>
(
node
);
if
(
auto
op_annotations
=
slice
->
get_op_annotations
())
{
auto
in_place_oi_pairs
=
op_annotations
->
get_in_place_oi_pairs
();
if
(
in_place_oi_pairs
.
size
()
>
0
)
{
auto
input
=
&
slice
->
get_inputs
().
at
(
0
);
auto
arg
=
input
->
get_output
().
get_node
();
auto
index
=
input
->
get_output
().
get_index
();
auto
input_tensor
=
&
arg
->
get_output_tensor
(
index
);
auto
input
=
slice
->
input_value
(
0
);
auto
arg
=
input
.
get_node_shared_ptr
();
auto
input_tensor
=
&
input
.
get_tensor
();
auto
input_bufferID
=
get_bufferID
(
input_tensor
);
auto
output_tensor
=
&
slice
->
get_outpu
t_tensor
();
auto
output_tensor
=
&
slice
->
output
(
0
).
ge
t_tensor
();
auto
output_bufferID
=
get_bufferID
(
output_tensor
);
// same set, in place slice allowed
...
...
@@ -277,17 +256,15 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_slice(
<<
old_offset
<<
", new offset is "
<<
offset
;
// check if need to propagate forward
for
(
size_t
i
=
0
;
i
<
slice
->
get_output_size
();
++
i
)
for
(
auto
slice_output
:
slice
->
outputs
()
)
{
auto
slice_output
=
&
slice
->
get_outputs
().
at
(
i
);
for
(
auto
slice_output_input
:
slice_output
->
get_inputs
())
for
(
auto
slice_output_input
:
slice_output
.
get_target_inputs
())
{
NGRAPH_DEBUG
<<
"cpu_memory_assignment: call propagate_in_place_slice "
"for output "
<<
i
<<
" of "
<<
slice
->
get_name
();
propagate_in_place_slice
(
slice_output_input
,
slice_output_input
->
get_index
());
<<
slice_output
.
get_index
()
<<
" of "
<<
*
slice
;
propagate_in_place_slice
(
slice_output_input
);
}
}
}
...
...
@@ -297,48 +274,43 @@ void runtime::cpu::pass::CPUMemoryAssignment::process_in_place_slice(
}
}
void
runtime
::
cpu
::
pass
::
CPUMemoryAssignment
::
propagate_in_place_slice
(
ngraph
::
descriptor
::
Input
*
input
,
size_t
input_index
)
void
runtime
::
cpu
::
pass
::
CPUMemoryAssignment
::
propagate_in_place_slice
(
const
Input
<
Node
>&
input
)
{
std
::
deque
<
std
::
pair
<
ngraph
::
descriptor
::
Input
*
,
size_t
>>
stack
;
stack
.
push_front
(
std
::
pair
<
ngraph
::
descriptor
::
Input
*
,
size_t
>
(
input
,
input_index
)
);
std
::
deque
<
Input
<
Node
>>
stack
;
stack
.
push_front
(
input
);
while
(
stack
.
size
()
>
0
)
{
ngraph
::
descriptor
::
Input
*
in
=
stack
.
front
().
first
;
auto
index
=
stack
.
front
().
second
;
Input
<
Node
>
in
=
stack
.
front
();
stack
.
pop_front
();
auto
node
=
in
->
get_node
();
auto
node
=
in
.
get_node
();
// let process_in_place_slice handle slice.
if
(
!
node
->
is_op
()
||
node
->
description
()
==
"Slice"
)
if
(
is_type
<
op
::
Slice
>
(
node
)
)
{
continue
;
}
auto
op
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Op
>
(
node
);
if
(
auto
op_annotations
=
op
->
get_op_annotations
())
if
(
auto
op_annotations
=
node
->
get_op_annotations
())
{
for
(
auto
oi_pair
:
op_annotations
->
get_in_place_oi_pairs
())
{
if
(
oi_pair
.
input
==
in
dex
)
if
(
oi_pair
.
input
==
in
.
get_index
()
)
{
auto
input_tensor
=
&
op
->
get_inputs
().
a
t
(
oi_pair
.
input
).
get_tensor
();
auto
input_tensor
=
&
node
->
inpu
t
(
oi_pair
.
input
).
get_tensor
();
auto
input_bufferID
=
get_bufferID
(
input_tensor
);
size_t
output_index
=
oi_pair
.
output
;
auto
output_tensor
=
&
op
->
get_outputs
().
a
t
(
output_index
).
get_tensor
();
auto
output_tensor
=
&
node
->
outpu
t
(
output_index
).
get_tensor
();
auto
output_bufferID
=
get_bufferID
(
output_tensor
);
// same set, in place op allowed
if
(
input_bufferID
==
output_bufferID
)
{
output_tensor
->
set_pool_offset
(
input_tensor
->
get_pool_offset
());
for
(
size_t
i
=
0
;
i
<
op
->
get_output_size
();
++
i
)
for
(
auto
op_output
:
node
->
outputs
()
)
{
auto
op_output
=
&
op
->
get_outputs
().
at
(
i
);
for
(
auto
op_output_input
:
op_output
->
get_inputs
())
for
(
auto
op_output_input
:
op_output
.
get_target_inputs
())
{
stack
.
push_front
(
std
::
pair
<
ngraph
::
descriptor
::
Input
*
,
size_t
>
(
op_output_input
,
op_output_input
->
get_index
()));
stack
.
push_front
(
op_output_input
);
}
}
}
...
...
@@ -365,16 +337,16 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
const
shared_ptr
<
Node
>&
node
=
*
it
;
if
(
node
->
is_parameter
())
{
auto
output_tensor
=
&
node
->
get_outpu
t_tensor
();
auto
output_tensor
=
&
node
->
output
(
0
).
ge
t_tensor
();
auto
ele
=
std
::
pair
<
TensorRole
,
unordered_set
<
descriptor
::
Tensor
*>>
(
TensorRole
::
INPUT
,
unordered_set
<
descriptor
::
Tensor
*>
({
output_tensor
}));
m_bufferID_to_tensorSets
[
count
]
=
ele
;
m_tensor_to_bufferID
[
output_tensor
]
=
count
;
count
++
;
}
else
if
(
node
->
is_constant
(
))
else
if
(
is_type
<
op
::
Constant
>
(
node
))
{
auto
output_tensor
=
&
node
->
get_outpu
t_tensor
();
auto
output_tensor
=
&
node
->
output
(
0
).
ge
t_tensor
();
auto
ele
=
std
::
pair
<
TensorRole
,
unordered_set
<
descriptor
::
Tensor
*>>
(
TensorRole
::
CONSTANT
,
unordered_set
<
descriptor
::
Tensor
*>
({
output_tensor
}));
m_bufferID_to_tensorSets
[
count
]
=
ele
;
...
...
@@ -383,8 +355,8 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
}
else
if
(
node
->
is_output
())
{
auto
output_tensor
=
&
node
->
get_outpu
t_tensor
();
auto
input_tensor
=
&
node
->
get_inputs
().
a
t
(
0
).
get_tensor
();
auto
output_tensor
=
&
node
->
output
(
0
).
ge
t_tensor
();
auto
input_tensor
=
&
node
->
inpu
t
(
0
).
get_tensor
();
auto
bufferID
=
get_bufferID
(
input_tensor
);
NGRAPH_CHECK
(
bufferID
<=
count
);
...
...
@@ -423,18 +395,18 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
auto
cacheable
=
op_annotations
->
is_cacheable
();
// in place concat
if
(
node
->
description
()
==
"Concat"
)
if
(
is_type
<
op
::
Concat
>
(
node
)
)
{
auto
output_tensor
=
&
node
->
get_outpu
t_tensor
();
auto
output_tensor
=
&
node
->
output
(
0
).
ge
t_tensor
();
auto
ele
=
std
::
pair
<
TensorRole
,
unordered_set
<
descriptor
::
Tensor
*>>
(
TensorRole
::
INTERMEDIATE
,
unordered_set
<
descriptor
::
Tensor
*>
({
output_tensor
}));
for
(
auto
&
arg
:
node
->
get_argument
s
())
for
(
auto
&
arg
:
node
->
input_value
s
())
{
// when reusing memory, check cacheability
if
(
!
m_disable_memory_sharing
&&
arg
->
is_op
()
)
if
(
!
m_disable_memory_sharing
)
{
auto
arg_op
=
std
::
static_pointer_cast
<
op
::
Op
>
(
arg
);
auto
arg_op
=
arg
.
get_node_shared_ptr
(
);
if
(
auto
arg_op_annotations
=
arg_op
->
get_op_annotations
())
{
// when reusing memory, ops with different cacheabilities should
...
...
@@ -449,7 +421,7 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
// because in-place slice before in-place concat cannot use the memory
// buffer of concat. In-place slice after in-place concat can use the
// memory buffer of concat.
auto
input_tensor
=
&
arg
->
get_outpu
t_tensor
();
auto
input_tensor
=
&
arg
.
ge
t_tensor
();
if
(
in_place_slice_chain
.
find
(
input_tensor
)
!=
in_place_slice_chain
.
end
())
{
...
...
@@ -494,21 +466,19 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
// other in place ops
for
(
auto
oi_pair
:
op_annotations
->
get_in_place_oi_pairs
())
{
auto
input_tensor
=
&
node
->
get_inputs
().
at
(
oi_pair
.
input
).
get_tensor
();
auto
output_tensor
=
&
node
->
get_outputs
().
at
(
oi_pair
.
output
).
get_tensor
();
auto
input_tensor
=
&
node
->
input_value
(
oi_pair
.
input
).
get_tensor
();
auto
output_tensor
=
&
node
->
output
(
oi_pair
.
output
).
get_tensor
();
// if destructive, do not put input tensor and output tensor into the
// same set.
if
(
!
oi_pair
.
destructive
)
{
bool
no_in_place
=
false
;
auto
input_
node
=
node
->
get_inputs
().
at
(
oi_pair
.
input
).
get_output
().
get_node
();
auto
input_
op
=
node
->
input_value
(
oi_pair
.
input
).
get_node_shared_ptr
();
// when reusing memory, check cacheability
if
(
!
m_disable_memory_sharing
&&
input_node
->
is_op
()
)
if
(
!
m_disable_memory_sharing
)
{
auto
input_op
=
std
::
static_pointer_cast
<
op
::
Op
>
(
input_node
);
if
(
auto
input_op_annotations
=
input_op
->
get_op_annotations
())
{
// when reusing memory, ops with different cacheabilities
...
...
@@ -527,7 +497,7 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
auto
input_buffer_it
=
m_bufferID_to_tensorSets
.
find
(
bufferID
);
NGRAPH_CHECK
(
input_buffer_it
!=
m_bufferID_to_tensorSets
.
end
());
if
(
node
->
description
()
==
"Slice"
)
if
(
is_type
<
op
::
Slice
>
(
node
)
)
{
if
(
input_buffer_it
->
second
.
first
!=
TensorRole
::
CONSTANT
)
{
...
...
@@ -556,9 +526,9 @@ void runtime::cpu::pass::CPUMemoryAssignment::build_buffer_sets_maps(list<shared
}
}
// process output tensors
for
(
size_t
i
=
0
;
i
<
node
->
get_output_size
();
i
++
)
for
(
auto
node_output
:
node
->
outputs
()
)
{
auto
output_tensor
=
&
node
->
get_outputs
().
at
(
i
)
.
get_tensor
();
auto
output_tensor
=
&
node
_output
.
get_tensor
();
// not in place, create a new set and insert into the map
if
(
m_tensor_to_bufferID
.
find
(
output_tensor
)
==
m_tensor_to_bufferID
.
end
())
{
...
...
@@ -617,9 +587,9 @@ void runtime::cpu::pass::CPUMemoryAssignment::liveness_analysis(
const
shared_ptr
<
Node
>&
node
=
*
it
;
node
->
liveness_new_list
.
clear
();
for
(
size_t
i
=
0
;
i
<
node
->
get_output_size
();
++
i
)
for
(
auto
node_output
:
node
->
outputs
()
)
{
auto
tensor
=
&
node
->
get_output_tensor
(
i
);
auto
tensor
=
&
node
_output
.
get_tensor
(
);
auto
bufferID
=
get_bufferID
(
tensor
);
if
(
allocated_sets
.
find
(
bufferID
)
==
allocated_sets
.
end
())
{
...
...
@@ -635,9 +605,9 @@ void runtime::cpu::pass::CPUMemoryAssignment::liveness_analysis(
const
shared_ptr
<
Node
>&
node
=
*
it
;
node
->
liveness_free_list
.
clear
();
for
(
descriptor
::
Input
&
input_decl
:
node
->
get_input
s
())
for
(
auto
input_value
:
node
->
input_value
s
())
{
auto
tensor
=
&
input_
decl
.
get_tensor
();
auto
tensor
=
&
input_
value
.
get_tensor
();
auto
bufferID
=
get_bufferID
(
tensor
);
if
(
freed_sets
.
find
(
bufferID
)
==
freed_sets
.
end
())
{
...
...
@@ -677,10 +647,9 @@ bool runtime::cpu::pass::CPUMemoryAssignment::run_on_function(shared_ptr<ngraph:
if
(
cacheable
)
{
for
(
size_t
i
=
0
;
i
<
node
->
get_output_size
();
++
i
)
for
(
auto
output
:
node
->
outputs
()
)
{
shared_ptr
<
descriptor
::
Tensor
>
tv
=
node
->
get_output_tensor_ptr
(
i
);
m_tensor_caching
.
insert
(
tv
.
get
());
m_tensor_caching
.
insert
(
&
output
.
get_tensor
());
}
}
}
...
...
@@ -698,107 +667,99 @@ bool runtime::cpu::pass::CPUMemoryAssignment::run_on_function(shared_ptr<ngraph:
unordered_set
<
descriptor
::
Tensor
*>
no_free
;
unordered_set
<
descriptor
::
Tensor
*>
no_new
;
if
(
node
->
is_op
())
auto
op
=
std
::
static_pointer_cast
<
op
::
Op
>
(
node
);
if
(
auto
op_annotations
=
op
->
get_op_annotations
())
{
auto
op
=
std
::
static_pointer_cast
<
op
::
Op
>
(
node
);
if
(
auto
op_annotations
=
op
->
get_op_annotations
())
for
(
auto
oi_pair
:
op_annotations
->
get_in_place_oi_pairs
())
{
for
(
auto
oi_pair
:
op_annotations
->
get_in_place_oi_pairs
())
{
auto
output_tensor
=
&
node
->
get_outputs
().
at
(
oi_pair
.
output
).
get_tensor
();
auto
input_tensor
=
&
node
->
get_inputs
().
at
(
oi_pair
.
input
).
get_tensor
();
auto
input_node
=
node
->
get_inputs
().
at
(
oi_pair
.
input
).
get_output
().
get_node
();
auto
output_tensor
=
&
node
->
output
(
oi_pair
.
output
).
get_tensor
();
auto
input_tensor
=
&
node
->
input_value
(
oi_pair
.
input
).
get_tensor
();
auto
input_op
=
node
->
input_value
(
oi_pair
.
input
).
get_node_shared_ptr
();
if
(
oi_pair
.
destructive
&&
node
->
liveness_free_list
.
count
(
input_tensor
)
!=
0
&&
node
->
liveness_new_list
.
count
(
output_tensor
)
!=
0
)
if
(
oi_pair
.
destructive
&&
node
->
liveness_free_list
.
count
(
input_tensor
)
!=
0
&&
node
->
liveness_new_list
.
count
(
output_tensor
)
!=
0
)
{
if
(
auto
input_op_annotations
=
input_op
->
get_op_annotations
())
{
if
(
input_node
->
is_op
())
// when reusing memory, ops with different cacheabilities are using
// different memory manager
// and should not share the same buffer.
if
(
!
m_disable_memory_sharing
&&
input_op_annotations
->
is_cacheable
()
!=
op_annotations
->
is_cacheable
())
{
auto
input_op
=
std
::
static_pointer_cast
<
op
::
Op
>
(
input_node
);
if
(
auto
input_op_annotations
=
input_op
->
get_op_annotations
())
{
// when reusing memory, ops with different cacheabilities are using
// different memory manager
// and should not share the same buffer.
if
(
!
m_disable_memory_sharing
&&
input_op_annotations
->
is_cacheable
()
!=
op_annotations
->
is_cacheable
())
{
NGRAPH_DEBUG
<<
"cpu_memory_assignment: reusing memory with "
"input and output have different cacheabilities, no "
"destructive oi"
;
continue
;
}
}
}
auto
input_bufferID
=
get_bufferID
(
input_tensor
);
auto
output_bufferID
=
get_bufferID
(
output_tensor
);
auto
input_buffer_it
=
m_bufferID_to_tensorSets
.
find
(
input_bufferID
);
NGRAPH_CHECK
(
input_buffer_it
!=
m_bufferID_to_tensorSets
.
end
());
// do not modify function inputs and constants, so no destructive oi
if
(
input_buffer_it
->
second
.
first
==
TensorRole
::
INPUT
||
input_buffer_it
->
second
.
first
==
TensorRole
::
CONSTANT
)
{
NGRAPH_DEBUG
<<
"cpu_memory_assignment: input is function input or "
"constant, no destructive oi"
;
NGRAPH_DEBUG
<<
"cpu_memory_assignment: reusing memory with "
"input and output have different cacheabilities, no "
"destructive oi"
;
continue
;
}
auto
input_set
=
input_buffer_it
->
second
.
second
;
// check buffer sizes, if required output buffer is larger than input
// buffer, do not reuse input buffer get the largest tensor size, which is
// the size of the memory buffer for the set
size_t
input_size
=
input_tensor
->
size
();
// get the smallest offset, which is the offset of the memory buffer for the
// set
size_t
offset
=
input_tensor
->
get_pool_offset
();
for
(
auto
e
:
input_set
)
{
if
(
e
->
size
()
>
input_size
)
{
input_size
=
e
->
size
()
;
}
if
(
e
->
get_pool_offset
()
<
offset
)
{
offset
=
e
->
get_pool_offset
();
}
}
auto
output_buffer_it
=
m_bufferID_to_tensorSets
.
find
(
output_bufferID
);
NGRAPH_CHECK
(
output_buffer_it
!=
m_bufferID_to_tensorSets
.
end
());
auto
output_set
=
output_buffer_it
->
second
.
second
;
size_t
output_size
=
input_tensor
->
size
();
// get the largest tensor size, which is the size of memory buffer for the
// set
for
(
auto
e
:
output_set
)
}
auto
input_bufferID
=
get_bufferID
(
input_tensor
);
auto
output_bufferID
=
get_bufferID
(
output_tensor
);
auto
input_buffer_it
=
m_bufferID_to_tensorSets
.
find
(
input_bufferID
);
NGRAPH_CHECK
(
input_buffer_it
!=
m_bufferID_to_tensorSets
.
end
());
// do not modify function inputs and constants, so no destructive oi
if
(
input_buffer_it
->
second
.
first
==
TensorRole
::
INPUT
||
input_buffer_it
->
second
.
first
==
TensorRole
::
CONSTANT
)
{
NGRAPH_DEBUG
<<
"cpu_memory_assignment: input is function input or "
"constant, no destructive oi"
;
continue
;
}
auto
input_set
=
input_buffer_it
->
second
.
second
;
// check buffer sizes, if required output buffer is larger than input
// buffer, do not reuse input buffer get the largest tensor size, which is
// the size of the memory buffer for the set
size_t
input_size
=
input_tensor
->
size
(
);
// get the smallest offset, which is the offset of the memory buffer for the
// set
size_t
offset
=
input_tensor
->
get_pool_offset
();
for
(
auto
e
:
input_set
)
{
if
(
e
->
size
()
>
input_size
)
{
if
(
e
->
size
()
>
output_size
)
{
output_size
=
e
->
size
();
}
input_size
=
e
->
size
();
}
if
(
input_size
<
output_size
)
if
(
e
->
get_pool_offset
()
<
offset
)
{
continue
;
offset
=
e
->
get_pool_offset
()
;
}
NGRAPH_DEBUG
<<
"cpu_memory_assignment: last use of input tensor, "
"destructive oi allowed:"
;
NGRAPH_DEBUG
<<
"input_tensor is "
<<
input_tensor
->
get_name
();
NGRAPH_DEBUG
<<
"output_tensor is "
<<
output_tensor
->
get_name
();
no_free
.
insert
(
input_tensor
);
no_new
.
insert
(
output_tensor
);
// set the tensor offset for tensors in the set containing the output tensor
// to the starting offset
// of the set of input tensor.
// do not combine those two sets.
// change the label of output tensor set to that of input tensor set
output_buffer_it
->
second
.
first
=
input_buffer_it
->
second
.
first
;
for
(
auto
&
ele_t
:
output_set
)
}
auto
output_buffer_it
=
m_bufferID_to_tensorSets
.
find
(
output_bufferID
);
NGRAPH_CHECK
(
output_buffer_it
!=
m_bufferID_to_tensorSets
.
end
());
auto
output_set
=
output_buffer_it
->
second
.
second
;
size_t
output_size
=
input_tensor
->
size
();
// get the largest tensor size, which is the size of memory buffer for the
// set
for
(
auto
e
:
output_set
)
{
if
(
e
->
size
()
>
output_size
)
{
ele_t
->
set_pool_offset
(
offset
);
output_size
=
e
->
size
(
);
}
}
if
(
input_size
<
output_size
)
{
continue
;
}
NGRAPH_DEBUG
<<
"cpu_memory_assignment: last use of input tensor, "
"destructive oi allowed:"
;
NGRAPH_DEBUG
<<
"input_tensor is "
<<
input_tensor
->
get_name
();
NGRAPH_DEBUG
<<
"output_tensor is "
<<
output_tensor
->
get_name
();
no_free
.
insert
(
input_tensor
);
no_new
.
insert
(
output_tensor
);
// set the tensor offset for tensors in the set containing the output tensor
// to the starting offset
// of the set of input tensor.
// do not combine those two sets.
// change the label of output tensor set to that of input tensor set
output_buffer_it
->
second
.
first
=
input_buffer_it
->
second
.
first
;
for
(
auto
&
ele_t
:
output_set
)
{
ele_t
->
set_pool_offset
(
offset
);
}
}
}
}
...
...
src/ngraph/runtime/cpu/pass/cpu_memory_assignment.hpp
View file @
70e5973c
...
...
@@ -53,13 +53,13 @@ private:
void
process_in_place_concat
(
std
::
list
<
std
::
shared_ptr
<
Node
>>
nodes
);
// For a chain of concat ops, propagate memory pool offsets
void
propagate_in_place_concat
(
std
::
shared_ptr
<
ngraph
::
op
::
Op
>
concat
,
size_t
index
);
void
propagate_in_place_concat
(
const
ngraph
::
Output
<
ngraph
::
Node
>&
concat
);
// Find in-place slice ops and set appropriate memory pool offset for its output
void
process_in_place_slice
(
std
::
list
<
std
::
shared_ptr
<
Node
>>
nodes
);
// propagate slice when its arg comes from function input
void
propagate_in_place_slice
(
ngraph
::
descriptor
::
Input
*
input
,
size_t
input_index
);
void
propagate_in_place_slice
(
const
ngraph
::
Input
<
ngraph
::
Node
>&
input
);
// build buffer sets maps
void
build_buffer_sets_maps
(
std
::
list
<
std
::
shared_ptr
<
Node
>>&
ops
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment