Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
0b99a7a1
Commit
0b99a7a1
authored
Mar 12, 2018
by
fenglei.tian
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'origin/master' into tfl/gpu_dot_back
parents
bc4aefed
41a883b1
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
21 changed files
with
451 additions
and
125 deletions
+451
-125
graph_util.hpp
src/ngraph/graph_util.hpp
+1
-0
assign_placement.hpp
src/ngraph/pass/assign_placement.hpp
+1
-0
cpu_call_frame.cpp
src/ngraph/runtime/cpu/cpu_call_frame.cpp
+1
-0
cpu_emitter.cpp
src/ngraph/runtime/cpu/cpu_emitter.cpp
+0
-0
cpu_external_function.cpp
src/ngraph/runtime/cpu/cpu_external_function.cpp
+46
-57
cpu_external_function.hpp
src/ngraph/runtime/cpu/cpu_external_function.hpp
+7
-0
cpu_runtime_context.hpp
src/ngraph/runtime/cpu/cpu_runtime_context.hpp
+1
-0
mkldnn_emitter.cpp
src/ngraph/runtime/cpu/mkldnn_emitter.cpp
+112
-0
mkldnn_emitter.hpp
src/ngraph/runtime/cpu/mkldnn_emitter.hpp
+29
-0
cpu_assignment.cpp
src/ngraph/runtime/cpu/pass/cpu_assignment.cpp
+51
-8
cpu_layout.cpp
src/ngraph/runtime/cpu/pass/cpu_layout.cpp
+0
-0
cpu_nop_elimination.cpp
src/ngraph/runtime/cpu/pass/cpu_nop_elimination.cpp
+1
-0
external_function.hpp
src/ngraph/runtime/external_function.hpp
+4
-1
manager.cpp
src/ngraph/runtime/manager.cpp
+1
-0
manager.hpp
src/ngraph/runtime/manager.hpp
+1
-0
nbench.cpp
src/tools/nbench/nbench.cpp
+79
-12
backend_test.in.cpp
test/backend_test.in.cpp
+0
-1
cpu_fusion.cpp
test/cpu_fusion.cpp
+8
-16
benchmark.cpp
test/util/benchmark.cpp
+98
-25
benchmark.hpp
test/util/benchmark.hpp
+9
-1
test_tools.hpp
test/util/test_tools.hpp
+1
-4
No files found.
src/ngraph/graph_util.hpp
View file @
0b99a7a1
...
...
@@ -16,6 +16,7 @@
#pragma once
#include <functional>
#include <list>
#include <memory>
#include <string>
...
...
src/ngraph/pass/assign_placement.hpp
View file @
0b99a7a1
...
...
@@ -17,6 +17,7 @@
#pragma once
#include <exception>
#include <functional>
#include <sstream>
#include "ngraph/pass/pass.hpp"
...
...
src/ngraph/runtime/cpu/cpu_call_frame.cpp
View file @
0b99a7a1
...
...
@@ -146,6 +146,7 @@ void runtime::cpu::CPU_CallFrame::setup_runtime_context()
}
const
auto
&
mkldnn_emitter
=
m_external_function
->
get_mkldnn_emitter
();
ctx
->
mkldnn_primitives
=
mkldnn_emitter
->
get_mkldnn_primitives
().
data
();
ctx
->
mkldnn_workspaces
=
mkldnn_emitter
->
get_mkldnn_workspaces
().
data
();
}
void
runtime
::
cpu
::
CPU_CallFrame
::
cleanup_runtime_context
()
...
...
src/ngraph/runtime/cpu/cpu_emitter.cpp
View file @
0b99a7a1
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/cpu/cpu_external_function.cpp
View file @
0b99a7a1
...
...
@@ -258,7 +258,7 @@ runtime::cpu::CPU_ExternalFunction::CPU_ExternalFunction(
const
shared_ptr
<
ngraph
::
Function
>&
function
,
bool
release_function
)
:
ngraph
::
runtime
::
ExternalFunction
(
function
,
release_function
)
,
m_compiled_function
(
nullptr
)
,
m_emit_timing
(
std
::
getenv
(
"NGRAPH_CPU_EMIT_TIMING"
)
!=
nullptr
)
,
m_emit_timing
(
false
)
,
m_use_tbb
(
std
::
getenv
(
"NGRAPH_CPU_USE_TBB"
)
!=
nullptr
)
,
m_function_name
(
function
->
get_name
())
{
...
...
@@ -275,6 +275,8 @@ void runtime::cpu::CPU_ExternalFunction::compile()
return
;
}
m_emit_timing
=
m_timing
|
(
std
::
getenv
(
"NGRAPH_CPU_EMIT_TIMING"
)
!=
nullptr
);
m_mkldnn_emitter
.
reset
(
new
MKLDNNEmitter
());
ngraph
::
pass
::
Manager
pass_manager
;
...
...
@@ -370,6 +372,7 @@ using namespace ngraph::runtime;
{
writer
<<
"// Declare debug timers
\n
"
;
vector
<
string
>
names
;
size_t
index
=
0
;
for
(
shared_ptr
<
Function
>
current_function
:
pass_manager
.
get_state
().
get_functions
())
{
for
(
shared_ptr
<
Node
>
node
:
current_function
->
get_ordered_ops
())
...
...
@@ -377,59 +380,43 @@ using namespace ngraph::runtime;
if
(
!
node
->
is_parameter
()
&&
!
node
->
is_constant
())
{
names
.
push_back
(
node
->
get_name
());
m_name_index_map
.
insert
({
node
->
get_name
(),
index
++
});
}
}
}
for
(
const
string
&
s
:
names
)
{
writer
<<
"ngraph::stopwatch timer_"
<<
s
<<
";
\n
"
;
}
writer
<<
"ngraph::stopwatch timers["
<<
names
.
size
()
<<
"];
\n
"
;
writer
<<
"extern
\"
C
\"
size_t get_debug_timer_count() { return "
<<
names
.
size
()
<<
"; }
\n
"
;
writer
<<
"extern
\"
C
\"
const char* get_debug_timer_name(size_t index)
\n
"
;
writer
<<
"{
\n
"
;
writer
.
indent
++
;
writer
<<
"const char* rc;
\n
"
;
writer
<<
"switch(index)
\n
"
;
writer
<<
"static const char* timer_names["
<<
names
.
size
()
<<
"] =
\n
"
;
writer
<<
"{
\n
"
;
for
(
size_t
i
=
0
;
i
<
names
.
size
();
i
++
)
writer
.
indent
++
;
vector
<
string
>
quoted_names
;
for
(
const
string
&
name
:
names
)
{
writer
<<
"case "
<<
i
<<
": rc =
\"
"
<<
names
[
i
]
<<
"
\"
; break;
\n
"
;
quoted_names
.
push_back
(
"
\"
"
+
name
+
"
\"
"
)
;
}
writer
<<
"default: rc =
\"\"
;
\n
"
;
writer
<<
"}
\n
"
;
writer
<<
"return rc;
\n
"
;
writer
<<
emit_string_array
(
quoted_names
,
100
-
(
4
*
2
+
1
));
writer
<<
"
\n
};
\n
"
;
writer
.
indent
--
;
writer
<<
"return timer_names[index];
\n
"
;
writer
.
indent
--
;
writer
<<
"}
\n
"
;
writer
<<
"extern
\"
C
\"
const size_t get_debug_timer_microseconds(size_t index)
\n
"
;
writer
<<
"{
\n
"
;
writer
.
indent
++
;
writer
<<
"size_t rc;
\n
"
;
writer
<<
"switch(index)
\n
"
;
writer
<<
"{
\n
"
;
for
(
size_t
i
=
0
;
i
<
names
.
size
();
i
++
)
{
writer
<<
"case "
<<
i
<<
": rc = timer_"
<<
names
[
i
]
<<
".get_total_microseconds(); break;
\n
"
;
}
writer
<<
"default: rc = 0;
\n
"
;
writer
<<
"}
\n
"
;
writer
<<
"return rc;
\n
"
;
writer
<<
"return (index < "
<<
names
.
size
()
<<
" ? timers[index].get_total_microseconds() : 0);
\n
"
;
writer
.
indent
--
;
writer
<<
"}
\n
"
;
writer
<<
"extern
\"
C
\"
const size_t get_debug_timer_call_count(size_t index)
\n
"
;
writer
<<
"{
\n
"
;
writer
.
indent
++
;
writer
<<
"size_t rc;
\n
"
;
writer
<<
"switch(index)
\n
"
;
writer
<<
"{
\n
"
;
for
(
size_t
i
=
0
;
i
<
names
.
size
();
i
++
)
{
writer
<<
"case "
<<
i
<<
": rc = timer_"
<<
names
[
i
]
<<
".get_call_count(); break;
\n
"
;
}
writer
<<
"default: rc = 0;
\n
"
;
writer
<<
"}
\n
"
;
writer
<<
"return rc;
\n
"
;
writer
<<
"return (index < "
<<
names
.
size
()
<<
" ? timers[index].get_call_count() : 0);
\n
"
;
writer
.
indent
--
;
writer
<<
"}
\n
"
;
writer
<<
"
\n
"
;
...
...
@@ -443,15 +430,11 @@ using namespace ngraph::runtime;
const
ngraph
::
op
::
Constant
*
c
=
dynamic_cast
<
ngraph
::
op
::
Constant
*>
(
node
.
get
());
if
(
c
)
{
m_active_constants
.
push_back
(
node
);
shared_ptr
<
descriptor
::
TensorView
>
tv
=
node
->
get_outputs
()[
0
].
get_tensor_view
();
auto
c_value_strings
=
c
->
get_value_strings
();
writer
<<
"static "
<<
tv
->
get_tensor
().
get_element_type
().
c_type_string
()
<<
" "
<<
tv
->
get_tensor
().
get_name
()
<<
"["
<<
c_value_strings
.
size
()
<<
"] =
\n
"
;
writer
<<
"{
\n
"
;
writer
.
indent
++
;
writer
<<
emit_string_array
(
c_value_strings
,
100
-
writer
.
indent
*
4
);
writer
.
indent
--
;
writer
<<
"
\n
};
\n\n
"
;
string
type
=
tv
->
get_tensor
().
get_element_type
().
c_type_string
();
writer
<<
"static "
<<
type
<<
"* "
<<
tv
->
get_tensor
().
get_name
()
<<
" = (("
<<
type
<<
"*)("
<<
c
->
get_data_ptr
()
<<
"));
\n
"
;
m_variable_name_map
[
tv
->
get_tensor
().
get_name
()]
=
tv
->
get_tensor
().
get_name
();
}
}
...
...
@@ -702,10 +685,6 @@ using namespace ngraph::runtime;
<<
"(G, [&](const tbb::flow::continue_msg &msg)
\n
{
\n
"
;
writer
.
indent
++
;
}
if
(
m_emit_timing
)
{
emit_debug_function_entry
(
writer
,
node
.
get
(),
in
,
out
);
}
if
(
runtime
::
cpu
::
IsTracingEnabled
()
&&
current_function
->
get_name
()
==
m_function_name
)
{
...
...
@@ -713,14 +692,21 @@ using namespace ngraph::runtime;
}
}
writer
<<
"
\n
// "
<<
node
->
get_name
()
<<
"("
;
vector
<
string
>
parameter_nodes
=
node_input_names
;
parameter_nodes
.
insert
(
parameter_nodes
.
end
(),
node_output_names
.
begin
(),
node_output_names
.
end
());
writer
<<
join
(
parameter_nodes
);
writer
<<
")
\n
"
;
if
(
!
node
->
is_parameter
()
&&
!
node
->
is_constant
())
{
writer
<<
"
\n
// "
<<
node
->
get_name
()
<<
"("
;
vector
<
string
>
parameter_nodes
=
node_input_names
;
parameter_nodes
.
insert
(
parameter_nodes
.
end
(),
node_output_names
.
begin
(),
node_output_names
.
end
());
writer
<<
join
(
parameter_nodes
);
writer
<<
")
\n
"
;
}
// Emit operation body
if
(
!
node
->
is_parameter
()
&&
!
node
->
is_constant
())
{
emit_debug_function_entry
(
writer
,
node
.
get
(),
in
,
out
);
}
string
func_name
;
auto
it
=
match_functions
.
find
(
node
.
get
());
if
(
it
==
match_functions
.
end
())
...
...
@@ -745,10 +731,7 @@ using namespace ngraph::runtime;
// Emit operation epilogue
if
(
!
node
->
is_parameter
()
&&
!
node
->
is_constant
())
{
if
(
m_emit_timing
)
{
emit_debug_function_exit
(
writer
,
node
.
get
(),
in
,
out
);
}
emit_debug_function_exit
(
writer
,
node
.
get
(),
in
,
out
);
if
(
runtime
::
cpu
::
IsTracingEnabled
()
&&
current_function
->
get_name
()
==
m_function_name
)
{
...
...
@@ -909,7 +892,10 @@ void runtime::cpu::CPU_ExternalFunction::emit_debug_function_entry(
const
std
::
vector
<
TensorViewWrapper
>&
in
,
const
std
::
vector
<
TensorViewWrapper
>&
out
)
{
writer
<<
"timer_"
<<
node
->
get_name
()
<<
".start();
\n
"
;
if
(
m_emit_timing
)
{
writer
<<
"timers["
<<
m_name_index_map
[
node
->
get_name
()]
<<
"].start();
\n
"
;
}
}
void
runtime
::
cpu
::
CPU_ExternalFunction
::
emit_debug_function_exit
(
...
...
@@ -918,7 +904,10 @@ void runtime::cpu::CPU_ExternalFunction::emit_debug_function_exit(
const
std
::
vector
<
TensorViewWrapper
>&
in
,
const
std
::
vector
<
TensorViewWrapper
>&
out
)
{
writer
<<
"timer_"
<<
node
->
get_name
()
<<
".stop();
\n
"
;
if
(
m_emit_timing
)
{
writer
<<
"timers["
<<
m_name_index_map
[
node
->
get_name
()]
<<
"].stop();
\n
"
;
}
}
bool
runtime
::
cpu
::
CPU_ExternalFunction
::
is_functionally_identical
(
...
...
src/ngraph/runtime/cpu/cpu_external_function.hpp
View file @
0b99a7a1
...
...
@@ -17,6 +17,7 @@
#pragma once
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <typeindex>
...
...
@@ -119,6 +120,12 @@ namespace ngraph
bool
m_emit_timing
;
bool
m_use_tbb
;
std
::
unordered_map
<
std
::
string
,
std
::
string
>
m_variable_name_map
;
std
::
map
<
std
::
string
,
size_t
>
m_name_index_map
;
// Because we are directly accessing the constant data stored in the
// Constant ops we need to keep a list of shared_ptr to each Constant
// so they don't get freed before we are done with them
std
::
vector
<
std
::
shared_ptr
<
Node
>>
m_active_constants
;
LayoutDescriptorPtrs
parameter_layout_descriptors
;
LayoutDescriptorPtrs
result_layout_descriptors
;
...
...
src/ngraph/runtime/cpu/cpu_runtime_context.hpp
View file @
0b99a7a1
...
...
@@ -37,6 +37,7 @@ namespace ngraph
{
int64_t
*
op_durations
;
mkldnn
::
primitive
*
const
*
mkldnn_primitives
;
char
*
const
*
mkldnn_workspaces
;
};
}
}
...
...
src/ngraph/runtime/cpu/mkldnn_emitter.cpp
View file @
0b99a7a1
...
...
@@ -21,6 +21,7 @@
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
using
namespace
ngraph
::
runtime
::
cpu
;
...
...
@@ -36,12 +37,24 @@ const std::vector<mkldnn::primitive*>& MKLDNNEmitter::get_mkldnn_primitives() co
return
m_mkldnn_primitives
;
}
const
std
::
vector
<
char
*>&
MKLDNNEmitter
::
get_mkldnn_workspaces
()
{
return
m_workspace_bufs
;
}
size_t
MKLDNNEmitter
::
insert_primitive
(
mkldnn
::
primitive
*
primitive
)
{
m_mkldnn_primitives
.
emplace_back
(
primitive
);
return
(
m_mkldnn_primitives
.
size
()
-
1
);
}
size_t
MKLDNNEmitter
::
insert_workspace
(
std
::
unique_ptr
<
MKLDNNWorkspace
>&
workspace
)
{
m_workspace_bufs
.
push_back
(
workspace
.
get
()
->
buf
);
m_workspaces
.
push_back
(
std
::
move
(
workspace
));
return
(
m_workspaces
.
size
()
-
1
);
}
const
std
::
vector
<
size_t
>&
MKLDNNEmitter
::
get_primitive_deps
(
size_t
index
)
const
{
return
m_primitive_deps
.
at
(
index
);
...
...
@@ -321,6 +334,105 @@ size_t MKLDNNEmitter::build_pooling_forward(mkldnn::algorithm pooling_algorithm,
return
primitive_index
;
}
size_t
MKLDNNEmitter
::
build_pooling_backward
(
mkldnn
::
algorithm
pooling_algorithm
,
const
mkldnn
::
memory
::
desc
&
diff_dst_desc
,
const
mkldnn
::
memory
::
desc
&
diff_src_desc
,
const
ngraph
::
Strides
&
window_strides
,
const
ngraph
::
Shape
&
window_shape
,
const
ngraph
::
Shape
&
padding_below
,
const
ngraph
::
Shape
&
padding_above
)
{
size_t
input_index
=
build_memory_primitive
(
diff_dst_desc
);
size_t
result_index
=
build_memory_primitive
(
diff_src_desc
);
size_t
primitive_index
=
insert_primitive
(
new
mkldnn
::
pooling_backward
(
{{
pooling_algorithm
,
diff_src_desc
,
diff_dst_desc
,
mkldnn
::
memory
::
dims
(
window_strides
.
begin
(),
window_strides
.
end
()),
mkldnn
::
memory
::
dims
(
window_shape
.
begin
(),
window_shape
.
end
()),
mkldnn
::
memory
::
dims
(
padding_below
.
begin
(),
padding_below
.
end
()),
mkldnn
::
memory
::
dims
(
padding_above
.
begin
(),
padding_above
.
end
()),
mkldnn
::
padding_kind
::
zero
},
mkldnn_utils
::
global_cpu_engine
,
{{
mkldnn
::
prop_kind
::
forward_training
,
pooling_algorithm
,
diff_src_desc
,
diff_dst_desc
,
mkldnn
::
memory
::
dims
(
window_strides
.
begin
(),
window_strides
.
end
()),
mkldnn
::
memory
::
dims
(
window_shape
.
begin
(),
window_shape
.
end
()),
mkldnn
::
memory
::
dims
(
padding_below
.
begin
(),
padding_below
.
end
()),
mkldnn
::
memory
::
dims
(
padding_above
.
begin
(),
padding_above
.
end
()),
mkldnn
::
padding_kind
::
zero
},
mkldnn_utils
::
global_cpu_engine
}},
*
m_mkldnn_primitives
[
input_index
],
*
m_mkldnn_primitives
[
result_index
]));
m_primitive_deps
[
primitive_index
]
=
{
input_index
,
result_index
};
return
primitive_index
;
}
size_t
MKLDNNEmitter
::
build_max_pooling_backward
(
mkldnn
::
algorithm
pooling_algorithm
,
const
mkldnn
::
memory
::
desc
&
fprop_src_desc
,
const
mkldnn
::
memory
::
desc
&
diff_dst_desc
,
const
mkldnn
::
memory
::
desc
&
diff_src_desc
,
const
ngraph
::
Strides
&
window_strides
,
const
ngraph
::
Shape
&
window_shape
,
const
ngraph
::
Shape
&
padding_below
,
const
ngraph
::
Shape
&
padding_above
)
{
size_t
fprop_src_index
=
build_memory_primitive
(
fprop_src_desc
);
size_t
diff_dst_index
=
build_memory_primitive
(
diff_dst_desc
);
size_t
diff_src_index
=
build_memory_primitive
(
diff_src_desc
);
mkldnn
::
pooling_forward
::
primitive_desc
fwd_pd
{
{
mkldnn
::
prop_kind
::
forward_training
,
pooling_algorithm
,
diff_src_desc
,
diff_dst_desc
,
mkldnn
::
memory
::
dims
(
window_strides
.
begin
(),
window_strides
.
end
()),
mkldnn
::
memory
::
dims
(
window_shape
.
begin
(),
window_shape
.
end
()),
mkldnn
::
memory
::
dims
(
padding_below
.
begin
(),
padding_below
.
end
()),
mkldnn
::
memory
::
dims
(
padding_above
.
begin
(),
padding_above
.
end
()),
mkldnn
::
padding_kind
::
zero
},
mkldnn_utils
::
global_cpu_engine
};
auto
ws_index
=
build_memory_primitive
(
fwd_pd
.
workspace_primitive_desc
().
desc
());
// Allocate workspace
// TODO (jbobba): Might need to align memory
auto
ws
=
std
::
unique_ptr
<
MKLDNNWorkspace
>
(
new
MKLDNNWorkspace
(
fwd_pd
.
workspace_primitive_desc
().
get_size
()));
auto
ws_buf_index
=
insert_workspace
(
ws
);
size_t
fwd_primitive_index
=
insert_primitive
(
new
mkldnn
::
pooling_forward
(
fwd_pd
,
*
m_mkldnn_primitives
[
fprop_src_index
],
*
m_mkldnn_primitives
[
diff_src_index
],
// HACK - Uses diff_src buffer. Safe since diff_src > fprop_dst
*
m_mkldnn_primitives
[
ws_index
]));
size_t
bwd_primitive_index
=
insert_primitive
(
new
mkldnn
::
pooling_backward
(
{{
pooling_algorithm
,
diff_src_desc
,
diff_dst_desc
,
mkldnn
::
memory
::
dims
(
window_strides
.
begin
(),
window_strides
.
end
()),
mkldnn
::
memory
::
dims
(
window_shape
.
begin
(),
window_shape
.
end
()),
mkldnn
::
memory
::
dims
(
padding_below
.
begin
(),
padding_below
.
end
()),
mkldnn
::
memory
::
dims
(
padding_above
.
begin
(),
padding_above
.
end
()),
mkldnn
::
padding_kind
::
zero
},
mkldnn_utils
::
global_cpu_engine
,
fwd_pd
},
*
m_mkldnn_primitives
[
diff_dst_index
],
*
m_mkldnn_primitives
[
ws_index
],
*
m_mkldnn_primitives
[
diff_src_index
]));
m_primitive_deps
[
fwd_primitive_index
]
=
{
fprop_src_index
,
diff_src_index
,
ws_index
,
ws_buf_index
};
m_primitive_deps
[
bwd_primitive_index
]
=
{
diff_dst_index
,
ws_index
,
diff_src_index
,
ws_buf_index
};
return
bwd_primitive_index
;
}
size_t
MKLDNNEmitter
::
build_reorder
(
const
mkldnn
::
memory
::
desc
&
input_desc
,
const
mkldnn
::
memory
::
desc
&
result_desc
)
{
...
...
src/ngraph/runtime/cpu/mkldnn_emitter.hpp
View file @
0b99a7a1
...
...
@@ -35,6 +35,14 @@ namespace ngraph
class
CPU_ExternalFunction
;
class
TensorViewWrapper
;
class
MKLDNNWorkspace
{
public
:
MKLDNNWorkspace
(
size_t
size
)
{
buf
=
reinterpret_cast
<
char
*>
(
malloc
(
size
));
}
~
MKLDNNWorkspace
()
{
free
(
buf
);
}
char
*
buf
;
};
class
MKLDNNEmitter
{
public
:
...
...
@@ -42,8 +50,10 @@ namespace ngraph
~
MKLDNNEmitter
();
const
std
::
vector
<
mkldnn
::
primitive
*>&
get_mkldnn_primitives
()
const
;
const
std
::
vector
<
char
*>&
get_mkldnn_workspaces
();
size_t
insert_primitive
(
mkldnn
::
primitive
*
primitive
);
size_t
insert_workspace
(
std
::
unique_ptr
<
MKLDNNWorkspace
>&
workspace
);
const
std
::
vector
<
size_t
>&
get_primitive_deps
(
size_t
index
)
const
;
// TODO(jmenon): Get rid of TensorViewWrappers at some point
...
...
@@ -109,6 +119,23 @@ namespace ngraph
const
ngraph
::
Shape
&
padding_below
,
const
ngraph
::
Shape
&
padding_above
);
size_t
build_pooling_backward
(
mkldnn
::
algorithm
pooling_algorithm
,
const
mkldnn
::
memory
::
desc
&
diff_dst_desc
,
const
mkldnn
::
memory
::
desc
&
diff_src_desc
,
const
ngraph
::
Strides
&
window_strides
,
const
ngraph
::
Shape
&
window_shape
,
const
ngraph
::
Shape
&
padding_below
,
const
ngraph
::
Shape
&
padding_above
);
size_t
build_max_pooling_backward
(
mkldnn
::
algorithm
pooling_algorithm
,
const
mkldnn
::
memory
::
desc
&
fprop_src_desc
,
const
mkldnn
::
memory
::
desc
&
diff_dst_desc
,
const
mkldnn
::
memory
::
desc
&
diff_src_desc
,
const
ngraph
::
Strides
&
window_strides
,
const
ngraph
::
Shape
&
window_shape
,
const
ngraph
::
Shape
&
padding_below
,
const
ngraph
::
Shape
&
padding_above
);
size_t
build_reorder
(
const
mkldnn
::
memory
::
desc
&
input_desc
,
const
mkldnn
::
memory
::
desc
&
result_desc
);
...
...
@@ -129,6 +156,8 @@ namespace ngraph
std
::
vector
<
mkldnn
::
primitive
*>
m_mkldnn_primitives
;
std
::
vector
<
mkldnn
::
stream
>
m_mkldnn_streams
;
std
::
unordered_map
<
size_t
,
std
::
vector
<
size_t
>>
m_primitive_deps
;
std
::
vector
<
std
::
unique_ptr
<
MKLDNNWorkspace
>>
m_workspaces
;
std
::
vector
<
char
*>
m_workspace_bufs
;
};
}
}
...
...
src/ngraph/runtime/cpu/pass/cpu_assignment.cpp
View file @
0b99a7a1
...
...
@@ -29,6 +29,7 @@
#include "ngraph/ops/avg_pool.hpp"
#include "ngraph/ops/batch_norm.hpp"
#include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/max_pool.hpp"
#include "ngraph/ops/relu.hpp"
#include "ngraph/runtime/cpu/cpu_op_annotations.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
...
...
@@ -245,10 +246,48 @@ namespace ngraph
}
}
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
MaxPool
)
{
auto
max_pool
=
static_cast
<
op
::
MaxPool
*>
(
node
);
auto
arg0_shape
=
node
->
get_input_shape
(
0
);
auto
arg0_rank
=
arg0_shape
.
size
();
auto
result_shape
=
node
->
get_output_shape
(
0
);
if
(
arg0_rank
==
4
&&
max_pool
->
get_window_shape
().
size
()
==
2
&&
node
->
get_input_element_type
(
0
)
==
element
::
f32
)
{
auto
op_annotations
=
std
::
make_shared
<
ngraph
::
runtime
::
cpu
::
CPUOpAnnotations
>
();
op_annotations
->
set_mkldnn_op
(
true
);
max_pool
->
set_op_annotations
(
op_annotations
);
}
}
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
MaxPoolBackprop
)
{
auto
max_pool
=
static_cast
<
op
::
MaxPoolBackprop
*>
(
node
);
auto
arg1_shape
=
node
->
get_input_shape
(
1
);
auto
arg1_rank
=
arg1_shape
.
size
();
auto
result_shape
=
node
->
get_output_shape
(
0
);
if
(
arg1_rank
==
4
&&
max_pool
->
get_window_shape
().
size
()
==
2
&&
node
->
get_input_element_type
(
1
)
==
element
::
f32
)
{
auto
op_annotations
=
std
::
make_shared
<
ngraph
::
runtime
::
cpu
::
CPUOpAnnotations
>
();
op_annotations
->
set_mkldnn_op
(
true
);
max_pool
->
set_op_annotations
(
op_annotations
);
}
}
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
Relu
)
{
auto
avg_pool
=
static_cast
<
op
::
Relu
*>
(
node
);
auto
relu
=
static_cast
<
op
::
Relu
*>
(
node
);
auto
arg0_shape
=
node
->
get_input_shape
(
0
);
auto
arg0_rank
=
arg0_shape
.
size
();
...
...
@@ -260,7 +299,7 @@ namespace ngraph
auto
op_annotations
=
std
::
make_shared
<
ngraph
::
runtime
::
cpu
::
CPUOpAnnotations
>
();
op_annotations
->
set_mkldnn_op
(
true
);
avg_pool
->
set_op_annotations
(
op_annotations
);
relu
->
set_op_annotations
(
op_annotations
);
}
}
...
...
@@ -280,18 +319,19 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
ReluBackprop
)
{
auto
avg_pool
=
static_cast
<
op
::
ReluBackprop
*>
(
node
);
auto
relu_bprop
=
static_cast
<
op
::
ReluBackprop
*>
(
node
);
auto
arg0_shape
=
node
->
get_input_shape
(
0
);
auto
arg0_rank
=
arg0_shape
.
size
();
auto
result_shape
=
node
->
get_output_shape
(
0
);
if
(
arg0_rank
==
4
&&
node
->
get_input_element_type
(
0
)
==
element
::
f32
)
if
((
arg0_rank
==
4
||
arg0_rank
==
2
)
&&
node
->
get_input_element_type
(
0
)
==
element
::
f32
)
{
auto
op_annotations
=
std
::
make_shared
<
ngraph
::
runtime
::
cpu
::
CPUOpAnnotations
>
();
op_annotations
->
set_mkldnn_op
(
true
);
avg_pool
->
set_op_annotations
(
op_annotations
);
relu_bprop
->
set_op_annotations
(
op_annotations
);
}
}
...
...
@@ -313,6 +353,9 @@ namespace ngraph
static
const
runtime
::
cpu
::
pass
::
AssignOpMap
s_dispatcher
{
{
TI
(
ngraph
::
op
::
Add
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
Add
>
},
{
TI
(
ngraph
::
op
::
AvgPool
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
AvgPool
>
},
{
TI
(
ngraph
::
op
::
AvgPoolBackprop
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
AvgPoolBackprop
>
},
{
TI
(
ngraph
::
op
::
BatchNorm
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
BatchNorm
>
},
{
TI
(
ngraph
::
op
::
Convolution
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
Convolution
>
},
...
...
@@ -320,13 +363,13 @@ static const runtime::cpu::pass::AssignOpMap s_dispatcher{
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
ConvolutionBackpropData
>
},
{
TI
(
ngraph
::
op
::
ConvolutionBackpropFilters
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
ConvolutionBackpropFilters
>
},
{
TI
(
ngraph
::
op
::
MaxPool
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
MaxPool
>
},
{
TI
(
ngraph
::
op
::
MaxPoolBackprop
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
MaxPoolBackprop
>
},
{
TI
(
ngraph
::
op
::
ConvolutionBias
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
ConvolutionBias
>
},
{
TI
(
ngraph
::
op
::
ConvolutionBiasBackpropFiltersBias
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
ConvolutionBiasBackpropFiltersBias
>
},
{
TI
(
ngraph
::
op
::
AvgPool
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
AvgPool
>
},
{
TI
(
ngraph
::
op
::
AvgPoolBackprop
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
AvgPoolBackprop
>
},
{
TI
(
ngraph
::
op
::
Relu
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
Relu
>
},
{
TI
(
ngraph
::
op
::
ReluBackprop
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
ReluBackprop
>
},
...
...
src/ngraph/runtime/cpu/pass/cpu_layout.cpp
View file @
0b99a7a1
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/cpu/pass/cpu_nop_elimination.cpp
View file @
0b99a7a1
...
...
@@ -14,6 +14,7 @@
* limitations under the License.
*******************************************************************************/
#include <functional>
#include <memory>
#include <typeindex>
#include <typeinfo>
...
...
src/ngraph/runtime/external_function.hpp
View file @
0b99a7a1
...
...
@@ -19,6 +19,7 @@
#include <memory>
#include "ngraph/function.hpp"
#include "ngraph/log.hpp"
namespace
ngraph
{
...
...
@@ -34,6 +35,7 @@ namespace ngraph
:
m_function
(
function
)
,
m_release_function
(
release_function
)
,
m_is_compiled
(
false
)
,
m_timing
(
false
)
{
}
...
...
@@ -42,12 +44,13 @@ namespace ngraph
public
:
virtual
~
ExternalFunction
()
{}
virtual
std
::
shared_ptr
<
CallFrame
>
make_call_frame
()
=
0
;
void
set_emit_timing
(
bool
enable
)
{
m_timing
=
enable
;
}
const
std
::
shared_ptr
<
ngraph
::
Function
>
get_function
()
{
return
m_function
;
}
protected
:
std
::
shared_ptr
<
ngraph
::
Function
>
m_function
;
bool
m_release_function
;
bool
m_is_compiled
;
bool
m_timing
;
};
}
}
src/ngraph/runtime/manager.cpp
View file @
0b99a7a1
...
...
@@ -15,6 +15,7 @@
*******************************************************************************/
#include <dlfcn.h>
#include <functional>
#include <iostream>
#include <sstream>
#include <string>
...
...
src/ngraph/runtime/manager.hpp
View file @
0b99a7a1
...
...
@@ -16,6 +16,7 @@
#pragma once
#include <functional>
#include <map>
#include <memory>
#include <string>
...
...
src/tools/nbench/nbench.cpp
View file @
0b99a7a1
...
...
@@ -21,34 +21,42 @@
// sample models are under ../../test/models
#include <fstream>
#include <ngraph/file_util.hpp>
#include <ngraph/runtime/backend.hpp>
#include <ngraph/runtime/call_frame.hpp>
#include <ngraph/runtime/manager.hpp>
#include <ngraph/util.hpp>
#include "util/benchmark.hpp"
#include "util/test_tools.hpp"
using
namespace
std
;
using
namespace
ngraph
;
int
main
(
int
argc
,
char
**
argv
)
{
string
model
=
"model.json"
;
string
backend
=
"
INTERPRETER
"
;
int
iter
=
10
;
string
model
;
string
backend
=
"
CPU
"
;
int
iter
ations
=
10
;
bool
failed
=
false
;
bool
statistics
=
false
;
bool
timing_detail
=
false
;
for
(
size_t
i
=
1
;
i
<
argc
;
i
++
)
{
if
(
string
(
argv
[
i
])
==
"-f"
)
string
arg
=
argv
[
i
];
if
(
arg
==
"-f"
||
arg
==
"--file"
)
{
model
=
argv
[
++
i
];
}
else
if
(
string
(
argv
[
i
])
==
"-b
"
)
else
if
(
arg
==
"-b"
||
arg
==
"--backend
"
)
{
backend
=
argv
[
++
i
];
}
else
if
(
string
(
argv
[
i
])
==
"-i
"
)
else
if
(
arg
==
"-i"
||
arg
==
"--iterations
"
)
{
try
{
iter
=
stoi
(
argv
[
++
i
]);
iter
ations
=
stoi
(
argv
[
++
i
]);
}
catch
(...)
{
...
...
@@ -56,6 +64,19 @@ int main(int argc, char** argv)
failed
=
true
;
}
}
else
if
(
arg
==
"-s"
||
arg
==
"--statistics"
)
{
statistics
=
true
;
}
else
if
(
arg
==
"--timing_detail"
)
{
timing_detail
=
true
;
}
else
{
cout
<<
"Unknown option: "
<<
arg
<<
endl
;
failed
=
true
;
}
}
if
(
!
static_cast
<
bool
>
(
ifstream
(
model
)))
{
...
...
@@ -73,12 +94,58 @@ SYNOPSIS
nbench [-f <filename>] [-b <backend>] [-i <iterations>]
OPTIONS
-f model json file to use (default: model.json)
-b Backend to use (default: INTERPRETER)
-i Iterations (default: 10)
-f|--file Serialized model file
-b|--backend Backend to use (default: CPU)
-i|--iterations Iterations (default: 10)
-s|--statistics Display op stastics
--timing_detail Gather detailed timing
)###"
;
return
1
;
}
cout
<<
"Benchmarking "
<<
model
<<
", "
<<
backend
<<
" backend, "
<<
iter
<<
" iterations.
\n
"
;
run_benchmark
(
model
,
backend
,
iter
);
const
string
json_string
=
file_util
::
read_file_to_string
(
model
);
stringstream
ss
(
json_string
);
shared_ptr
<
Function
>
f
=
deserialize
(
ss
);
if
(
statistics
)
{
cout
<<
"statistics:"
<<
endl
;
cout
<<
"total nodes: "
<<
f
->
get_ops
().
size
()
<<
endl
;
size_t
total_constant_bytes
=
0
;
unordered_map
<
string
,
size_t
>
op_list
;
for
(
shared_ptr
<
Node
>
node
:
f
->
get_ordered_ops
())
{
string
name
=
node
->
get_name
();
string
op_name
=
name
.
substr
(
0
,
name
.
find
(
'_'
));
string
shape_name
=
"{"
+
join
(
node
->
get_outputs
()[
0
].
get_shape
())
+
"}"
;
op_list
[
op_name
+
shape_name
]
++
;
if
(
op_name
==
"Constant"
)
{
const
Shape
&
shape
=
node
->
get_outputs
()[
0
].
get_shape
();
size_t
const_size
=
node
->
get_outputs
()[
0
].
get_element_type
().
size
();
if
(
shape
.
size
()
==
0
)
{
total_constant_bytes
+=
const_size
;
}
else
{
total_constant_bytes
+=
(
const_size
*
shape_size
(
node
->
get_outputs
()[
0
].
get_shape
()));
}
}
}
cout
<<
"Total Constant size: "
<<
total_constant_bytes
<<
" bytes
\n
"
;
for
(
const
pair
<
string
,
size_t
>&
op_info
:
op_list
)
{
cout
<<
op_info
.
first
<<
": "
<<
op_info
.
second
<<
" ops"
<<
endl
;
}
}
else
if
(
iterations
>
0
)
{
cout
<<
"Benchmarking "
<<
model
<<
", "
<<
backend
<<
" backend, "
<<
iterations
<<
" iterations.
\n
"
;
run_benchmark
(
f
,
backend
,
iterations
,
timing_detail
);
}
return
0
;
}
test/backend_test.in.cpp
View file @
0b99a7a1
...
...
@@ -5370,7 +5370,6 @@ TEST(${BACKEND_NAME}, numeric_double_inf)
TEST
(
$
{
BACKEND_NAME
},
abc_tbb
)
{
SKIP_TEST_FOR
(
"GPU"
,
"${BACKEND_NAME}"
);
ONLY_ENABLE_TEST_FOR
(
"CPU"
,
"${BACKEND_NAME}"
);
// Force TBB flow graph generation in the CPU backend
...
...
test/cpu_fusion.cpp
View file @
0b99a7a1
...
...
@@ -109,14 +109,10 @@ TEST(cpu_fusion, gemm_cpu_broadcast_row)
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shapeA
);
auto
B
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shapeB
);
auto
reshape_w
=
make_shared
<
op
::
Reshape
>
(
A
,
AxisVector
{
1
,
0
},
Shape
{
2
,
3
});
auto
reshape_x
=
make_shared
<
op
::
Reshape
>
(
B
,
AxisVector
{
1
,
0
},
Shape
{
3
,
2
});
auto
one
=
op
::
Constant
::
create
<
float
>
(
element
::
f32
,
Shape
{
2
},
std
::
vector
<
float
>
{
1.0
f
,
1.0
f
});
auto
bias
=
op
::
Constant
::
create
<
float
>
(
element
::
f32
,
Shape
{
2
},
std
::
vector
<
float
>
{
2.0
f
,
3.0
f
});
auto
broadcast
=
make_shared
<
op
::
Broadcast
>
(
one
,
shapeC
,
AxisSet
{
0
});
auto
cg
=
make_shared
<
op
::
MatmulBias
>
(
A
,
B
,
one
,
A
->
get_shape
(),
B
->
get_shape
(),
true
,
true
,
AxisSet
{
0
});
A
,
B
,
bias
,
A
->
get_shape
(),
B
->
get_shape
(),
true
,
true
,
AxisSet
{
0
});
auto
f
=
make_shared
<
Function
>
(
cg
,
op
::
ParameterVector
{
A
,
B
});
...
...
@@ -136,8 +132,8 @@ TEST(cpu_fusion, gemm_cpu_broadcast_row)
copy_data
(
b
,
dataB
);
cf
->
call
({
a
,
b
},
{
result
});
vector
<
float
>
expected
{
1
0
,
28
,
37
,
109
};
ASSERT_TRUE
(
read_vector
<
float
>
(
result
)
==
expected
);
vector
<
float
>
expected
{
1
1
,
30
,
38
,
111
};
EXPECT_EQ
(
read_vector
<
float
>
(
result
),
expected
);
}
TEST
(
cpu_fusion
,
gemm_cpu_broadcast_column
)
...
...
@@ -148,14 +144,10 @@ TEST(cpu_fusion, gemm_cpu_broadcast_column)
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shapeA
);
auto
B
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shapeB
);
auto
reshape_w
=
make_shared
<
op
::
Reshape
>
(
A
,
AxisVector
{
1
,
0
},
Shape
{
2
,
3
});
auto
reshape_x
=
make_shared
<
op
::
Reshape
>
(
B
,
AxisVector
{
1
,
0
},
Shape
{
3
,
2
});
auto
one
=
op
::
Constant
::
create
<
float
>
(
element
::
f32
,
Shape
{
2
},
std
::
vector
<
float
>
{
1.0
f
,
1.0
f
});
auto
bias
=
op
::
Constant
::
create
<
float
>
(
element
::
f32
,
Shape
{
2
},
std
::
vector
<
float
>
{
2.0
f
,
3.0
f
});
auto
broadcast
=
make_shared
<
op
::
Broadcast
>
(
one
,
shapeC
,
AxisSet
{
1
});
auto
cg
=
make_shared
<
op
::
MatmulBias
>
(
A
,
B
,
one
,
A
->
get_shape
(),
B
->
get_shape
(),
true
,
true
,
AxisSet
{
1
});
A
,
B
,
bias
,
A
->
get_shape
(),
B
->
get_shape
(),
true
,
true
,
AxisSet
{
1
});
auto
f
=
make_shared
<
Function
>
(
cg
,
op
::
ParameterVector
{
A
,
B
});
...
...
@@ -175,8 +167,8 @@ TEST(cpu_fusion, gemm_cpu_broadcast_column)
copy_data
(
b
,
dataB
);
cf
->
call
({
a
,
b
},
{
result
});
vector
<
float
>
expected
{
1
0
,
28
,
37
,
109
};
ASSERT_TRUE
(
read_vector
<
float
>
(
result
)
==
expected
);
vector
<
float
>
expected
{
1
1
,
29
,
39
,
111
};
EXPECT_EQ
(
read_vector
<
float
>
(
result
),
expected
);
}
TEST
(
cpu_fusion
,
gemm_cpu_broadcast_matrix
)
...
...
test/util/benchmark.cpp
View file @
0b99a7a1
...
...
@@ -17,56 +17,127 @@
#include <iomanip>
#include "benchmark.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/call_frame.hpp"
#include "ngraph/runtime/external_function.hpp"
#include "ngraph/runtime/manager.hpp"
#include "ngraph/runtime/tensor_view.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
#include "random.hpp"
std
::
multimap
<
size_t
,
std
::
string
>
aggregate_timing
(
const
std
::
vector
<
ngraph
::
runtime
::
PerformanceCounter
>&
perf_data
)
using
namespace
std
;
using
namespace
ngraph
;
shared_ptr
<
Node
>
find_node
(
const
string
&
name
,
shared_ptr
<
Function
>
func
)
{
st
d
::
unordered_map
<
std
::
string
,
size_t
>
timing
;
for
(
const
ngraph
::
runtime
::
PerformanceCounter
&
p
:
perf_data
)
st
atic
unordered_map
<
string
,
shared_ptr
<
Node
>>
node_map
;
if
(
node_map
.
empty
()
)
{
std
::
string
op
=
p
.
name
().
substr
(
0
,
p
.
name
().
find
(
'_'
));
timing
[
op
]
+=
p
.
microseconds
();
vector
<
shared_ptr
<
Function
>>
fs
;
traverse_functions
(
func
,
[
&
](
shared_ptr
<
Function
>
f
)
{
fs
.
push_back
(
f
);
});
for
(
shared_ptr
<
Function
>
f
:
fs
)
{
for
(
shared_ptr
<
Node
>
node
:
f
->
get_ops
())
{
node_map
.
insert
({
node
->
get_name
(),
node
});
}
}
}
return
node_map
[
name
];
}
multimap
<
size_t
,
string
>
aggregate_timing_details
(
const
vector
<
runtime
::
PerformanceCounter
>&
perf_data
,
shared_ptr
<
Function
>
f
)
{
unordered_map
<
string
,
size_t
>
timing
;
for
(
const
runtime
::
PerformanceCounter
&
p
:
perf_data
)
{
shared_ptr
<
Node
>
node
=
find_node
(
p
.
name
(),
f
);
string
op
=
p
.
name
().
substr
(
0
,
p
.
name
().
find
(
'_'
));
string
shape_name
=
"{"
+
join
(
node
->
get_outputs
()[
0
].
get_shape
())
+
"}"
;
timing
[
op
+
shape_name
]
+=
p
.
microseconds
();
}
std
::
multimap
<
size_t
,
std
::
string
>
rc
;
for
(
const
std
::
pair
<
std
::
string
,
size_t
>&
t
:
timing
)
multimap
<
size_t
,
string
>
rc
;
for
(
const
pair
<
string
,
size_t
>&
t
:
timing
)
{
rc
.
insert
({
t
.
second
,
t
.
first
});
}
return
rc
;
}
void
run_benchmark
(
const
std
::
string
&
json_path
,
const
std
::
string
&
backend_name
,
size_t
iterations
)
multimap
<
size_t
,
string
>
aggregate_timing
(
const
vector
<
runtime
::
PerformanceCounter
>&
perf_data
)
{
using
namespace
std
;
using
namespace
ngraph
;
string
env_var_name
=
"NGRAPH_"
+
backend_name
+
"_EMIT_TIMING"
;
bool
emit_timing
=
(
std
::
getenv
(
env_var_name
.
c_str
())
!=
nullptr
);
if
(
!
emit_timing
)
unordered_map
<
string
,
size_t
>
timing
;
for
(
const
runtime
::
PerformanceCounter
&
p
:
perf_data
)
{
string
op
=
p
.
name
().
substr
(
0
,
p
.
name
().
find
(
'_'
));
timing
[
op
]
+=
p
.
microseconds
();
}
multimap
<
size_t
,
string
>
rc
;
for
(
const
pair
<
string
,
size_t
>&
t
:
timing
)
{
cout
<<
"To get per-op timing set the environment variable "
<<
env_var_name
<<
"
\n
"
;
rc
.
insert
({
t
.
second
,
t
.
first
})
;
}
return
rc
;
}
ngraph
::
test
::
Uniform
<
float
>
rng
{
-
1
,
1
,
0
};
void
run_benchmark
(
const
string
&
json_path
,
const
string
&
backend_name
,
size_t
iterations
,
bool
timing_detail
)
{
stopwatch
timer
;
timer
.
start
();
const
string
json_string
=
file_util
::
read_file_to_string
(
json_path
);
stringstream
ss
(
json_string
);
shared_ptr
<
Function
>
f
=
deserialize
(
ss
);
timer
.
stop
();
cout
<<
"deserialize time: "
<<
timer
.
get_milliseconds
()
<<
"ms"
<<
endl
;
run_benchmark
(
f
,
backend_name
,
iterations
,
timing_detail
);
}
stopwatch
build_time
;
build_time
.
start
();
void
print_times
(
const
multimap
<
size_t
,
string
>&
timing
)
{
// set the column widths
int
name_width
=
0
;
int
time_width
=
0
;
for
(
const
pair
<
size_t
,
string
>&
p
:
timing
)
{
name_width
=
max
(
name_width
,
static_cast
<
int
>
(
p
.
second
.
size
()));
stringstream
ss
;
ss
.
imbue
(
locale
(
""
));
ss
<<
p
.
first
;
time_width
=
max
(
time_width
,
static_cast
<
int
>
(
ss
.
str
().
size
()));
}
for
(
auto
it
=
timing
.
rbegin
();
it
!=
timing
.
rend
();
it
++
)
{
cout
<<
setw
(
name_width
+
2
)
<<
left
<<
it
->
second
<<
" "
<<
setw
(
time_width
+
2
)
<<
right
<<
it
->
first
<<
"us
\n
"
;
}
}
void
run_benchmark
(
shared_ptr
<
Function
>
f
,
const
string
&
backend_name
,
size_t
iterations
,
bool
timing_detail
)
{
test
::
Uniform
<
float
>
rng
{
-
1
,
1
,
0
};
stopwatch
timer
;
timer
.
start
();
auto
manager
=
runtime
::
Manager
::
get
(
backend_name
);
auto
external
=
manager
->
compile
(
f
);
external
->
set_emit_timing
(
timing_detail
);
auto
backend
=
manager
->
allocate_backend
();
auto
cf
=
backend
->
make_call_frame
(
external
);
build_time
.
stop
();
cout
<<
"build_time "
<<
build_time
.
get_milliseconds
()
<<
"ms"
<<
endl
;
timer
.
stop
();
cout
.
imbue
(
locale
(
""
));
cout
<<
"compile time: "
<<
timer
.
get_milliseconds
()
<<
"ms"
<<
endl
;
vector
<
shared_ptr
<
runtime
::
TensorView
>>
args
;
for
(
shared_ptr
<
op
::
Parameter
>
param
:
f
->
get_parameters
())
...
...
@@ -100,9 +171,11 @@ void run_benchmark(const std::string& json_path, const std::string& backend_name
return
p1
.
total_microseconds
()
>
p2
.
total_microseconds
();
});
multimap
<
size_t
,
string
>
timing
=
aggregate_timing
(
perf_data
);
for
(
auto
it
=
timing
.
rbegin
();
it
!=
timing
.
rend
();
it
++
)
{
cout
.
imbue
(
locale
(
""
));
cout
<<
setw
(
15
)
<<
left
<<
it
->
second
<<
" "
<<
setw
(
10
)
<<
right
<<
it
->
first
<<
"us
\n
"
;
}
multimap
<
size_t
,
string
>
timing_details
=
aggregate_timing_details
(
perf_data
,
f
);
cout
<<
"
\n
---- Aggregate times per op type ----
\n
"
;
print_times
(
timing
);
cout
<<
"
\n
---- Aggregate times per op type/shape ----
\n
"
;
print_times
(
timing_details
);
}
test/util/benchmark.hpp
View file @
0b99a7a1
...
...
@@ -18,13 +18,21 @@
#include <map>
#include <ngraph/function.hpp>
#include <ngraph/runtime/call_frame.hpp>
#include "test_tools.hpp"
/// performance test utilities
std
::
multimap
<
size_t
,
std
::
string
>
aggregate_timing
(
const
std
::
vector
<
ngraph
::
runtime
::
PerformanceCounter
>&
perf_data
);
void
run_benchmark
(
std
::
shared_ptr
<
ngraph
::
Function
>
f
,
const
std
::
string
&
backend_name
,
size_t
iterations
,
bool
timing_detail
);
void
run_benchmark
(
const
std
::
string
&
json_path
,
const
std
::
string
&
backend_name
,
size_t
iterations
);
size_t
iterations
,
bool
timing_detail
=
false
);
test/util/test_tools.hpp
View file @
0b99a7a1
...
...
@@ -36,11 +36,8 @@
#define ONLY_ENABLE_TEST_FOR(backend_to_enable, current_backend) \
if (backend_to_enable != current_backend) \
{ \
NGRAPH_INFO << "Skipped test for " << current_backend; \
return; \
} \
else \
{ \
NGRAPH_INFO << "Enabled test for " << current_backend; \
}
namespace
ngraph
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment