Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
a95fe1ff
Unverified
Commit
a95fe1ff
authored
Mar 12, 2018
by
Jayaram Bobba
Committed by
GitHub
Mar 12, 2018
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' into jbobba/batchnorm-inference
parents
30135ca3
41a883b1
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
261 additions
and
128 deletions
+261
-128
graph_util.hpp
src/ngraph/graph_util.hpp
+1
-0
assign_placement.hpp
src/ngraph/pass/assign_placement.hpp
+1
-0
cpu_emitter.cpp
src/ngraph/runtime/cpu/cpu_emitter.cpp
+4
-11
cpu_external_function.cpp
src/ngraph/runtime/cpu/cpu_external_function.cpp
+46
-57
cpu_external_function.hpp
src/ngraph/runtime/cpu/cpu_external_function.hpp
+7
-0
cpu_nop_elimination.cpp
src/ngraph/runtime/cpu/pass/cpu_nop_elimination.cpp
+1
-0
external_function.hpp
src/ngraph/runtime/external_function.hpp
+4
-1
manager.cpp
src/ngraph/runtime/manager.cpp
+1
-0
manager.hpp
src/ngraph/runtime/manager.hpp
+1
-0
nbench.cpp
src/tools/nbench/nbench.cpp
+79
-12
backend_test.in.cpp
test/backend_test.in.cpp
+0
-1
cpu_fusion.cpp
test/cpu_fusion.cpp
+8
-16
benchmark.cpp
test/util/benchmark.cpp
+98
-25
benchmark.hpp
test/util/benchmark.hpp
+9
-1
test_tools.hpp
test/util/test_tools.hpp
+1
-4
No files found.
src/ngraph/graph_util.hpp
View file @
a95fe1ff
...
...
@@ -16,6 +16,7 @@
#pragma once
#include <functional>
#include <list>
#include <memory>
#include <string>
...
...
src/ngraph/pass/assign_placement.hpp
View file @
a95fe1ff
...
...
@@ -17,6 +17,7 @@
#pragma once
#include <exception>
#include <functional>
#include <sstream>
#include "ngraph/pass/pass.hpp"
...
...
src/ngraph/runtime/cpu/cpu_emitter.cpp
View file @
a95fe1ff
...
...
@@ -224,10 +224,6 @@ namespace ngraph
}
#endif
//TODO: This could be further optimized to reduce the impact of memcpy by either
//a) emitting customized code for initializing output/bias
//b) emitting two cblas calls (one for gemm on W and x and the second for gemm on Bias and E^T + the result of the first gemm)
//@jbobba suggests b) is more efficient but we should benchmark both
template
<>
void
CPU_Emitter
::
EMITTER_DECL
(
ngraph
::
op
::
MatmulBias
)
{
...
...
@@ -316,13 +312,10 @@ namespace ngraph
writer
<<
"};
\n
"
;
writer
<<
"cblas::cblas_sgemm("
<<
"cblas::Layout::RowMajor, "
<<
cnotranspose
<<
ctranspose
<<
arg2_shape
[
0
]
<<
", "
<<
arg2_shape
[
1
]
<<
", 1"
<<
",
\n
"
<<
" 1.0f, ones_col,"
<<
max
(
1UL
,
arg2_shape
[
1
])
<<
", "
<<
args
[
2
].
get_name
()
<<
", "
<<
"1"
<<
", "
<<
"cblas::Layout::RowMajor, "
<<
cnotranspose
<<
cnotranspose
<<
arg2_shape
[
0
]
<<
", "
<<
arg2_shape
[
1
]
<<
", 1,
\n
"
<<
"1.0f, "
<<
args
[
2
].
get_name
()
<<
", 1, "
<<
"ones_col, "
<<
max
(
1UL
,
arg2_shape
[
1
])
<<
", "
<<
"1.0f"
<<
",
\n
"
<<
" "
<<
out
[
0
].
get_name
()
<<
", "
...
...
src/ngraph/runtime/cpu/cpu_external_function.cpp
View file @
a95fe1ff
...
...
@@ -258,7 +258,7 @@ runtime::cpu::CPU_ExternalFunction::CPU_ExternalFunction(
const
shared_ptr
<
ngraph
::
Function
>&
function
,
bool
release_function
)
:
ngraph
::
runtime
::
ExternalFunction
(
function
,
release_function
)
,
m_compiled_function
(
nullptr
)
,
m_emit_timing
(
std
::
getenv
(
"NGRAPH_CPU_EMIT_TIMING"
)
!=
nullptr
)
,
m_emit_timing
(
false
)
,
m_use_tbb
(
std
::
getenv
(
"NGRAPH_CPU_USE_TBB"
)
!=
nullptr
)
,
m_function_name
(
function
->
get_name
())
{
...
...
@@ -275,6 +275,8 @@ void runtime::cpu::CPU_ExternalFunction::compile()
return
;
}
m_emit_timing
=
m_timing
|
(
std
::
getenv
(
"NGRAPH_CPU_EMIT_TIMING"
)
!=
nullptr
);
m_mkldnn_emitter
.
reset
(
new
MKLDNNEmitter
());
ngraph
::
pass
::
Manager
pass_manager
;
...
...
@@ -353,6 +355,7 @@ using namespace ngraph::runtime;
{
writer
<<
"// Declare debug timers
\n
"
;
vector
<
string
>
names
;
size_t
index
=
0
;
for
(
shared_ptr
<
Function
>
current_function
:
pass_manager
.
get_state
().
get_functions
())
{
for
(
shared_ptr
<
Node
>
node
:
current_function
->
get_ordered_ops
())
...
...
@@ -360,59 +363,43 @@ using namespace ngraph::runtime;
if
(
!
node
->
is_parameter
()
&&
!
node
->
is_constant
())
{
names
.
push_back
(
node
->
get_name
());
m_name_index_map
.
insert
({
node
->
get_name
(),
index
++
});
}
}
}
for
(
const
string
&
s
:
names
)
{
writer
<<
"ngraph::stopwatch timer_"
<<
s
<<
";
\n
"
;
}
writer
<<
"ngraph::stopwatch timers["
<<
names
.
size
()
<<
"];
\n
"
;
writer
<<
"extern
\"
C
\"
size_t get_debug_timer_count() { return "
<<
names
.
size
()
<<
"; }
\n
"
;
writer
<<
"extern
\"
C
\"
const char* get_debug_timer_name(size_t index)
\n
"
;
writer
<<
"{
\n
"
;
writer
.
indent
++
;
writer
<<
"const char* rc;
\n
"
;
writer
<<
"switch(index)
\n
"
;
writer
<<
"static const char* timer_names["
<<
names
.
size
()
<<
"] =
\n
"
;
writer
<<
"{
\n
"
;
for
(
size_t
i
=
0
;
i
<
names
.
size
();
i
++
)
writer
.
indent
++
;
vector
<
string
>
quoted_names
;
for
(
const
string
&
name
:
names
)
{
writer
<<
"case "
<<
i
<<
": rc =
\"
"
<<
names
[
i
]
<<
"
\"
; break;
\n
"
;
quoted_names
.
push_back
(
"
\"
"
+
name
+
"
\"
"
)
;
}
writer
<<
"default: rc =
\"\"
;
\n
"
;
writer
<<
"}
\n
"
;
writer
<<
"return rc;
\n
"
;
writer
<<
emit_string_array
(
quoted_names
,
100
-
(
4
*
2
+
1
));
writer
<<
"
\n
};
\n
"
;
writer
.
indent
--
;
writer
<<
"return timer_names[index];
\n
"
;
writer
.
indent
--
;
writer
<<
"}
\n
"
;
writer
<<
"extern
\"
C
\"
const size_t get_debug_timer_microseconds(size_t index)
\n
"
;
writer
<<
"{
\n
"
;
writer
.
indent
++
;
writer
<<
"size_t rc;
\n
"
;
writer
<<
"switch(index)
\n
"
;
writer
<<
"{
\n
"
;
for
(
size_t
i
=
0
;
i
<
names
.
size
();
i
++
)
{
writer
<<
"case "
<<
i
<<
": rc = timer_"
<<
names
[
i
]
<<
".get_total_microseconds(); break;
\n
"
;
}
writer
<<
"default: rc = 0;
\n
"
;
writer
<<
"}
\n
"
;
writer
<<
"return rc;
\n
"
;
writer
<<
"return (index < "
<<
names
.
size
()
<<
" ? timers[index].get_total_microseconds() : 0);
\n
"
;
writer
.
indent
--
;
writer
<<
"}
\n
"
;
writer
<<
"extern
\"
C
\"
const size_t get_debug_timer_call_count(size_t index)
\n
"
;
writer
<<
"{
\n
"
;
writer
.
indent
++
;
writer
<<
"size_t rc;
\n
"
;
writer
<<
"switch(index)
\n
"
;
writer
<<
"{
\n
"
;
for
(
size_t
i
=
0
;
i
<
names
.
size
();
i
++
)
{
writer
<<
"case "
<<
i
<<
": rc = timer_"
<<
names
[
i
]
<<
".get_call_count(); break;
\n
"
;
}
writer
<<
"default: rc = 0;
\n
"
;
writer
<<
"}
\n
"
;
writer
<<
"return rc;
\n
"
;
writer
<<
"return (index < "
<<
names
.
size
()
<<
" ? timers[index].get_call_count() : 0);
\n
"
;
writer
.
indent
--
;
writer
<<
"}
\n
"
;
writer
<<
"
\n
"
;
...
...
@@ -426,15 +413,11 @@ using namespace ngraph::runtime;
const
ngraph
::
op
::
Constant
*
c
=
dynamic_cast
<
ngraph
::
op
::
Constant
*>
(
node
.
get
());
if
(
c
)
{
m_active_constants
.
push_back
(
node
);
shared_ptr
<
descriptor
::
TensorView
>
tv
=
node
->
get_outputs
()[
0
].
get_tensor_view
();
auto
c_value_strings
=
c
->
get_value_strings
();
writer
<<
"static "
<<
tv
->
get_tensor
().
get_element_type
().
c_type_string
()
<<
" "
<<
tv
->
get_tensor
().
get_name
()
<<
"["
<<
c_value_strings
.
size
()
<<
"] =
\n
"
;
writer
<<
"{
\n
"
;
writer
.
indent
++
;
writer
<<
emit_string_array
(
c_value_strings
,
100
-
writer
.
indent
*
4
);
writer
.
indent
--
;
writer
<<
"
\n
};
\n\n
"
;
string
type
=
tv
->
get_tensor
().
get_element_type
().
c_type_string
();
writer
<<
"static "
<<
type
<<
"* "
<<
tv
->
get_tensor
().
get_name
()
<<
" = (("
<<
type
<<
"*)("
<<
c
->
get_data_ptr
()
<<
"));
\n
"
;
m_variable_name_map
[
tv
->
get_tensor
().
get_name
()]
=
tv
->
get_tensor
().
get_name
();
}
}
...
...
@@ -685,10 +668,6 @@ using namespace ngraph::runtime;
<<
"(G, [&](const tbb::flow::continue_msg &msg)
\n
{
\n
"
;
writer
.
indent
++
;
}
if
(
m_emit_timing
)
{
emit_debug_function_entry
(
writer
,
node
.
get
(),
in
,
out
);
}
if
(
runtime
::
cpu
::
IsTracingEnabled
()
&&
current_function
->
get_name
()
==
m_function_name
)
{
...
...
@@ -696,14 +675,21 @@ using namespace ngraph::runtime;
}
}
writer
<<
"
\n
// "
<<
node
->
get_name
()
<<
"("
;
vector
<
string
>
parameter_nodes
=
node_input_names
;
parameter_nodes
.
insert
(
parameter_nodes
.
end
(),
node_output_names
.
begin
(),
node_output_names
.
end
());
writer
<<
join
(
parameter_nodes
);
writer
<<
")
\n
"
;
if
(
!
node
->
is_parameter
()
&&
!
node
->
is_constant
())
{
writer
<<
"
\n
// "
<<
node
->
get_name
()
<<
"("
;
vector
<
string
>
parameter_nodes
=
node_input_names
;
parameter_nodes
.
insert
(
parameter_nodes
.
end
(),
node_output_names
.
begin
(),
node_output_names
.
end
());
writer
<<
join
(
parameter_nodes
);
writer
<<
")
\n
"
;
}
// Emit operation body
if
(
!
node
->
is_parameter
()
&&
!
node
->
is_constant
())
{
emit_debug_function_entry
(
writer
,
node
.
get
(),
in
,
out
);
}
string
func_name
;
auto
it
=
match_functions
.
find
(
node
.
get
());
if
(
it
==
match_functions
.
end
())
...
...
@@ -728,10 +714,7 @@ using namespace ngraph::runtime;
// Emit operation epilogue
if
(
!
node
->
is_parameter
()
&&
!
node
->
is_constant
())
{
if
(
m_emit_timing
)
{
emit_debug_function_exit
(
writer
,
node
.
get
(),
in
,
out
);
}
emit_debug_function_exit
(
writer
,
node
.
get
(),
in
,
out
);
if
(
runtime
::
cpu
::
IsTracingEnabled
()
&&
current_function
->
get_name
()
==
m_function_name
)
{
...
...
@@ -892,7 +875,10 @@ void runtime::cpu::CPU_ExternalFunction::emit_debug_function_entry(
const
std
::
vector
<
TensorViewWrapper
>&
in
,
const
std
::
vector
<
TensorViewWrapper
>&
out
)
{
writer
<<
"timer_"
<<
node
->
get_name
()
<<
".start();
\n
"
;
if
(
m_emit_timing
)
{
writer
<<
"timers["
<<
m_name_index_map
[
node
->
get_name
()]
<<
"].start();
\n
"
;
}
}
void
runtime
::
cpu
::
CPU_ExternalFunction
::
emit_debug_function_exit
(
...
...
@@ -901,7 +887,10 @@ void runtime::cpu::CPU_ExternalFunction::emit_debug_function_exit(
const
std
::
vector
<
TensorViewWrapper
>&
in
,
const
std
::
vector
<
TensorViewWrapper
>&
out
)
{
writer
<<
"timer_"
<<
node
->
get_name
()
<<
".stop();
\n
"
;
if
(
m_emit_timing
)
{
writer
<<
"timers["
<<
m_name_index_map
[
node
->
get_name
()]
<<
"].stop();
\n
"
;
}
}
bool
runtime
::
cpu
::
CPU_ExternalFunction
::
is_functionally_identical
(
...
...
src/ngraph/runtime/cpu/cpu_external_function.hpp
View file @
a95fe1ff
...
...
@@ -17,6 +17,7 @@
#pragma once
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <typeindex>
...
...
@@ -119,6 +120,12 @@ namespace ngraph
bool
m_emit_timing
;
bool
m_use_tbb
;
std
::
unordered_map
<
std
::
string
,
std
::
string
>
m_variable_name_map
;
std
::
map
<
std
::
string
,
size_t
>
m_name_index_map
;
// Because we are directly accessing the constant data stored in the
// Constant ops we need to keep a list of shared_ptr to each Constant
// so they don't get freed before we are done with them
std
::
vector
<
std
::
shared_ptr
<
Node
>>
m_active_constants
;
LayoutDescriptorPtrs
parameter_layout_descriptors
;
LayoutDescriptorPtrs
result_layout_descriptors
;
...
...
src/ngraph/runtime/cpu/pass/cpu_nop_elimination.cpp
View file @
a95fe1ff
...
...
@@ -14,6 +14,7 @@
* limitations under the License.
*******************************************************************************/
#include <functional>
#include <memory>
#include <typeindex>
#include <typeinfo>
...
...
src/ngraph/runtime/external_function.hpp
View file @
a95fe1ff
...
...
@@ -19,6 +19,7 @@
#include <memory>
#include "ngraph/function.hpp"
#include "ngraph/log.hpp"
namespace
ngraph
{
...
...
@@ -34,6 +35,7 @@ namespace ngraph
:
m_function
(
function
)
,
m_release_function
(
release_function
)
,
m_is_compiled
(
false
)
,
m_timing
(
false
)
{
}
...
...
@@ -42,12 +44,13 @@ namespace ngraph
public
:
virtual
~
ExternalFunction
()
{}
virtual
std
::
shared_ptr
<
CallFrame
>
make_call_frame
()
=
0
;
void
set_emit_timing
(
bool
enable
)
{
m_timing
=
enable
;
}
const
std
::
shared_ptr
<
ngraph
::
Function
>
get_function
()
{
return
m_function
;
}
protected
:
std
::
shared_ptr
<
ngraph
::
Function
>
m_function
;
bool
m_release_function
;
bool
m_is_compiled
;
bool
m_timing
;
};
}
}
src/ngraph/runtime/manager.cpp
View file @
a95fe1ff
...
...
@@ -15,6 +15,7 @@
*******************************************************************************/
#include <dlfcn.h>
#include <functional>
#include <iostream>
#include <sstream>
#include <string>
...
...
src/ngraph/runtime/manager.hpp
View file @
a95fe1ff
...
...
@@ -16,6 +16,7 @@
#pragma once
#include <functional>
#include <map>
#include <memory>
#include <string>
...
...
src/tools/nbench/nbench.cpp
View file @
a95fe1ff
...
...
@@ -21,34 +21,42 @@
// sample models are under ../../test/models
#include <fstream>
#include <ngraph/file_util.hpp>
#include <ngraph/runtime/backend.hpp>
#include <ngraph/runtime/call_frame.hpp>
#include <ngraph/runtime/manager.hpp>
#include <ngraph/util.hpp>
#include "util/benchmark.hpp"
#include "util/test_tools.hpp"
using
namespace
std
;
using
namespace
ngraph
;
int
main
(
int
argc
,
char
**
argv
)
{
string
model
=
"model.json"
;
string
backend
=
"
INTERPRETER
"
;
int
iter
=
10
;
string
model
;
string
backend
=
"
CPU
"
;
int
iter
ations
=
10
;
bool
failed
=
false
;
bool
statistics
=
false
;
bool
timing_detail
=
false
;
for
(
size_t
i
=
1
;
i
<
argc
;
i
++
)
{
if
(
string
(
argv
[
i
])
==
"-f"
)
string
arg
=
argv
[
i
];
if
(
arg
==
"-f"
||
arg
==
"--file"
)
{
model
=
argv
[
++
i
];
}
else
if
(
string
(
argv
[
i
])
==
"-b
"
)
else
if
(
arg
==
"-b"
||
arg
==
"--backend
"
)
{
backend
=
argv
[
++
i
];
}
else
if
(
string
(
argv
[
i
])
==
"-i
"
)
else
if
(
arg
==
"-i"
||
arg
==
"--iterations
"
)
{
try
{
iter
=
stoi
(
argv
[
++
i
]);
iter
ations
=
stoi
(
argv
[
++
i
]);
}
catch
(...)
{
...
...
@@ -56,6 +64,19 @@ int main(int argc, char** argv)
failed
=
true
;
}
}
else
if
(
arg
==
"-s"
||
arg
==
"--statistics"
)
{
statistics
=
true
;
}
else
if
(
arg
==
"--timing_detail"
)
{
timing_detail
=
true
;
}
else
{
cout
<<
"Unknown option: "
<<
arg
<<
endl
;
failed
=
true
;
}
}
if
(
!
static_cast
<
bool
>
(
ifstream
(
model
)))
{
...
...
@@ -73,12 +94,58 @@ SYNOPSIS
nbench [-f <filename>] [-b <backend>] [-i <iterations>]
OPTIONS
-f model json file to use (default: model.json)
-b Backend to use (default: INTERPRETER)
-i Iterations (default: 10)
-f|--file Serialized model file
-b|--backend Backend to use (default: CPU)
-i|--iterations Iterations (default: 10)
-s|--statistics Display op stastics
--timing_detail Gather detailed timing
)###"
;
return
1
;
}
cout
<<
"Benchmarking "
<<
model
<<
", "
<<
backend
<<
" backend, "
<<
iter
<<
" iterations.
\n
"
;
run_benchmark
(
model
,
backend
,
iter
);
const
string
json_string
=
file_util
::
read_file_to_string
(
model
);
stringstream
ss
(
json_string
);
shared_ptr
<
Function
>
f
=
deserialize
(
ss
);
if
(
statistics
)
{
cout
<<
"statistics:"
<<
endl
;
cout
<<
"total nodes: "
<<
f
->
get_ops
().
size
()
<<
endl
;
size_t
total_constant_bytes
=
0
;
unordered_map
<
string
,
size_t
>
op_list
;
for
(
shared_ptr
<
Node
>
node
:
f
->
get_ordered_ops
())
{
string
name
=
node
->
get_name
();
string
op_name
=
name
.
substr
(
0
,
name
.
find
(
'_'
));
string
shape_name
=
"{"
+
join
(
node
->
get_outputs
()[
0
].
get_shape
())
+
"}"
;
op_list
[
op_name
+
shape_name
]
++
;
if
(
op_name
==
"Constant"
)
{
const
Shape
&
shape
=
node
->
get_outputs
()[
0
].
get_shape
();
size_t
const_size
=
node
->
get_outputs
()[
0
].
get_element_type
().
size
();
if
(
shape
.
size
()
==
0
)
{
total_constant_bytes
+=
const_size
;
}
else
{
total_constant_bytes
+=
(
const_size
*
shape_size
(
node
->
get_outputs
()[
0
].
get_shape
()));
}
}
}
cout
<<
"Total Constant size: "
<<
total_constant_bytes
<<
" bytes
\n
"
;
for
(
const
pair
<
string
,
size_t
>&
op_info
:
op_list
)
{
cout
<<
op_info
.
first
<<
": "
<<
op_info
.
second
<<
" ops"
<<
endl
;
}
}
else
if
(
iterations
>
0
)
{
cout
<<
"Benchmarking "
<<
model
<<
", "
<<
backend
<<
" backend, "
<<
iterations
<<
" iterations.
\n
"
;
run_benchmark
(
f
,
backend
,
iterations
,
timing_detail
);
}
return
0
;
}
test/backend_test.in.cpp
View file @
a95fe1ff
...
...
@@ -5370,7 +5370,6 @@ TEST(${BACKEND_NAME}, numeric_double_inf)
TEST
(
$
{
BACKEND_NAME
},
abc_tbb
)
{
SKIP_TEST_FOR
(
"GPU"
,
"${BACKEND_NAME}"
);
ONLY_ENABLE_TEST_FOR
(
"CPU"
,
"${BACKEND_NAME}"
);
// Force TBB flow graph generation in the CPU backend
...
...
test/cpu_fusion.cpp
View file @
a95fe1ff
...
...
@@ -109,14 +109,10 @@ TEST(cpu_fusion, gemm_cpu_broadcast_row)
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shapeA
);
auto
B
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shapeB
);
auto
reshape_w
=
make_shared
<
op
::
Reshape
>
(
A
,
AxisVector
{
1
,
0
},
Shape
{
2
,
3
});
auto
reshape_x
=
make_shared
<
op
::
Reshape
>
(
B
,
AxisVector
{
1
,
0
},
Shape
{
3
,
2
});
auto
one
=
op
::
Constant
::
create
<
float
>
(
element
::
f32
,
Shape
{
2
},
std
::
vector
<
float
>
{
1.0
f
,
1.0
f
});
auto
bias
=
op
::
Constant
::
create
<
float
>
(
element
::
f32
,
Shape
{
2
},
std
::
vector
<
float
>
{
2.0
f
,
3.0
f
});
auto
broadcast
=
make_shared
<
op
::
Broadcast
>
(
one
,
shapeC
,
AxisSet
{
0
});
auto
cg
=
make_shared
<
op
::
MatmulBias
>
(
A
,
B
,
one
,
A
->
get_shape
(),
B
->
get_shape
(),
true
,
true
,
AxisSet
{
0
});
A
,
B
,
bias
,
A
->
get_shape
(),
B
->
get_shape
(),
true
,
true
,
AxisSet
{
0
});
auto
f
=
make_shared
<
Function
>
(
cg
,
op
::
ParameterVector
{
A
,
B
});
...
...
@@ -136,8 +132,8 @@ TEST(cpu_fusion, gemm_cpu_broadcast_row)
copy_data
(
b
,
dataB
);
cf
->
call
({
a
,
b
},
{
result
});
vector
<
float
>
expected
{
1
0
,
28
,
37
,
109
};
ASSERT_TRUE
(
read_vector
<
float
>
(
result
)
==
expected
);
vector
<
float
>
expected
{
1
1
,
30
,
38
,
111
};
EXPECT_EQ
(
read_vector
<
float
>
(
result
),
expected
);
}
TEST
(
cpu_fusion
,
gemm_cpu_broadcast_column
)
...
...
@@ -148,14 +144,10 @@ TEST(cpu_fusion, gemm_cpu_broadcast_column)
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shapeA
);
auto
B
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shapeB
);
auto
reshape_w
=
make_shared
<
op
::
Reshape
>
(
A
,
AxisVector
{
1
,
0
},
Shape
{
2
,
3
});
auto
reshape_x
=
make_shared
<
op
::
Reshape
>
(
B
,
AxisVector
{
1
,
0
},
Shape
{
3
,
2
});
auto
one
=
op
::
Constant
::
create
<
float
>
(
element
::
f32
,
Shape
{
2
},
std
::
vector
<
float
>
{
1.0
f
,
1.0
f
});
auto
bias
=
op
::
Constant
::
create
<
float
>
(
element
::
f32
,
Shape
{
2
},
std
::
vector
<
float
>
{
2.0
f
,
3.0
f
});
auto
broadcast
=
make_shared
<
op
::
Broadcast
>
(
one
,
shapeC
,
AxisSet
{
1
});
auto
cg
=
make_shared
<
op
::
MatmulBias
>
(
A
,
B
,
one
,
A
->
get_shape
(),
B
->
get_shape
(),
true
,
true
,
AxisSet
{
1
});
A
,
B
,
bias
,
A
->
get_shape
(),
B
->
get_shape
(),
true
,
true
,
AxisSet
{
1
});
auto
f
=
make_shared
<
Function
>
(
cg
,
op
::
ParameterVector
{
A
,
B
});
...
...
@@ -175,8 +167,8 @@ TEST(cpu_fusion, gemm_cpu_broadcast_column)
copy_data
(
b
,
dataB
);
cf
->
call
({
a
,
b
},
{
result
});
vector
<
float
>
expected
{
1
0
,
28
,
37
,
109
};
ASSERT_TRUE
(
read_vector
<
float
>
(
result
)
==
expected
);
vector
<
float
>
expected
{
1
1
,
29
,
39
,
111
};
EXPECT_EQ
(
read_vector
<
float
>
(
result
),
expected
);
}
TEST
(
cpu_fusion
,
gemm_cpu_broadcast_matrix
)
...
...
test/util/benchmark.cpp
View file @
a95fe1ff
...
...
@@ -17,56 +17,127 @@
#include <iomanip>
#include "benchmark.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/call_frame.hpp"
#include "ngraph/runtime/external_function.hpp"
#include "ngraph/runtime/manager.hpp"
#include "ngraph/runtime/tensor_view.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
#include "random.hpp"
std
::
multimap
<
size_t
,
std
::
string
>
aggregate_timing
(
const
std
::
vector
<
ngraph
::
runtime
::
PerformanceCounter
>&
perf_data
)
using
namespace
std
;
using
namespace
ngraph
;
shared_ptr
<
Node
>
find_node
(
const
string
&
name
,
shared_ptr
<
Function
>
func
)
{
st
d
::
unordered_map
<
std
::
string
,
size_t
>
timing
;
for
(
const
ngraph
::
runtime
::
PerformanceCounter
&
p
:
perf_data
)
st
atic
unordered_map
<
string
,
shared_ptr
<
Node
>>
node_map
;
if
(
node_map
.
empty
()
)
{
std
::
string
op
=
p
.
name
().
substr
(
0
,
p
.
name
().
find
(
'_'
));
timing
[
op
]
+=
p
.
microseconds
();
vector
<
shared_ptr
<
Function
>>
fs
;
traverse_functions
(
func
,
[
&
](
shared_ptr
<
Function
>
f
)
{
fs
.
push_back
(
f
);
});
for
(
shared_ptr
<
Function
>
f
:
fs
)
{
for
(
shared_ptr
<
Node
>
node
:
f
->
get_ops
())
{
node_map
.
insert
({
node
->
get_name
(),
node
});
}
}
}
return
node_map
[
name
];
}
multimap
<
size_t
,
string
>
aggregate_timing_details
(
const
vector
<
runtime
::
PerformanceCounter
>&
perf_data
,
shared_ptr
<
Function
>
f
)
{
unordered_map
<
string
,
size_t
>
timing
;
for
(
const
runtime
::
PerformanceCounter
&
p
:
perf_data
)
{
shared_ptr
<
Node
>
node
=
find_node
(
p
.
name
(),
f
);
string
op
=
p
.
name
().
substr
(
0
,
p
.
name
().
find
(
'_'
));
string
shape_name
=
"{"
+
join
(
node
->
get_outputs
()[
0
].
get_shape
())
+
"}"
;
timing
[
op
+
shape_name
]
+=
p
.
microseconds
();
}
std
::
multimap
<
size_t
,
std
::
string
>
rc
;
for
(
const
std
::
pair
<
std
::
string
,
size_t
>&
t
:
timing
)
multimap
<
size_t
,
string
>
rc
;
for
(
const
pair
<
string
,
size_t
>&
t
:
timing
)
{
rc
.
insert
({
t
.
second
,
t
.
first
});
}
return
rc
;
}
void
run_benchmark
(
const
std
::
string
&
json_path
,
const
std
::
string
&
backend_name
,
size_t
iterations
)
multimap
<
size_t
,
string
>
aggregate_timing
(
const
vector
<
runtime
::
PerformanceCounter
>&
perf_data
)
{
using
namespace
std
;
using
namespace
ngraph
;
string
env_var_name
=
"NGRAPH_"
+
backend_name
+
"_EMIT_TIMING"
;
bool
emit_timing
=
(
std
::
getenv
(
env_var_name
.
c_str
())
!=
nullptr
);
if
(
!
emit_timing
)
unordered_map
<
string
,
size_t
>
timing
;
for
(
const
runtime
::
PerformanceCounter
&
p
:
perf_data
)
{
string
op
=
p
.
name
().
substr
(
0
,
p
.
name
().
find
(
'_'
));
timing
[
op
]
+=
p
.
microseconds
();
}
multimap
<
size_t
,
string
>
rc
;
for
(
const
pair
<
string
,
size_t
>&
t
:
timing
)
{
cout
<<
"To get per-op timing set the environment variable "
<<
env_var_name
<<
"
\n
"
;
rc
.
insert
({
t
.
second
,
t
.
first
})
;
}
return
rc
;
}
ngraph
::
test
::
Uniform
<
float
>
rng
{
-
1
,
1
,
0
};
void
run_benchmark
(
const
string
&
json_path
,
const
string
&
backend_name
,
size_t
iterations
,
bool
timing_detail
)
{
stopwatch
timer
;
timer
.
start
();
const
string
json_string
=
file_util
::
read_file_to_string
(
json_path
);
stringstream
ss
(
json_string
);
shared_ptr
<
Function
>
f
=
deserialize
(
ss
);
timer
.
stop
();
cout
<<
"deserialize time: "
<<
timer
.
get_milliseconds
()
<<
"ms"
<<
endl
;
run_benchmark
(
f
,
backend_name
,
iterations
,
timing_detail
);
}
stopwatch
build_time
;
build_time
.
start
();
void
print_times
(
const
multimap
<
size_t
,
string
>&
timing
)
{
// set the column widths
int
name_width
=
0
;
int
time_width
=
0
;
for
(
const
pair
<
size_t
,
string
>&
p
:
timing
)
{
name_width
=
max
(
name_width
,
static_cast
<
int
>
(
p
.
second
.
size
()));
stringstream
ss
;
ss
.
imbue
(
locale
(
""
));
ss
<<
p
.
first
;
time_width
=
max
(
time_width
,
static_cast
<
int
>
(
ss
.
str
().
size
()));
}
for
(
auto
it
=
timing
.
rbegin
();
it
!=
timing
.
rend
();
it
++
)
{
cout
<<
setw
(
name_width
+
2
)
<<
left
<<
it
->
second
<<
" "
<<
setw
(
time_width
+
2
)
<<
right
<<
it
->
first
<<
"us
\n
"
;
}
}
void
run_benchmark
(
shared_ptr
<
Function
>
f
,
const
string
&
backend_name
,
size_t
iterations
,
bool
timing_detail
)
{
test
::
Uniform
<
float
>
rng
{
-
1
,
1
,
0
};
stopwatch
timer
;
timer
.
start
();
auto
manager
=
runtime
::
Manager
::
get
(
backend_name
);
auto
external
=
manager
->
compile
(
f
);
external
->
set_emit_timing
(
timing_detail
);
auto
backend
=
manager
->
allocate_backend
();
auto
cf
=
backend
->
make_call_frame
(
external
);
build_time
.
stop
();
cout
<<
"build_time "
<<
build_time
.
get_milliseconds
()
<<
"ms"
<<
endl
;
timer
.
stop
();
cout
.
imbue
(
locale
(
""
));
cout
<<
"compile time: "
<<
timer
.
get_milliseconds
()
<<
"ms"
<<
endl
;
vector
<
shared_ptr
<
runtime
::
TensorView
>>
args
;
for
(
shared_ptr
<
op
::
Parameter
>
param
:
f
->
get_parameters
())
...
...
@@ -100,9 +171,11 @@ void run_benchmark(const std::string& json_path, const std::string& backend_name
return
p1
.
total_microseconds
()
>
p2
.
total_microseconds
();
});
multimap
<
size_t
,
string
>
timing
=
aggregate_timing
(
perf_data
);
for
(
auto
it
=
timing
.
rbegin
();
it
!=
timing
.
rend
();
it
++
)
{
cout
.
imbue
(
locale
(
""
));
cout
<<
setw
(
15
)
<<
left
<<
it
->
second
<<
" "
<<
setw
(
10
)
<<
right
<<
it
->
first
<<
"us
\n
"
;
}
multimap
<
size_t
,
string
>
timing_details
=
aggregate_timing_details
(
perf_data
,
f
);
cout
<<
"
\n
---- Aggregate times per op type ----
\n
"
;
print_times
(
timing
);
cout
<<
"
\n
---- Aggregate times per op type/shape ----
\n
"
;
print_times
(
timing_details
);
}
test/util/benchmark.hpp
View file @
a95fe1ff
...
...
@@ -18,13 +18,21 @@
#include <map>
#include <ngraph/function.hpp>
#include <ngraph/runtime/call_frame.hpp>
#include "test_tools.hpp"
/// performance test utilities
std
::
multimap
<
size_t
,
std
::
string
>
aggregate_timing
(
const
std
::
vector
<
ngraph
::
runtime
::
PerformanceCounter
>&
perf_data
);
void
run_benchmark
(
std
::
shared_ptr
<
ngraph
::
Function
>
f
,
const
std
::
string
&
backend_name
,
size_t
iterations
,
bool
timing_detail
);
void
run_benchmark
(
const
std
::
string
&
json_path
,
const
std
::
string
&
backend_name
,
size_t
iterations
);
size_t
iterations
,
bool
timing_detail
=
false
);
test/util/test_tools.hpp
View file @
a95fe1ff
...
...
@@ -36,11 +36,8 @@
#define ONLY_ENABLE_TEST_FOR(backend_to_enable, current_backend) \
if (backend_to_enable != current_backend) \
{ \
NGRAPH_INFO << "Skipped test for " << current_backend; \
return; \
} \
else \
{ \
NGRAPH_INFO << "Enabled test for " << current_backend; \
}
namespace
ngraph
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment