Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
d34fb157
Unverified
Commit
d34fb157
authored
Jul 24, 2019
by
Robert Kimball
Committed by
GitHub
Jul 24, 2019
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #3284 from NervanaSystems/bob/nbench_db
Add nbench support for double buffering
parents
a509de7b
47626835
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
893 additions
and
114 deletions
+893
-114
CMakeLists.txt
src/ngraph/CMakeLists.txt
+2
-0
chrome_trace.cpp
src/ngraph/runtime/chrome_trace.cpp
+240
-0
chrome_trace.hpp
src/ngraph/runtime/chrome_trace.hpp
+144
-0
host_tensor.cpp
src/ngraph/runtime/host_tensor.cpp
+4
-0
int_executable.cpp
src/ngraph/runtime/interpreter/int_executable.cpp
+75
-0
int_executable.hpp
src/ngraph/runtime/interpreter/int_executable.hpp
+12
-0
CMakeLists.txt
src/tools/nbench/CMakeLists.txt
+2
-0
benchmark.cpp
src/tools/nbench/benchmark.cpp
+4
-108
benchmark.hpp
src/tools/nbench/benchmark.hpp
+0
-4
benchmark_pipelined.cpp
src/tools/nbench/benchmark_pipelined.cpp
+186
-0
benchmark_pipelined.hpp
src/tools/nbench/benchmark_pipelined.hpp
+33
-0
benchmark_utils.cpp
src/tools/nbench/benchmark_utils.cpp
+116
-0
benchmark_utils.hpp
src/tools/nbench/benchmark_utils.hpp
+57
-0
nbench.cpp
src/tools/nbench/nbench.cpp
+18
-2
No files found.
src/ngraph/CMakeLists.txt
View file @
d34fb157
...
...
@@ -463,6 +463,8 @@ set (SRC
runtime/backend.hpp
runtime/backend_manager.cpp
runtime/backend_manager.hpp
runtime/chrome_trace.cpp
runtime/chrome_trace.hpp
runtime/executable.cpp
runtime/executable.hpp
runtime/host_tensor.cpp
...
...
src/ngraph/runtime/chrome_trace.cpp
0 → 100644
View file @
d34fb157
//*****************************************************************************
// Copyright 2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <iostream>
#include <map>
#include <sstream>
#include <string>
#include "chrome_trace.hpp"
#include "ngraph/log.hpp"
using
namespace
std
;
using
namespace
ngraph
;
static
bool
read_tracing_env_var
()
{
static
const
bool
is_enabled
=
(
getenv
(
"NGRAPH_ENABLE_TRACING"
)
!=
nullptr
);
return
is_enabled
;
}
mutex
runtime
::
event
::
Manager
::
s_file_mutex
;
bool
runtime
::
event
::
Manager
::
s_tracing_enabled
=
read_tracing_env_var
();
runtime
::
event
::
Duration
::
Duration
(
const
string
&
name
,
const
string
&
category
,
const
string
&
args
)
{
if
(
Manager
::
is_tracing_enabled
())
{
m_start
=
Manager
::
get_current_microseconds
();
m_stop
=
0
;
m_name
=
name
;
m_category
=
category
;
m_args
=
args
;
}
}
void
runtime
::
event
::
Duration
::
stop
()
{
if
(
Manager
::
is_tracing_enabled
())
{
m_stop
=
Manager
::
get_current_microseconds
();
}
}
void
runtime
::
event
::
Duration
::
write
()
{
if
(
Manager
::
is_tracing_enabled
())
{
size_t
stop_time
=
(
m_stop
!=
0
?
m_stop
:
Manager
::
get_current_microseconds
());
lock_guard
<
mutex
>
lock
(
Manager
::
get_mutex
());
ofstream
&
out
=
runtime
::
event
::
Manager
::
get_output_stream
();
if
(
out
.
is_open
()
==
false
)
{
runtime
::
event
::
Manager
::
open
();
}
else
{
Manager
::
get_output_stream
()
<<
",
\n
"
;
}
Manager
::
get_output_stream
()
<<
R"({"name":")"
<<
m_name
<<
R"(","cat":")"
<<
m_category
<<
R"(","ph":"X","pid":)"
<<
Manager
::
get_process_id
()
<<
R"(,"tid":)"
<<
Manager
::
get_thread_id
()
<<
R"(,"ts":)"
<<
m_start
<<
R"(,"dur":)"
<<
(
stop_time
-
m_start
);
if
(
!
m_args
.
empty
())
{
out
<<
R"(,"args":)"
<<
m_args
;
}
out
<<
"}"
;
}
}
runtime
::
event
::
Object
::
Object
(
const
string
&
name
,
const
string
&
args
)
:
m_name
{
name
}
,
m_id
{
static_cast
<
size_t
>
(
chrono
::
high_resolution_clock
::
now
().
time_since_epoch
().
count
())}
{
if
(
Manager
::
is_tracing_enabled
())
{
lock_guard
<
mutex
>
lock
(
Manager
::
get_mutex
());
ofstream
&
out
=
runtime
::
event
::
Manager
::
get_output_stream
();
if
(
out
.
is_open
()
==
false
)
{
runtime
::
event
::
Manager
::
open
();
}
else
{
Manager
::
get_output_stream
()
<<
",
\n
"
;
}
out
<<
R"({"name":")"
<<
m_name
<<
R"(","ph":"N","id":")"
<<
m_id
<<
R"(","ts":)"
<<
Manager
::
get_current_microseconds
()
<<
R"(,"pid":)"
<<
Manager
::
get_process_id
()
<<
R"(,"tid":)"
<<
Manager
::
get_thread_id
();
if
(
!
args
.
empty
())
{
out
<<
R"(,"args":)"
<<
args
;
}
out
<<
"}"
;
write_snapshot
(
out
,
args
);
}
}
void
runtime
::
event
::
Object
::
snapshot
(
const
string
&
args
)
{
if
(
Manager
::
is_tracing_enabled
())
{
lock_guard
<
mutex
>
lock
(
Manager
::
get_mutex
());
ofstream
&
out
=
runtime
::
event
::
Manager
::
get_output_stream
();
if
(
out
.
is_open
()
==
false
)
{
runtime
::
event
::
Manager
::
open
();
}
else
{
Manager
::
get_output_stream
()
<<
",
\n
"
;
}
write_snapshot
(
out
,
args
);
}
}
void
runtime
::
event
::
Object
::
write_snapshot
(
ostream
&
out
,
const
string
&
args
)
{
out
<<
R"({"name":")"
<<
m_name
<<
R"(","ph":"O","id":")"
<<
m_id
<<
R"(","ts":)"
<<
Manager
::
get_current_microseconds
()
<<
R"(,"pid":)"
<<
Manager
::
get_process_id
()
<<
R"(,"tid":)"
<<
Manager
::
get_thread_id
();
if
(
!
args
.
empty
())
{
out
<<
R"(,"args":)"
<<
args
;
}
out
<<
"}"
;
}
void
runtime
::
event
::
Object
::
destroy
()
{
if
(
Manager
::
is_tracing_enabled
())
{
lock_guard
<
mutex
>
lock
(
Manager
::
get_mutex
());
ofstream
&
out
=
runtime
::
event
::
Manager
::
get_output_stream
();
if
(
out
.
is_open
()
==
false
)
{
runtime
::
event
::
Manager
::
open
();
}
else
{
Manager
::
get_output_stream
()
<<
",
\n
"
;
}
out
<<
R"({"name":")"
<<
m_name
<<
R"(","ph":"D","id":")"
<<
m_id
<<
R"(","ts":)"
<<
Manager
::
get_current_microseconds
()
<<
R"(,"pid":)"
<<
Manager
::
get_process_id
()
<<
R"(,"tid":)"
<<
Manager
::
get_thread_id
()
<<
"}"
;
}
}
void
runtime
::
event
::
Manager
::
open
(
const
string
&
path
)
{
ofstream
&
out
=
get_output_stream
();
if
(
out
.
is_open
()
==
false
)
{
out
.
open
(
path
,
ios_base
::
trunc
);
out
<<
"[
\n
"
;
}
}
void
runtime
::
event
::
Manager
::
close
()
{
ofstream
&
out
=
get_output_stream
();
if
(
out
.
is_open
())
{
out
<<
"
\n
]
\n
"
;
out
.
close
();
}
}
ofstream
&
runtime
::
event
::
Manager
::
get_output_stream
()
{
static
ofstream
s_event_log
;
return
s_event_log
;
}
const
string
&
runtime
::
event
::
Manager
::
get_process_id
()
{
static
const
string
s_pid
=
to_string
(
getpid
());
return
s_pid
;
}
void
runtime
::
event
::
Manager
::
enable_event_tracing
()
{
s_tracing_enabled
=
true
;
}
void
runtime
::
event
::
Manager
::
disable_event_tracing
()
{
s_tracing_enabled
=
false
;
}
bool
runtime
::
event
::
Manager
::
is_event_tracing_enabled
()
{
return
s_tracing_enabled
;
}
string
runtime
::
event
::
Manager
::
get_thread_id
()
{
thread
::
id
tid
=
this_thread
::
get_id
();
static
map
<
thread
::
id
,
string
>
tid_map
;
auto
it
=
tid_map
.
find
(
tid
);
string
rc
;
if
(
it
==
tid_map
.
end
())
{
stringstream
ss
;
ss
<<
"
\"
"
<<
tid
<<
"
\"
"
;
rc
=
ss
.
str
();
tid_map
.
insert
({
tid
,
rc
});
}
else
{
rc
=
it
->
second
;
}
return
rc
;
}
src/ngraph/runtime/chrome_trace.hpp
0 → 100644
View file @
d34fb157
//*****************************************************************************
// Copyright 2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <chrono>
#include <fstream>
#include <iostream>
#include <mutex>
#include <string>
#include <thread>
#ifdef _WIN32
#include <windows.h>
// windows.h must be before processthreadsapi.h so we need this comment
#include <processthreadsapi.h>
#define getpid() GetCurrentProcessId()
#else
#include <unistd.h>
#endif
namespace
ngraph
{
namespace
runtime
{
namespace
event
{
class
Duration
;
class
Object
;
class
Manager
;
}
}
}
//
// This class records timestamps for a given user defined event and
// produces output in the chrome tracing format that can be used to view
// the events of a running program
//
// Following is the format of a trace event
//
// {
// "name": "myName",
// "cat": "category,list",
// "ph": "B",
// "ts": 12345,
// "pid": 123,
// "tid": 456,
// "args": {
// "someArg": 1,
// "anotherArg": {
// "value": "my value"
// }
// }
// }
//
// The trace file format is defined here:
// https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview
//
// The trace file can be viewed by Chrome browser using the
// URL: chrome://tracing/
//
// More information about this is at:
// http://dev.chromium.org/developers/how-tos/trace-event-profiling-tool
class
ngraph
::
runtime
::
event
::
Manager
{
friend
class
Duration
;
friend
class
Object
;
public
:
static
void
open
(
const
std
::
string
&
path
=
"runtime_event_trace.json"
);
static
void
close
();
static
bool
is_tracing_enabled
()
{
return
s_tracing_enabled
;
}
static
void
enable_event_tracing
();
static
void
disable_event_tracing
();
static
bool
is_event_tracing_enabled
();
private
:
static
std
::
ofstream
&
get_output_stream
();
static
const
std
::
string
&
get_process_id
();
static
size_t
get_current_microseconds
()
{
return
std
::
chrono
::
high_resolution_clock
::
now
().
time_since_epoch
().
count
()
/
1000
;
}
static
std
::
string
get_thread_id
();
static
std
::
mutex
&
get_mutex
()
{
return
s_file_mutex
;
}
static
std
::
ostream
s_ostream
;
static
std
::
mutex
s_file_mutex
;
static
bool
s_tracing_enabled
;
};
class
ngraph
::
runtime
::
event
::
Duration
{
public
:
explicit
Duration
(
const
std
::
string
&
name
,
const
std
::
string
&
category
,
const
std
::
string
&
args
=
""
);
~
Duration
()
{
write
();
}
/// \brief stop the timer without writing the data to the log file. To write the data
/// call the `write` method
/// Calls to stop() are optional
void
stop
();
/// \brief write the log data to the log file for this event
/// This funtion has an implicit stop() if stop() has not been previously called
void
write
();
Duration
(
const
Duration
&
)
=
delete
;
Duration
&
operator
=
(
Duration
const
&
)
=
delete
;
private
:
std
::
string
to_json
()
const
;
size_t
m_start
;
size_t
m_stop
;
std
::
string
m_name
;
std
::
string
m_category
;
std
::
string
m_args
;
};
class
ngraph
::
runtime
::
event
::
Object
{
public
:
Object
(
const
std
::
string
&
name
,
const
std
::
string
&
args
);
void
snapshot
(
const
std
::
string
&
args
);
void
destroy
();
private
:
void
write_snapshot
(
std
::
ostream
&
out
,
const
std
::
string
&
args
);
const
std
::
string
m_name
;
size_t
m_id
;
};
src/ngraph/runtime/host_tensor.cpp
View file @
d34fb157
...
...
@@ -18,6 +18,7 @@
#include <memory>
#include "ngraph/descriptor/layout/dense_tensor_layout.hpp"
#include "ngraph/runtime/chrome_trace.hpp"
#include "ngraph/runtime/host_tensor.hpp"
#include "ngraph/util.hpp"
...
...
@@ -96,6 +97,8 @@ const char* runtime::HostTensor::get_data_ptr() const
void
runtime
::
HostTensor
::
write
(
const
void
*
source
,
size_t
n
)
{
runtime
::
event
::
Duration
d1
(
"write"
,
"HostTensor"
);
if
(
n
>
m_buffer_size
)
{
throw
out_of_range
(
"write access past end of tensor"
);
...
...
@@ -106,6 +109,7 @@ void runtime::HostTensor::write(const void* source, size_t n)
void
runtime
::
HostTensor
::
read
(
void
*
target
,
size_t
n
)
const
{
runtime
::
event
::
Duration
d1
(
"read"
,
"HostTensor"
);
if
(
n
>
m_buffer_size
)
{
throw
out_of_range
(
"read access past end of tensor"
);
...
...
src/ngraph/runtime/interpreter/int_executable.cpp
View file @
d34fb157
...
...
@@ -30,6 +30,7 @@
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/memory_layout.hpp"
#include "ngraph/runtime/backend_manager.hpp"
#include "ngraph/runtime/chrome_trace.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
...
...
@@ -74,6 +75,8 @@ runtime::interpreter::INTExecutable::INTExecutable(const std::string& model_stri
bool
runtime
::
interpreter
::
INTExecutable
::
call
(
const
vector
<
shared_ptr
<
runtime
::
Tensor
>>&
outputs
,
const
vector
<
shared_ptr
<
runtime
::
Tensor
>>&
inputs
)
{
runtime
::
event
::
Duration
d1
(
"call"
,
"Interpreter"
);
// convert inputs to HostTensor
vector
<
shared_ptr
<
HostTensor
>>
func_inputs
;
for
(
auto
tensor
:
inputs
)
...
...
@@ -122,6 +125,7 @@ bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::
for
(
const
NodeWrapper
&
wrapped
:
m_wrapped_nodes
)
{
auto
op
=
wrapped
.
get_node
();
runtime
::
event
::
Duration
d2
(
op
->
description
(),
"Interpreter"
);
auto
type_id
=
wrapped
.
get_typeid
();
if
(
type_id
==
OP_TYPEID
::
Parameter
)
{
...
...
@@ -304,3 +308,74 @@ void runtime::interpreter::INTExecutable::save(ostream& out)
string
model
=
serialize
(
m_function
,
0
);
writer
.
write
(
"model"
,
model
.
data
(),
model
.
size
());
}
shared_ptr
<
ngraph
::
op
::
Parameter
>
runtime
::
interpreter
::
INTExecutable
::
get_parameter
(
size_t
index
)
const
{
const
ParameterVector
&
parameters
=
get_parameters
();
NGRAPH_CHECK
(
index
<
parameters
.
size
(),
"create_tensor for input out of bounds"
);
return
parameters
[
index
];
}
shared_ptr
<
ngraph
::
op
::
Result
>
runtime
::
interpreter
::
INTExecutable
::
get_result
(
size_t
index
)
const
{
const
ResultVector
&
results
=
get_results
();
NGRAPH_CHECK
(
index
<
results
.
size
(),
"create_tensor for input out of bounds"
);
return
results
[
index
];
}
shared_ptr
<
runtime
::
Tensor
>
runtime
::
interpreter
::
INTExecutable
::
create_input_tensor
(
size_t
input_index
)
{
shared_ptr
<
op
::
Parameter
>
parameter
=
get_parameter
(
input_index
);
return
make_shared
<
runtime
::
HostTensor
>
(
parameter
->
get_element_type
(),
parameter
->
get_shape
());
}
shared_ptr
<
runtime
::
Tensor
>
runtime
::
interpreter
::
INTExecutable
::
create_output_tensor
(
size_t
output_index
)
{
shared_ptr
<
op
::
Result
>
result
=
get_result
(
output_index
);
return
make_shared
<
runtime
::
HostTensor
>
(
result
->
get_element_type
(),
result
->
get_shape
());
}
vector
<
shared_ptr
<
runtime
::
Tensor
>>
runtime
::
interpreter
::
INTExecutable
::
create_input_tensor
(
size_t
input_index
,
size_t
pipeline_depth
)
{
vector
<
shared_ptr
<
runtime
::
HostTensor
>>
tensors
;
shared_ptr
<
op
::
Parameter
>
parameter
=
get_parameter
(
input_index
);
for
(
size_t
i
=
0
;
i
<
pipeline_depth
;
i
++
)
{
shared_ptr
<
runtime
::
HostTensor
>
tensor
;
auto
t
=
make_shared
<
runtime
::
HostTensor
>
(
parameter
->
get_element_type
(),
parameter
->
get_shape
());
tensor
=
static_pointer_cast
<
runtime
::
HostTensor
>
(
t
);
tensors
.
push_back
(
tensor
);
}
vector
<
shared_ptr
<
runtime
::
Tensor
>>
result_tensors
;
for
(
const
shared_ptr
<
runtime
::
HostTensor
>&
tensor
:
tensors
)
{
result_tensors
.
push_back
(
tensor
);
}
return
result_tensors
;
}
vector
<
shared_ptr
<
runtime
::
Tensor
>>
runtime
::
interpreter
::
INTExecutable
::
create_output_tensor
(
size_t
output_index
,
size_t
pipeline_depth
)
{
vector
<
shared_ptr
<
runtime
::
HostTensor
>>
tensors
;
shared_ptr
<
op
::
Result
>
result
=
get_result
(
output_index
);
for
(
size_t
i
=
0
;
i
<
pipeline_depth
;
i
++
)
{
shared_ptr
<
runtime
::
HostTensor
>
tensor
;
auto
t
=
make_shared
<
runtime
::
HostTensor
>
(
result
->
get_element_type
(),
result
->
get_shape
());
tensor
=
static_pointer_cast
<
runtime
::
HostTensor
>
(
t
);
tensors
.
push_back
(
tensor
);
}
vector
<
shared_ptr
<
runtime
::
Tensor
>>
result_tensors
;
for
(
const
shared_ptr
<
runtime
::
HostTensor
>&
tensor
:
tensors
)
{
result_tensors
.
push_back
(
tensor
);
}
return
result_tensors
;
}
src/ngraph/runtime/interpreter/int_executable.hpp
View file @
d34fb157
...
...
@@ -186,9 +186,21 @@ public:
std
::
vector
<
PerformanceCounter
>
get_performance_data
()
const
override
;
std
::
shared_ptr
<
runtime
::
Tensor
>
create_input_tensor
(
size_t
input_index
)
override
;
std
::
shared_ptr
<
runtime
::
Tensor
>
create_output_tensor
(
size_t
output_index
)
override
;
std
::
vector
<
std
::
shared_ptr
<
runtime
::
Tensor
>>
create_input_tensor
(
size_t
input_index
,
size_t
pipeline_depth
)
override
;
std
::
vector
<
std
::
shared_ptr
<
runtime
::
Tensor
>>
create_output_tensor
(
size_t
output_index
,
size_t
pipeline_depth
)
override
;
private
:
INTExecutable
(
const
std
::
string
&
model_string
);
std
::
shared_ptr
<
ngraph
::
op
::
Parameter
>
get_parameter
(
size_t
index
)
const
;
std
::
shared_ptr
<
ngraph
::
op
::
Result
>
get_result
(
size_t
index
)
const
;
int
get_alignment
()
const
{
return
64
;
}
bool
m_is_compiled
=
false
;
bool
m_nan_check_enabled
=
false
;
...
...
src/tools/nbench/CMakeLists.txt
View file @
d34fb157
...
...
@@ -17,6 +17,8 @@
set
(
SRC
nbench.cpp
benchmark.cpp
benchmark_pipelined.cpp
benchmark_utils.cpp
)
add_executable
(
nbench
${
SRC
}
)
...
...
src/tools/nbench/benchmark.cpp
View file @
d34fb157
...
...
@@ -14,12 +14,8 @@
// limitations under the License.
//*****************************************************************************
#include <random>
#if defined(__x86_64__) || defined(__amd64__)
#include <xmmintrin.h>
#endif
#include "benchmark.hpp"
#include "benchmark_utils.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/host_tensor.hpp"
...
...
@@ -30,106 +26,6 @@
using
namespace
std
;
using
namespace
ngraph
;
static
default_random_engine
s_random_engine
;
void
set_denormals_flush_to_zero
()
{
#if defined(__x86_64__) || defined(__amd64__)
// Avoids perf impact from denormals while benchmarking with random data
_MM_SET_FLUSH_ZERO_MODE
(
_MM_FLUSH_ZERO_ON
);
_MM_SET_DENORMALS_ZERO_MODE
(
_MM_DENORMALS_ZERO_ON
);
#endif
}
template
<
typename
T
>
void
init_int_tv
(
shared_ptr
<
runtime
::
Tensor
>
tv
,
T
min
,
T
max
)
{
size_t
size
=
tv
->
get_element_count
();
uniform_int_distribution
<
T
>
dist
(
min
,
max
);
vector
<
T
>
vec
(
size
);
for
(
T
&
element
:
vec
)
{
element
=
dist
(
s_random_engine
);
}
tv
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
T
));
}
template
<>
void
init_int_tv
<
char
>
(
shared_ptr
<
runtime
::
Tensor
>
tv
,
char
min
,
char
max
)
{
size_t
size
=
tv
->
get_element_count
();
uniform_int_distribution
<
int16_t
>
dist
(
static_cast
<
short
>
(
min
),
static_cast
<
short
>
(
max
));
vector
<
char
>
vec
(
size
);
for
(
char
&
element
:
vec
)
{
element
=
static_cast
<
char
>
(
dist
(
s_random_engine
));
}
tv
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
char
));
}
template
<>
void
init_int_tv
<
int8_t
>
(
shared_ptr
<
runtime
::
Tensor
>
tv
,
int8_t
min
,
int8_t
max
)
{
size_t
size
=
tv
->
get_element_count
();
uniform_int_distribution
<
int16_t
>
dist
(
static_cast
<
short
>
(
min
),
static_cast
<
short
>
(
max
));
vector
<
int8_t
>
vec
(
size
);
for
(
int8_t
&
element
:
vec
)
{
element
=
static_cast
<
int8_t
>
(
dist
(
s_random_engine
));
}
tv
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
int8_t
));
}
template
<>
void
init_int_tv
<
uint8_t
>
(
shared_ptr
<
runtime
::
Tensor
>
tv
,
uint8_t
min
,
uint8_t
max
)
{
size_t
size
=
tv
->
get_element_count
();
uniform_int_distribution
<
int16_t
>
dist
(
static_cast
<
short
>
(
min
),
static_cast
<
short
>
(
max
));
vector
<
uint8_t
>
vec
(
size
);
for
(
uint8_t
&
element
:
vec
)
{
element
=
static_cast
<
uint8_t
>
(
dist
(
s_random_engine
));
}
tv
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
uint8_t
));
}
template
<
typename
T
>
void
init_real_tv
(
shared_ptr
<
runtime
::
Tensor
>
tv
,
T
min
,
T
max
)
{
size_t
size
=
tv
->
get_element_count
();
uniform_real_distribution
<
T
>
dist
(
min
,
max
);
vector
<
T
>
vec
(
size
);
for
(
T
&
element
:
vec
)
{
element
=
dist
(
s_random_engine
);
}
tv
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
T
));
}
static
void
random_init
(
shared_ptr
<
runtime
::
Tensor
>
tv
)
{
element
::
Type
et
=
tv
->
get_element_type
();
switch
(
et
.
get_type_enum
())
{
case
element
:
:
Type_t
::
boolean
:
init_int_tv
<
char
>
(
tv
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
f32
:
init_real_tv
<
float
>
(
tv
,
-
1
,
1
);
break
;
case
element
:
:
Type_t
::
f64
:
init_real_tv
<
double
>
(
tv
,
-
1
,
1
);
break
;
case
element
:
:
Type_t
::
i8
:
init_int_tv
<
int8_t
>
(
tv
,
-
1
,
1
);
break
;
case
element
:
:
Type_t
::
i16
:
init_int_tv
<
int16_t
>
(
tv
,
-
1
,
1
);
break
;
case
element
:
:
Type_t
::
i32
:
init_int_tv
<
int32_t
>
(
tv
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
i64
:
init_int_tv
<
int64_t
>
(
tv
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
u8
:
init_int_tv
<
uint8_t
>
(
tv
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
u16
:
init_int_tv
<
uint16_t
>
(
tv
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
u32
:
init_int_tv
<
uint32_t
>
(
tv
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
u64
:
init_int_tv
<
uint64_t
>
(
tv
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
undefined
:
case
element
:
:
Type_t
::
dynamic
:
case
element
:
:
Type_t
::
bf16
:
case
element
:
:
Type_t
::
f16
:
default
:
throw
runtime_error
(
"unsupported type"
);
}
}
vector
<
runtime
::
PerformanceCounter
>
run_benchmark
(
shared_ptr
<
Function
>
f
,
const
string
&
backend_name
,
size_t
iterations
,
...
...
@@ -140,7 +36,7 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
stopwatch
timer
;
timer
.
start
();
auto
backend
=
runtime
::
Backend
::
create
(
backend_name
);
auto
compiled_fun
c
=
backend
->
compile
(
f
,
timing_detail
);
auto
exe
c
=
backend
->
compile
(
f
,
timing_detail
);
timer
.
stop
();
cout
.
imbue
(
locale
(
""
));
cout
<<
"compile time: "
<<
timer
.
get_milliseconds
()
<<
"ms"
<<
endl
;
...
...
@@ -201,7 +97,7 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
}
}
}
compiled_fun
c
->
call
(
results
,
args
);
exe
c
->
call
(
results
,
args
);
if
(
copy_data
)
{
for
(
size_t
result_index
=
0
;
result_index
<
results
.
size
();
result_index
++
)
...
...
@@ -217,6 +113,6 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
float
time
=
t1
.
get_milliseconds
();
cout
<<
time
/
iterations
<<
"ms per iteration"
<<
endl
;
vector
<
runtime
::
PerformanceCounter
>
perf_data
=
compiled_fun
c
->
get_performance_data
();
vector
<
runtime
::
PerformanceCounter
>
perf_data
=
exe
c
->
get_performance_data
();
return
perf_data
;
}
src/tools/nbench/benchmark.hpp
View file @
d34fb157
...
...
@@ -24,10 +24,6 @@
#include "ngraph/function.hpp"
#include "ngraph/runtime/performance_counter.hpp"
/// performance test utilities
std
::
multimap
<
size_t
,
std
::
string
>
aggregate_timing
(
const
std
::
vector
<
ngraph
::
runtime
::
PerformanceCounter
>&
perf_data
);
std
::
vector
<
ngraph
::
runtime
::
PerformanceCounter
>
run_benchmark
(
std
::
shared_ptr
<
ngraph
::
Function
>
f
,
const
std
::
string
&
backend_name
,
size_t
iterations
,
...
...
src/tools/nbench/benchmark_pipelined.cpp
0 → 100644
View file @
d34fb157
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <array>
#include <condition_variable>
#include <mutex>
#include <thread>
#include "benchmark.hpp"
#include "benchmark_utils.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/host_tensor.hpp"
#include "ngraph/runtime/tensor.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
using
namespace
std
;
using
namespace
ngraph
;
class
TensorCollection
{
public
:
vector
<
shared_ptr
<
runtime
::
HostTensor
>>
parameter_data
;
vector
<
shared_ptr
<
runtime
::
HostTensor
>>
result_data
;
vector
<
shared_ptr
<
runtime
::
Tensor
>>
input_tensors
;
vector
<
shared_ptr
<
runtime
::
Tensor
>>
output_tensors
;
private
:
};
static
mutex
s_mutex
;
static
condition_variable
s_condition
;
static
size_t
current_iteration
=
0
;
static
size_t
s_iterations
;
static
size_t
s_warmup_iterations
;
static
stopwatch
s_timer
;
static
void
thread_entry
(
runtime
::
Executable
*
exec
,
const
TensorCollection
&
tensors
,
size_t
pipeline_stage
)
{
bool
data_written
=
false
;
const
vector
<
shared_ptr
<
runtime
::
Tensor
>>&
args
=
tensors
.
input_tensors
;
const
vector
<
shared_ptr
<
runtime
::
Tensor
>>&
results
=
tensors
.
output_tensors
;
while
(
current_iteration
<
s_iterations
+
s_warmup_iterations
)
{
if
(
!
data_written
)
{
for
(
size_t
arg_index
=
0
;
arg_index
<
args
.
size
();
arg_index
++
)
{
const
shared_ptr
<
runtime
::
Tensor
>&
arg
=
args
[
arg_index
];
if
(
arg
->
get_stale
())
{
const
shared_ptr
<
runtime
::
HostTensor
>&
data
=
tensors
.
parameter_data
[
arg_index
];
arg
->
write
(
data
->
get_data_ptr
(),
data
->
get_element_count
()
*
data
->
get_element_type
().
size
());
}
}
data_written
=
true
;
}
unique_lock
<
mutex
>
lock
(
s_mutex
);
if
((
current_iteration
&
1
)
!=
pipeline_stage
)
{
s_condition
.
wait
(
lock
);
}
else
{
if
(
current_iteration
==
s_warmup_iterations
)
{
s_timer
.
start
();
}
// our turn to run
exec
->
call
(
results
,
args
);
current_iteration
++
;
data_written
=
false
;
s_condition
.
notify_all
();
lock
.
unlock
();
for
(
size_t
result_index
=
0
;
result_index
<
results
.
size
();
result_index
++
)
{
const
shared_ptr
<
runtime
::
HostTensor
>&
data
=
tensors
.
result_data
[
result_index
];
const
shared_ptr
<
runtime
::
Tensor
>&
result
=
results
[
result_index
];
result
->
read
(
data
->
get_data_ptr
(),
data
->
get_element_count
()
*
data
->
get_element_type
().
size
());
}
}
}
}
vector
<
runtime
::
PerformanceCounter
>
run_benchmark_pipelined
(
shared_ptr
<
Function
>
f
,
const
string
&
backend_name
,
size_t
iterations
,
bool
timing_detail
,
int
warmup_iterations
,
bool
copy_data
)
{
constexpr
size_t
pipeline_depth
=
2
;
s_iterations
=
iterations
;
s_warmup_iterations
=
warmup_iterations
;
array
<
TensorCollection
,
pipeline_depth
>
tensor_collections
;
stopwatch
timer
;
timer
.
start
();
auto
backend
=
runtime
::
Backend
::
create
(
backend_name
);
auto
exec
=
backend
->
compile
(
f
,
timing_detail
);
timer
.
stop
();
cout
.
imbue
(
locale
(
""
));
cout
<<
"compile time: "
<<
timer
.
get_milliseconds
()
<<
"ms"
<<
endl
;
set_denormals_flush_to_zero
();
// Create random input data for all input tensors
for
(
size_t
i
=
0
;
i
<
pipeline_depth
;
i
++
)
{
for
(
shared_ptr
<
op
::
Parameter
>
param
:
f
->
get_parameters
())
{
auto
tensor_data
=
make_shared
<
runtime
::
HostTensor
>
(
param
->
get_element_type
(),
param
->
get_shape
());
random_init
(
tensor_data
);
tensor_collections
[
i
].
parameter_data
.
push_back
(
tensor_data
);
}
}
// Create output tensors for all outputs
for
(
size_t
i
=
0
;
i
<
pipeline_depth
;
i
++
)
{
for
(
shared_ptr
<
Node
>
result
:
f
->
get_results
())
{
auto
tensor_data
=
make_shared
<
runtime
::
HostTensor
>
(
result
->
get_element_type
(),
result
->
get_shape
());
tensor_collections
[
i
].
result_data
.
push_back
(
tensor_data
);
}
}
// Create input tensors for all Parameters
array
<
vector
<
shared_ptr
<
runtime
::
Tensor
>>
,
pipeline_depth
>
input_tensors_array
;
size_t
input_index
=
0
;
for
(
shared_ptr
<
op
::
Parameter
>
param
:
f
->
get_parameters
())
{
auto
input_tensors
=
exec
->
create_input_tensor
(
input_index
++
,
pipeline_depth
);
for
(
size_t
i
=
0
;
i
<
pipeline_depth
;
i
++
)
{
tensor_collections
[
i
].
input_tensors
.
push_back
(
input_tensors
[
i
]);
}
}
// Create output tensors for all Results
array
<
vector
<
shared_ptr
<
runtime
::
Tensor
>>
,
pipeline_depth
>
output_tensors_array
;
size_t
output_index
=
0
;
for
(
shared_ptr
<
Node
>
result
:
f
->
get_results
())
{
auto
output_tensors
=
exec
->
create_output_tensor
(
output_index
++
,
pipeline_depth
);
for
(
size_t
i
=
0
;
i
<
pipeline_depth
;
i
++
)
{
tensor_collections
[
i
].
output_tensors
.
push_back
(
output_tensors
[
i
]);
}
}
thread
threads
[
pipeline_depth
];
for
(
size_t
i
=
0
;
i
<
pipeline_depth
;
i
++
)
{
threads
[
i
]
=
thread
(
thread_entry
,
exec
.
get
(),
tensor_collections
[
i
],
i
);
}
for
(
size_t
i
=
0
;
i
<
pipeline_depth
;
i
++
)
{
threads
[
i
].
join
();
}
s_timer
.
stop
();
float
time
=
s_timer
.
get_milliseconds
();
cout
<<
time
/
iterations
<<
"ms per iteration"
<<
endl
;
vector
<
runtime
::
PerformanceCounter
>
perf_data
=
exec
->
get_performance_data
();
return
perf_data
;
}
src/tools/nbench/benchmark_pipelined.hpp
0 → 100644
View file @
d34fb157
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "ngraph/function.hpp"
#include "ngraph/runtime/performance_counter.hpp"
std
::
vector
<
ngraph
::
runtime
::
PerformanceCounter
>
run_benchmark_pipelined
(
std
::
shared_ptr
<
ngraph
::
Function
>
f
,
const
std
::
string
&
backend_name
,
size_t
iterations
,
bool
timing_detail
,
int
warmup_iterations
,
bool
copy_data
);
src/tools/nbench/benchmark_utils.cpp
0 → 100644
View file @
d34fb157
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#if defined(__x86_64__) || defined(__amd64__)
#include <xmmintrin.h>
#endif
#include "benchmark_utils.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/host_tensor.hpp"
#include "ngraph/runtime/tensor.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
using
namespace
std
;
using
namespace
ngraph
;
template
<>
void
init_int_tensor
<
char
>
(
shared_ptr
<
runtime
::
Tensor
>
tensor
,
char
min
,
char
max
)
{
size_t
size
=
tensor
->
get_element_count
();
uniform_int_distribution
<
int16_t
>
dist
(
static_cast
<
short
>
(
min
),
static_cast
<
short
>
(
max
));
vector
<
char
>
vec
(
size
);
for
(
char
&
element
:
vec
)
{
element
=
static_cast
<
char
>
(
dist
(
get_random_engine
()));
}
tensor
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
char
));
}
template
<>
void
init_int_tensor
<
int8_t
>
(
shared_ptr
<
runtime
::
Tensor
>
tensor
,
int8_t
min
,
int8_t
max
)
{
size_t
size
=
tensor
->
get_element_count
();
uniform_int_distribution
<
int16_t
>
dist
(
static_cast
<
short
>
(
min
),
static_cast
<
short
>
(
max
));
vector
<
int8_t
>
vec
(
size
);
for
(
int8_t
&
element
:
vec
)
{
element
=
static_cast
<
int8_t
>
(
dist
(
get_random_engine
()));
}
tensor
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
int8_t
));
}
template
<>
void
init_int_tensor
<
uint8_t
>
(
shared_ptr
<
runtime
::
Tensor
>
tensor
,
uint8_t
min
,
uint8_t
max
)
{
size_t
size
=
tensor
->
get_element_count
();
uniform_int_distribution
<
int16_t
>
dist
(
static_cast
<
short
>
(
min
),
static_cast
<
short
>
(
max
));
vector
<
uint8_t
>
vec
(
size
);
for
(
uint8_t
&
element
:
vec
)
{
element
=
static_cast
<
uint8_t
>
(
dist
(
get_random_engine
()));
}
tensor
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
uint8_t
));
}
void
set_denormals_flush_to_zero
()
{
#if defined(__x86_64__) || defined(__amd64__)
// Avoids perf impact from denormals while benchmarking with random data
_MM_SET_FLUSH_ZERO_MODE
(
_MM_FLUSH_ZERO_ON
);
_MM_SET_DENORMALS_ZERO_MODE
(
_MM_DENORMALS_ZERO_ON
);
#endif
}
void
random_init
(
shared_ptr
<
runtime
::
Tensor
>
tensor
)
{
element
::
Type
et
=
tensor
->
get_element_type
();
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
switch
(
et
.
get_type_enum
())
{
case
element
:
:
Type_t
::
boolean
:
init_int_tensor
<
char
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
f32
:
init_real_tensor
<
float
>
(
tensor
,
-
1
,
1
);
break
;
case
element
:
:
Type_t
::
f64
:
init_real_tensor
<
double
>
(
tensor
,
-
1
,
1
);
break
;
case
element
:
:
Type_t
::
i8
:
init_int_tensor
<
int8_t
>
(
tensor
,
-
1
,
1
);
break
;
case
element
:
:
Type_t
::
i16
:
init_int_tensor
<
int16_t
>
(
tensor
,
-
1
,
1
);
break
;
case
element
:
:
Type_t
::
i32
:
init_int_tensor
<
int32_t
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
i64
:
init_int_tensor
<
int64_t
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
u8
:
init_int_tensor
<
uint8_t
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
u16
:
init_int_tensor
<
uint16_t
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
u32
:
init_int_tensor
<
uint32_t
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
u64
:
init_int_tensor
<
uint64_t
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
undefined
:
case
element
:
:
Type_t
::
dynamic
:
case
element
:
:
Type_t
::
bf16
:
case
element
:
:
Type_t
::
f16
:
default
:
throw
runtime_error
(
"unsupported type"
);
}
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic pop
#endif
}
default_random_engine
&
get_random_engine
()
{
static
std
::
default_random_engine
s_random_engine
;
return
s_random_engine
;
}
src/tools/nbench/benchmark_utils.hpp
0 → 100644
View file @
d34fb157
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <random>
#include "benchmark.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/host_tensor.hpp"
#include "ngraph/runtime/tensor.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
void
set_denormals_flush_to_zero
();
void
random_init
(
std
::
shared_ptr
<
ngraph
::
runtime
::
Tensor
>
tensor
);
std
::
default_random_engine
&
get_random_engine
();
template
<
typename
T
>
void
init_int_tensor
(
std
::
shared_ptr
<
ngraph
::
runtime
::
Tensor
>
tensor
,
T
min
,
T
max
)
{
size_t
size
=
tensor
->
get_element_count
();
std
::
uniform_int_distribution
<
T
>
dist
(
min
,
max
);
std
::
vector
<
T
>
vec
(
size
);
for
(
T
&
element
:
vec
)
{
element
=
dist
(
get_random_engine
());
}
tensor
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
T
));
}
template
<
typename
T
>
void
init_real_tensor
(
std
::
shared_ptr
<
ngraph
::
runtime
::
Tensor
>
tensor
,
T
min
,
T
max
)
{
size_t
size
=
tensor
->
get_element_count
();
std
::
uniform_real_distribution
<
T
>
dist
(
min
,
max
);
std
::
vector
<
T
>
vec
(
size
);
for
(
T
&
element
:
vec
)
{
element
=
dist
(
get_random_engine
());
}
tensor
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
T
));
}
src/tools/nbench/nbench.cpp
View file @
d34fb157
...
...
@@ -24,6 +24,7 @@
#include <iomanip>
#include "benchmark.hpp"
#include "benchmark_pipelined.hpp"
#include "ngraph/distributed.hpp"
#include "ngraph/except.hpp"
#include "ngraph/file_util.hpp"
...
...
@@ -181,6 +182,7 @@ int main(int argc, char** argv)
int
warmup_iterations
=
1
;
bool
copy_data
=
true
;
bool
dot_file
=
false
;
bool
double_buffer
=
false
;
for
(
size_t
i
=
1
;
i
<
argc
;
i
++
)
{
...
...
@@ -229,6 +231,10 @@ int main(int argc, char** argv)
{
directory
=
argv
[
++
i
];
}
else
if
(
arg
==
"--double_buffer"
)
{
double_buffer
=
true
;
}
else
if
(
arg
==
"-w"
||
arg
==
"--warmup_iterations"
)
{
try
...
...
@@ -283,6 +289,7 @@ OPTIONS
-w|--warmup_iterations Number of warm-up iterations
--no_copy_data Disable copy of input/result data every iteration
--dot Generate Graphviz dot file
--double_buffer Double buffer inputs and outputs
)###"
;
return
1
;
}
...
...
@@ -420,8 +427,17 @@ OPTIONS
{
cout
<<
"
\n
---- Benchmark ----
\n
"
;
shared_ptr
<
Function
>
f
=
deserialize
(
model
);
auto
perf_data
=
run_benchmark
(
f
,
backend
,
iterations
,
timing_detail
,
warmup_iterations
,
copy_data
);
vector
<
runtime
::
PerformanceCounter
>
perf_data
;
if
(
double_buffer
)
{
perf_data
=
run_benchmark_pipelined
(
f
,
backend
,
iterations
,
timing_detail
,
warmup_iterations
,
copy_data
);
}
else
{
perf_data
=
run_benchmark
(
f
,
backend
,
iterations
,
timing_detail
,
warmup_iterations
,
copy_data
);
}
auto
perf_shape
=
to_perf_shape
(
f
,
perf_data
);
aggregate_perf_data
.
insert
(
aggregate_perf_data
.
end
(),
perf_shape
.
begin
(),
perf_shape
.
end
());
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment