Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
da7a15f8
Commit
da7a15f8
authored
Jul 16, 2019
by
Robert Kimball
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
separate benchmark and pipelined benchmark
parent
8b768fee
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
355 additions
and
13 deletions
+355
-13
CMakeLists.txt
src/tools/nbench/CMakeLists.txt
+2
-0
benchmark.cpp
src/tools/nbench/benchmark.cpp
+0
-0
benchmark.hpp
src/tools/nbench/benchmark.hpp
+0
-12
benchmark_pipelined.cpp
src/tools/nbench/benchmark_pipelined.cpp
+142
-0
benchmark_pipelined.hpp
src/tools/nbench/benchmark_pipelined.hpp
+33
-0
benchmark_utils.cpp
src/tools/nbench/benchmark_utils.cpp
+116
-0
benchmark_utils.hpp
src/tools/nbench/benchmark_utils.hpp
+60
-0
nbench.cpp
src/tools/nbench/nbench.cpp
+2
-1
No files found.
src/tools/nbench/CMakeLists.txt
View file @
da7a15f8
...
@@ -17,6 +17,8 @@
...
@@ -17,6 +17,8 @@
set
(
SRC
set
(
SRC
nbench.cpp
nbench.cpp
benchmark.cpp
benchmark.cpp
benchmark_pipelined.cpp
benchmark_utils.cpp
)
)
add_executable
(
nbench
${
SRC
}
)
add_executable
(
nbench
${
SRC
}
)
...
...
src/tools/nbench/benchmark.cpp
View file @
da7a15f8
This diff is collapsed.
Click to expand it.
src/tools/nbench/benchmark.hpp
View file @
da7a15f8
...
@@ -24,21 +24,9 @@
...
@@ -24,21 +24,9 @@
#include "ngraph/function.hpp"
#include "ngraph/function.hpp"
#include "ngraph/runtime/performance_counter.hpp"
#include "ngraph/runtime/performance_counter.hpp"
/// performance test utilities
std
::
multimap
<
size_t
,
std
::
string
>
aggregate_timing
(
const
std
::
vector
<
ngraph
::
runtime
::
PerformanceCounter
>&
perf_data
);
std
::
vector
<
ngraph
::
runtime
::
PerformanceCounter
>
run_benchmark
(
std
::
shared_ptr
<
ngraph
::
Function
>
f
,
std
::
vector
<
ngraph
::
runtime
::
PerformanceCounter
>
run_benchmark
(
std
::
shared_ptr
<
ngraph
::
Function
>
f
,
const
std
::
string
&
backend_name
,
const
std
::
string
&
backend_name
,
size_t
iterations
,
size_t
iterations
,
bool
timing_detail
,
bool
timing_detail
,
int
warmup_iterations
,
int
warmup_iterations
,
bool
copy_data
);
bool
copy_data
);
std
::
vector
<
ngraph
::
runtime
::
PerformanceCounter
>
run_benchmark_double_buffered
(
std
::
shared_ptr
<
ngraph
::
Function
>
f
,
const
std
::
string
&
backend_name
,
size_t
iterations
,
bool
timing_detail
,
int
warmup_iterations
,
bool
copy_data
);
src/tools/nbench/benchmark_pipelined.cpp
0 → 100644
View file @
da7a15f8
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "benchmark.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/host_tensor.hpp"
#include "ngraph/runtime/tensor.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
#include "benchmark_utils.hpp"
using
namespace
std
;
using
namespace
ngraph
;
vector
<
runtime
::
PerformanceCounter
>
run_benchmark_pipelined
(
shared_ptr
<
Function
>
f
,
const
string
&
backend_name
,
size_t
iterations
,
bool
timing_detail
,
int
warmup_iterations
,
bool
copy_data
)
{
constexpr
size_t
pipeline_depth
=
2
;
stopwatch
timer
;
timer
.
start
();
auto
backend
=
runtime
::
Backend
::
create
(
backend_name
);
auto
exec
=
backend
->
compile
(
f
,
timing_detail
);
timer
.
stop
();
cout
.
imbue
(
locale
(
""
));
cout
<<
"compile time: "
<<
timer
.
get_milliseconds
()
<<
"ms"
<<
endl
;
set_denormals_flush_to_zero
();
// Create random input data for all input tensors
array
<
vector
<
shared_ptr
<
runtime
::
HostTensor
>>
,
pipeline_depth
>
parameters_data_set
;
array
<
vector
<
shared_ptr
<
runtime
::
HostTensor
>>
,
pipeline_depth
>
results_data_set
;
for
(
size_t
i
=
0
;
i
<
pipeline_depth
;
i
++
)
{
vector
<
shared_ptr
<
runtime
::
HostTensor
>>
parameters_data
;
for
(
shared_ptr
<
op
::
Parameter
>
param
:
f
->
get_parameters
())
{
auto
tensor_data
=
make_shared
<
runtime
::
HostTensor
>
(
param
->
get_element_type
(),
param
->
get_shape
());
random_init
(
tensor_data
);
parameters_data
.
push_back
(
tensor_data
);
}
parameters_data_set
[
i
]
=
parameters_data
;
}
// Create input tensors for all Parameters
array
<
vector
<
shared_ptr
<
runtime
::
Tensor
>>
,
pipeline_depth
>
input_tensors_array
;
size_t
input_index
=
0
;
for
(
shared_ptr
<
op
::
Parameter
>
param
:
f
->
get_parameters
())
{
auto
input_tensors
=
exec
->
create_input_tensor
(
input_index
++
,
pipeline_depth
);
for
(
size_t
i
=
0
;
i
<
pipeline_depth
;
i
++
)
{
input_tensors_array
[
i
].
push_back
(
input_tensors
[
i
]);
}
}
// // Create output tensors for all Results
// array<vector<shared_ptr<runtime::Tensor>>, pipeline_depth> output_tensors_array;
// for (shared_ptr<Node> out : f->get_results())
// {
// auto output_tensors = backend->create_tensor(out->get_element_type(), out->get_shape());
// output_tensors_array[i] = output_tensors;
// }
stopwatch
t1
;
// // Before we start we write the first iteration's data
// size_t buffer_number = 0;
// auto args = input_tensors_array[buffer_number];
// auto args_data = parameters_data_set[buffer_number];
// for (size_t arg_index = 0; arg_index < args.size(); arg_index++)
// {
// const shared_ptr<runtime::Tensor>& arg = args[arg_index];
// const shared_ptr<runtime::HostTensor>& data = args_data[arg_index];
// arg->begin_write(data->get_data_ptr(),
// data->get_element_count() * data->get_element_type().size(),
// buffer_number);
// }
// const vector<shared_ptr<runtime::Tensor>>& results = output_tensors[buffer_number];
// const vector<shared_ptr<runtime::HostTensor>>& results_data = results_data_set[buffer_number];
// for (size_t i = 0; i < iterations + warmup_iterations; i++)
// {
// if (i == warmup_iterations)
// {
// t1.start();
// }
// future<void> exec_future = exec->begin_execute(results, args);
// if (i > 0)
// {
// for (size_t result_index = 0; result_index < results.size(); result_index++)
// {
// const shared_ptr<runtime::HostTensor>& data = results_data[result_index];
// const shared_ptr<runtime::Tensor>& result = results[result_index];
// result->begin_read(data->get_data_ptr(),
// data->get_element_count() * data->get_element_type().size(),
// (buffer_number - 1) & 1);
// }
// }
// buffer_number = (buffer_number + 1) & 1;
// for (size_t arg_index = 0; arg_index < args.size(); arg_index++)
// {
// const shared_ptr<runtime::Tensor>& arg = args[arg_index];
// const shared_ptr<runtime::HostTensor>& data = args_data[arg_index];
// arg->begin_write(data->get_data_ptr(),
// data->get_element_count() * data->get_element_type().size(),
// buffer_number);
// }
// exec_future.get();
// }
// for (size_t result_index = 0; result_index < results.size(); result_index++)
// {
// const shared_ptr<runtime::HostTensor>& data = results_data[result_index];
// const shared_ptr<runtime::Tensor>& result = results[result_index];
// result->begin_read(data->get_data_ptr(),
// data->get_element_count() * data->get_element_type().size(),
// (buffer_number - 1) & 1);
// }
// t1.stop();
// float time = t1.get_milliseconds();
// cout << time / iterations << "ms per iteration" << endl;
vector
<
runtime
::
PerformanceCounter
>
perf_data
=
exec
->
get_performance_data
();
return
perf_data
;
}
src/tools/nbench/benchmark_pipelined.hpp
0 → 100644
View file @
da7a15f8
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "ngraph/function.hpp"
#include "ngraph/runtime/performance_counter.hpp"
std
::
vector
<
ngraph
::
runtime
::
PerformanceCounter
>
run_benchmark_pipelined
(
std
::
shared_ptr
<
ngraph
::
Function
>
f
,
const
std
::
string
&
backend_name
,
size_t
iterations
,
bool
timing_detail
,
int
warmup_iterations
,
bool
copy_data
);
src/tools/nbench/benchmark_utils.cpp
0 → 100644
View file @
da7a15f8
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#if defined(__x86_64__) || defined(__amd64__)
#include <xmmintrin.h>
#endif
#include "benchmark_utils.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/host_tensor.hpp"
#include "ngraph/runtime/tensor.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
using
namespace
std
;
using
namespace
ngraph
;
template
<>
void
init_int_tensor
<
char
>
(
shared_ptr
<
runtime
::
Tensor
>
tensor
,
char
min
,
char
max
)
{
size_t
size
=
tensor
->
get_element_count
();
uniform_int_distribution
<
int16_t
>
dist
(
static_cast
<
short
>
(
min
),
static_cast
<
short
>
(
max
));
vector
<
char
>
vec
(
size
);
for
(
char
&
element
:
vec
)
{
element
=
static_cast
<
char
>
(
dist
(
get_random_engine
()));
}
tensor
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
char
));
}
template
<>
void
init_int_tensor
<
int8_t
>
(
shared_ptr
<
runtime
::
Tensor
>
tensor
,
int8_t
min
,
int8_t
max
)
{
size_t
size
=
tensor
->
get_element_count
();
uniform_int_distribution
<
int16_t
>
dist
(
static_cast
<
short
>
(
min
),
static_cast
<
short
>
(
max
));
vector
<
int8_t
>
vec
(
size
);
for
(
int8_t
&
element
:
vec
)
{
element
=
static_cast
<
int8_t
>
(
dist
(
get_random_engine
()));
}
tensor
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
int8_t
));
}
template
<>
void
init_int_tensor
<
uint8_t
>
(
shared_ptr
<
runtime
::
Tensor
>
tensor
,
uint8_t
min
,
uint8_t
max
)
{
size_t
size
=
tensor
->
get_element_count
();
uniform_int_distribution
<
int16_t
>
dist
(
static_cast
<
short
>
(
min
),
static_cast
<
short
>
(
max
));
vector
<
uint8_t
>
vec
(
size
);
for
(
uint8_t
&
element
:
vec
)
{
element
=
static_cast
<
uint8_t
>
(
dist
(
get_random_engine
()));
}
tensor
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
uint8_t
));
}
void
set_denormals_flush_to_zero
()
{
#if defined(__x86_64__) || defined(__amd64__)
// Avoids perf impact from denormals while benchmarking with random data
_MM_SET_FLUSH_ZERO_MODE
(
_MM_FLUSH_ZERO_ON
);
_MM_SET_DENORMALS_ZERO_MODE
(
_MM_DENORMALS_ZERO_ON
);
#endif
}
void
random_init
(
shared_ptr
<
runtime
::
Tensor
>
tensor
)
{
element
::
Type
et
=
tensor
->
get_element_type
();
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
switch
(
et
.
get_type_enum
())
{
case
element
:
:
Type_t
::
boolean
:
init_int_tensor
<
char
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
f32
:
init_real_tensor
<
float
>
(
tensor
,
-
1
,
1
);
break
;
case
element
:
:
Type_t
::
f64
:
init_real_tensor
<
double
>
(
tensor
,
-
1
,
1
);
break
;
case
element
:
:
Type_t
::
i8
:
init_int_tensor
<
int8_t
>
(
tensor
,
-
1
,
1
);
break
;
case
element
:
:
Type_t
::
i16
:
init_int_tensor
<
int16_t
>
(
tensor
,
-
1
,
1
);
break
;
case
element
:
:
Type_t
::
i32
:
init_int_tensor
<
int32_t
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
i64
:
init_int_tensor
<
int64_t
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
u8
:
init_int_tensor
<
uint8_t
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
u16
:
init_int_tensor
<
uint16_t
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
u32
:
init_int_tensor
<
uint32_t
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
u64
:
init_int_tensor
<
uint64_t
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
undefined
:
case
element
:
:
Type_t
::
dynamic
:
case
element
:
:
Type_t
::
bf16
:
case
element
:
:
Type_t
::
f16
:
default
:
throw
runtime_error
(
"unsupported type"
);
}
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic pop
#endif
}
default_random_engine
&
get_random_engine
()
{
static
std
::
default_random_engine
s_random_engine
;
return
s_random_engine
;
}
src/tools/nbench/benchmark_utils.hpp
0 → 100644
View file @
da7a15f8
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <random>
#include "benchmark.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/host_tensor.hpp"
#include "ngraph/runtime/tensor.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
using
namespace
std
;
using
namespace
ngraph
;
void
set_denormals_flush_to_zero
();
void
random_init
(
shared_ptr
<
runtime
::
Tensor
>
tensor
);
std
::
default_random_engine
&
get_random_engine
();
template
<
typename
T
>
void
init_int_tensor
(
shared_ptr
<
runtime
::
Tensor
>
tensor
,
T
min
,
T
max
)
{
size_t
size
=
tensor
->
get_element_count
();
uniform_int_distribution
<
T
>
dist
(
min
,
max
);
vector
<
T
>
vec
(
size
);
for
(
T
&
element
:
vec
)
{
element
=
dist
(
get_random_engine
());
}
tensor
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
T
));
}
template
<
typename
T
>
void
init_real_tensor
(
shared_ptr
<
runtime
::
Tensor
>
tensor
,
T
min
,
T
max
)
{
size_t
size
=
tensor
->
get_element_count
();
uniform_real_distribution
<
T
>
dist
(
min
,
max
);
vector
<
T
>
vec
(
size
);
for
(
T
&
element
:
vec
)
{
element
=
dist
(
get_random_engine
());
}
tensor
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
T
));
}
src/tools/nbench/nbench.cpp
View file @
da7a15f8
...
@@ -24,6 +24,7 @@
...
@@ -24,6 +24,7 @@
#include <iomanip>
#include <iomanip>
#include "benchmark.hpp"
#include "benchmark.hpp"
#include "benchmark_pipelined.hpp"
#include "ngraph/distributed.hpp"
#include "ngraph/distributed.hpp"
#include "ngraph/except.hpp"
#include "ngraph/except.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/file_util.hpp"
...
@@ -429,7 +430,7 @@ OPTIONS
...
@@ -429,7 +430,7 @@ OPTIONS
vector
<
runtime
::
PerformanceCounter
>
perf_data
;
vector
<
runtime
::
PerformanceCounter
>
perf_data
;
if
(
double_buffer
)
if
(
double_buffer
)
{
{
perf_data
=
run_benchmark_
double_buffer
ed
(
perf_data
=
run_benchmark_
pipelin
ed
(
f
,
backend
,
iterations
,
timing_detail
,
warmup_iterations
,
copy_data
);
f
,
backend
,
iterations
,
timing_detail
,
warmup_iterations
,
copy_data
);
}
}
else
else
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment