Unverified Commit bced1048 authored by Diego Caballero's avatar Diego Caballero Committed by GitHub

[nBench] Add support to dump result tensors (#3958)

* [nBench] Add support to dump result tensors

A new flag '--dump_results' is added to dump result tensors to std::out.

* address feedback

* fix enum unhandled
Co-authored-by: 's avatarAshok Emani <ashok.emani@intel.com>
Co-authored-by: 's avatarRobert Kimball <robert.kimball@intel.com>
parent 2a8487e4
...@@ -26,12 +26,71 @@ ...@@ -26,12 +26,71 @@
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
/// Utility to dump single HostTensor to std::out.
template <class TYPE>
static void dump_tensor_elements(runtime::HostTensor& tensor)
{
TYPE* data_ptr = tensor.get_data_ptr<TYPE>();
size_t numElements = tensor.get_element_count();
for (size_t i = 0; i < numElements; ++i)
{
cout << data_ptr[i] << " ";
}
cout << endl;
}
/// Utility to dump all the result tensors to std::out.
static void dump_result_tensors(vector<shared_ptr<runtime::HostTensor>>& results)
{
cout << "============================================================================\n";
cout << "---- Dumping result tensors \n";
cout << "============================================================================\n";
unsigned i = 0;
for (auto& result : results)
{
cout << "Result tensor #" << i << ": " << result->get_name() << endl;
auto element_type = result->get_element_type();
#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
switch (element_type)
{
case (element::Type_t::f32): dump_tensor_elements<float>(*result); break;
case (element::Type_t::u8): dump_tensor_elements<uint8_t>(*result); break;
case (element::Type_t::i8): dump_tensor_elements<int8_t>(*result); break;
case (element::Type_t::u16): dump_tensor_elements<uint16_t>(*result); break;
case (element::Type_t::i16): dump_tensor_elements<int16_t>(*result); break;
case (element::Type_t::i32): dump_tensor_elements<int32_t>(*result); break;
case (element::Type_t::i64): dump_tensor_elements<int64_t>(*result); break;
case (element::Type_t::f64): dump_tensor_elements<double>(*result); break;
case (element::Type_t::u32): dump_tensor_elements<uint32_t>(*result); break;
case (element::Type_t::u64): dump_tensor_elements<uint64_t>(*result); break;
case (element::Type_t::boolean): dump_tensor_elements<char>(*result); break;
case (element::Type_t::bf16): dump_tensor_elements<bfloat16>(*result); break;
case (element::Type_t::f16): dump_tensor_elements<float16>(*result); break;
case element::Type_t::u1: throw runtime_error("unsupported type");
case element::Type_t::undefined: throw runtime_error("unsupported type");
case element::Type_t::dynamic: throw runtime_error("unsupported type");
default: NGRAPH_UNREACHABLE("Type not implemented yet");
}
#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
#pragma GCC diagnostic pop
#endif
++i;
}
}
vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f, vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
const string& backend_name, const string& backend_name,
size_t iterations, size_t iterations,
bool timing_detail, bool timing_detail,
size_t warmup_iterations, size_t warmup_iterations,
bool copy_data) bool copy_data,
bool dump_results)
{ {
stopwatch timer; stopwatch timer;
timer.start(); timer.start();
...@@ -115,6 +174,11 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f, ...@@ -115,6 +174,11 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
ss << time / iterations << "ms per iteration" << endl; ss << time / iterations << "ms per iteration" << endl;
cout << ss.str(); cout << ss.str();
if (dump_results)
{
dump_result_tensors(result_data);
}
vector<runtime::PerformanceCounter> perf_data = exec->get_performance_data(); vector<runtime::PerformanceCounter> perf_data = exec->get_performance_data();
return perf_data; return perf_data;
} }
...@@ -29,4 +29,5 @@ std::vector<ngraph::runtime::PerformanceCounter> run_benchmark(std::shared_ptr<n ...@@ -29,4 +29,5 @@ std::vector<ngraph::runtime::PerformanceCounter> run_benchmark(std::shared_ptr<n
size_t iterations, size_t iterations,
bool timing_detail, bool timing_detail,
size_t warmup_iterations, size_t warmup_iterations,
bool copy_data); bool copy_data,
bool dump_results);
...@@ -184,6 +184,7 @@ int main(int argc, char** argv) ...@@ -184,6 +184,7 @@ int main(int argc, char** argv)
bool visualize = false; bool visualize = false;
int warmup_iterations = 1; int warmup_iterations = 1;
bool copy_data = true; bool copy_data = true;
bool dump_results = false;
bool dot_file = false; bool dot_file = false;
bool double_buffer = false; bool double_buffer = false;
...@@ -223,6 +224,10 @@ int main(int argc, char** argv) ...@@ -223,6 +224,10 @@ int main(int argc, char** argv)
{ {
copy_data = false; copy_data = false;
} }
else if (arg == "--dump_results")
{
dump_results = true;
}
else if (arg == "-v" || arg == "--visualize") else if (arg == "-v" || arg == "--visualize")
{ {
visualize = true; visualize = true;
...@@ -292,6 +297,7 @@ OPTIONS ...@@ -292,6 +297,7 @@ OPTIONS
--timing_detail Gather detailed timing --timing_detail Gather detailed timing
-w|--warmup_iterations Number of warm-up iterations -w|--warmup_iterations Number of warm-up iterations
--no_copy_data Disable copy of input/result data every iteration --no_copy_data Disable copy of input/result data every iteration
--dump_results Dump result tensors to standard output.
--dot Generate Graphviz dot file --dot Generate Graphviz dot file
--double_buffer Double buffer inputs and outputs --double_buffer Double buffer inputs and outputs
)###"; )###";
...@@ -441,13 +447,20 @@ OPTIONS ...@@ -441,13 +447,20 @@ OPTIONS
vector<runtime::PerformanceCounter> perf_data; vector<runtime::PerformanceCounter> perf_data;
if (double_buffer) if (double_buffer)
{ {
NGRAPH_CHECK(!dump_results,
"'dump_results' not implemented in double buffer mode");
perf_data = run_benchmark_pipelined( perf_data = run_benchmark_pipelined(
f, backend, iterations, timing_detail, warmup_iterations, copy_data); f, backend, iterations, timing_detail, warmup_iterations, copy_data);
} }
else else
{ {
perf_data = run_benchmark( perf_data = run_benchmark(f,
f, backend, iterations, timing_detail, warmup_iterations, copy_data); backend,
iterations,
timing_detail,
warmup_iterations,
copy_data,
dump_results);
} }
auto perf_shape = to_perf_shape(f, perf_data); auto perf_shape = to_perf_shape(f, perf_data);
aggregate_perf_data.insert( aggregate_perf_data.insert(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment