Unverified Commit 33140eff authored by Robert Kimball's avatar Robert Kimball Committed by GitHub

Enhancements to nbench (#622)

* fix detailed timing flag

* more detailed info
parent c7d6d7f1
......@@ -136,6 +136,38 @@ public:
StaticInitializers() { ngraph::file_util::remove_directory(s_output_dir); }
};
static string emit_string_array(const vector<string>& s, size_t max_line_length)
{
stringstream ss;
stringstream line;
for (size_t i = 0; i < s.size(); i++)
{
if (i != 0)
{
line << ",";
}
stringstream value;
value << s[i];
string value_string = value.str();
if (static_cast<size_t>(line.tellp()) + value_string.size() + 1 <= max_line_length)
{
if (i > 0)
{
line << " ";
}
line << value_string;
}
else
{
ss << line.str() << "\n";
line.str("");
line << value_string;
}
}
ss << line.str();
return ss.str();
}
static StaticInitializers s_static_initializers;
#define TI(x) type_index(typeid(x))
......@@ -226,7 +258,7 @@ runtime::cpu::CPU_ExternalFunction::CPU_ExternalFunction(
const shared_ptr<ngraph::Function>& function, bool release_function)
: ngraph::runtime::ExternalFunction(function, release_function)
, m_compiled_function(nullptr)
, m_emit_timing(std::getenv("NGRAPH_CPU_EMIT_TIMING") != nullptr)
, m_emit_timing(false)
, m_use_tbb(std::getenv("NGRAPH_CPU_USE_TBB") != nullptr)
, m_function_name(function->get_name())
{
......@@ -243,6 +275,8 @@ void runtime::cpu::CPU_ExternalFunction::compile()
return;
}
m_emit_timing = m_timing | (std::getenv("NGRAPH_CPU_EMIT_TIMING") != nullptr);
m_mkldnn_emitter.reset(new MKLDNNEmitter());
ngraph::pass::Manager pass_manager;
......@@ -338,6 +372,7 @@ using namespace ngraph::runtime;
{
writer << "// Declare debug timers\n";
vector<string> names;
size_t index = 0;
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
{
for (shared_ptr<Node> node : current_function->get_ordered_ops())
......@@ -345,59 +380,43 @@ using namespace ngraph::runtime;
if (!node->is_parameter() && !node->is_constant())
{
names.push_back(node->get_name());
m_name_index_map.insert({node->get_name(), index++});
}
}
}
for (const string& s : names)
{
writer << "ngraph::stopwatch timer_" << s << ";\n";
}
writer << "ngraph::stopwatch timers[" << names.size() << "];\n";
writer << "extern \"C\" size_t get_debug_timer_count() { return " << names.size()
<< "; }\n";
writer << "extern \"C\" const char* get_debug_timer_name(size_t index)\n";
writer << "{\n";
writer.indent++;
writer << "const char* rc;\n";
writer << "switch(index)\n";
writer << "static const char* timer_names[" << names.size() << "] =\n";
writer << "{\n";
for (size_t i = 0; i < names.size(); i++)
writer.indent++;
vector<string> quoted_names;
for (const string& name : names)
{
writer << "case " << i << ": rc = \"" << names[i] << "\"; break;\n";
quoted_names.push_back("\"" + name + "\"");
}
writer << "default: rc = \"\";\n";
writer << "}\n";
writer << "return rc;\n";
writer << emit_string_array(quoted_names, 100 - (4 * 2 + 1));
writer << "\n};\n";
writer.indent--;
writer << "return timer_names[index];\n";
writer.indent--;
writer << "}\n";
writer << "extern \"C\" const size_t get_debug_timer_microseconds(size_t index)\n";
writer << "{\n";
writer.indent++;
writer << "size_t rc;\n";
writer << "switch(index)\n";
writer << "{\n";
for (size_t i = 0; i < names.size(); i++)
{
writer << "case " << i << ": rc = timer_" << names[i]
<< ".get_total_microseconds(); break;\n";
}
writer << "default: rc = 0;\n";
writer << "}\n";
writer << "return rc;\n";
writer << "return (index < " << names.size()
<< " ? timers[index].get_total_microseconds() : 0);\n";
writer.indent--;
writer << "}\n";
writer << "extern \"C\" const size_t get_debug_timer_call_count(size_t index)\n";
writer << "{\n";
writer.indent++;
writer << "size_t rc;\n";
writer << "switch(index)\n";
writer << "{\n";
for (size_t i = 0; i < names.size(); i++)
{
writer << "case " << i << ": rc = timer_" << names[i] << ".get_call_count(); break;\n";
}
writer << "default: rc = 0;\n";
writer << "}\n";
writer << "return rc;\n";
writer << "return (index < " << names.size() << " ? timers[index].get_call_count() : 0);\n";
writer.indent--;
writer << "}\n";
writer << "\n";
......@@ -666,10 +685,6 @@ using namespace ngraph::runtime;
<< "(G, [&](const tbb::flow::continue_msg &msg)\n{\n";
writer.indent++;
}
if (m_emit_timing)
{
emit_debug_function_entry(writer, node.get(), in, out);
}
if (runtime::cpu::IsTracingEnabled() &&
current_function->get_name() == m_function_name)
{
......@@ -677,14 +692,21 @@ using namespace ngraph::runtime;
}
}
if (!node->is_parameter() && !node->is_constant())
{
writer << "\n// " << node->get_name() << "(";
vector<string> parameter_nodes = node_input_names;
parameter_nodes.insert(
parameter_nodes.end(), node_output_names.begin(), node_output_names.end());
writer << join(parameter_nodes);
writer << ")\n";
}
// Emit operation body
if (!node->is_parameter() && !node->is_constant())
{
emit_debug_function_entry(writer, node.get(), in, out);
}
string func_name;
auto it = match_functions.find(node.get());
if (it == match_functions.end())
......@@ -708,11 +730,8 @@ using namespace ngraph::runtime;
// Emit operation epilogue
if (!node->is_parameter() && !node->is_constant())
{
if (m_emit_timing)
{
emit_debug_function_exit(writer, node.get(), in, out);
}
if (runtime::cpu::IsTracingEnabled() &&
current_function->get_name() == m_function_name)
{
......@@ -873,7 +892,10 @@ void runtime::cpu::CPU_ExternalFunction::emit_debug_function_entry(
const std::vector<TensorViewWrapper>& in,
const std::vector<TensorViewWrapper>& out)
{
writer << "timer_" << node->get_name() << ".start();\n";
if (m_emit_timing)
{
writer << "timers[" << m_name_index_map[node->get_name()] << "].start();\n";
}
}
void runtime::cpu::CPU_ExternalFunction::emit_debug_function_exit(
......@@ -882,7 +904,10 @@ void runtime::cpu::CPU_ExternalFunction::emit_debug_function_exit(
const std::vector<TensorViewWrapper>& in,
const std::vector<TensorViewWrapper>& out)
{
writer << "timer_" << node->get_name() << ".stop();\n";
if (m_emit_timing)
{
writer << "timers[" << m_name_index_map[node->get_name()] << "].stop();\n";
}
}
bool runtime::cpu::CPU_ExternalFunction::is_functionally_identical(
......
......@@ -17,6 +17,7 @@
#pragma once
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <typeindex>
......@@ -119,6 +120,7 @@ namespace ngraph
bool m_emit_timing;
bool m_use_tbb;
std::unordered_map<std::string, std::string> m_variable_name_map;
std::map<std::string, size_t> m_name_index_map;
// Because we are directly accessing the constant data stored in the
// Constant ops we need to keep a list of shared_ptr to each Constant
......
......@@ -19,6 +19,7 @@
#include <memory>
#include "ngraph/function.hpp"
#include "ngraph/log.hpp"
namespace ngraph
{
......@@ -34,6 +35,7 @@ namespace ngraph
: m_function(function)
, m_release_function(release_function)
, m_is_compiled(false)
, m_timing(false)
{
}
......@@ -42,12 +44,13 @@ namespace ngraph
public:
virtual ~ExternalFunction() {}
virtual std::shared_ptr<CallFrame> make_call_frame() = 0;
void set_emit_timing(bool enable) { m_timing = enable; }
const std::shared_ptr<ngraph::Function> get_function() { return m_function; }
protected:
std::shared_ptr<ngraph::Function> m_function;
bool m_release_function;
bool m_is_compiled;
bool m_timing;
};
}
}
......@@ -21,34 +21,42 @@
// sample models are under ../../test/models
#include <fstream>
#include <ngraph/file_util.hpp>
#include <ngraph/runtime/backend.hpp>
#include <ngraph/runtime/call_frame.hpp>
#include <ngraph/runtime/manager.hpp>
#include <ngraph/util.hpp>
#include "util/benchmark.hpp"
#include "util/test_tools.hpp"
using namespace std;
using namespace ngraph;
int main(int argc, char** argv)
{
string model = "model.json";
string backend = "INTERPRETER";
int iter = 10;
string model;
string backend = "CPU";
int iterations = 10;
bool failed = false;
bool statistics = false;
bool timing_detail = false;
for (size_t i = 1; i < argc; i++)
{
if (string(argv[i]) == "-f")
string arg = argv[i];
if (arg == "-f" || arg == "--file")
{
model = argv[++i];
}
else if (string(argv[i]) == "-b")
else if (arg == "-b" || arg == "--backend")
{
backend = argv[++i];
}
else if (string(argv[i]) == "-i")
else if (arg == "-i" || arg == "--iterations")
{
try
{
iter = stoi(argv[++i]);
iterations = stoi(argv[++i]);
}
catch (...)
{
......@@ -56,6 +64,19 @@ int main(int argc, char** argv)
failed = true;
}
}
else if (arg == "-s" || arg == "--statistics")
{
statistics = true;
}
else if (arg == "--timing_detail")
{
timing_detail = true;
}
else
{
cout << "Unknown option: " << arg << endl;
failed = true;
}
}
if (!static_cast<bool>(ifstream(model)))
{
......@@ -73,12 +94,58 @@ SYNOPSIS
nbench [-f <filename>] [-b <backend>] [-i <iterations>]
OPTIONS
-f model json file to use (default: model.json)
-b Backend to use (default: INTERPRETER)
-i Iterations (default: 10)
-f|--file Serialized model file
-b|--backend Backend to use (default: CPU)
-i|--iterations Iterations (default: 10)
-s|--statistics Display op stastics
--timing_detail Gather detailed timing
)###";
return 1;
}
cout << "Benchmarking " << model << ", " << backend << " backend, " << iter << " iterations.\n";
run_benchmark(model, backend, iter);
const string json_string = file_util::read_file_to_string(model);
stringstream ss(json_string);
shared_ptr<Function> f = deserialize(ss);
if (statistics)
{
cout << "statistics:" << endl;
cout << "total nodes: " << f->get_ops().size() << endl;
size_t total_constant_bytes = 0;
unordered_map<string, size_t> op_list;
for (shared_ptr<Node> node : f->get_ordered_ops())
{
string name = node->get_name();
string op_name = name.substr(0, name.find('_'));
string shape_name = "{" + join(node->get_outputs()[0].get_shape()) + "}";
op_list[op_name + shape_name]++;
if (op_name == "Constant")
{
const Shape& shape = node->get_outputs()[0].get_shape();
size_t const_size = node->get_outputs()[0].get_element_type().size();
if (shape.size() == 0)
{
total_constant_bytes += const_size;
}
else
{
total_constant_bytes +=
(const_size * shape_size(node->get_outputs()[0].get_shape()));
}
}
}
cout << "Total Constant size: " << total_constant_bytes << " bytes\n";
for (const pair<string, size_t>& op_info : op_list)
{
cout << op_info.first << ": " << op_info.second << " ops" << endl;
}
}
else if (iterations > 0)
{
cout << "Benchmarking " << model << ", " << backend << " backend, " << iterations
<< " iterations.\n";
run_benchmark(f, backend, iterations, timing_detail);
}
return 0;
}
......@@ -17,56 +17,127 @@
#include <iomanip>
#include "benchmark.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/call_frame.hpp"
#include "ngraph/runtime/external_function.hpp"
#include "ngraph/runtime/manager.hpp"
#include "ngraph/runtime/tensor_view.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
#include "random.hpp"
std::multimap<size_t, std::string>
aggregate_timing(const std::vector<ngraph::runtime::PerformanceCounter>& perf_data)
using namespace std;
using namespace ngraph;
shared_ptr<Node> find_node(const string& name, shared_ptr<Function> func)
{
std::unordered_map<std::string, size_t> timing;
for (const ngraph::runtime::PerformanceCounter& p : perf_data)
static unordered_map<string, shared_ptr<Node>> node_map;
if (node_map.empty())
{
std::string op = p.name().substr(0, p.name().find('_'));
timing[op] += p.microseconds();
vector<shared_ptr<Function>> fs;
traverse_functions(func, [&](shared_ptr<Function> f) { fs.push_back(f); });
for (shared_ptr<Function> f : fs)
{
for (shared_ptr<Node> node : f->get_ops())
{
node_map.insert({node->get_name(), node});
}
}
}
return node_map[name];
}
std::multimap<size_t, std::string> rc;
for (const std::pair<std::string, size_t>& t : timing)
multimap<size_t, string>
aggregate_timing_details(const vector<runtime::PerformanceCounter>& perf_data,
shared_ptr<Function> f)
{
unordered_map<string, size_t> timing;
for (const runtime::PerformanceCounter& p : perf_data)
{
shared_ptr<Node> node = find_node(p.name(), f);
string op = p.name().substr(0, p.name().find('_'));
string shape_name = "{" + join(node->get_outputs()[0].get_shape()) + "}";
timing[op + shape_name] += p.microseconds();
}
multimap<size_t, string> rc;
for (const pair<string, size_t>& t : timing)
{
rc.insert({t.second, t.first});
}
return rc;
}
void run_benchmark(const std::string& json_path, const std::string& backend_name, size_t iterations)
multimap<size_t, string> aggregate_timing(const vector<runtime::PerformanceCounter>& perf_data)
{
using namespace std;
using namespace ngraph;
string env_var_name = "NGRAPH_" + backend_name + "_EMIT_TIMING";
bool emit_timing = (std::getenv(env_var_name.c_str()) != nullptr);
if (!emit_timing)
unordered_map<string, size_t> timing;
for (const runtime::PerformanceCounter& p : perf_data)
{
cout << "To get per-op timing set the environment variable " << env_var_name << "\n";
string op = p.name().substr(0, p.name().find('_'));
timing[op] += p.microseconds();
}
ngraph::test::Uniform<float> rng{-1, 1, 0};
multimap<size_t, string> rc;
for (const pair<string, size_t>& t : timing)
{
rc.insert({t.second, t.first});
}
return rc;
}
void run_benchmark(const string& json_path,
const string& backend_name,
size_t iterations,
bool timing_detail)
{
stopwatch timer;
timer.start();
const string json_string = file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> f = deserialize(ss);
timer.stop();
cout << "deserialize time: " << timer.get_milliseconds() << "ms" << endl;
run_benchmark(f, backend_name, iterations, timing_detail);
}
void print_times(const multimap<size_t, string>& timing)
{
// set the column widths
int name_width = 0;
int time_width = 0;
for (const pair<size_t, string>& p : timing)
{
name_width = max(name_width, static_cast<int>(p.second.size()));
stringstream ss;
ss.imbue(locale(""));
ss << p.first;
time_width = max(time_width, static_cast<int>(ss.str().size()));
}
for (auto it = timing.rbegin(); it != timing.rend(); it++)
{
cout << setw(name_width + 2) << left << it->second << " " << setw(time_width + 2) << right
<< it->first << "us\n";
}
}
stopwatch build_time;
build_time.start();
void run_benchmark(shared_ptr<Function> f,
const string& backend_name,
size_t iterations,
bool timing_detail)
{
test::Uniform<float> rng{-1, 1, 0};
stopwatch timer;
timer.start();
auto manager = runtime::Manager::get(backend_name);
auto external = manager->compile(f);
external->set_emit_timing(timing_detail);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
build_time.stop();
cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
timer.stop();
cout.imbue(locale(""));
cout << "compile time: " << timer.get_milliseconds() << "ms" << endl;
vector<shared_ptr<runtime::TensorView>> args;
for (shared_ptr<op::Parameter> param : f->get_parameters())
......@@ -100,9 +171,11 @@ void run_benchmark(const std::string& json_path, const std::string& backend_name
return p1.total_microseconds() > p2.total_microseconds();
});
multimap<size_t, string> timing = aggregate_timing(perf_data);
for (auto it = timing.rbegin(); it != timing.rend(); it++)
{
cout.imbue(locale(""));
cout << setw(15) << left << it->second << " " << setw(10) << right << it->first << "us\n";
}
multimap<size_t, string> timing_details = aggregate_timing_details(perf_data, f);
cout << "\n---- Aggregate times per op type ----\n";
print_times(timing);
cout << "\n---- Aggregate times per op type/shape ----\n";
print_times(timing_details);
}
......@@ -18,13 +18,21 @@
#include <map>
#include <ngraph/function.hpp>
#include <ngraph/runtime/call_frame.hpp>
#include "test_tools.hpp"
/// performance test utilities
std::multimap<size_t, std::string>
aggregate_timing(const std::vector<ngraph::runtime::PerformanceCounter>& perf_data);
void run_benchmark(std::shared_ptr<ngraph::Function> f,
const std::string& backend_name,
size_t iterations,
bool timing_detail);
void run_benchmark(const std::string& json_path,
const std::string& backend_name,
size_t iterations);
size_t iterations,
bool timing_detail = false);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment