Unverified Commit 9779dc81 authored by Robert Kimball's avatar Robert Kimball Committed by GitHub

Add nbench support for processing all models in a directory (#1518)

* only print details if details enabled

* refactor print routines to main file

* refactor for multi dir support

* dir support prints nice results
parent 43bcb2b8
......@@ -14,12 +14,10 @@
// limitations under the License.
//*****************************************************************************
#include <iomanip>
#include <random>
#include "benchmark.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/tensor_view.hpp"
#include "ngraph/runtime/tensor_view.hpp"
......@@ -29,93 +27,6 @@
using namespace std;
using namespace ngraph;
multimap<size_t, string>
aggregate_timing_details(const vector<runtime::PerformanceCounter>& perf_data,
shared_ptr<Function> func)
{
unordered_map<string, shared_ptr<Node>> node_map;
vector<shared_ptr<Function>> fs;
traverse_functions(func, [&](shared_ptr<Function> f) { fs.push_back(f); });
for (shared_ptr<Function> f : fs)
{
for (shared_ptr<Node> node : f->get_ops())
{
node_map.insert({node->get_name(), node});
}
}
unordered_map<string, size_t> timing;
unordered_map<string, size_t> count;
for (const runtime::PerformanceCounter& p : perf_data)
{
shared_ptr<Node> node = node_map.at(p.name());
string op = p.name().substr(0, p.name().find('_'));
string shape_name = " {" + join(node->get_outputs()[0].get_shape()) + "} ";
timing[op + shape_name] += p.microseconds();
count[op + shape_name] += 1;
}
multimap<size_t, string> rc;
for (const pair<string, size_t>& t : timing)
{
rc.insert({t.second, t.first + to_string(count[t.first])});
}
return rc;
}
multimap<size_t, string> aggregate_timing(const vector<runtime::PerformanceCounter>& perf_data)
{
unordered_map<string, size_t> timing;
for (const runtime::PerformanceCounter& p : perf_data)
{
string op = p.name().substr(0, p.name().find('_'));
timing[op] += p.microseconds();
}
multimap<size_t, string> rc;
for (const pair<string, size_t>& t : timing)
{
rc.insert({t.second, t.first});
}
return rc;
}
void run_benchmark(const string& json_path,
const string& backend_name,
size_t iterations,
bool timing_detail,
int warmup_iterations)
{
stopwatch timer;
timer.start();
const string json_string = file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> f = deserialize(ss);
timer.stop();
cout << "deserialize time: " << timer.get_milliseconds() << "ms" << endl;
run_benchmark(f, backend_name, iterations, timing_detail, warmup_iterations);
}
void print_times(const multimap<size_t, string>& timing)
{
// set the column widths
int name_width = 0;
int time_width = 0;
for (const pair<size_t, string>& p : timing)
{
name_width = max(name_width, static_cast<int>(p.second.size()));
stringstream ss;
ss.imbue(locale(""));
ss << p.first;
time_width = max(time_width, static_cast<int>(ss.str().size()));
}
for (auto it = timing.rbegin(); it != timing.rend(); it++)
{
cout << setw(name_width + 2) << left << it->second << " " << setw(time_width + 2) << right
<< it->first << "us\n";
}
}
static default_random_engine s_random_engine;
template <typename T>
......@@ -236,11 +147,11 @@ static void random_init(shared_ptr<runtime::TensorView> tv)
}
}
void run_benchmark(shared_ptr<Function> f,
const string& backend_name,
size_t iterations,
bool timing_detail,
int warmup_iterations)
vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
const string& backend_name,
size_t iterations,
bool timing_detail,
int warmup_iterations)
{
stopwatch timer;
timer.start();
......@@ -294,17 +205,5 @@ void run_benchmark(shared_ptr<Function> f,
cout << time / iterations << "ms per iteration" << endl;
vector<runtime::PerformanceCounter> perf_data = backend->get_performance_data(f);
sort(perf_data.begin(),
perf_data.end(),
[](const runtime::PerformanceCounter& p1, const runtime::PerformanceCounter& p2) {
return p1.total_microseconds() > p2.total_microseconds();
});
multimap<size_t, string> timing = aggregate_timing(perf_data);
multimap<size_t, string> timing_details = aggregate_timing_details(perf_data, f);
cout << "\n---- Aggregate times per op type ----\n";
print_times(timing);
cout << "\n---- Aggregate times per op type/shape/count ----\n";
print_times(timing_details);
return perf_data;
}
......@@ -28,14 +28,8 @@
std::multimap<size_t, std::string>
aggregate_timing(const std::vector<ngraph::runtime::PerformanceCounter>& perf_data);
void run_benchmark(std::shared_ptr<ngraph::Function> f,
const std::string& backend_name,
size_t iterations,
bool timing_detail,
int warmup_iterations);
void run_benchmark(const std::string& json_path,
const std::string& backend_name,
size_t iterations,
bool timing_detail = false,
int warmup_iterations = 1);
std::vector<ngraph::runtime::PerformanceCounter> run_benchmark(std::shared_ptr<ngraph::Function> f,
const std::string& backend_name,
size_t iterations,
bool timing_detail,
int warmup_iterations);
......@@ -21,9 +21,11 @@
// sample models are under ../../test/models
#include <fstream>
#include <iomanip>
#include "benchmark.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/visualize_tree.hpp"
#include "ngraph/runtime/backend.hpp"
......@@ -33,6 +35,121 @@
using namespace std;
using namespace ngraph;
class PerfShape : public ngraph::runtime::PerformanceCounter
{
public:
PerfShape(const runtime::PerformanceCounter& p, Shape s)
: PerformanceCounter(p)
, shape(s)
{
}
Shape shape;
};
unordered_map<string, shared_ptr<Node>> get_node_map(shared_ptr<Function> func)
{
unordered_map<string, shared_ptr<Node>> node_map;
vector<shared_ptr<Function>> fs;
traverse_functions(func, [&](shared_ptr<Function> f) { fs.push_back(f); });
for (shared_ptr<Function> f : fs)
{
for (shared_ptr<Node> node : f->get_ops())
{
node_map.insert({node->get_name(), node});
}
}
return node_map;
}
vector<PerfShape> to_perf_shape(shared_ptr<Function> f,
const vector<runtime::PerformanceCounter>& perf_data)
{
vector<PerfShape> result;
auto node_map = get_node_map(f);
for (const runtime::PerformanceCounter& p : perf_data)
{
auto node = node_map[p.name()];
Shape shape = node->get_outputs()[0].get_shape();
result.push_back(PerfShape(p, shape));
}
return result;
}
multimap<size_t, string> aggregate_timing_details(const vector<PerfShape>& perf_data)
{
unordered_map<string, size_t> timing;
unordered_map<string, size_t> count;
for (const PerfShape& p : perf_data)
{
string op = p.name().substr(0, p.name().find('_'));
string shape_name = " {" + join(p.shape) + "} ";
timing[op + shape_name] += p.microseconds();
count[op + shape_name] += 1;
}
multimap<size_t, string> rc;
for (const pair<string, size_t>& t : timing)
{
rc.insert({t.second, t.first + to_string(count[t.first])});
}
return rc;
}
multimap<size_t, string> aggregate_timing(const vector<PerfShape>& perf_data)
{
unordered_map<string, size_t> timing;
for (const PerfShape& p : perf_data)
{
string op = p.name().substr(0, p.name().find('_'));
timing[op] += p.microseconds();
}
multimap<size_t, string> rc;
for (const pair<string, size_t>& t : timing)
{
rc.insert({t.second, t.first});
}
return rc;
}
void print_times(const multimap<size_t, string>& timing)
{
// set the column widths
int name_width = 0;
int time_width = 0;
for (const pair<size_t, string>& p : timing)
{
name_width = max(name_width, static_cast<int>(p.second.size()));
stringstream ss;
ss.imbue(locale(""));
ss << p.first;
time_width = max(time_width, static_cast<int>(ss.str().size()));
}
for (auto it = timing.rbegin(); it != timing.rend(); it++)
{
cout << setw(name_width + 2) << left << it->second << " " << setw(time_width + 2) << right
<< it->first << "us\n";
}
}
void print_results(vector<PerfShape> perf_data, bool timing_detail)
{
sort(perf_data.begin(), perf_data.end(), [](const PerfShape& p1, const PerfShape& p2) {
return p1.total_microseconds() > p2.total_microseconds();
});
multimap<size_t, string> timing = aggregate_timing(perf_data);
multimap<size_t, string> timing_details = aggregate_timing_details(perf_data);
if (timing_detail)
{
cout << "\n---- Aggregate times per op type ----\n";
print_times(timing);
cout << "\n---- Aggregate times per op type/shape/count ----\n";
print_times(timing_details);
}
}
int main(int argc, char** argv)
{
string model;
......@@ -190,6 +307,7 @@ OPTIONS
else if (!directory.empty())
{
vector<string> models;
vector<PerfShape> aggregate_perf_data;
file_util::iterate_files(directory,
[&](const string& file, bool is_dir) {
if (!is_dir)
......@@ -198,27 +316,35 @@ OPTIONS
}
},
true);
unordered_map<string, Shape> shape_info;
for (const string& m : models)
{
try
{
shared_ptr<Function> f = deserialize(m);
cout << "Benchmarking " << m << ", " << backend << " backend, " << iterations
<< " iterations.\n";
run_benchmark(f, backend, iterations, timing_detail, warmup_iterations);
// cout << "Benchmarking " << m << ", " << backend << " backend, " << iterations
// << " iterations.\n";
auto perf_data =
run_benchmark(f, backend, iterations, timing_detail, warmup_iterations);
auto perf_shape = to_perf_shape(f, perf_data);
aggregate_perf_data.insert(
aggregate_perf_data.end(), perf_shape.begin(), perf_shape.end());
}
catch (exception e)
{
cout << "Exception caught on '" << m << "'\n" << e.what() << endl;
}
}
print_results(aggregate_perf_data, timing_detail);
}
else if (iterations > 0)
{
shared_ptr<Function> f = deserialize(model);
cout << "Benchmarking " << model << ", " << backend << " backend, " << iterations
<< " iterations.\n";
run_benchmark(f, backend, iterations, timing_detail, warmup_iterations);
auto perf_data = run_benchmark(f, backend, iterations, timing_detail, warmup_iterations);
auto perf_shape = to_perf_shape(f, perf_data);
print_results(perf_shape, timing_detail);
}
return 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment