Unverified Commit 9779dc81 authored by Robert Kimball's avatar Robert Kimball Committed by GitHub

Add nbench support for processing all models in a directory (#1518)

* only print details if details enabled

* refactor print routines to main file

* refactor for multi dir support

* dir support prints nice results
parent 43bcb2b8
...@@ -14,12 +14,10 @@ ...@@ -14,12 +14,10 @@
// limitations under the License. // limitations under the License.
//***************************************************************************** //*****************************************************************************
#include <iomanip>
#include <random> #include <random>
#include "benchmark.hpp" #include "benchmark.hpp"
#include "ngraph/file_util.hpp" #include "ngraph/file_util.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/runtime/backend.hpp" #include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/tensor_view.hpp" #include "ngraph/runtime/tensor_view.hpp"
#include "ngraph/runtime/tensor_view.hpp" #include "ngraph/runtime/tensor_view.hpp"
...@@ -29,93 +27,6 @@ ...@@ -29,93 +27,6 @@
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
multimap<size_t, string>
aggregate_timing_details(const vector<runtime::PerformanceCounter>& perf_data,
shared_ptr<Function> func)
{
unordered_map<string, shared_ptr<Node>> node_map;
vector<shared_ptr<Function>> fs;
traverse_functions(func, [&](shared_ptr<Function> f) { fs.push_back(f); });
for (shared_ptr<Function> f : fs)
{
for (shared_ptr<Node> node : f->get_ops())
{
node_map.insert({node->get_name(), node});
}
}
unordered_map<string, size_t> timing;
unordered_map<string, size_t> count;
for (const runtime::PerformanceCounter& p : perf_data)
{
shared_ptr<Node> node = node_map.at(p.name());
string op = p.name().substr(0, p.name().find('_'));
string shape_name = " {" + join(node->get_outputs()[0].get_shape()) + "} ";
timing[op + shape_name] += p.microseconds();
count[op + shape_name] += 1;
}
multimap<size_t, string> rc;
for (const pair<string, size_t>& t : timing)
{
rc.insert({t.second, t.first + to_string(count[t.first])});
}
return rc;
}
multimap<size_t, string> aggregate_timing(const vector<runtime::PerformanceCounter>& perf_data)
{
unordered_map<string, size_t> timing;
for (const runtime::PerformanceCounter& p : perf_data)
{
string op = p.name().substr(0, p.name().find('_'));
timing[op] += p.microseconds();
}
multimap<size_t, string> rc;
for (const pair<string, size_t>& t : timing)
{
rc.insert({t.second, t.first});
}
return rc;
}
void run_benchmark(const string& json_path,
const string& backend_name,
size_t iterations,
bool timing_detail,
int warmup_iterations)
{
stopwatch timer;
timer.start();
const string json_string = file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> f = deserialize(ss);
timer.stop();
cout << "deserialize time: " << timer.get_milliseconds() << "ms" << endl;
run_benchmark(f, backend_name, iterations, timing_detail, warmup_iterations);
}
void print_times(const multimap<size_t, string>& timing)
{
// set the column widths
int name_width = 0;
int time_width = 0;
for (const pair<size_t, string>& p : timing)
{
name_width = max(name_width, static_cast<int>(p.second.size()));
stringstream ss;
ss.imbue(locale(""));
ss << p.first;
time_width = max(time_width, static_cast<int>(ss.str().size()));
}
for (auto it = timing.rbegin(); it != timing.rend(); it++)
{
cout << setw(name_width + 2) << left << it->second << " " << setw(time_width + 2) << right
<< it->first << "us\n";
}
}
static default_random_engine s_random_engine; static default_random_engine s_random_engine;
template <typename T> template <typename T>
...@@ -236,11 +147,11 @@ static void random_init(shared_ptr<runtime::TensorView> tv) ...@@ -236,11 +147,11 @@ static void random_init(shared_ptr<runtime::TensorView> tv)
} }
} }
void run_benchmark(shared_ptr<Function> f, vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
const string& backend_name, const string& backend_name,
size_t iterations, size_t iterations,
bool timing_detail, bool timing_detail,
int warmup_iterations) int warmup_iterations)
{ {
stopwatch timer; stopwatch timer;
timer.start(); timer.start();
...@@ -294,17 +205,5 @@ void run_benchmark(shared_ptr<Function> f, ...@@ -294,17 +205,5 @@ void run_benchmark(shared_ptr<Function> f,
cout << time / iterations << "ms per iteration" << endl; cout << time / iterations << "ms per iteration" << endl;
vector<runtime::PerformanceCounter> perf_data = backend->get_performance_data(f); vector<runtime::PerformanceCounter> perf_data = backend->get_performance_data(f);
sort(perf_data.begin(), return perf_data;
perf_data.end(),
[](const runtime::PerformanceCounter& p1, const runtime::PerformanceCounter& p2) {
return p1.total_microseconds() > p2.total_microseconds();
});
multimap<size_t, string> timing = aggregate_timing(perf_data);
multimap<size_t, string> timing_details = aggregate_timing_details(perf_data, f);
cout << "\n---- Aggregate times per op type ----\n";
print_times(timing);
cout << "\n---- Aggregate times per op type/shape/count ----\n";
print_times(timing_details);
} }
...@@ -28,14 +28,8 @@ ...@@ -28,14 +28,8 @@
std::multimap<size_t, std::string> std::multimap<size_t, std::string>
aggregate_timing(const std::vector<ngraph::runtime::PerformanceCounter>& perf_data); aggregate_timing(const std::vector<ngraph::runtime::PerformanceCounter>& perf_data);
void run_benchmark(std::shared_ptr<ngraph::Function> f, std::vector<ngraph::runtime::PerformanceCounter> run_benchmark(std::shared_ptr<ngraph::Function> f,
const std::string& backend_name, const std::string& backend_name,
size_t iterations, size_t iterations,
bool timing_detail, bool timing_detail,
int warmup_iterations); int warmup_iterations);
void run_benchmark(const std::string& json_path,
const std::string& backend_name,
size_t iterations,
bool timing_detail = false,
int warmup_iterations = 1);
...@@ -21,9 +21,11 @@ ...@@ -21,9 +21,11 @@
// sample models are under ../../test/models // sample models are under ../../test/models
#include <fstream> #include <fstream>
#include <iomanip>
#include "benchmark.hpp" #include "benchmark.hpp"
#include "ngraph/file_util.hpp" #include "ngraph/file_util.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/pass/manager.hpp" #include "ngraph/pass/manager.hpp"
#include "ngraph/pass/visualize_tree.hpp" #include "ngraph/pass/visualize_tree.hpp"
#include "ngraph/runtime/backend.hpp" #include "ngraph/runtime/backend.hpp"
...@@ -33,6 +35,121 @@ ...@@ -33,6 +35,121 @@
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
class PerfShape : public ngraph::runtime::PerformanceCounter
{
public:
PerfShape(const runtime::PerformanceCounter& p, Shape s)
: PerformanceCounter(p)
, shape(s)
{
}
Shape shape;
};
unordered_map<string, shared_ptr<Node>> get_node_map(shared_ptr<Function> func)
{
unordered_map<string, shared_ptr<Node>> node_map;
vector<shared_ptr<Function>> fs;
traverse_functions(func, [&](shared_ptr<Function> f) { fs.push_back(f); });
for (shared_ptr<Function> f : fs)
{
for (shared_ptr<Node> node : f->get_ops())
{
node_map.insert({node->get_name(), node});
}
}
return node_map;
}
vector<PerfShape> to_perf_shape(shared_ptr<Function> f,
const vector<runtime::PerformanceCounter>& perf_data)
{
vector<PerfShape> result;
auto node_map = get_node_map(f);
for (const runtime::PerformanceCounter& p : perf_data)
{
auto node = node_map[p.name()];
Shape shape = node->get_outputs()[0].get_shape();
result.push_back(PerfShape(p, shape));
}
return result;
}
multimap<size_t, string> aggregate_timing_details(const vector<PerfShape>& perf_data)
{
unordered_map<string, size_t> timing;
unordered_map<string, size_t> count;
for (const PerfShape& p : perf_data)
{
string op = p.name().substr(0, p.name().find('_'));
string shape_name = " {" + join(p.shape) + "} ";
timing[op + shape_name] += p.microseconds();
count[op + shape_name] += 1;
}
multimap<size_t, string> rc;
for (const pair<string, size_t>& t : timing)
{
rc.insert({t.second, t.first + to_string(count[t.first])});
}
return rc;
}
multimap<size_t, string> aggregate_timing(const vector<PerfShape>& perf_data)
{
unordered_map<string, size_t> timing;
for (const PerfShape& p : perf_data)
{
string op = p.name().substr(0, p.name().find('_'));
timing[op] += p.microseconds();
}
multimap<size_t, string> rc;
for (const pair<string, size_t>& t : timing)
{
rc.insert({t.second, t.first});
}
return rc;
}
void print_times(const multimap<size_t, string>& timing)
{
// set the column widths
int name_width = 0;
int time_width = 0;
for (const pair<size_t, string>& p : timing)
{
name_width = max(name_width, static_cast<int>(p.second.size()));
stringstream ss;
ss.imbue(locale(""));
ss << p.first;
time_width = max(time_width, static_cast<int>(ss.str().size()));
}
for (auto it = timing.rbegin(); it != timing.rend(); it++)
{
cout << setw(name_width + 2) << left << it->second << " " << setw(time_width + 2) << right
<< it->first << "us\n";
}
}
void print_results(vector<PerfShape> perf_data, bool timing_detail)
{
sort(perf_data.begin(), perf_data.end(), [](const PerfShape& p1, const PerfShape& p2) {
return p1.total_microseconds() > p2.total_microseconds();
});
multimap<size_t, string> timing = aggregate_timing(perf_data);
multimap<size_t, string> timing_details = aggregate_timing_details(perf_data);
if (timing_detail)
{
cout << "\n---- Aggregate times per op type ----\n";
print_times(timing);
cout << "\n---- Aggregate times per op type/shape/count ----\n";
print_times(timing_details);
}
}
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
string model; string model;
...@@ -190,6 +307,7 @@ OPTIONS ...@@ -190,6 +307,7 @@ OPTIONS
else if (!directory.empty()) else if (!directory.empty())
{ {
vector<string> models; vector<string> models;
vector<PerfShape> aggregate_perf_data;
file_util::iterate_files(directory, file_util::iterate_files(directory,
[&](const string& file, bool is_dir) { [&](const string& file, bool is_dir) {
if (!is_dir) if (!is_dir)
...@@ -198,27 +316,35 @@ OPTIONS ...@@ -198,27 +316,35 @@ OPTIONS
} }
}, },
true); true);
unordered_map<string, Shape> shape_info;
for (const string& m : models) for (const string& m : models)
{ {
try try
{ {
shared_ptr<Function> f = deserialize(m); shared_ptr<Function> f = deserialize(m);
cout << "Benchmarking " << m << ", " << backend << " backend, " << iterations // cout << "Benchmarking " << m << ", " << backend << " backend, " << iterations
<< " iterations.\n"; // << " iterations.\n";
run_benchmark(f, backend, iterations, timing_detail, warmup_iterations); auto perf_data =
run_benchmark(f, backend, iterations, timing_detail, warmup_iterations);
auto perf_shape = to_perf_shape(f, perf_data);
aggregate_perf_data.insert(
aggregate_perf_data.end(), perf_shape.begin(), perf_shape.end());
} }
catch (exception e) catch (exception e)
{ {
cout << "Exception caught on '" << m << "'\n" << e.what() << endl; cout << "Exception caught on '" << m << "'\n" << e.what() << endl;
} }
} }
print_results(aggregate_perf_data, timing_detail);
} }
else if (iterations > 0) else if (iterations > 0)
{ {
shared_ptr<Function> f = deserialize(model); shared_ptr<Function> f = deserialize(model);
cout << "Benchmarking " << model << ", " << backend << " backend, " << iterations cout << "Benchmarking " << model << ", " << backend << " backend, " << iterations
<< " iterations.\n"; << " iterations.\n";
run_benchmark(f, backend, iterations, timing_detail, warmup_iterations); auto perf_data = run_benchmark(f, backend, iterations, timing_detail, warmup_iterations);
auto perf_shape = to_perf_shape(f, perf_data);
print_results(perf_shape, timing_detail);
} }
return 0; return 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment