Commit 02b04376 authored by Robert Kimball's avatar Robert Kimball Committed by Scott Cyphers

PerformanceCounter rework (#2790)

* rework PerformanceCounter class

* Update intel gpu to new PerformanceCounter and enable performance on interpreter
parent d3f3a1ff
......@@ -1459,7 +1459,7 @@ void runtime::cpu::CPU_ExternalFunction::build(ngraph::pass::PassConfig& pass_co
enables.emplace_back(enable);
enable_nodename_list.emplace_back(make_pair(enable, node->get_name()));
m_perf_counters.emplace_back(node->get_name().c_str(), 0, 0);
m_perf_counters.emplace_back(node, 0, 0);
}
if ((std::getenv("NGRAPH_DEX_DEBUG") != nullptr))
......@@ -1826,10 +1826,15 @@ const vector<runtime::PerformanceCounter>& runtime::cpu::CPU_ExternalFunction::g
size_t count = get_count();
if (m_perf_counters.size() == 0)
{
map<string, shared_ptr<const Node>> name_map;
for (auto n : m_function->get_ops())
{
name_map.insert({n->get_name(), n});
}
for (size_t i = 0; i < count; i++)
{
m_perf_counters.push_back(
{get_name(i), get_microseconds(i), get_call_count(i)});
shared_ptr<const Node> n = name_map[get_name(i)];
m_perf_counters.push_back({n, get_microseconds(i), get_call_count(i)});
}
}
else
......
......@@ -816,9 +816,15 @@ void runtime::gpu::GPUExternalFunction::get_performance_data(
if (get_count && get_name && get_microseconds && get_call_count)
{
size_t count = get_count();
map<string, shared_ptr<const Node>> name_map;
for (auto n : m_function->get_ops())
{
name_map.insert({n->get_name(), n});
}
for (size_t i = 0; i < count; i++)
{
rc.push_back({get_name(i), get_microseconds(i), get_call_count(i)});
shared_ptr<const Node> n = name_map[get_name(i)];
rc.push_back({n, get_microseconds(i), get_call_count(i)});
}
}
}
......
......@@ -173,6 +173,11 @@ vector<runtime::PerformanceCounter>
{
const map<cldnn::primitive_id, cldnn::event>& primitives =
m_cldnn_network->get_executed_primitives();
map<string, shared_ptr<const Node>> name_map;
for (auto n : m_function->get_ops())
{
name_map.insert({n->get_name(), n});
}
for (const auto& p : primitives)
{
// Let's generate the primitive name that matches to the name in Function
......@@ -188,7 +193,8 @@ vector<runtime::PerformanceCounter>
.count();
}
}
const runtime::PerformanceCounter perf_counter(primitive_name.c_str(), usec, 1);
shared_ptr<const Node> n = name_map[primitive_name];
const runtime::PerformanceCounter perf_counter(n, usec, 1);
rc.push_back(perf_counter);
}
}
......
......@@ -37,6 +37,7 @@ using descriptor::layout::DenseTensorLayout;
runtime::interpreter::INTExecutable::INTExecutable(const shared_ptr<Function>& function,
bool enable_performance_collection)
: m_performance_counters_enabled{enable_performance_collection}
{
m_is_compiled = true;
pass::Manager pass_manager;
......@@ -103,7 +104,7 @@ bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::
// for each ordered op in the graph
for (const NodeWrapper& wrapped : m_wrapped_nodes)
{
const Node* op = &wrapped.get_node();
auto op = wrapped.get_node();
auto type_id = wrapped.get_typeid();
if (type_id == OP_TYPEID::Parameter)
{
......@@ -178,7 +179,7 @@ bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::
}
if (m_nan_check_enabled)
{
perform_nan_check(op_outputs, op);
perform_nan_check(op_outputs, op.get());
}
}
......@@ -207,7 +208,7 @@ void runtime::interpreter::INTExecutable::generate_calls(const element::Type& ty
case element::Type_t::undefined:
case element::Type_t::dynamic:
case element::Type_t::bf16:
ss << "unsupported element type " << type << " op " << op.get_node().get_name();
ss << "unsupported element type " << type << " op " << op.get_node()->get_name();
throw ngraph_error(ss.str());
}
}
......@@ -221,11 +222,9 @@ vector<runtime::PerformanceCounter>
runtime::interpreter::INTExecutable::get_performance_data() const
{
vector<runtime::PerformanceCounter> rc;
for (const pair<const Node*, stopwatch> p : m_timer_map)
for (const pair<shared_ptr<const Node>, stopwatch> p : m_timer_map)
{
rc.emplace_back(p.first->get_name().c_str(),
p.second.get_total_microseconds(),
p.second.get_call_count());
rc.emplace_back(p.first, p.second.get_total_microseconds(), p.second.get_call_count());
}
return rc;
}
......
......@@ -177,7 +177,7 @@ private:
bool m_is_compiled = false;
bool m_nan_check_enabled = false;
bool m_performance_counters_enabled = false;
std::unordered_map<const Node*, stopwatch> m_timer_map;
std::unordered_map<std::shared_ptr<const Node>, stopwatch> m_timer_map;
std::vector<NodeWrapper> m_wrapped_nodes;
std::unordered_map<const Node*, std::shared_ptr<RNGState>> m_states;
std::set<std::string> m_unsupported_op_name_list;
......@@ -195,7 +195,7 @@ private:
const std::vector<std::shared_ptr<HostTensor>>& out,
const std::vector<std::shared_ptr<HostTensor>>& args)
{
const Node& node = node_wrapper.get_node();
const Node& node = *node_wrapper.get_node();
// We want to check that every OP_TYPEID enumeration is included in the list.
// These GCC flags enable compile-time checking so that if an enumeration
......
......@@ -52,7 +52,7 @@ class ngraph::runtime::interpreter::NodeWrapper
public:
NodeWrapper(const std::shared_ptr<const ngraph::Node>& node);
const Node& get_node() const { return *m_node; }
std::shared_ptr<const Node> get_node() const { return m_node; }
ngraph::runtime::interpreter::OP_TYPEID get_typeid() const { return m_typeid; }
private:
std::shared_ptr<const ngraph::Node> m_node;
......
......@@ -19,6 +19,8 @@
#include <cstddef>
#include <string>
#include "ngraph/node.hpp"
namespace ngraph
{
namespace runtime
......@@ -26,20 +28,20 @@ namespace ngraph
class PerformanceCounter
{
public:
PerformanceCounter(const char* n, size_t us, size_t calls)
: m_name(n)
PerformanceCounter(const std::shared_ptr<const Node>& n, size_t us, size_t calls)
: m_node(n)
, m_total_microseconds(us)
, m_call_count(calls)
{
}
const std::string& name() const { return m_name; }
std::shared_ptr<const Node> get_node() const { return m_node; }
size_t total_microseconds() const { return m_total_microseconds; }
size_t microseconds() const
{
return m_call_count == 0 ? 0 : m_total_microseconds / m_call_count;
}
size_t call_count() const { return m_call_count; }
std::string m_name;
std::shared_ptr<const Node> m_node;
size_t m_total_microseconds;
size_t m_call_count;
};
......
......@@ -53,33 +53,18 @@ public:
Shape shape;
};
unordered_map<string, shared_ptr<Node>> get_node_map(shared_ptr<Function> func)
{
unordered_map<string, shared_ptr<Node>> node_map;
vector<shared_ptr<Function>> fs;
traverse_functions(func, [&](shared_ptr<Function> f) { fs.push_back(f); });
for (shared_ptr<Function> f : fs)
{
for (shared_ptr<Node> node : f->get_ops())
{
node_map.insert({node->get_name(), node});
}
}
return node_map;
}
vector<PerfShape> to_perf_shape(shared_ptr<Function> f,
const vector<runtime::PerformanceCounter>& perf_data)
{
vector<PerfShape> result;
auto node_map = get_node_map(f);
for (const runtime::PerformanceCounter& p : perf_data)
{
auto node = node_map[p.name()];
auto node = p.get_node();
if (node == nullptr)
{
ostringstream os;
os << "Can't find \"" << p.name() << "\" in Function \"" << f->get_name() << "\".";
os << "Can't find \"" << node->get_name() << "\" in Function \"" << f->get_name()
<< "\".";
throw runtime_error(os.str());
}
......@@ -95,7 +80,8 @@ multimap<size_t, string> aggregate_timing_details(const vector<PerfShape>& perf_
unordered_map<string, size_t> count;
for (const PerfShape& p : perf_data)
{
string op = p.name().substr(0, p.name().find('_'));
auto node = p.get_node();
string op = node->get_name().substr(0, node->get_name().find('_'));
string shape_name = " {" + join(p.shape) + "} ";
timing[op + shape_name] += p.microseconds();
count[op + shape_name] += 1;
......@@ -114,7 +100,8 @@ multimap<size_t, string> aggregate_timing(const vector<PerfShape>& perf_data)
unordered_map<string, size_t> timing;
for (const PerfShape& p : perf_data)
{
string op = p.name().substr(0, p.name().find('_'));
auto node = p.get_node();
string op = node->get_name().substr(0, node->get_name().find('_'));
timing[op] += p.microseconds();
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment