Commit 2c7cacd2 authored by Jayaram Bobba's avatar Jayaram Bobba Committed by Scott Cyphers

Add support for perf counter collection in DEX (#1831)

* Add support for perf counter collection in DEX

* Addressed PR feedback
parent a20c710b
......@@ -78,9 +78,7 @@ bool runtime::cpu::CPU_Backend::compile(shared_ptr<Function> func)
if (instance.m_external_function == nullptr)
{
instance.m_external_function = make_shared<CPU_ExternalFunction>(func);
#if !defined(NGRAPH_DEX_ONLY)
instance.m_external_function->m_emit_timing = instance.m_performance_counters_enabled;
#endif
auto cf = instance.m_external_function->make_call_frame();
instance.m_call_frame = dynamic_pointer_cast<CPU_CallFrame>(cf);
}
......@@ -109,8 +107,6 @@ void runtime::cpu::CPU_Backend::remove_compiled_function(shared_ptr<Function> fu
m_function_map.erase(func);
}
#if !defined(NGRAPH_DEX_ONLY)
void runtime::cpu::CPU_Backend::enable_performance_data(shared_ptr<Function> func, bool enable)
{
FunctionInstance& instance = m_function_map[func];
......@@ -131,28 +127,10 @@ vector<runtime::PerformanceCounter>
const FunctionInstance& instance = it->second;
if (instance.m_external_function != nullptr)
{
auto* engine = instance.m_external_function->m_execution_engine.get();
if (engine)
{
auto get_count = engine->find_function<size_t()>("get_debug_timer_count");
auto get_name = engine->find_function<const char*(size_t)>("get_debug_timer_name");
auto get_microseconds =
engine->find_function<size_t(size_t)>("get_debug_timer_microseconds");
auto get_call_count =
engine->find_function<size_t(size_t)>("get_debug_timer_call_count");
if (get_count && get_name && get_microseconds && get_call_count)
{
size_t count = get_count();
for (size_t i = 0; i < count; i++)
{
rc.push_back({get_name(i), get_microseconds(i), get_call_count(i)});
}
}
}
rc.insert(rc.end(),
instance.m_external_function->get_perf_counters().begin(),
instance.m_external_function->get_perf_counters().end());
}
}
return rc;
}
#endif
......@@ -53,11 +53,9 @@ namespace ngraph
void remove_compiled_function(std::shared_ptr<Function> func) override;
#if !defined(NGRAPH_DEX_ONLY)
void enable_performance_data(std::shared_ptr<Function> func, bool enable) override;
std::vector<PerformanceCounter>
get_performance_data(std::shared_ptr<Function> func) const override;
#endif
private:
class FunctionInstance
......
......@@ -45,6 +45,7 @@
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
#include "ngraph/runtime/cpu/mkldnn_emitter.hpp"
#include "ngraph/runtime/performance_counter.hpp"
namespace ngraph
{
......@@ -138,6 +139,8 @@ namespace ngraph
const std::string& directory,
const std::string& filename);
const std::vector<PerformanceCounter>& get_perf_counters();
#if defined(NGRAPH_HALIDE)
std::unordered_map<std::string, Halide::Func>& get_halide_functions()
{
......@@ -182,7 +185,7 @@ namespace ngraph
std::string output_name,
bool dex);
bool computes_result(Node* node);
void release_function() { m_function = nullptr; }
#if !defined(NGRAPH_DEX_ONLY)
void emit_debug_function_entry(codegen::CodeWriter& writer,
Node* node,
......@@ -204,22 +207,9 @@ namespace ngraph
std::string emit_op_as_function(const Node&, const std::string& function_name);
std::string strip_comments(const std::string&);
#endif
void release_function() { m_function = nullptr; }
std::shared_ptr<ngraph::Function> m_function;
bool m_release_function;
bool m_use_tbb;
EntryPoint m_compiled_function;
std::unordered_map<std::string, std::string> m_variable_name_map;
#if !defined(NGRAPH_DEX_ONLY)
bool m_is_compiled;
std::unique_ptr<codegen::Compiler> m_compiler;
std::unique_ptr<codegen::ExecutionEngine> m_execution_engine;
bool m_emit_timing;
std::map<std::string, size_t> m_name_index_map;
......@@ -227,8 +217,17 @@ namespace ngraph
// Constant ops we need to keep a list of shared_ptr to each Constant
// so they don't get freed before we are done with them
std::vector<std::shared_ptr<Node>> m_active_constants;
#endif
std::shared_ptr<ngraph::Function> m_function;
bool m_release_function;
bool m_emit_timing;
bool m_use_tbb;
EntryPoint m_compiled_function;
std::unordered_map<std::string, std::string> m_variable_name_map;
std::unordered_map<std::string, CPUTensorRole> m_tensor_roles;
LayoutDescriptorPtrs parameter_layout_descriptors;
......@@ -257,6 +256,7 @@ namespace ngraph
std::unordered_map<std::string, std::shared_ptr<CPU_ExternalFunction>> callees;
bool m_is_built;
bool m_direct_execution;
std::vector<runtime::PerformanceCounter> m_perf_counters;
#if defined(NGRAPH_HALIDE)
std::unordered_map<std::string, Halide::Func> halide_functions;
......
......@@ -34,9 +34,11 @@ namespace ngraph
}
const std::string& name() const { return m_name; }
size_t total_microseconds() const { return m_total_microseconds; }
size_t microseconds() const { return m_total_microseconds / m_call_count; }
size_t microseconds() const
{
return m_call_count == 0 ? 0 : m_total_microseconds / m_call_count;
}
size_t call_count() const { return m_call_count; }
private:
std::string m_name;
size_t m_total_microseconds;
size_t m_call_count;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment