Commit 636cce2d authored by Jai Menon's avatar Jai Menon Committed by Scott Cyphers

Jmenon/ctx (#450)

* CPU: Runtime Context and Tracing

* CPU: Tracing controls

* CPU: Misc

* CPU: Restrict profiling to outermost function

* CPU: Address review comments

* Formatting fix
parent 60fc850a
......@@ -170,6 +170,7 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
runtime/cpu/cpu_emitter.cpp
runtime/cpu/cpu_external_function.cpp
runtime/cpu/cpu_tensor_view_wrapper.cpp
runtime/cpu/cpu_tracing.cpp
runtime/cpu/ops/matmul_bias.cpp
runtime/cpu/pass/cpu_fusion.cpp
)
......
......@@ -18,6 +18,7 @@
#include "ngraph/runtime/cpu/cpu_call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_external_function.hpp"
#include "ngraph/runtime/cpu/cpu_tracing.hpp"
#include "ngraph/runtime/host_tensor_view.hpp"
using namespace std;
......@@ -28,6 +29,12 @@ runtime::cpu::CPU_CallFrame::CPU_CallFrame(std::shared_ptr<CPU_ExternalFunction>
: m_external_function(external_function)
, m_compiled_function(compiled_function)
{
setup_runtime_context();
}
runtime::cpu::CPU_CallFrame::~CPU_CallFrame()
{
cleanup_runtime_context();
}
void runtime::cpu::CPU_CallFrame::tensor_call(
......@@ -50,7 +57,12 @@ void runtime::cpu::CPU_CallFrame::tensor_call(
}
// Invoke compiled computation
m_compiled_function(inputs.data(), outputs.data());
m_compiled_function(inputs.data(), outputs.data(), ctx);
if (runtime::cpu::IsTracingEnabled())
{
GenerateTimeline(m_external_function->get_op_attrs(), ctx->op_durations);
}
}
void runtime::cpu::CPU_CallFrame::call(
......@@ -96,3 +108,20 @@ vector<runtime::PerformanceCounter> runtime::cpu::CPU_CallFrame::get_performance
}
return rc;
}
void runtime::cpu::CPU_CallFrame::setup_runtime_context()
{
ctx = new CPURuntimeContext;
ctx->op_durations = nullptr;
if (runtime::cpu::IsTracingEnabled())
{
ctx->op_durations = new int64_t[m_external_function->get_op_attrs().size()];
}
}
void runtime::cpu::CPU_CallFrame::cleanup_runtime_context()
{
delete[] ctx->op_durations;
delete ctx;
}
......@@ -22,6 +22,7 @@
#include "ngraph/function.hpp"
#include "ngraph/runtime/call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_runtime_context.hpp"
#include "ngraph/runtime/tensor_view.hpp"
namespace ngraph
......@@ -35,7 +36,7 @@ namespace ngraph
class CPU_CallFrame;
class CPU_ExternalFunction;
using EntryPoint_t = void(void** inputs, void** outputs);
using EntryPoint_t = void(void** inputs, void** outputs, CPURuntimeContext* ctx);
using EntryPoint = std::function<EntryPoint_t>;
......@@ -45,6 +46,7 @@ namespace ngraph
public:
CPU_CallFrame(std::shared_ptr<CPU_ExternalFunction> external_function,
EntryPoint compiled_function);
~CPU_CallFrame();
/// @brief Invoke the function with values matching the signature of the function.
///
......@@ -61,9 +63,13 @@ namespace ngraph
std::vector<ngraph::runtime::PerformanceCounter>
get_performance_data() const override;
void setup_runtime_context();
void cleanup_runtime_context();
protected:
std::shared_ptr<CPU_ExternalFunction> m_external_function;
EntryPoint m_compiled_function;
CPURuntimeContext* ctx;
};
}
}
......
......@@ -1014,7 +1014,7 @@ void runtime::cpu::CPU_Emitter::EmitFunctionCall(
writer << "\n};\n";
writer << "\n";
writer << function->get_name() << "(args, out);\n";
writer << function->get_name() << "(args, out, ctx);\n";
}
writer.indent--;
writer << "}\n";
......@@ -1093,13 +1093,13 @@ void runtime::cpu::CPU_Emitter::EmitReduce(codegen::CodeWriter& writer,
writer << "{ // " << n->get_name() << " 3\n";
writer.indent++;
string type = f_result_element_type.c_type_string();
writer << "auto f = [](" << type << " x, " << type << " y) -> " << type << "\n{";
writer << "auto f = [&](" << type << " x, " << type << " y) -> " << type << "\n{";
writer.indent++;
writer << "\n";
writer << type << " result;\n";
writer << "void* args[] = {&x, &y};\n";
writer << "void* out[] = {&result};\n";
writer << reduction_function->get_name() << "(args, out);\n";
writer << reduction_function->get_name() << "(args, out, ctx);\n";
writer << "return result;\n";
writer.indent--;
writer << "};\n";
......@@ -1129,13 +1129,13 @@ void runtime::cpu::CPU_Emitter::EmitReduce(codegen::CodeWriter& writer,
writer << "{ // " << n->get_name() << " 5\n";
writer.indent++;
string type = f_result_element_type.c_type_string();
writer << "auto f = [](" << type << " x, " << type << " y) -> " << type << "\n{";
writer << "auto f = [&](" << type << " x, " << type << " y) -> " << type << "\n{";
writer.indent++;
writer << "\n";
writer << type << " result;\n";
writer << "void* args[] = {&x, &y};\n";
writer << "void* out[] = {&result};\n";
writer << reduction_function->get_name() << "(args, out);\n";
writer << reduction_function->get_name() << "(args, out, ctx);\n";
writer << "return result;\n";
writer.indent--;
writer << "};\n";
......@@ -1161,13 +1161,13 @@ void runtime::cpu::CPU_Emitter::EmitReduce(codegen::CodeWriter& writer,
writer << "{ // " << n->get_name() << " 7\n";
writer.indent++;
string type = f_result_element_type.c_type_string();
writer << "auto f = [](" << type << " x, " << type << " y) -> " << type << "\n{";
writer << "auto f = [&](" << type << " x, " << type << " y) -> " << type << "\n{";
writer.indent++;
writer << "\n";
writer << type << " result;\n";
writer << "void* args[] = {&x, &y};\n";
writer << "void* out[] = {&result};\n";
writer << reduction_function->get_name() << "(args, out);\n";
writer << reduction_function->get_name() << "(args, out, ctx);\n";
writer << "return result;\n";
writer.indent--;
writer << "};\n";
......@@ -1183,13 +1183,13 @@ void runtime::cpu::CPU_Emitter::EmitReduce(codegen::CodeWriter& writer,
writer.indent++;
string type = f_result_element_type.c_type_string();
writer << "auto f = [](" << type << " x, " << type << " y) -> " << type << "\n{";
writer << "auto f = [&](" << type << " x, " << type << " y) -> " << type << "\n{";
writer.indent++;
writer << "\n";
writer << type << " result;\n";
writer << "void* args[] = {&x, &y};\n";
writer << "void* out[] = {&result};\n";
writer << reduction_function->get_name() << "(args, out);\n";
writer << reduction_function->get_name() << "(args, out, ctx);\n";
writer << "return result;\n";
writer.indent--;
writer << "};\n";
......@@ -1211,13 +1211,13 @@ void runtime::cpu::CPU_Emitter::EmitReduce(codegen::CodeWriter& writer,
string type = f_result_element_type.c_type_string();
writer << "auto f = [](" << type << " x, " << type << " y) -> " << type << "\n{";
writer << "auto f = [&](" << type << " x, " << type << " y) -> " << type << "\n{";
writer.indent++;
writer << "\n";
writer << type << " result;\n";
writer << "void* args[] = {&x, &y};\n";
writer << "void* out[] = {&result};\n";
writer << reduction_function->get_name() << "(args, out);\n";
writer << reduction_function->get_name() << "(args, out, ctx);\n";
writer << "return result;\n";
writer.indent--;
writer << "};\n";
......@@ -2194,13 +2194,13 @@ void runtime::cpu::CPU_Emitter::EmitReduceWindow(
writer.indent++;
string type = f_result_element_type.c_type_string();
writer << "auto f = [](" << type << " x, " << type << " y) -> " << type << "\n{";
writer << "auto f = [&](" << type << " x, " << type << " y) -> " << type << "\n{";
writer.indent++;
writer << "\n";
writer << type << " result;\n";
writer << "void* args[] = {&x, &y};\n";
writer << "void* out[] = {&result};\n";
writer << reduction_function->get_name() << "(args, out);\n";
writer << reduction_function->get_name() << "(args, out, ctx);\n";
writer << "return result;\n";
writer.indent--;
writer << "};\n";
......@@ -2238,24 +2238,24 @@ void runtime::cpu::CPU_Emitter::EmitSelectAndScatter(
string type = n->get_output_element_type(0).c_type_string();
writer << "auto f_select = [](" << type << " x, " << type << " y) -> char\n{";
writer << "auto f_select = [&](" << type << " x, " << type << " y) -> char\n{";
writer.indent++;
writer << "\n";
writer << "char result;\n";
writer << "void* args[] = {&x, &y};\n";
writer << "void* out[] = {&result};\n";
writer << selection_function->get_name() << "(args, out);\n";
writer << selection_function->get_name() << "(args, out, ctx);\n";
writer << "return result;\n";
writer.indent--;
writer << "};\n";
writer << "auto f_scatter = [](" << type << " x, " << type << " y) -> " << type << "\n{";
writer << "auto f_scatter = [&](" << type << " x, " << type << " y) -> " << type << "\n{";
writer.indent++;
writer << "\n";
writer << type << " result;\n";
writer << "void* args[] = {&x, &y};\n";
writer << "void* out[] = {&result};\n";
writer << scatter_function->get_name() << "(args, out);\n";
writer << scatter_function->get_name() << "(args, out, ctx);\n";
writer << "return result;\n";
writer.indent--;
writer << "};\n";
......
......@@ -94,6 +94,7 @@
#include "ngraph/runtime/cpu/cpu_call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_emitter.hpp"
#include "ngraph/runtime/cpu/cpu_external_function.hpp"
#include "ngraph/runtime/cpu/cpu_tracing.hpp"
#include "ngraph/runtime/cpu/ops/matmul_bias.hpp"
#include "ngraph/runtime/cpu/pass/cpu_fusion.hpp"
#include "ngraph/runtime/host_tensor_view.hpp"
......@@ -261,6 +262,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
#include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/runtime/cpu/cpu_eigen_utils.hpp"
#include "ngraph/runtime/cpu/cpu_kernels.hpp"
#include "ngraph/runtime/cpu/cpu_runtime_context.hpp"
#include "ngraph/runtime/kernel/avg_pool.hpp"
#include "ngraph/runtime/kernel/broadcast.hpp"
#include "ngraph/runtime/kernel/concat.hpp"
......@@ -398,7 +400,8 @@ using namespace ngraph::runtime;
writer << "// Declare all functions\n";
for (shared_ptr<Function> f : pass_manager.get_state().get_functions())
{
writer << "extern \"C\" void " << f->get_name() << "(void** inputs, void** outputs);\n";
writer << "extern \"C\" void " << f->get_name()
<< "(void** inputs, void** outputs, cpu::CPURuntimeContext* ctx);\n";
}
writer << "\n";
......@@ -477,7 +480,7 @@ using namespace ngraph::runtime;
}
writer << "extern \"C\" void " << current_function->get_name();
writer << "(void** inputs, void** outputs)\n";
writer << "(void** inputs, void** outputs, cpu::CPURuntimeContext* ctx)\n";
writer << "{\n";
writer.indent++;
......@@ -487,6 +490,13 @@ using namespace ngraph::runtime;
writer << "tbb::flow::graph G;\n\n";
}
// Execution tracing support
if (runtime::cpu::IsTracingEnabled() && current_function->get_name() == function_name)
{
writer << "cpu::Timestamp start_ts;\n"
<< "int profiler_count = 0;\n\n";
}
bool temporaries_used = false;
size_t worst_case_tmp_size = 0;
for (shared_ptr<Node> node : current_function->get_ordered_ops())
......@@ -610,12 +620,14 @@ using namespace ngraph::runtime;
throw ngraph_error("Unhandled op during code generation : " + node->description());
}
vector<TensorViewWrapper> in;
vector<string> node_input_names, node_output_names;
for (const descriptor::Input& input : node->get_inputs())
{
const descriptor::Output& output = input.get_output();
shared_ptr<descriptor::TensorView> tv = output.get_tensor_view();
in.push_back(
TensorViewWrapper(tv, m_variable_name_map[tv->get_tensor().get_name()]));
node_input_names.emplace_back(tv->get_tensor().get_name());
}
vector<TensorViewWrapper> out;
for (const descriptor::Output& output : node->get_outputs())
......@@ -623,11 +635,17 @@ using namespace ngraph::runtime;
shared_ptr<descriptor::TensorView> tv = output.get_tensor_view();
out.push_back(
TensorViewWrapper(tv, m_variable_name_map[tv->get_tensor().get_name()]));
node_output_names.emplace_back(tv->get_tensor().get_name());
}
// Emit operation prologue
if (!node->is_parameter() && !node->is_constant())
{
if (current_function->get_name() == function_name)
{
m_op_attrs.emplace_back(
node->description(), node_output_names, node_input_names);
}
if (m_use_tbb)
{
writer << "tbb::flow::continue_node<tbb::flow::continue_msg> "
......@@ -640,6 +658,11 @@ using namespace ngraph::runtime;
{
emit_debug_function_entry(writer, node.get(), in, out);
}
if (runtime::cpu::IsTracingEnabled() &&
current_function->get_name() == function_name)
{
writer << "start_ts = cpu::Clock::now();\n";
}
}
// Emit operation body
......@@ -664,7 +687,7 @@ using namespace ngraph::runtime;
{
names.push_back(tv.get_name());
}
writer << func_name << "(" << join(names) << ");\n";
writer << func_name << "(" << join(names) << ", ctx);\n";
}
// Emit operation epilogue
......@@ -675,6 +698,13 @@ using namespace ngraph::runtime;
{
emit_debug_function_exit(writer, node.get(), in, out);
}
if (runtime::cpu::IsTracingEnabled() &&
current_function->get_name() == function_name)
{
writer << "ctx->op_durations[profiler_count++] = "
<< "(std::chrono::duration_cast<cpu::Timescale>(cpu::Clock::now() - "
"start_ts)).count();\n";
}
if (m_use_tbb)
{
writer.indent--;
......@@ -867,6 +897,7 @@ string runtime::cpu::CPU_ExternalFunction::emit_op_as_function(const Node& node,
writer << tvw.get_type() << "* " << tvw.get_name();
out.push_back(tvw);
}
writer << ",\ncpu::CPURuntimeContext* ctx";
writer.indent--;
writer << "\n)\n";
writer << "{\n";
......
......@@ -18,9 +18,11 @@
#include <functional>
#include <memory>
#include <string>
#include <typeindex>
#include <typeinfo>
#include <unordered_map>
#include <vector>
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/codegen/compiler.hpp"
......@@ -47,6 +49,21 @@ namespace ngraph
using OpMap = std::unordered_map<std::type_index, OpFunction>;
struct OpAttributes
{
std::string Description;
std::vector<std::string> Outputs;
std::vector<std::string> Inputs;
OpAttributes(const std::string& desc,
const std::vector<std::string>& outputs,
const std::vector<std::string>& inputs)
: Description(desc)
, Outputs(outputs)
, Inputs(inputs)
{
}
};
class CPU_ExternalFunction : public ngraph::runtime::ExternalFunction,
public std::enable_shared_from_this<CPU_ExternalFunction>
{
......@@ -57,6 +74,7 @@ namespace ngraph
bool release_function = true);
std::shared_ptr<ngraph::runtime::CallFrame> make_call_frame();
const std::vector<OpAttributes>& get_op_attrs() const { return m_op_attrs; }
protected:
void compile();
......@@ -88,6 +106,7 @@ namespace ngraph
bool m_emit_timing;
bool m_use_tbb;
std::unordered_map<std::string, std::string> m_variable_name_map;
std::vector<OpAttributes> m_op_attrs;
};
}
}
......
// ----------------------------------------------------------------------------
// Copyright 2018 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#pragma once
#include <chrono>
#include <cstdint>
namespace ngraph
{
namespace runtime
{
namespace cpu
{
typedef std::chrono::high_resolution_clock Clock;
typedef std::chrono::time_point<Clock> Timestamp;
typedef std::chrono::microseconds Timescale;
extern "C" {
struct CPURuntimeContext
{
int64_t* op_durations;
};
}
}
}
}
// ----------------------------------------------------------------------------
// Copyright 2018 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#include <fstream>
#include <map>
#include "cpu_tracing.hpp"
void ngraph::runtime::cpu::to_json(nlohmann::json& json, const TraceEvent& event)
{
std::map<std::string, std::string> args;
for (size_t i = 0; i < event.Inputs.size(); i++)
{
args["Input" + std::to_string(i + 1)] = event.Inputs[i];
}
for (size_t i = 0; i < event.Outputs.size(); i++)
{
args["Output" + std::to_string(i + 1)] = event.Outputs[i];
}
json = nlohmann::json{{"ph", event.Phase},
{"cat", event.Category},
{"name", event.Name},
{"pid", event.PID},
{"tid", event.TID},
{"ts", event.Timestamp},
{"dur", event.Duration},
{"args", args}};
}
void ngraph::runtime::cpu::GenerateTimeline(const std::vector<OpAttributes>& op_attrs,
int64_t* op_durations)
{
nlohmann::json timeline;
std::list<TraceEvent> trace;
std::ofstream out("timeline.json");
int64_t ts = 0;
for (size_t i = 0; i < op_attrs.size(); i++)
{
trace.emplace_back("X",
"Op",
op_attrs[i].Description,
0,
0,
ts,
op_durations[i],
op_attrs[i].Outputs,
op_attrs[i].Inputs);
ts += op_durations[i];
}
timeline["traceEvents"] = trace;
out << timeline;
out.close();
return;
}
bool ngraph::runtime::cpu::IsTracingEnabled()
{
return (std::getenv("NGRAPH_CPU_TRACING") != nullptr);
}
// ----------------------------------------------------------------------------
// Copyright 2018 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#pragma once
#include <cstdint>
#include <list>
#include <string>
#include <vector>
#include "ngraph/json.hpp"
#include "ngraph/runtime/cpu/cpu_external_function.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
struct TraceEvent
{
// This should be a single character
// but the JSON encoder nlohmann::json
// is broken and doesn't handle character fields
std::string Phase;
std::string Category;
const std::string& Name;
unsigned int PID;
unsigned int TID;
int64_t Timestamp;
int64_t Duration;
const std::vector<std::string>& Outputs;
const std::vector<std::string>& Inputs;
TraceEvent(const std::string& ph,
const std::string& cat,
const std::string& name,
unsigned int pid,
unsigned int tid,
int64_t ts,
int64_t dur,
const std::vector<std::string>& outputs,
const std::vector<std::string>& inputs)
: Phase(ph)
, Category(cat)
, Name(name)
, PID(pid)
, TID(tid)
, Timestamp(ts)
, Duration(dur)
, Outputs(outputs)
, Inputs(inputs)
{
}
};
void to_json(nlohmann::json& json, const TraceEvent& event);
void GenerateTimeline(const std::vector<OpAttributes>& op_attrs, int64_t* op_durations);
bool IsTracingEnabled();
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment