Commit 71cc8bbf authored by Nick Korovaiko's avatar Nick Korovaiko Committed by Scott Cyphers

Tracing for CPU (#1956)

* tracing

* count tracepoint

* address scotts feedback

* merge

* fix an ununsed var warning
parent ff98d02a
...@@ -19,12 +19,26 @@ ...@@ -19,12 +19,26 @@
#include "ngraph/runtime/aligned_buffer.hpp" #include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/runtime/cpu/cpu_debugger.hpp" #include "ngraph/runtime/cpu/cpu_debugger.hpp"
#include "ngraph/runtime/cpu/cpu_external_function.hpp" #include "ngraph/runtime/cpu/cpu_external_function.hpp"
#include "ngraph/runtime/cpu/cpu_runtime_context.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view.hpp" #include "ngraph/runtime/cpu/cpu_tensor_view.hpp"
#include "ngraph/runtime/cpu/cpu_tracing.hpp" #include "ngraph/runtime/cpu/cpu_tracing.hpp"
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
void runtime::cpu::CPU_CountTracepoint::operator()(void** outputs, const std::string& name)
{
if (m_count == 0)
{
return;
}
if (++m_iteration >= m_count)
{
m_callback(outputs, name);
m_iteration = 0;
}
}
runtime::cpu::CPU_Debugger::CPU_Debugger(ngraph::runtime::cpu::CPU_CallFrame& callframe) runtime::cpu::CPU_Debugger::CPU_Debugger(ngraph::runtime::cpu::CPU_CallFrame& callframe)
: m_callframe(callframe) : m_callframe(callframe)
{ {
...@@ -73,16 +87,28 @@ void runtime::cpu::CPU_Debugger::call(const std::vector<std::shared_ptr<runtime: ...@@ -73,16 +87,28 @@ void runtime::cpu::CPU_Debugger::call(const std::vector<std::shared_ptr<runtime:
m_callframe.inner_call(m_outputs, m_inputs); m_callframe.inner_call(m_outputs, m_inputs);
} }
bool runtime::cpu::CPU_Debugger::add_breakpoint(std::shared_ptr<Node> op) std::tuple<bool, size_t> runtime::cpu::CPU_Debugger::find_pc_for_node(std::shared_ptr<Node> op)
{ {
auto external_function = m_callframe.m_external_function; auto external_function = m_callframe.m_external_function;
auto ctx = m_callframe.ctx;
auto i_pos = std::find( auto i_pos = std::find(
external_function->op_names.begin(), external_function->op_names.end(), op->get_name()); external_function->op_names.begin(), external_function->op_names.end(), op->get_name());
if (i_pos != external_function->op_names.end()) if (i_pos != external_function->op_names.end())
{ {
auto pc = static_cast<size_t>(std::distance(external_function->op_names.begin(), i_pos)); auto pc = static_cast<size_t>(std::distance(external_function->op_names.begin(), i_pos));
ctx->breakpoints.insert(pc); return std::tuple<bool, size_t>{true, pc};
}
return std::tuple<bool, size_t>{false, 0};
}
bool runtime::cpu::CPU_Debugger::add_breakpoint(std::shared_ptr<Node> op)
{
bool found;
size_t pc;
std::tie(found, pc) = find_pc_for_node(op);
if (found)
{
m_callframe.ctx->breakpoints.insert(pc);
return true; return true;
} }
return false; return false;
...@@ -90,14 +116,12 @@ bool runtime::cpu::CPU_Debugger::add_breakpoint(std::shared_ptr<Node> op) ...@@ -90,14 +116,12 @@ bool runtime::cpu::CPU_Debugger::add_breakpoint(std::shared_ptr<Node> op)
bool runtime::cpu::CPU_Debugger::delete_breakpoint(std::shared_ptr<Node> op) bool runtime::cpu::CPU_Debugger::delete_breakpoint(std::shared_ptr<Node> op)
{ {
auto external_function = m_callframe.m_external_function; bool found;
auto ctx = m_callframe.ctx; size_t pc;
auto i_pos = std::find( std::tie(found, pc) = find_pc_for_node(op);
external_function->op_names.begin(), external_function->op_names.end(), op->get_name()); if (found)
if (i_pos != external_function->op_names.end())
{ {
auto pc = static_cast<size_t>(std::distance(external_function->op_names.begin(), i_pos)); m_callframe.ctx->breakpoints.erase(pc);
ctx->breakpoints.erase(pc);
return true; return true;
} }
return false; return false;
...@@ -108,3 +132,61 @@ void* runtime::cpu::CPU_Debugger::inspect(std::shared_ptr<Node> op, size_t outpu ...@@ -108,3 +132,61 @@ void* runtime::cpu::CPU_Debugger::inspect(std::shared_ptr<Node> op, size_t outpu
return m_callframe.m_external_function->tensor_data.at(op->get_name() + "_" + return m_callframe.m_external_function->tensor_data.at(op->get_name() + "_" +
to_string(output_index)); to_string(output_index));
} }
bool runtime::cpu::CPU_Debugger::add_tracepoint(
std::shared_ptr<Node> op, const std::function<void(void**, const std::string&)>& callback)
{
auto external_function = m_callframe.m_external_function;
bool found;
size_t pc;
std::tie(found, pc) = find_pc_for_node(op);
if (found)
{
if (replaced_functors.count(pc) != 0)
{
return false;
}
auto op_name = op->get_name();
std::vector<void**> poutputs;
for (size_t i = 0; i < op->get_outputs().size(); i++)
{
poutputs.push_back(&external_function->get_tensor_data(op_name + "_" + to_string(i)));
}
auto original_functor = external_function->functors.at(pc);
auto trace_functor = [poutputs, callback, original_functor, op_name](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
original_functor(ctx, ectx);
std::vector<void*> outputs;
for (auto pout : poutputs)
{
outputs.push_back(*pout);
}
callback(outputs.data(), op_name);
};
replaced_functors[pc] = original_functor;
external_function->functors.at(pc) = trace_functor;
return true;
}
return false;
}
bool runtime::cpu::CPU_Debugger::delete_tracepoint(std::shared_ptr<Node> op)
{
bool found;
size_t pc;
std::tie(found, pc) = find_pc_for_node(op);
if (found)
{
m_callframe.m_external_function->functors.at(pc) = replaced_functors.at(pc);
return true;
}
return false;
}
...@@ -17,8 +17,11 @@ ...@@ -17,8 +17,11 @@
#pragma once #pragma once
#include <functional> #include <functional>
#include <map>
#include <memory> #include <memory>
#include <string> #include <string>
#include <string>
#include <tuple>
#include <vector> #include <vector>
#include "ngraph/function.hpp" #include "ngraph/function.hpp"
...@@ -33,6 +36,26 @@ namespace ngraph ...@@ -33,6 +36,26 @@ namespace ngraph
{ {
namespace cpu namespace cpu
{ {
class CPU_CountTracepoint
{
public:
/// \brief A convenience class that wraps user's callback to run it every *count* iterations
CPU_CountTracepoint(const std::function<void(void**, const std::string&)>& callback,
size_t count)
: m_callback(callback)
, m_count(count)
, m_iteration(0)
{
}
void operator()(void** outputs, const std::string& name);
private:
std::function<void(void**, const std::string&)> m_callback;
size_t m_count;
size_t m_iteration;
};
class CPU_Debugger class CPU_Debugger
{ {
public: public:
...@@ -56,13 +79,22 @@ namespace ngraph ...@@ -56,13 +79,22 @@ namespace ngraph
/// \brief Remove a breakpoint from a node /// \brief Remove a breakpoint from a node
bool delete_breakpoint(std::shared_ptr<Node> op); bool delete_breakpoint(std::shared_ptr<Node> op);
/// \brief Add a tracepoint to a node
bool
add_tracepoint(std::shared_ptr<Node> op,
const std::function<void(void**, const std::string&)>& callback);
/// \brief Remove a tracepoint from a node
bool delete_tracepoint(std::shared_ptr<Node> op);
void* inspect(std::shared_ptr<Node> op, size_t output_index = 0); void* inspect(std::shared_ptr<Node> op, size_t output_index = 0);
protected: protected:
// Returns a tuple with the following items <found, pc>
std::tuple<bool, size_t> find_pc_for_node(std::shared_ptr<Node> op);
CPU_Debugger(const CPU_Debugger&) = delete; CPU_Debugger(const CPU_Debugger&) = delete;
CPU_Debugger(CPU_Debugger&&) = delete; CPU_Debugger(CPU_Debugger&&) = delete;
CPU_Debugger& operator=(const CPU_Debugger&) = delete; CPU_Debugger& operator=(const CPU_Debugger&) = delete;
std::map<size_t, CPUKernelFunctor> replaced_functors;
CPU_CallFrame& m_callframe; CPU_CallFrame& m_callframe;
std::vector<std::shared_ptr<runtime::Tensor>> m_inputs; std::vector<std::shared_ptr<runtime::Tensor>> m_inputs;
std::vector<std::shared_ptr<runtime::Tensor>> m_outputs; std::vector<std::shared_ptr<runtime::Tensor>> m_outputs;
......
...@@ -224,3 +224,127 @@ TEST(debugger, resume) ...@@ -224,3 +224,127 @@ TEST(debugger, resume)
ASSERT_EQ(*static_cast<int*>(dbg.inspect(absn)), 777); ASSERT_EQ(*static_cast<int*>(dbg.inspect(absn)), 777);
ASSERT_EQ(*static_cast<int*>(dbg.inspect(neg)), -777); ASSERT_EQ(*static_cast<int*>(dbg.inspect(neg)), -777);
} }
TEST(tracer, basic)
{
Shape shape{};
auto A = make_shared<op::Parameter>(element::i32, shape);
auto B = make_shared<op::Parameter>(element::i32, shape);
auto add = make_shared<op::Add>(A, B);
auto absn = make_shared<op::Abs>(add);
auto neg = make_shared<op::Negative>(absn);
auto f = make_shared<Function>(neg, op::ParameterVector{A, B});
shared_ptr<runtime::Backend> backend = runtime::Backend::create("CPU");
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::i32, shape);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::i32, shape);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::i32, shape);
vector<int> dataA{-1};
vector<int> dataB{-776};
copy_data(a, dataA);
copy_data(b, dataB);
auto cf =
std::dynamic_pointer_cast<ngraph::runtime::cpu::CPU_Backend>(backend)->get_call_frame(f);
ngraph::runtime::cpu::CPU_Debugger dbg(*cf);
int good_or_bad_value = -777;
auto add_tracer = [&good_or_bad_value](void** values, const std::string& name) {
ASSERT_EQ(static_cast<int*>(values[0])[0], good_or_bad_value);
};
dbg.add_tracepoint(add, add_tracer);
dbg.call({result}, {a, b});
dbg.delete_tracepoint(add);
good_or_bad_value = 777;
dbg.call({result}, {a, b});
}
TEST(tracer, count_tracepoint)
{
Shape shape{};
auto A = make_shared<op::Parameter>(element::i32, shape);
auto B = make_shared<op::Parameter>(element::i32, shape);
auto add = make_shared<op::Add>(A, B);
auto f = make_shared<Function>(add, op::ParameterVector{A, B});
shared_ptr<runtime::Backend> backend = runtime::Backend::create("CPU");
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::i32, shape);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::i32, shape);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::i32, shape);
auto cf =
std::dynamic_pointer_cast<ngraph::runtime::cpu::CPU_Backend>(backend)->get_call_frame(f);
ngraph::runtime::cpu::CPU_Debugger dbg(*cf);
const size_t num_iterations = 10;
const size_t offset = 5;
std::function<void(void**, const std::string&)> callback =
[num_iterations, offset](void** values, const std::string& name) {
ASSERT_EQ(static_cast<int*>(values[0])[0], num_iterations - 1 + offset);
};
ngraph::runtime::cpu::CPU_CountTracepoint count_tracepoint(callback, 10);
for (size_t i = 0; i < num_iterations; i++)
{
dbg.add_tracepoint(add, count_tracepoint);
vector<int> dataA{static_cast<int>(offset)};
vector<int> dataB{static_cast<int>(i)};
copy_data(a, dataA);
copy_data(b, dataB);
dbg.call({result}, {a, b});
}
}
TEST(tracer, conditional_tracepoint)
{
Shape shape{};
auto A = make_shared<op::Parameter>(element::i32, shape);
auto B = make_shared<op::Parameter>(element::i32, shape);
auto add = make_shared<op::Add>(A, B);
auto f = make_shared<Function>(add, op::ParameterVector{A, B});
shared_ptr<runtime::Backend> backend = runtime::Backend::create("CPU");
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::i32, shape);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::i32, shape);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::i32, shape);
auto cf =
std::dynamic_pointer_cast<ngraph::runtime::cpu::CPU_Backend>(backend)->get_call_frame(f);
ngraph::runtime::cpu::CPU_Debugger dbg(*cf);
const size_t num_iterations = 10;
const size_t offset = 5;
int countdown = num_iterations;
auto add_tracer = [offset, &countdown, num_iterations](void** values, const std::string& name) {
if (countdown-- == 0)
{
ASSERT_EQ(static_cast<int*>(values[0])[0], num_iterations - 1 + offset);
}
};
for (size_t i = 0; i < num_iterations; i++)
{
dbg.add_tracepoint(add, add_tracer);
vector<int> dataA{static_cast<int>(offset)};
vector<int> dataB{static_cast<int>(i)};
copy_data(a, dataA);
copy_data(b, dataB);
dbg.call({result}, {a, b});
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment