Commit fc5842d9 authored by Nick Korovaiko's avatar Nick Korovaiko Committed by Robert Kimball

DEX Debugger (#1798)

* gdb-like interface + tests

* fix not being able to run call twice without call

* fix continue bug

* fix enables; rename kontinue to resume

* switch from lists of functors,enables to vector

* address scott's feedback

* adding a debugger object

* address jayarams feedback
parent 8a041166
......@@ -27,6 +27,7 @@ set(SRC
cpu_tensor_view.cpp
cpu_tracing.cpp
cpu_visualize_tree.cpp
cpu_debugger.cpp
builder/add.cpp
builder/allreduce.cpp
builder/avg_pool.cpp
......
......@@ -85,6 +85,24 @@ bool runtime::cpu::CPU_Backend::compile(shared_ptr<Function> func)
return true;
}
std::shared_ptr<ngraph::runtime::cpu::CPU_CallFrame>
runtime::cpu::CPU_Backend::get_call_frame(std::shared_ptr<Function> func)
{
bool rc = true;
FunctionInstance& instance = m_function_map[func];
if (instance.m_external_function == nullptr)
{
rc = compile(func);
}
if (!rc)
{
throw ngraph_error("couldn't compile a function");
}
return instance.m_call_frame;
}
bool runtime::cpu::CPU_Backend::call(shared_ptr<Function> func,
const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& inputs)
......
......@@ -52,6 +52,7 @@ namespace ngraph
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs) override;
void remove_compiled_function(std::shared_ptr<Function> func) override;
std::shared_ptr<CPU_CallFrame> get_call_frame(std::shared_ptr<Function> func);
void enable_performance_data(std::shared_ptr<Function> func, bool enable) override;
std::vector<PerformanceCounter>
......
......@@ -38,15 +38,13 @@ runtime::cpu::CPU_CallFrame::~CPU_CallFrame()
cleanup_runtime_context();
}
void runtime::cpu::CPU_CallFrame::call(
void runtime::cpu::CPU_CallFrame::inner_call(
const std::vector<std::shared_ptr<runtime::Tensor>>& output_tvs,
const std::vector<std::shared_ptr<runtime::Tensor>>& input_tvs)
{
vector<void*> inputs;
vector<void*> outputs;
propagate_layouts(output_tvs, m_external_function->get_result_layout_descriptors());
for (size_t i = 0; i < input_tvs.size(); i++)
{
shared_ptr<runtime::cpu::CPUTensorView> tv =
......@@ -79,6 +77,15 @@ void runtime::cpu::CPU_CallFrame::call(
}
}
void runtime::cpu::CPU_CallFrame::call(
const std::vector<std::shared_ptr<runtime::Tensor>>& output_tvs,
const std::vector<std::shared_ptr<runtime::Tensor>>& input_tvs)
{
ctx->pc = 0;
propagate_layouts(output_tvs, m_external_function->get_result_layout_descriptors());
inner_call(output_tvs, input_tvs);
}
void runtime::cpu::CPU_CallFrame::propagate_layouts(
const std::vector<std::shared_ptr<runtime::Tensor>>& tvs,
const LayoutDescriptorPtrs& layouts) const
......@@ -103,6 +110,7 @@ void runtime::cpu::CPU_CallFrame::setup_runtime_context()
{
ctx = new CPURuntimeContext;
ctx->pc = 0;
ctx->op_durations = nullptr;
if (runtime::cpu::IsTracingEnabled())
{
......
......@@ -18,6 +18,7 @@
#include <functional>
#include <memory>
#include <string>
#include <vector>
#include "ngraph/function.hpp"
......@@ -33,6 +34,7 @@ namespace ngraph
{
class CPU_CallFrame;
class CPU_ExternalFunction;
class CPU_Debugger;
using EntryPoint_t = void(void** inputs, void** outputs, CPURuntimeContext* ctx);
......@@ -42,6 +44,8 @@ namespace ngraph
class CPU_CallFrame
{
public:
friend class CPU_Debugger;
CPU_CallFrame(std::shared_ptr<CPU_ExternalFunction> external_function,
EntryPoint compiled_function);
~CPU_CallFrame();
......@@ -63,6 +67,9 @@ namespace ngraph
CPU_CallFrame(CPU_CallFrame&&) = delete;
CPU_CallFrame& operator=(const CPU_CallFrame&) = delete;
void inner_call(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs);
std::shared_ptr<CPU_ExternalFunction> m_external_function;
EntryPoint m_compiled_function;
CPURuntimeContext* ctx;
......
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <algorithm>
#include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/runtime/cpu/cpu_debugger.hpp"
#include "ngraph/runtime/cpu/cpu_external_function.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view.hpp"
#include "ngraph/runtime/cpu/cpu_tracing.hpp"
using namespace std;
using namespace ngraph;
runtime::cpu::CPU_Debugger::CPU_Debugger(ngraph::runtime::cpu::CPU_CallFrame& callframe)
: m_callframe(callframe)
{
}
runtime::cpu::CPU_Debugger::~CPU_Debugger()
{
}
bool runtime::cpu::CPU_Debugger::step()
{
auto ctx = m_callframe.ctx;
if (ctx->pc >= m_callframe.m_external_function->op_names.size())
{
return false;
}
bool is_set = ctx->breakpoints.count(ctx->pc + 1) != 0;
ctx->breakpoints.insert(ctx->pc + 1);
m_callframe.inner_call(m_outputs, m_inputs);
if (!is_set)
{
ctx->breakpoints.erase(ctx->pc);
}
return true;
}
void runtime::cpu::CPU_Debugger::resume()
{
auto ctx = m_callframe.ctx;
if (ctx->pc >= m_callframe.m_external_function->op_names.size())
{
return;
}
m_callframe.inner_call(m_outputs, m_inputs);
return;
}
void runtime::cpu::CPU_Debugger::call(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs)
{
m_outputs.assign(outputs.begin(), outputs.end());
m_inputs.assign(inputs.begin(), inputs.end());
m_callframe.ctx->pc = 0;
m_callframe.inner_call(m_outputs, m_inputs);
}
bool runtime::cpu::CPU_Debugger::add_breakpoint(std::shared_ptr<Node> op)
{
auto external_function = m_callframe.m_external_function;
auto ctx = m_callframe.ctx;
auto i_pos = std::find(
external_function->op_names.begin(), external_function->op_names.end(), op->get_name());
if (i_pos != external_function->op_names.end())
{
auto pc = static_cast<size_t>(std::distance(external_function->op_names.begin(), i_pos));
ctx->breakpoints.insert(pc);
return true;
}
return false;
}
bool runtime::cpu::CPU_Debugger::delete_breakpoint(std::shared_ptr<Node> op)
{
auto external_function = m_callframe.m_external_function;
auto ctx = m_callframe.ctx;
auto i_pos = std::find(
external_function->op_names.begin(), external_function->op_names.end(), op->get_name());
if (i_pos != external_function->op_names.end())
{
auto pc = static_cast<size_t>(std::distance(external_function->op_names.begin(), i_pos));
ctx->breakpoints.erase(pc);
return true;
}
return false;
}
void* runtime::cpu::CPU_Debugger::inspect(std::shared_ptr<Node> op, size_t output_index)
{
return m_callframe.m_external_function->tensor_data.at(op->get_name() + "_" +
to_string(output_index));
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <functional>
#include <memory>
#include <string>
#include <vector>
#include "ngraph/function.hpp"
#include "ngraph/runtime/cpu/cpu_call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/cpu_runtime_context.hpp"
#include "ngraph/runtime/tensor.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
class CPU_Debugger
{
public:
CPU_Debugger(CPU_CallFrame& callframe);
~CPU_Debugger();
/// \brief Invoke the function with values matching the signature of the function.
///
/// Tuples will be expanded into their tensor views to build the call frame.
void call(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs);
/// \brief Execute a single operation
bool step();
/// \brief Continue to execute from the current PC
void resume();
/// \brief Add a breakpoint to a node
bool add_breakpoint(std::shared_ptr<Node> op);
/// \brief Remove a breakpoint from a node
bool delete_breakpoint(std::shared_ptr<Node> op);
void* inspect(std::shared_ptr<Node> op, size_t output_index = 0);
protected:
CPU_Debugger(const CPU_Debugger&) = delete;
CPU_Debugger(CPU_Debugger&&) = delete;
CPU_Debugger& operator=(const CPU_Debugger&) = delete;
CPU_CallFrame& m_callframe;
std::vector<std::shared_ptr<runtime::Tensor>> m_inputs;
std::vector<std::shared_ptr<runtime::Tensor>> m_outputs;
};
}
}
}
......@@ -1480,7 +1480,7 @@ void runtime::cpu::CPU_ExternalFunction::build()
}
m_op_attrs.emplace_back(node->description(), out_names, in_names);
op_names.push_back(node->get_name());
handler->second(this, node.get(), in, out);
bool disable_caching = computes_result(node.get()) || possibly_overwritten(node.get());
......@@ -1735,10 +1735,10 @@ void runtime::cpu::CPU_ExternalFunction::build()
}
else
{
for (const auto& p : enables)
for (; ctx->pc < functors.size(); ctx->pc++)
{
auto index = profiler_count++;
if (p(ctx) || ctx->first_iteration)
if ((enables.at(ctx->pc))(ctx) || ctx->first_iteration)
{
// Each Op will have exactly one functor, start the clock before the exceution of functor
// and collect the profiler_count once the execution complets
......@@ -1746,7 +1746,15 @@ void runtime::cpu::CPU_ExternalFunction::build()
{
start_ts = cpu::Clock::now();
}
(*functor)(ctx);
(functors.at(ctx->pc))(ctx);
if (ctx->breakpoints.count(ctx->pc + 1))
{
ctx->pc++;
break;
}
if (runtime::cpu::IsTracingEnabled() || m_emit_timing)
{
end_ts = cpu::Clock::now();
......@@ -1778,11 +1786,9 @@ void runtime::cpu::CPU_ExternalFunction::build()
m_perf_counters[index].m_call_count++;
}
}
std::advance(functor, 1);
}
}
ctx->first_iteration = false;
if (runtime::cpu::IsTracingEnabled())
{
assert(m_op_attrs.size() == profiler_count);
......
......@@ -57,6 +57,7 @@ namespace ngraph
class CPU_ExternalFunction;
class CPU_Emitter;
class CPU_CallFrame;
class CPU_Debugger;
#if !defined(NGRAPH_DEX_ONLY)
......@@ -87,6 +88,8 @@ namespace ngraph
class CPU_ExternalFunction : public std::enable_shared_from_this<CPU_ExternalFunction>
{
friend class CPU_Backend;
friend class CPU_CallFrame;
friend class CPU_Debugger;
public:
enum class CPUTensorRole
......@@ -119,7 +122,7 @@ namespace ngraph
// Temporary Memory Pool alignment
static constexpr size_t s_memory_pool_alignment = 4096;
std::list<std::function<void(CPURuntimeContext*)>>& get_functors()
std::vector<std::function<void(CPURuntimeContext*)>>& get_functors()
{
return functors;
}
......@@ -253,8 +256,9 @@ namespace ngraph
std::string m_function_name;
std::list<std::function<void(CPURuntimeContext*)>> functors;
std::list<std::function<bool(CPURuntimeContext*)>> enables;
std::vector<std::function<void(CPURuntimeContext*)>> functors;
std::vector<std::string> op_names;
std::vector<std::function<bool(CPURuntimeContext*)>> enables;
std::list<std::pair<std::function<bool(CPURuntimeContext*)>, std::string>>
enable_nodename_list;
std::function<void(CPURuntimeContext*, std::vector<void*>&, std::vector<void*>&)>
......
......@@ -18,6 +18,7 @@
#include <chrono>
#include <cstdint>
#include <set>
#define TBB_PREVIEW_GLOBAL_CONTROL 1
#include <tbb/flow_graph.h>
......@@ -59,6 +60,8 @@ namespace ngraph
tbb::flow::graph* G;
tbb::global_control* c;
tbb::task_scheduler_init* init;
std::set<size_t> breakpoints;
size_t pc;
};
}
}
......
......@@ -71,7 +71,7 @@ endif()
if (NGRAPH_CPU_ENABLE)
list(APPEND SRC core_fusion.cpp quantize_cpu.cpp)
list(APPEND SRC backend_performance.cpp cpu_fusion.cpp cpu_test.cpp cpu_reshape_sinking.cpp)
list(APPEND SRC backend_performance.cpp cpu_fusion.cpp cpu_test.cpp cpu_reshape_sinking.cpp cpu_debugger.cpp)
if (NGRAPH_HALIDE)
list(APPEND SRC halide.cpp)
endif()
......
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <algorithm>
#include <cstdio>
#include <iostream>
#include <list>
#include <memory>
#include "gtest/gtest.h"
#include "ngraph/autodiff/adjoints.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/log.hpp"
#include "ngraph/ngraph.hpp"
#include "ngraph/runtime/cpu/cpu_backend.hpp"
#include "ngraph/runtime/cpu/cpu_call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_debugger.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view.hpp"
#include "ngraph/runtime/cpu/op/sigmoid_mul.hpp"
#include "ngraph/util.hpp"
#include "util/test_tools.hpp"
using namespace ngraph;
using namespace std;
TEST(debugger, add_breakpoint)
{
Shape shape{};
auto A = make_shared<op::Parameter>(element::i32, shape);
auto B = make_shared<op::Parameter>(element::i32, shape);
auto add = make_shared<op::Add>(A, B);
auto absn = make_shared<op::Abs>(add);
auto neg = make_shared<op::Negative>(absn);
auto f = make_shared<Function>(neg, op::ParameterVector{A, B});
auto backend = runtime::Backend::create("CPU");
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::i32, shape);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::i32, shape);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::i32, shape);
vector<int> dataA{-1};
vector<int> dataB{-776};
copy_data(a, dataA);
copy_data(b, dataB);
auto cf =
std::dynamic_pointer_cast<ngraph::runtime::cpu::CPU_Backend>(backend)->get_call_frame(f);
ngraph::runtime::cpu::CPU_Debugger dbg(*cf);
dbg.add_breakpoint(neg);
dbg.call({result}, {a, b});
ASSERT_EQ(*static_cast<int*>(dbg.inspect(add)), -777);
ASSERT_EQ(*static_cast<int*>(dbg.inspect(absn)), 777);
dbg.step();
ASSERT_EQ(*static_cast<int*>(dbg.inspect(neg)), -777);
}
TEST(debugger, stepping)
{
Shape shape{};
auto A = make_shared<op::Parameter>(element::i32, shape);
auto B = make_shared<op::Parameter>(element::i32, shape);
auto add = make_shared<op::Add>(A, B);
auto absn = make_shared<op::Abs>(add);
auto neg = make_shared<op::Negative>(absn);
auto f = make_shared<Function>(neg, op::ParameterVector{A, B});
auto backend = runtime::Backend::create("CPU");
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::i32, shape);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::i32, shape);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::i32, shape);
vector<int> dataA{-1};
vector<int> dataB{-776};
copy_data(a, dataA);
copy_data(b, dataB);
auto cf =
std::dynamic_pointer_cast<ngraph::runtime::cpu::CPU_Backend>(backend)->get_call_frame(f);
ngraph::runtime::cpu::CPU_Debugger dbg(*cf);
dbg.add_breakpoint(add);
dbg.call({result}, {a, b});
ASSERT_EQ(*static_cast<int*>(dbg.inspect(add)), -777);
dbg.step();
ASSERT_EQ(*static_cast<int*>(dbg.inspect(absn)), 777);
dbg.step();
ASSERT_EQ(*static_cast<int*>(dbg.inspect(neg)), -777);
}
TEST(debugger, delete_breakpoint)
{
Shape shape{};
auto A = make_shared<op::Parameter>(element::i32, shape);
auto B = make_shared<op::Parameter>(element::i32, shape);
auto add = make_shared<op::Add>(A, B);
auto absn = make_shared<op::Abs>(add);
auto neg = make_shared<op::Negative>(absn);
auto f = make_shared<Function>(neg, op::ParameterVector{A, B});
auto backend = runtime::Backend::create("CPU");
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::i32, shape);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::i32, shape);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::i32, shape);
vector<int> dataA{-1};
vector<int> dataB{-776};
copy_data(a, dataA);
copy_data(b, dataB);
auto cf =
std::dynamic_pointer_cast<ngraph::runtime::cpu::CPU_Backend>(backend)->get_call_frame(f);
ngraph::runtime::cpu::CPU_Debugger dbg(*cf);
dbg.add_breakpoint(add);
dbg.add_breakpoint(absn);
dbg.add_breakpoint(neg);
dbg.delete_breakpoint(add);
dbg.delete_breakpoint(absn);
dbg.delete_breakpoint(neg);
dbg.call({result}, {a, b});
ASSERT_EQ(*static_cast<int*>(dbg.inspect(add)), -777);
ASSERT_EQ(*static_cast<int*>(dbg.inspect(absn)), 777);
ASSERT_EQ(*static_cast<int*>(dbg.inspect(neg)), -777);
}
TEST(debugger, while_stepping)
{
Shape shape{};
auto A = make_shared<op::Parameter>(element::i32, shape);
auto B = make_shared<op::Parameter>(element::i32, shape);
auto add = make_shared<op::Add>(A, B);
auto absn = make_shared<op::Abs>(add);
auto neg = make_shared<op::Negative>(absn);
auto f = make_shared<Function>(neg, op::ParameterVector{A, B});
auto backend = runtime::Backend::create("CPU");
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::i32, shape);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::i32, shape);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::i32, shape);
vector<int> dataA{-1};
vector<int> dataB{-776};
copy_data(a, dataA);
copy_data(b, dataB);
auto cf =
std::dynamic_pointer_cast<ngraph::runtime::cpu::CPU_Backend>(backend)->get_call_frame(f);
ngraph::runtime::cpu::CPU_Debugger dbg(*cf);
dbg.call({result}, {a, b});
dbg.add_breakpoint(add);
while (dbg.step())
{
};
ASSERT_EQ(*static_cast<int*>(dbg.inspect(add)), -777);
ASSERT_EQ(*static_cast<int*>(dbg.inspect(absn)), 777);
ASSERT_EQ(*static_cast<int*>(dbg.inspect(neg)), -777);
}
TEST(debugger, resume)
{
Shape shape{};
auto A = make_shared<op::Parameter>(element::i32, shape);
auto B = make_shared<op::Parameter>(element::i32, shape);
auto add = make_shared<op::Add>(A, B);
auto absn = make_shared<op::Abs>(add);
auto neg = make_shared<op::Negative>(absn);
auto f = make_shared<Function>(neg, op::ParameterVector{A, B});
auto backend = runtime::Backend::create("CPU");
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::i32, shape);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::i32, shape);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::i32, shape);
vector<int> dataA{-1};
vector<int> dataB{-776};
copy_data(a, dataA);
copy_data(b, dataB);
auto cf =
std::dynamic_pointer_cast<ngraph::runtime::cpu::CPU_Backend>(backend)->get_call_frame(f);
ngraph::runtime::cpu::CPU_Debugger dbg(*cf);
dbg.add_breakpoint(absn);
dbg.call({result}, {a, b});
ASSERT_EQ(*static_cast<int*>(dbg.inspect(add)), -777);
dbg.resume();
ASSERT_EQ(*static_cast<int*>(dbg.inspect(absn)), 777);
ASSERT_EQ(*static_cast<int*>(dbg.inspect(neg)), -777);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment