Commit e9162eb5 authored by Diego Caballero's avatar Diego Caballero Committed by Sang Ik Lee

[Standalone] Introduce CPURuntimeContextCG for standalone codegen generation. (#2421)

* [CPUCodegen] Remove unnecessary forward declaration.

* [CPUCodegen] Introduce CPURuntimeContextCG for standalone codegen generation.

This patch introduces CPURuntimeContextCG. This class is aimed at
removing the dependency between nGraph and the generated code in
codegen mode. It will be used to hold the runtime context in
codegen mode and it will be emitted in the generated code. For now,
CPURuntimeContextCG only contains TBB's graph and global context.
Follow-up patches will migrate more members in CPURuntimeContext to
CPURuntimeContextCG for codegen mode.

Testing results:
  - Before: NGRAPH_CODEGEN=1 test/unit-test
    [----------] Global test environment tear-down
    [==========] 2503 tests from 54 test cases ran. (290406 ms total)
    [  PASSED  ] 2490 tests.

  - After: NGRAPH_CODEGEN=1 test/unit-test
    [----------] Global test environment tear-down
    [==========] 2503 tests from 54 test cases ran. (412616 ms total)
    [  PASSED  ] 2490 tests.

* [CPUCodegen] Refactor function parameters string

* Fix bug in CPU_CallFrame destructor impacting DEX

* [Standalone] Replace assert with NGRAPH_ASSERT
parent a444f7a9
...@@ -30,16 +30,30 @@ using namespace std; ...@@ -30,16 +30,30 @@ using namespace std;
using namespace ngraph; using namespace ngraph;
runtime::cpu::CPU_CallFrame::CPU_CallFrame(std::shared_ptr<CPU_ExternalFunction> external_function, runtime::cpu::CPU_CallFrame::CPU_CallFrame(std::shared_ptr<CPU_ExternalFunction> external_function,
InitContextFuncCG compiled_init_ctx_func,
DestroyContextFuncCG compiled_destroy_ctx_func,
EntryPoint compiled_function) EntryPoint compiled_function)
: m_external_function(external_function) : m_external_function(external_function)
, m_compiled_init_ctx_func(compiled_init_ctx_func)
, m_compiled_destroy_ctx_func(compiled_destroy_ctx_func)
, m_compiled_function(compiled_function) , m_compiled_function(compiled_function)
{ {
setup_runtime_context(); setup_runtime_context();
if (!m_external_function->is_direct_execution())
{
// Invoke codegen runtime context initialization function.
NGRAPH_ASSERT(m_compiled_init_ctx_func) << "compiled_init_ctx_func cannot be null.";
cg_ctx = m_compiled_init_ctx_func();
}
} }
runtime::cpu::CPU_CallFrame::~CPU_CallFrame() runtime::cpu::CPU_CallFrame::~CPU_CallFrame()
{ {
cleanup_runtime_context(); if (!m_external_function->is_direct_execution())
{
NGRAPH_ASSERT(m_compiled_destroy_ctx_func) << "compiled_destroy_ctx_func cannot be null.";
m_compiled_destroy_ctx_func(cg_ctx);
}
} }
void runtime::cpu::CPU_CallFrame::inner_call( void runtime::cpu::CPU_CallFrame::inner_call(
...@@ -66,7 +80,7 @@ void runtime::cpu::CPU_CallFrame::inner_call( ...@@ -66,7 +80,7 @@ void runtime::cpu::CPU_CallFrame::inner_call(
// Invoke compiled computation // Invoke compiled computation
if (!m_external_function->is_direct_execution()) if (!m_external_function->is_direct_execution())
{ {
m_compiled_function(inputs.data(), outputs.data(), ctx); m_compiled_function(inputs.data(), outputs.data(), ctx, cg_ctx);
} }
else else
{ {
...@@ -136,8 +150,10 @@ void runtime::cpu::CPU_CallFrame::setup_runtime_context() ...@@ -136,8 +150,10 @@ void runtime::cpu::CPU_CallFrame::setup_runtime_context()
ctx->mkldnn_workspaces = mkldnn_emitter->get_mkldnn_workspaces().data(); ctx->mkldnn_workspaces = mkldnn_emitter->get_mkldnn_workspaces().data();
ctx->states = m_external_function->m_states.data(); ctx->states = m_external_function->m_states.data();
if (std::getenv("NGRAPH_CPU_USE_TBB") != nullptr) if (m_external_function->is_direct_execution() && std::getenv("NGRAPH_CPU_USE_TBB") != nullptr)
{ {
// For codegen mode, graph and global control are now part of the code generated
// CPURuntimeContextCG class.
ctx->G = new tbb::flow::graph; ctx->G = new tbb::flow::graph;
const auto envParallelism = std::getenv("NGRAPH_INTER_OP_PARALLELISM"); const auto envParallelism = std::getenv("NGRAPH_INTER_OP_PARALLELISM");
const auto parallelism = envParallelism == nullptr ? 1 : std::atoi(envParallelism); const auto parallelism = envParallelism == nullptr ? 1 : std::atoi(envParallelism);
...@@ -161,8 +177,11 @@ void runtime::cpu::CPU_CallFrame::cleanup_runtime_context() ...@@ -161,8 +177,11 @@ void runtime::cpu::CPU_CallFrame::cleanup_runtime_context()
{ {
delete buffer; delete buffer;
} }
if (std::getenv("NGRAPH_CPU_USE_TBB") != nullptr) if (m_external_function->is_direct_execution() && std::getenv("NGRAPH_CPU_USE_TBB") != nullptr)
{ {
// For codegen mode, graph and global control are now part of a code generated
// CPURuntimeContext class.
// delete graph G and nodes in G // delete graph G and nodes in G
ctx->G->wait_for_all(); ctx->G->wait_for_all();
std::vector<tbb::flow::graph_node*> to_be_deleted; std::vector<tbb::flow::graph_node*> to_be_deleted;
......
...@@ -26,19 +26,27 @@ ...@@ -26,19 +26,27 @@
#include "ngraph/runtime/cpu/cpu_runtime_context.hpp" #include "ngraph/runtime/cpu/cpu_runtime_context.hpp"
#include "ngraph/runtime/tensor.hpp" #include "ngraph/runtime/tensor.hpp"
class CPURuntimeContextCG;
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
{ {
namespace cpu namespace cpu
{ {
class CPU_CallFrame;
class CPU_ExternalFunction; class CPU_ExternalFunction;
class CPU_Debugger; class CPU_Debugger;
using EntryPoint_t = void(void** inputs, void** outputs, CPURuntimeContext* ctx); using InitContextFuncTy = CPURuntimeContextCG*();
using DestroyContextFuncTy = void(CPURuntimeContextCG*);
using EntryPointTy = void(void** inputs,
void** outputs,
CPURuntimeContext* ctx,
CPURuntimeContextCG* cg_ctx);
using EntryPoint = std::function<EntryPoint_t>; using InitContextFuncCG = std::function<InitContextFuncTy>;
using DestroyContextFuncCG = std::function<DestroyContextFuncTy>;
using EntryPoint = std::function<EntryPointTy>;
// Compile and execute graphs // Compile and execute graphs
class CPU_CallFrame class CPU_CallFrame
...@@ -47,6 +55,8 @@ namespace ngraph ...@@ -47,6 +55,8 @@ namespace ngraph
friend class CPU_Debugger; friend class CPU_Debugger;
CPU_CallFrame(std::shared_ptr<CPU_ExternalFunction> external_function, CPU_CallFrame(std::shared_ptr<CPU_ExternalFunction> external_function,
InitContextFuncCG compiled_init_ctx_func,
DestroyContextFuncCG compiled_destroy_ctx_func,
EntryPoint compiled_function); EntryPoint compiled_function);
~CPU_CallFrame(); ~CPU_CallFrame();
...@@ -60,6 +70,7 @@ namespace ngraph ...@@ -60,6 +70,7 @@ namespace ngraph
const LayoutDescriptorPtrs& layouts) const; const LayoutDescriptorPtrs& layouts) const;
void setup_runtime_context(); void setup_runtime_context();
void setup_cg_runtime_context();
void cleanup_runtime_context(); void cleanup_runtime_context();
protected: protected:
...@@ -71,8 +82,21 @@ namespace ngraph ...@@ -71,8 +82,21 @@ namespace ngraph
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs); const std::vector<std::shared_ptr<runtime::Tensor>>& inputs);
std::shared_ptr<CPU_ExternalFunction> m_external_function; std::shared_ptr<CPU_ExternalFunction> m_external_function;
CPURuntimeContext* ctx = nullptr;
/* Codegen specific */
/// Function that initializes the context used in codegen mode.
InitContextFuncCG m_compiled_init_ctx_func;
/// Function that destroys the context used in codegen mode.
DestroyContextFuncCG m_compiled_destroy_ctx_func;
EntryPoint m_compiled_function; EntryPoint m_compiled_function;
CPURuntimeContext* ctx;
/// Execution context used in codegen mode.
CPURuntimeContextCG* cg_ctx = nullptr;
}; };
} }
} }
......
...@@ -428,6 +428,19 @@ static void ...@@ -428,6 +428,19 @@ static void
writer << "}\n"; writer << "}\n";
} }
static void generate_class_declarations(codegen::CodeWriter& writer)
{
writer << "// Declare all classes\n";
writer << "struct CPURuntimeContextCG;\n";
}
static void generate_runtime_context_class(codegen::CodeWriter& writer)
{
writer <<
#include "ngraph/runtime/cpu/pregenerated_src/cpu_cg_runtime_context.hpp"
<< "\n";
}
void runtime::cpu::CPU_ExternalFunction::compile() void runtime::cpu::CPU_ExternalFunction::compile()
{ {
if (m_is_compiled) if (m_is_compiled)
...@@ -618,14 +631,20 @@ using namespace ngraph::runtime; ...@@ -618,14 +631,20 @@ using namespace ngraph::runtime;
} }
} }
generate_class_declarations(writer);
const char* func_params =
"(void** inputs, void** outputs, cpu::CPURuntimeContext* ctx, CPURuntimeContextCG* cg_ctx)";
writer << "// Declare all functions\n"; writer << "// Declare all functions\n";
for (shared_ptr<Function> f : pass_manager.get_state().get_functions()) for (shared_ptr<Function> f : pass_manager.get_state().get_functions())
{ {
writer << "extern \"C\" void " << f->get_name() writer << "extern \"C\" void " << f->get_name() << func_params << ";\n";
<< "(void** inputs, void** outputs, cpu::CPURuntimeContext* ctx);\n";
} }
writer << "\n"; writer << "\n";
generate_runtime_context_class(writer);
writer << common_function_string << "\n"; writer << common_function_string << "\n";
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions()) for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
...@@ -687,8 +706,7 @@ using namespace ngraph::runtime; ...@@ -687,8 +706,7 @@ using namespace ngraph::runtime;
writer << "bool " << current_function->get_name() << "_t_en[" << tensor_index << "];\n"; writer << "bool " << current_function->get_name() << "_t_en[" << tensor_index << "];\n";
writer << "extern \"C\" void " << current_function->get_name(); writer << "extern \"C\" void " << current_function->get_name() << func_params << "\n";
writer << "(void** inputs, void** outputs, cpu::CPURuntimeContext* ctx)\n";
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
...@@ -716,7 +734,7 @@ using namespace ngraph::runtime; ...@@ -716,7 +734,7 @@ using namespace ngraph::runtime;
writer << "tbb::flow::continue_node<tbb::flow::continue_msg>* " writer << "tbb::flow::continue_node<tbb::flow::continue_msg>* "
"flowgraph_node_start" "flowgraph_node_start"
<< " = new tbb::flow::continue_node<tbb::flow::continue_msg> " << " = new tbb::flow::continue_node<tbb::flow::continue_msg> "
"(*(ctx->G), [&](const tbb::flow::continue_msg &msg)\n{});\n"; "(*(cg_ctx->tbb_graph), [&](const tbb::flow::continue_msg &msg)\n{});\n";
} }
for (shared_ptr<Node> node : ordered_ops) for (shared_ptr<Node> node : ordered_ops)
...@@ -850,7 +868,7 @@ using namespace ngraph::runtime; ...@@ -850,7 +868,7 @@ using namespace ngraph::runtime;
"flowgraph_node_" "flowgraph_node_"
<< node->get_name() << node->get_name()
<< " = new tbb::flow::continue_node<tbb::flow::continue_msg> " << " = new tbb::flow::continue_node<tbb::flow::continue_msg> "
"(*(ctx->G), [&](const tbb::flow::continue_msg &msg)\n{\n"; "(*(cg_ctx->tbb_graph), [&](const tbb::flow::continue_msg &msg)\n{\n";
writer.indent++; writer.indent++;
} }
if (runtime::cpu::IsTracingEnabled() && if (runtime::cpu::IsTracingEnabled() &&
...@@ -924,7 +942,7 @@ using namespace ngraph::runtime; ...@@ -924,7 +942,7 @@ using namespace ngraph::runtime;
{ {
names.push_back(tv.get_name()); names.push_back(tv.get_name());
} }
writer << func_name << "(" << join(names) << ", ctx);\n"; writer << func_name << "(" << join(names) << ", ctx, cg_ctx);\n";
} }
// skip multi-output nodes since they would be covered by GetOutputElement // skip multi-output nodes since they would be covered by GetOutputElement
...@@ -1012,9 +1030,9 @@ using namespace ngraph::runtime; ...@@ -1012,9 +1030,9 @@ using namespace ngraph::runtime;
// Execute the flow graph // Execute the flow graph
writer << "(static_cast<tbb::flow::continue_node<tbb::flow::continue_msg>*>" writer << "(static_cast<tbb::flow::continue_node<tbb::flow::continue_msg>*>"
"(&(*(ctx->G->begin()))))" "(&(*(cg_ctx->tbb_graph->begin()))))"
<< "->try_put(tbb::flow::continue_msg());\n"; << "->try_put(tbb::flow::continue_msg());\n";
writer << "try { ctx->G->wait_for_all(); } catch(...) { throw; }\n"; writer << "try { cg_ctx->tbb_graph->wait_for_all(); } catch(...) { throw; }\n";
} }
writer << "ctx->first_iteration = false;\n"; writer << "ctx->first_iteration = false;\n";
...@@ -1041,7 +1059,23 @@ using namespace ngraph::runtime; ...@@ -1041,7 +1059,23 @@ using namespace ngraph::runtime;
} }
m_execution_engine->add_module(codegen_module); m_execution_engine->add_module(codegen_module);
m_execution_engine->finalize(); m_execution_engine->finalize();
m_compiled_function = m_execution_engine->find_function<EntryPoint_t>(m_function_name);
m_compiled_init_ctx_func = m_execution_engine->find_function<InitContextFuncTy>("init_cg_ctx");
if (m_compiled_init_ctx_func == nullptr)
{
throw runtime_error("could not find compiled init context function");
}
m_compiled_destroy_ctx_func =
m_execution_engine->find_function<DestroyContextFuncTy>("destroy_cg_ctx");
if (m_compiled_destroy_ctx_func == nullptr)
{
throw runtime_error("could not find compiled destroy context function");
}
m_compiled_function = m_execution_engine->find_function<EntryPointTy>(m_function_name);
if (m_compiled_function == nullptr) if (m_compiled_function == nullptr)
{ {
...@@ -2094,6 +2128,8 @@ shared_ptr<ngraph::runtime::cpu::CPU_CallFrame> ...@@ -2094,6 +2128,8 @@ shared_ptr<ngraph::runtime::cpu::CPU_CallFrame>
} }
return make_shared<ngraph::runtime::cpu::CPU_CallFrame>(shared_from_this(), return make_shared<ngraph::runtime::cpu::CPU_CallFrame>(shared_from_this(),
m_compiled_init_ctx_func,
m_compiled_destroy_ctx_func,
m_compiled_function); m_compiled_function);
} }
...@@ -2238,7 +2274,7 @@ string runtime::cpu::CPU_ExternalFunction::emit_op_as_function(const Node& node, ...@@ -2238,7 +2274,7 @@ string runtime::cpu::CPU_ExternalFunction::emit_op_as_function(const Node& node,
writer << tvw.get_type() << "* " << tvw.get_name(); writer << tvw.get_type() << "* " << tvw.get_name();
out.push_back(tvw); out.push_back(tvw);
} }
writer << ",\ncpu::CPURuntimeContext* ctx"; writer << ",\ncpu::CPURuntimeContext* ctx, CPURuntimeContextCG* cg_ctx";
writer.indent--; writer.indent--;
writer << "\n)\n"; writer << "\n)\n";
writer << "{\n"; writer << "{\n";
......
...@@ -258,6 +258,13 @@ namespace ngraph ...@@ -258,6 +258,13 @@ namespace ngraph
bool m_is_compiled; bool m_is_compiled;
#endif #endif
bool m_direct_execution; bool m_direct_execution;
/// Function that initializes the context used in codegen mode.
InitContextFuncCG m_compiled_init_ctx_func;
/// Function that destroys the context used in codegen mode.
DestroyContextFuncCG m_compiled_destroy_ctx_func;
EntryPoint m_compiled_function; EntryPoint m_compiled_function;
std::unordered_map<std::string, std::string> m_variable_name_map; std::unordered_map<std::string, std::string> m_variable_name_map;
std::unordered_map<std::string, std::pair<std::size_t, std::size_t>> std::unordered_map<std::string, std::pair<std::size_t, std::size_t>>
......
//*****************************************************************************
// Copyright 2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
/// \file
/// This file contains the pre-generated source code for CPURuntimeContextCG. This class is used
/// to hold runtime information of the execution of kernels in codegen mode.
///
#pragma once
R"(
struct CPURuntimeContextCG
{
std::unique_ptr<tbb::flow::graph> tbb_graph;
std::unique_ptr<tbb::global_control> tbb_gcontrol;
CPURuntimeContextCG() { init_tbb(); }
~CPURuntimeContextCG() { cleanup_tbb(); }
private:
inline void init_tbb()
{
if (std::getenv("NGRAPH_CPU_USE_TBB"))
{
tbb_graph.reset(new tbb::flow::graph);
const char* env_parallelism = std::getenv("NGRAPH_INTER_OP_PARALLELISM");
const int parallelism = env_parallelism == nullptr ? 1 : std::atoi(env_parallelism);
tbb_gcontrol.reset(
new tbb::global_control(tbb::global_control::max_allowed_parallelism, parallelism));
}
}
inline void cleanup_tbb()
{
if (std::getenv("NGRAPH_CPU_USE_TBB"))
{
// Delete nodes in tbb_graph.
tbb_graph->wait_for_all();
std::vector<tbb::flow::graph_node*> to_be_deleted;
for (auto it = tbb_graph->begin(); it != tbb_graph->end(); it++)
{
to_be_deleted.push_back(&*it);
}
for (auto* node : to_be_deleted)
{
delete node;
}
}
}
};
extern "C" CPURuntimeContextCG* init_cg_ctx()
{
return new CPURuntimeContextCG;
}
extern "C" void destroy_cg_ctx(CPURuntimeContextCG* cg_ctx)
{
delete cg_ctx;
}
)"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment