Commit 669532d8 authored by Sang Ik Lee's avatar Sang Ik Lee Committed by Scott Cyphers

Allow building nGraph CPU without TBB - r0.25.1 (#3652)

* Allow TBB to be excluded from CPU build.

* Change some CMake scope.

* m_use_tbb can be removed if TBB is not enabled.

* Fix some rebase errors

* Some files do not get definition.

* CPU codegen: Use different pregenerated code if TBB is not built.

* Fix codegen issues.

* Fix bug.

* Use NGRAPH_TBB_ENABLE only for CPU.
parent 0f8ea0eb
......@@ -189,6 +189,9 @@ option(NGRAPH_STATIC_LIB_ENABLE "Enable build nGraph static library" FALSE)
option(NGRAPH_INTERPRETER_STATIC_LIB_ENABLE "Enable build INTERPRETER backend static library" FALSE)
option(NGRAPH_CPU_STATIC_LIB_ENABLE "Enable build CPU backend static library" FALSE)
option(NGRAPH_DYNAMIC_COMPONENTS_ENABLE "Enable dynamic loading of components" TRUE)
if (NGRAPH_CPU_ENABLE)
option(NGRAPH_TBB_ENABLE "Control usage of TBB for CPU backend" TRUE)
endif()
if (NGRAPH_CPU_ENABLE
AND
......@@ -268,6 +271,9 @@ NORMALIZE_BOOL(NGRAPH_STATIC_LIB_ENABLE)
NORMALIZE_BOOL(NGRAPH_INTERPRETER_STATIC_LIB_ENABLE)
NORMALIZE_BOOL(NGRAPH_CPU_STATIC_LIB_ENABLE)
NORMALIZE_BOOL(NGRAPH_DYNAMIC_COMPONENTS_ENABLE)
if (NGRAPH_CPU_ENABLE)
NORMALIZE_BOOL(NGRAPH_TBB_ENABLE)
endif()
# Turn off GPU build in 0.25.1
set(NGRAPH_GPU_ENABLE FALSE)
......@@ -298,6 +304,9 @@ message(STATUS "NGRAPH_STATIC_LIB_ENABLE: ${NGRAPH_STATIC_LIB_ENABLE
message(STATUS "NGRAPH_INTERPRETER_STATIC_LIB_ENABLE: ${NGRAPH_INTERPRETER_STATIC_LIB_ENABLE}")
message(STATUS "NGRAPH_CPU_STATIC_LIB_ENABLE: ${NGRAPH_CPU_STATIC_LIB_ENABLE}")
message(STATUS "NGRAPH_DYNAMIC_COMPONENTS_ENABLE: ${NGRAPH_DYNAMIC_COMPONENTS_ENABLE}")
if (NGRAPH_CPU_ENABLE)
message(STATUS "NGRAPH_TBB_ENABLE: ${NGRAPH_TBB_ENABLE}")
endif()
#-----------------------------------------------------------------------------------------------
# Installation logic...
......@@ -433,10 +442,6 @@ if (NGRAPH_DISTRIBUTED_ENABLE)
endif()
endif()
if (NOT DEFINED NGRAPH_TBB_ENABLE)
set(NGRAPH_TBB_ENABLE ${NGRAPH_CPU_ENABLE})
endif()
# Since UNIX and APPLE support Bash we can use a Bash script to do the clang-format functions
# This is much faster than the cmake method
if (UNIX OR APPLE)
......
......@@ -60,6 +60,7 @@ endif()
if(NGRAPH_TBB_ENABLE)
get_target_property(TBB_INCLUDE_DIR libtbb INTERFACE_INCLUDE_DIRECTORIES)
list(APPEND HEADER_SEARCH_DEFINES TBB_HEADERS_PATH="${TBB_INCLUDE_DIR}")
set_source_files_properties(compiler.cpp PROPERTIES COMPILE_DEFINITIONS "NGRAPH_TBB_ENABLE")
endif()
set_source_files_properties(compiler.cpp PROPERTIES COMPILE_DEFINITIONS "${HEADER_SEARCH_DEFINES}")
......
......@@ -190,6 +190,11 @@ void codegen::CompilerCore::initialize()
// Prevent Eigen from using any LGPL3 code
args.push_back("-DEIGEN_MPL2_ONLY");
#if defined(NGRAPH_TBB_ENABLE)
// Enable TBB
args.push_back("-DNGRAPH_TBB_ENABLE");
#endif
// Prepare DiagnosticEngine
IntrusiveRefCntPtr<DiagnosticOptions> diag_options = new DiagnosticOptions();
diag_options->ErrorLimit = 20;
......
......@@ -178,8 +178,7 @@ if (NGRAPH_CPU_ENABLE)
target_compile_definitions(cpu_backend PRIVATE "NGRAPH_ENABLE_CPU_CONV_AUTO")
endif()
if(NGRAPH_TBB_ENABLE)
set_source_files_properties(cpu_external_function.cpp
PROPERTIES COMPILE_DEFINITIONS "NGRAPH_TBB_ENABLE")
target_compile_definitions(cpu_backend PRIVATE "NGRAPH_TBB_ENABLE")
endif()
if (NGRAPH_HALIDE)
target_compile_definitions(cpu_backend PRIVATE "NGRAPH_HALIDE")
......@@ -210,14 +209,16 @@ if (NGRAPH_CPU_ENABLE)
target_compile_definitions(cpu_backend PRIVATE CPU_BACKEND_DLL_EXPORTS)
add_dependencies(cpu_backend libmkldnn ext_eigen)
target_link_libraries(cpu_backend PUBLIC ngraph libmkldnn libmkl libeigen libtbb)
target_link_libraries(cpu_backend PUBLIC ngraph libmkldnn libmkl libeigen)
if (NGRAPH_JSON_ENABLE)
target_link_libraries(cpu_backend PUBLIC libjson)
endif()
if (NGRAPH_TBB_ENABLE)
target_link_libraries(cpu_backend PUBLIC libtbb)
endif()
if (NOT NGRAPH_DEX_ONLY)
target_link_libraries(cpu_backend PUBLIC codegen)
endif()
target_include_directories(cpu_backend SYSTEM PUBLIC libmkldnn)
if (NOT APPLE AND NOT MSVS)
# CPU backend uses third-party libraries like Eigen that might be linked in and
......
......@@ -14,7 +14,9 @@
// limitations under the License.
//*****************************************************************************
#if defined(NGRAPH_TBB_ENABLE)
#include <tbb/tbb_stddef.h>
#endif
#include "cpu_backend_visibility.h"
#include "ngraph/graph_util.hpp"
......@@ -40,8 +42,10 @@ runtime::BackendConstructor* runtime::cpu::get_backend_constructor_pointer()
public:
std::shared_ptr<runtime::Backend> create(const std::string& /* config */) override
{
#if defined(NGRAPH_TBB_ENABLE)
// Force TBB to link to the backend
tbb::TBB_runtime_interface_version();
#endif
return make_shared<runtime::cpu::CPU_Backend>();
}
};
......
......@@ -221,6 +221,7 @@ void runtime::cpu::CPU_CallFrame::setup_runtime_context(Allocator* allocator)
ctx->states = m_external_function->m_states.data();
#if defined(NGRAPH_TBB_ENABLE)
if (m_external_function->is_direct_execution() &&
std::getenv("NGRAPH_CPU_USE_TBB") != nullptr)
{
......@@ -232,6 +233,7 @@ void runtime::cpu::CPU_CallFrame::setup_runtime_context(Allocator* allocator)
ctx->c =
new tbb::global_control(tbb::global_control::max_allowed_parallelism, parallelism);
}
#endif
}
m_num_ctx_available = m_num_ctx;
}
......@@ -253,6 +255,7 @@ void runtime::cpu::CPU_CallFrame::cleanup_runtime_context()
{
delete buffer;
}
#if defined(NGRAPH_TBB_ENABLE)
if (m_external_function->is_direct_execution() &&
std::getenv("NGRAPH_CPU_USE_TBB") != nullptr)
{
......@@ -273,6 +276,7 @@ void runtime::cpu::CPU_CallFrame::cleanup_runtime_context()
}
delete ctx->c;
}
#endif
delete ctx;
}
m_num_ctx_available = 0;
......
......@@ -109,10 +109,13 @@ namespace ngraph
m_thread_pool_devices.push_back(
std::unique_ptr<Eigen::ThreadPoolDevice>(new Eigen::ThreadPoolDevice(
m_thread_pools[i].get(), num_threads_per_pool)));
#if defined(NGRAPH_TBB_ENABLE)
m_tbb_arenas.emplace_back(1);
#endif
}
}
#if defined(NGRAPH_TBB_ENABLE)
void CPUExecutor::execute(CPUKernelFunctor& f,
CPURuntimeContext* ctx,
CPUExecutionContext* ectx,
......@@ -128,6 +131,14 @@ namespace ngraph
f(ctx, ectx);
}
}
#else
void CPUExecutor::execute(CPUKernelFunctor& f,
CPURuntimeContext* ctx,
CPUExecutionContext* ectx)
{
f(ctx, ectx);
}
#endif
CPUExecutor& GetCPUExecutor()
{
......
......@@ -26,7 +26,9 @@
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#if defined(NGRAPH_TBB_ENABLE)
#include "tbb/task_arena.h"
#endif
namespace ngraph
{
......@@ -49,16 +51,24 @@ namespace ngraph
return *m_thread_pool_devices[id].get();
}
#if defined(NGRAPH_TBB_ENABLE)
void execute(CPUKernelFunctor& f,
CPURuntimeContext* ctx,
CPUExecutionContext* ectx,
bool use_tbb = false);
#else
void execute(CPUKernelFunctor& f,
CPURuntimeContext* ctx,
CPUExecutionContext* ectx);
#endif
int get_num_thread_pools() { return m_num_thread_pools; }
int get_num_cores() { return m_num_cores; }
private:
std::vector<std::unique_ptr<Eigen::ThreadPool>> m_thread_pools;
std::vector<std::unique_ptr<Eigen::ThreadPoolDevice>> m_thread_pool_devices;
#if defined(NGRAPH_TBB_ENABLE)
std::vector<tbb::task_arena> m_tbb_arenas;
#endif
int m_num_thread_pools;
int m_num_cores;
};
......
......@@ -23,9 +23,11 @@
#include <typeinfo>
#include <unordered_map>
#if defined(NGRAPH_TBB_ENABLE)
#define TBB_PREVIEW_FLOW_GRAPH_TRACE 1
#include <tbb/flow_graph.h>
#endif
#if !defined(NGRAPH_DEX_ONLY)
#include "ngraph/code_writer.hpp"
......@@ -236,7 +238,9 @@ runtime::cpu::CPU_ExternalFunction::CPU_ExternalFunction(
: m_function(function)
, m_release_function(release_function)
, m_emit_timing(false)
#if defined(NGRAPH_TBB_ENABLE)
, m_use_tbb(std::getenv("NGRAPH_CPU_USE_TBB") != nullptr)
#endif
#if !defined(NGRAPH_DEX_ONLY)
, m_is_compiled(false)
, m_direct_execution((std::getenv("NGRAPH_CODEGEN") == nullptr) ||
......@@ -508,6 +512,7 @@ void runtime::cpu::CPU_ExternalFunction::compile(ngraph::pass::PassConfig& pass_
CodeWriter writer;
writer << "// Generated by the nGraph CPU backend\n";
#if defined(NGRAPH_TBB_ENABLE)
if (m_use_tbb)
{
if (runtime::cpu::IsTracingEnabled() || m_emit_timing)
......@@ -518,6 +523,8 @@ void runtime::cpu::CPU_ExternalFunction::compile(ngraph::pass::PassConfig& pass_
}
writer << "#include <tbb/flow_graph.h>";
}
#endif
writer +=
R"(
#include <cmath>
......@@ -769,6 +776,7 @@ using namespace ngraph::runtime;
writer << "bool* t_en = (bool*)" << m_function->get_name() << "_t_en;\n";
#if defined(NGRAPH_TBB_ENABLE)
if (m_use_tbb)
{
writer << "\n";
......@@ -779,6 +787,7 @@ using namespace ngraph::runtime;
<< " = new tbb::flow::continue_node<tbb::flow::continue_msg> "
"(*(cg_ctx->tbb_graph), [&](const tbb::flow::continue_msg &msg)\n{});\n";
}
#endif
// Add inputs to the variable name map
size_t arg_index = 0;
......@@ -887,6 +896,7 @@ using namespace ngraph::runtime;
t_out_attrs,
t_in_attrs);
}
#if defined(NGRAPH_TBB_ENABLE)
if (m_use_tbb)
{
writer << "tbb::flow::continue_node<tbb::flow::continue_msg>* "
......@@ -896,6 +906,7 @@ using namespace ngraph::runtime;
"(*(cg_ctx->tbb_graph), [&](const tbb::flow::continue_msg &msg)\n{\n";
writer.indent++;
}
#endif
if (runtime::cpu::IsTracingEnabled() && m_function->get_name() == m_function_name)
{
writer << "start_ts = cpu::Clock::now();\n";
......@@ -1015,14 +1026,17 @@ using namespace ngraph::runtime;
<< "(std::chrono::duration_cast<cpu::Timescale>(cpu::Clock::now() - "
"start_ts)).count();\n";
}
#if defined(NGRAPH_TBB_ENABLE)
if (m_use_tbb)
{
writer.indent--;
writer << "});\n";
}
#endif
}
}
#if defined(NGRAPH_TBB_ENABLE)
if (m_use_tbb)
{
writer << "\n";
......@@ -1058,6 +1072,7 @@ using namespace ngraph::runtime;
<< "->try_put(tbb::flow::continue_msg());\n";
writer << "try { cg_ctx->tbb_graph->wait_for_all(); } catch(...) { throw; }\n";
}
#endif
writer << "ctx->first_iteration = false;\n";
writer.indent--;
......@@ -1350,12 +1365,14 @@ void runtime::cpu::CPU_ExternalFunction::build(ngraph::pass::PassConfig& pass_co
return;
}
#if defined(NGRAPH_TBB_ENABLE)
if (m_use_tbb && (runtime::cpu::IsTracingEnabled() || m_emit_timing))
{
throw ngraph_error(
"CPU Backend: Tracing and performance breakdowns might not be accurate with TBB "
"enabled due to concurrent graph execution");
}
#endif
// reference all the builders for static library
#ifdef NGRAPH_CPU_STATIC_LIB_ENABLE
......@@ -1717,6 +1734,7 @@ void runtime::cpu::CPU_ExternalFunction::build(ngraph::pass::PassConfig& pass_co
}
auto functor = functors.begin();
#if defined(NGRAPH_TBB_ENABLE)
if (m_use_tbb)
{
// Build the flow graph
......@@ -1824,6 +1842,7 @@ void runtime::cpu::CPU_ExternalFunction::build(ngraph::pass::PassConfig& pass_co
}
}
else
#endif
{
static const auto ddebug = std::getenv("NGRAPH_DEX_DEBUG");
if (ddebug != nullptr)
......@@ -1930,7 +1949,11 @@ void runtime::cpu::CPU_ExternalFunction::build(ngraph::pass::PassConfig& pass_co
m_is_built = true;
#if defined(NGRAPH_TBB_ENABLE)
if (m_release_function && !m_use_tbb)
#else
if (m_release_function)
#endif
{
release_function();
}
......
......@@ -262,7 +262,9 @@ namespace ngraph
bool m_release_function;
bool m_emit_timing;
#if defined(NGRAPH_TBB_ENABLE)
bool m_use_tbb;
#endif
#if !defined(NGRAPH_DEX_ONLY)
bool m_is_compiled;
#endif
......
......@@ -20,11 +20,13 @@
#include <cstdint>
#include <set>
#if defined(NGRAPH_TBB_ENABLE)
#define TBB_PREVIEW_GLOBAL_CONTROL 1
#define TBB_PREVIEW_FLOW_GRAPH_TRACE 1
#include <tbb/flow_graph.h>
#include <tbb/global_control.h>
#include <tbb/task_scheduler_init.h>
#endif
#include "ngraph/op/experimental/compiled_kernel.hpp"
#ifdef NGRAPH_MLIR_ENABLE
......@@ -66,8 +68,10 @@ namespace ngraph
std::vector<mkldnn::primitive*> mkldnn_primitives;
std::vector<AlignedBuffer*> memory_buffers;
std::vector<char*> mkldnn_workspaces;
#if defined(NGRAPH_TBB_ENABLE)
tbb::flow::graph* G;
tbb::global_control* c;
#endif
State* const* states;
std::set<size_t> breakpoints;
size_t pc;
......
......@@ -23,11 +23,16 @@
R"(
struct CPURuntimeContextCG
{
#if defined(NGRAPH_TBB_ENABLE)
std::unique_ptr<tbb::flow::graph> tbb_graph;
std::unique_ptr<tbb::global_control> tbb_gcontrol;
CPURuntimeContextCG() { init_tbb(); init_mkldnn_primitives();}
~CPURuntimeContextCG() { cleanup_tbb(); cleanup_mkldnn_primitives();}
#else
CPURuntimeContextCG() { init_mkldnn_primitives();}
~CPURuntimeContextCG() { cleanup_mkldnn_primitives();}
#endif
std::vector<mkldnn::primitive*> mkldnn_primitives;
std::vector<char*> mkldnn_workspaces;
......@@ -57,6 +62,7 @@ struct CPURuntimeContextCG
private:
#if defined(NGRAPH_TBB_ENABLE)
inline void init_tbb()
{
if (std::getenv("NGRAPH_CPU_USE_TBB"))
......@@ -86,6 +92,7 @@ private:
}
}
}
#endif
void init_mkldnn_primitives();
......
......@@ -382,7 +382,7 @@ if (NGRAPH_PLAIDML_ENABLE)
endif()
if (NGRAPH_TBB_ENABLE)
target_compile_definitions(unit-test PRIVATE NGRAPH_TBB_ENABLE)
target_compile_definitions(unit-test PRIVATE "NGRAPH_TBB_ENABLE")
endif()
if (NGRAPH_HALIDE)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment