Commit c2c33748 authored by Jai Menon's avatar Jai Menon Committed by Scott Cyphers

Build and execute TBB flow graphs in the CPU backend (#304)

* CMake: TBB integration placeholder

* CMake: Integrate TBB

* CMake: Indent

* CMake: Rewrite TBB integration

* CMake: More TBB integration changes

* CMake: Install TBB headers and DSOs

* CMake: Don't install the TBB debug DSO

* CMake: Propagate ngraph's configured compiler setting over to MKL-DNN

* CMake: Restore TBB debug DSO installation

* CMake: Add installed headers to search path.
This needs to be cleaned up along with other header search cleanup

* CPU: Build and execute TBB flowgraphs

* CPU: TBB fixes

* CPU: More TBB fixes

* CPU: Allow both TBB and serial codegen for now

* TBB: get_arguments -> get_input_ops

* CPU: Use node methods

* CPU: Add TBB headers in the build directory to the search path

* TBB: Incorporate various changes from master

* CMake: Indentation fix

* CMake: Indentation fix

* CMake: TBB is mandatory so remove additional predicates

* TBB: Add a test

* CMake: Fix linker flags with GCC
parent bc63f7bb
......@@ -32,7 +32,10 @@ if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
UPDATE_COMMAND ""
# Uncomment below with any in-flight MKL-DNN patches
# PATCH_COMMAND patch -p1 < ${CMAKE_SOURCE_DIR}/third-party/patches/mkldnn-cmake-openmp.patch
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR}
CMAKE_ARGS
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR}
)
else()
ExternalProject_Add(
......@@ -42,7 +45,10 @@ if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
UPDATE_COMMAND ""
# Uncomment below with any in-flight MKL-DNN patches
# PATCH_COMMAND patch -p1 < ${CMAKE_SOURCE_DIR}/third-party/patches/mkldnn-cmake-openmp.patch
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR}
CMAKE_ARGS
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR}
BUILD_BYPRODUCTS "${MKLDNN_INSTALL_DIR}/include/mkldnn.hpp"
)
endif()
......
# Copyright 2017 Nervana Systems Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#----------------------------------------------------------------------------------------------------------
# Fetch and configure TBB
#----------------------------------------------------------------------------------------------------------
set(TBB_GIT_REPO_URL https://github.com/01org/tbb)
set(TBB_GIT_TAG "tbb_2018")
if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
configure_file(${CMAKE_SOURCE_DIR}/cmake/tbb_fetch.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/tbb/CMakeLists.txt)
execute_process(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/tbb")
execute_process(COMMAND "${CMAKE_COMMAND}" --build .
WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/tbb")
set(TBB_ROOT ${CMAKE_CURRENT_BINARY_DIR}/tbb/tbb-src PARENT_SCOPE)
endif()
# Copyright 2017 Nervana Systems Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
cmake_minimum_required(VERSION 3.1)
include(ExternalProject)
project(tbb-fetch NONE)
ExternalProject_Add(
ext_tbb
GIT_REPOSITORY ${TBB_GIT_REPO_URL}
GIT_TAG ${TBB_GIT_TAG}
SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/tbb/tbb-src"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND ""
)
......@@ -137,6 +137,16 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
MKLDNN_INCLUDE_DIR)
find_package(ZLIB REQUIRED)
include(${TBB_ROOT}/cmake/TBBBuild.cmake)
tbb_build(TBB_ROOT ${TBB_ROOT} MAKE_ARGS compiler=clang tbb_build_dir=${CMAKE_CURRENT_BINARY_DIR}/tbb_build
tbb_build_prefix=tbb CONFIG_DIR TBB_DIR)
find_package(TBB REQUIRED tbb)
if (NOT TBB_FOUND)
message(FATAL_ERROR "TBB is needed by the CPU backend and was not found")
else()
message(STATUS "Found TBB and imported target ${TBB_IMPORTED_TARGETS}")
endif()
include_directories(SYSTEM ${LLVM_INCLUDE_DIR} ${MKLDNN_INCLUDE_DIR})
link_directories(${LLVM_LIB_DIR} ${MKLDNN_LIB_DIR})
......@@ -162,11 +172,10 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
# This must be kept in sync with the LLVM + Clang version in use
set_source_files_properties(codegen/compiler.cpp PROPERTIES COMPILE_FLAGS "-fno-rtti")
set_source_files_properties(codegen/compiler.cpp PROPERTIES COMPILE_DEFINITIONS
"EIGEN_HEADERS_PATH=\"${EIGEN_INCLUDE_DIR}\";CLANG_BUILTIN_HEADERS_PATH=\"${LLVM_LIB_DIR}/clang/5.0.0/include\";NGRAPH_HEADERS_PATH=\"${NGRAPH_INCLUDE_PATH}\";")
set(NGRAPH_CPU_PCH_ENABLE 0 CACHE STRING "Enable pre-compiled headers in the CPU backend")
"EIGEN_HEADERS_PATH=\"${EIGEN_INCLUDE_DIR}\";CLANG_BUILTIN_HEADERS_PATH=\"${LLVM_LIB_DIR}/clang/5.0.0/include\";TBB_HEADERS_PATH=\"${TBB_ROOT}/include\";NGRAPH_HEADERS_PATH=\"${NGRAPH_INCLUDE_PATH}\";INSTALLED_HEADERS_PATH=\"${CMAKE_INSTALL_PREFIX}/include\";")
set(NGRAPH_CPU_DEBUGINFO_ENABLE 0 CACHE STRING "Enable debuginfo in the CPU backend")
set_property(SOURCE codegen/compiler.cpp APPEND_STRING PROPERTY COMPILE_DEFINITIONS
"NGCPU_PCH=${NGRAPH_CPU_PCH_ENABLE};NGCPU_DEBUGINFO=${NGRAPH_CPU_DEBUGINFO_ENABLE}")
"NGCPU_DEBUGINFO=${NGRAPH_CPU_DEBUGINFO_ENABLE}")
endif()
add_library(ngraph SHARED ${SRC})
......@@ -187,6 +196,16 @@ if (NOT APPLE)
# option so making this portable is still an open issue. As a note for the future,
# this is not an issue on Windows and LLVM's lld does support --exclude-libs.
set_target_properties(ngraph PROPERTIES LINK_FLAGS "-Wl,--exclude-libs,ALL")
# GCC invokes the linker with --as-needed by default which doesn't work for us
# because generated code needs to find symbols in these DSOs at runtime.
# The fix below is temporary and will be removed once we find a better way
# to do this because certain dependencies like the OpenMP runtime libraries
# _do_ need to be linked with --as-needed with a higher priority for the
# Intel OpenMP runtime so we don't mix libgomp and libiomp5
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set_property(TARGET ngraph APPEND_STRING PROPERTY LINK_FLAGS " -Wl,--no-as-needed")
endif()
endif()
# Colon separated string for specified runtime plugin loading, this is made explicit s.t. if a
......@@ -227,6 +246,10 @@ if(NGRAPH_CPU_ENABLE AND MKLDNN_LIB_DIR)
target_link_libraries(ngraph PRIVATE mkldnn)
endif()
if(NGRAPH_CPU_ENABLE)
target_link_libraries(ngraph PRIVATE ${TBB_IMPORTED_TARGETS})
endif()
if(NGRAPH_GPU_ENABLE AND CUDA_LIBRARIES)
target_link_libraries(ngraph PRIVATE ${CUDA_LIBRARIES} ${CUDNN_LIBRARIES})
endif()
......@@ -245,8 +268,19 @@ install(DIRECTORY
)
if (NOT APPLE)
install(DIRECTORY
${MKLDNN_LIB_DIR}/
DESTINATION "${NGRAPH_INSTALL_LIB}"
)
install(DIRECTORY
${MKLDNN_LIB_DIR}/
DESTINATION "${NGRAPH_INSTALL_LIB}"
)
if (NGRAPH_CPU_ENABLE)
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tbb_build/tbb_release/
DESTINATION ${NGRAPH_INSTALL_LIB}
FILES_MATCHING PATTERN "libtbb.so.*"
)
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tbb_build/tbb_debug/
DESTINATION ${NGRAPH_INSTALL_LIB}
FILES_MATCHING PATTERN "libtbb_debug.so.*"
)
endif()
endif()
......@@ -334,7 +334,9 @@ void StaticCompiler::configure_search_path()
});
add_header_search_path(EIGEN_HEADERS_PATH);
add_header_search_path(TBB_HEADERS_PATH);
add_header_search_path(NGRAPH_HEADERS_PATH);
add_header_search_path(INSTALLED_HEADERS_PATH);
#endif
}
......
......@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#include <cmath>
#include <cstdio>
#include "ngraph/log.hpp"
......@@ -25,11 +26,11 @@ template <typename T>
std::string to_cpp_string(T value)
{
string rc;
if (isnan(value))
if (std::isnan(value))
{
rc = "NAN";
}
else if (isinf(value))
else if (std::isinf(value))
{
if (value > 0)
{
......
......@@ -161,6 +161,7 @@ runtime::cpu::CPU_ExternalFunction::CPU_ExternalFunction(
: ngraph::runtime::ExternalFunction(function, release_function)
, m_compiled_function(nullptr)
, m_emit_timing(std::getenv("NGRAPH_CPU_EMIT_TIMING") != nullptr)
, m_use_tbb(std::getenv("NGRAPH_CPU_USE_TBB") != nullptr)
{
}
......@@ -190,6 +191,8 @@ void runtime::cpu::CPU_ExternalFunction::compile()
R"(// Generated by the NGraph CPU backend
#include <cmath>
#include <tbb/flow_graph.h>
#include <Eigen/Dense>
#include "ngraph/runtime/aligned_buffer.hpp"
......@@ -228,7 +231,7 @@ using namespace ngraph::runtime;
{
for (shared_ptr<Node> node : current_function->get_ordered_ops())
{
if (!dynamic_pointer_cast<op::Parameter>(node))
if (!node->is_parameter() && !node->is_constant())
{
names.push_back(node->get_name());
}
......@@ -303,6 +306,12 @@ using namespace ngraph::runtime;
writer << "{\n";
writer.indent++;
if (m_use_tbb)
{
// TODO: This should be static but we don't codegen statics correctly yet
writer << "tbb::flow::graph G;\n\n";
}
bool temporaries_used = false;
for (shared_ptr<Node> node : current_function->get_ordered_ops())
{
......@@ -439,21 +448,84 @@ using namespace ngraph::runtime;
shared_ptr<descriptor::TensorView> tv = output.get_tensor_view();
out.push_back(TensorViewWrapper(tv));
}
if (m_emit_timing)
// Emit operation prologue
if (!node->is_parameter() && !node->is_constant())
{
emit_debug_function_entry(writer, node.get(), in, out);
if (m_use_tbb)
{
writer << "tbb::flow::continue_node<tbb::flow::continue_msg> flowgraph_node_"
<< node->get_name() << "(G, [&](const tbb::flow::continue_msg &msg) {\n";
writer.indent++;
}
if (m_emit_timing)
{
emit_debug_function_entry(writer, node.get(), in, out);
}
}
// Emit operation body
handler->second(&emitter, node.get(), in, out);
handle_output_alias(writer, *node, output_alias_map);
if (m_emit_timing)
// Emit operation epilogue
if (!node->is_parameter() && !node->is_constant())
{
emit_debug_function_exit(writer, node.get(), in, out);
handle_output_alias(writer, *node, output_alias_map);
if (m_emit_timing)
{
emit_debug_function_exit(writer, node.get(), in, out);
}
if (m_use_tbb)
{
writer.indent--;
writer << "});\n";
}
}
}
writer.indent--;
if (m_use_tbb)
{
writer << "\n";
// Build the flow graph
vector<Node*> dependence_graph_heads;
// End writer
traverse_nodes(
current_function, [&writer, &dependence_graph_heads](shared_ptr<Node> n) {
if (!n->is_parameter() && !n->is_constant())
{
bool is_head = true;
for (auto arg : n->get_input_ops())
{
if (!arg->is_parameter() && !arg->is_constant())
{
is_head = false;
writer << "tbb::flow::make_edge(flowgraph_node_" << arg->get_name()
<< ", flowgraph_node_" << n->get_name() << ");\n";
}
}
if (is_head)
{
dependence_graph_heads.emplace_back(n.get());
}
}
});
writer << "\n";
// Execute the flow graph
if (!dependence_graph_heads.empty())
{
for (Node* n : dependence_graph_heads)
{
writer << "flowgraph_node_" << n->get_name()
<< ".try_put(tbb::flow::continue_msg());\n";
}
writer << "try { G.wait_for_all(); } catch(...) { throw; }\n";
}
}
writer.indent--;
// End generated function
writer += "}\n\n";
}
......@@ -535,10 +607,7 @@ void runtime::cpu::CPU_ExternalFunction::emit_debug_function_entry(
const std::vector<TensorViewWrapper>& in,
const std::vector<TensorViewWrapper>& out)
{
if (!dynamic_cast<op::Parameter*>(node))
{
writer << "timer_" << node->get_name() << ".start();\n";
}
writer << "timer_" << node->get_name() << ".start();\n";
}
void runtime::cpu::CPU_ExternalFunction::emit_debug_function_exit(
......@@ -547,8 +616,5 @@ void runtime::cpu::CPU_ExternalFunction::emit_debug_function_exit(
const std::vector<TensorViewWrapper>& in,
const std::vector<TensorViewWrapper>& out)
{
if (!dynamic_cast<op::Parameter*>(node))
{
writer << "timer_" << node->get_name() << ".stop();\n\n";
}
writer << "timer_" << node->get_name() << ".stop();\n\n";
}
......@@ -77,6 +77,7 @@ namespace ngraph
std::unique_ptr<codegen::Compiler> m_compiler;
std::unique_ptr<codegen::ExecutionEngine> m_execution_engine;
bool m_emit_timing;
bool m_use_tbb;
};
}
}
......
......@@ -21,4 +21,4 @@ set (SRC
add_executable(resource_generator ${SRC})
set_source_files_properties(main.cpp PROPERTIES COMPILE_DEFINITIONS
"EIGEN_HEADERS_PATH=\"${EIGEN_INCLUDE_DIR}\";CLANG_BUILTIN_HEADERS_PATH=\"${LLVM_LIB_DIR}/clang/5.0.0/include\";NGRAPH_HEADERS_PATH=\"${NGRAPH_INCLUDE_PATH}\";")
"EIGEN_HEADERS_PATH=\"${EIGEN_INCLUDE_DIR}\";CLANG_BUILTIN_HEADERS_PATH=\"${LLVM_LIB_DIR}/clang/5.0.0/include\";TBB_HEADERS_PATH=\"${TBB_ROOT}/include\";NGRAPH_HEADERS_PATH=\"${NGRAPH_INCLUDE_PATH}\";")
......@@ -92,6 +92,7 @@ int main(int argc, char** argv)
include_paths.push_back({cpp1, {"bits", "ext", "debug", "backward"}});
include_paths.push_back({EIGEN_HEADERS_PATH, {}, true});
include_paths.push_back({NGRAPH_HEADERS_PATH, {}, true});
include_paths.push_back({TBB_HEADERS_PATH, {}, true});
if (output_path.empty())
{
......
......@@ -15,6 +15,8 @@
#include <algorithm>
#include <cinttypes>
#include <cmath>
#include <cstdlib>
#include <string>
#include "gtest/gtest.h"
......@@ -3748,7 +3750,18 @@ TEST(${BACKEND_NAME}, one_hot_scalar_fp_nonint_in_3)
copy_data(a, vector<float>{1.1f});
auto result = backend->make_primary_tensor_view(element::f32, shape_r);
EXPECT_THROW({ cf->call({a}, {result}); }, std::range_error);
try
{
cf->call({a}, {result});
}
catch (const std::exception& e)
{
EXPECT_EQ(e.what(), std::string("One-hot: non-integral value in input"));
}
catch (...)
{
FAIL() << "Expected a std::out_of_range exception";
}
}
TEST(${BACKEND_NAME}, one_hot_scalar_oob_in_3)
......@@ -3770,7 +3783,18 @@ TEST(${BACKEND_NAME}, one_hot_scalar_oob_in_3)
copy_data(a, vector<int32_t>{3000000});
auto result = backend->make_primary_tensor_view(element::i32, shape_r);
EXPECT_THROW({ cf->call({a}, {result}); }, std::range_error);
try
{
cf->call({a}, {result});
}
catch (const std::exception& e)
{
EXPECT_EQ(e.what(), std::string("One-hot: value is out of category range"));
}
catch (...)
{
FAIL() << "Expected a std::out_of_range exception";
}
}
TEST(${BACKEND_NAME}, one_hot_vector_0)
......@@ -3842,7 +3866,18 @@ TEST(${BACKEND_NAME}, one_hot_vector_1_barely_oob)
copy_data(a, vector<int32_t>{2, 1, 0, 0, 3, 2, 1, 0});
auto result = backend->make_primary_tensor_view(element::i32, shape_r);
EXPECT_THROW({ cf->call({a}, {result}); }, std::range_error);
try
{
cf->call({a}, {result});
}
catch (const std::exception& e)
{
EXPECT_EQ(e.what(), std::string("One-hot: value is out of category range"));
}
catch (...)
{
FAIL() << "Expected a std::out_of_range exception";
}
}
TEST(${BACKEND_NAME}, one_hot_vector_1_far_oob)
......@@ -3864,7 +3899,18 @@ TEST(${BACKEND_NAME}, one_hot_vector_1_far_oob)
copy_data(a, vector<int32_t>{2, 1, 0, 0, 3000000, 2, 1, 0});
auto result = backend->make_primary_tensor_view(element::i32, shape_r);
EXPECT_THROW({ cf->call({a}, {result}); }, std::range_error);
try
{
cf->call({a}, {result});
}
catch (const std::exception& e)
{
EXPECT_EQ(e.what(), std::string("One-hot: value is out of category range"));
}
catch (...)
{
FAIL() << "Expected a std::out_of_range exception";
}
}
TEST(${BACKEND_NAME}, one_hot_matrix_0)
......@@ -3942,7 +3988,18 @@ TEST(${BACKEND_NAME}, one_hot_vector_1_fp_nonint)
copy_data(a, vector<float>{2, 1, 0, 0, 2, 2, 1.01f, 0});
auto result = backend->make_primary_tensor_view(element::f32, shape_r);
EXPECT_THROW({ cf->call({a}, {result}); }, std::range_error);
try
{
cf->call({a}, {result});
}
catch (const std::exception& e)
{
EXPECT_EQ(e.what(), std::string("One-hot: non-integral value in input"));
}
catch (...)
{
FAIL() << "Expected a std::out_of_range exception";
}
}
TEST(${BACKEND_NAME}, replace_slice_3d)
......@@ -4507,3 +4564,53 @@ TEST(${BACKEND_NAME}, numeric_double_inf)
cf->call({}, {result});
EXPECT_EQ((vector<char>{false, false, true, false, false}), result->get_vector<char>());
}
TEST(${BACKEND_NAME}, abc_tbb)
{
// Force TBB flow graph generation in the CPU backend
// This has no effect on other backends
bool use_tbb = (getenv("NGRAPH_CPU_USE_TBB") != nullptr);
if (!use_tbb)
{
setenv("NGRAPH_CPU_USE_TBB", "1", 1);
}
auto shape = Shape{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto C = make_shared<op::Parameter>(element::f32, shape);
auto rt = make_shared<TensorViewType>(element::f32, shape);
auto f = make_shared<Function>((A + B) * C, rt, op::Parameters{A, B, C});
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
// Create some tensors for input/output
shared_ptr<runtime::TensorView> a = backend->make_primary_tensor_view(element::f32, shape);
shared_ptr<runtime::TensorView> b = backend->make_primary_tensor_view(element::f32, shape);
shared_ptr<runtime::TensorView> c = backend->make_primary_tensor_view(element::f32, shape);
shared_ptr<runtime::TensorView> result = backend->make_primary_tensor_view(element::f32, shape);
copy_data(a, test::NDArray<float, 2>({{1, 2}, {3, 4}}).get_vector());
copy_data(b, test::NDArray<float, 2>({{5, 6}, {7, 8}}).get_vector());
copy_data(c, test::NDArray<float, 2>({{9, 10}, {11, 12}}).get_vector());
cf->call({a, b, c}, {result});
EXPECT_EQ(result->get_vector<float>(),
(test::NDArray<float, 2>({{54, 80}, {110, 144}})).get_vector());
cf->call({b, a, c}, {result});
EXPECT_EQ(result->get_vector<float>(),
(test::NDArray<float, 2>({{54, 80}, {110, 144}})).get_vector());
cf->call({a, c, b}, {result});
EXPECT_EQ(result->get_vector<float>(),
(test::NDArray<float, 2>({{50, 72}, {98, 128}})).get_vector());
if (!use_tbb)
{
unsetenv("NGRAPH_CPU_USE_TBB");
}
}
......@@ -15,3 +15,4 @@ include( ../cmake/external_gtest.cmake )
include( ../cmake/external_eigen.cmake )
include( ../cmake/external_mkldnn.cmake )
include( ../cmake/external_llvm.cmake )
include( ../cmake/external_tbb.cmake )
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment