Merge branch 'master' into mem_leaks

3fa332b6 · Jaikrishnan Menon · 225b12bb · ffe657df · 3fa332b6 · 3fa332b6
Commit 3fa332b6 authored Nov 22, 2017 by Jaikrishnan Menon
18 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -43,6 +43,9 @@ endif()
 set(CMAKE_DISABLE_SOURCE_CHANGES ON)
 set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)

+# Create compilation database compile_commands.json
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
 # set directory where the custom finders live
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")

@@ -87,6 +90,11 @@ endif()
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
 # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")

+# Set true if CPU backend is built by default
+if (NOT DEFINED NGRAPH_CPU_ENABLE)
+    SET(NGRAPH_CPU_ENABLE TRUE)
+endif()
+
 #-----------------------------------------------------------------------------------------------
 # External projects install directory
 #-----------------------------------------------------------------------------------------------

--- a/src/ngraph/codegen/compiler.cpp
+++ b/src/ngraph/codegen/compiler.cpp
@@ -42,6 +42,7 @@
 #include <clang/Basic/TargetInfo.h>
 #include <clang/CodeGen/CodeGenAction.h>
 #include <clang/Frontend/CompilerInstance.h>
+#include <clang/Frontend/FrontendActions.h>
 #include <clang/Frontend/TextDiagnosticPrinter.h>
 #include <llvm/Support/TargetSelect.h>

@@ -73,6 +74,11 @@ Compiler::~Compiler()
 {
 }

+void Compiler::set_precompiled_header_source(const std::string& source)
+{
+    s_static_compiler.set_precompiled_header_source(source);
+}
+
 std::unique_ptr<llvm::Module> Compiler::compile(const std::string& source)
 {
    lock_guard<mutex> lock(m_mutex);
@@ -88,14 +94,10 @@ static std::string GetExecutablePath(const char* Argv0)
 }

 StaticCompiler::StaticCompiler()
-    : m_precompiled_headers_enabled(false)
+    : m_precompiled_header_valid(false)
    , m_debuginfo_enabled(false)
    , m_source_name("code.cpp")
 {
-#if NGCPU_PCH
-    m_precompiled_headers_enabled = true;
-#endif
-
 #if NGCPU_DEBUGINFO
    m_debuginfo_enabled = true;
 #endif
@@ -213,14 +215,6 @@ StaticCompiler::StaticCompiler()
        CGO.setDebugInfo(codegenoptions::FullDebugInfo);
    }

-    if (m_precompiled_headers_enabled)
-    {
-        // Preprocessor options
-        auto& PPO = m_compiler->getInvocation().getPreprocessorOpts();
-        PPO.ImplicitPCHInclude = "ngcpu.pch";
-        PPO.DisablePCHValidation = 1;
-    }
-
    // Enable various target features
    // Most of these are for Eigen
    auto& TO = m_compiler->getInvocation().getTargetOpts();
@@ -314,6 +308,18 @@ std::unique_ptr<llvm::Module>
    StaticCompiler::compile(std::unique_ptr<clang::CodeGenAction>& compiler_action,
                            const string& source)
 {
+    if (!m_precompiled_header_valid && m_precomiled_header_source.empty() == false)
+    {
+        generate_pch(m_precomiled_header_source);
+    }
+    if (m_precompiled_header_valid)
+    {
+        // Preprocessor options
+        auto& PPO = m_compiler->getInvocation().getPreprocessorOpts();
+        PPO.ImplicitPCHInclude = m_pch_path;
+        PPO.DisablePCHValidation = 0;
+    }
+
    // Map code filename to a memoryBuffer
    StringRef source_ref(source);
    unique_ptr<MemoryBuffer> buffer = MemoryBuffer::getMemBufferCopy(source_ref);
@@ -334,24 +340,24 @@ std::unique_ptr<llvm::Module>
    return rc;
 }

-// std::unique_ptr<llvm::Module> StaticCompiler::generate_pch(const string& source)
-// {
-//     // Map code filename to a memoryBuffer
-//     StringRef source_ref(source);
-//     unique_ptr<MemoryBuffer> buffer = MemoryBuffer::getMemBufferCopy(source_ref);
-//     m_compiler->getInvocation().getPreprocessorOpts().addRemappedFile(m_source_name, buffer.get());
+void StaticCompiler::generate_pch(const string& source)
+{
+    m_pch_path = file_util::tmp_filename();
+    m_compiler->getFrontendOpts().OutputFile = m_pch_path;

-//     // Create and execute action
-//     CodeGenAction* compilerAction = new GeneratePCHAction();
-//     std::unique_ptr<llvm::Module> rc;
-//     if (m_compiler->ExecuteAction(*compilerAction) == true)
-//     {
-//         rc = compilerAction->takeModule();
-//     }
+    // Map code filename to a memoryBuffer
+    StringRef source_ref(source);
+    unique_ptr<MemoryBuffer> buffer = MemoryBuffer::getMemBufferCopy(source_ref);
+    m_compiler->getInvocation().getPreprocessorOpts().addRemappedFile(m_source_name, buffer.get());

-//     buffer.release();
+    // Create and execute action
+    clang::GeneratePCHAction* compilerAction = new clang::GeneratePCHAction();
+    if (m_compiler->ExecuteAction(*compilerAction) == true)
+    {
+        m_precompiled_header_valid = true;
+    }

-//     m_compiler->getInvocation().getPreprocessorOpts().clearRemappedFiles();
+    buffer.release();

-//     return rc;
-// }
+    m_compiler->getInvocation().getPreprocessorOpts().clearRemappedFiles();
+}
--- a/src/ngraph/codegen/compiler.hpp
+++ b/src/ngraph/codegen/compiler.hpp
@@ -53,6 +53,7 @@ class ngraph::codegen::Compiler
 public:
    Compiler();
    ~Compiler();
+    void set_precompiled_header_source(const std::string& source);
    std::unique_ptr<llvm::Module> compile(const std::string& source);
    std::unique_ptr<clang::CodeGenAction>& get_compiler_action() { return compiler_action; }
 private:
@@ -65,20 +66,26 @@ public:
    StaticCompiler();
    ~StaticCompiler();

-    void set_precompiled_headers_enabled(bool state) { m_precompiled_headers_enabled = state; }
-    bool is_precompiled_headers_enabled() { return m_precompiled_headers_enabled; }
    void set_debuginfo_enabled(bool state) { m_debuginfo_enabled = state; }
    bool is_debuginfo_enabled() { return m_debuginfo_enabled; }
+    void set_precompiled_header_source(const std::string& source)
+    {
+        m_precomiled_header_source = source;
+    }
    void add_header_search_path(const std::string& path);
+
    std::unique_ptr<llvm::Module> compile(std::unique_ptr<clang::CodeGenAction>& compiler_action,
                                          const std::string& source);
+    void generate_pch(const std::string& source);

 private:
    std::unique_ptr<clang::CompilerInstance> m_compiler;
-    bool m_precompiled_headers_enabled;
+    bool m_precompiled_header_valid;
    bool m_debuginfo_enabled;
    std::string m_source_name;
    std::vector<std::string> m_extra_search_path_list;
+    std::string m_pch_path;
+    std::string m_precomiled_header_source;

    bool is_version_number(const std::string& path);
    void use_cached_files();

--- a/src/ngraph/function.cpp
+++ b/src/ngraph/function.cpp
@@ -31,6 +31,7 @@ Function::Function(const std::shared_ptr<Node>& result,
    , m_name(name)
    , m_result_type(result_type)
    , m_ordered_ops_valid(false)
+    , m_temporary_pool_size(0)
    , m_instance_id(m_next_instance_id.fetch_add(1))
 {
    m_result->set_is_output();
@@ -97,6 +98,16 @@ void Function::set_name(const string& name)
    }
 }

+size_t Function::get_temporary_pool_size()
+{
+    return m_temporary_pool_size;
+}
+
+void Function::set_temporary_pool_size(size_t size)
+{
+    m_temporary_pool_size = size;
+}
+
 std::ostream& ngraph::operator<<(std::ostream& out, const Function& f)
 {
    out << "Function(" << f.get_name() << ")";

--- a/src/ngraph/function.hpp
+++ b/src/ngraph/function.hpp
@@ -56,6 +56,9 @@ namespace ngraph
        void clear_ordered_ops_valid() { m_ordered_ops_valid = false; }
        friend std::ostream& operator<<(std::ostream&, const Function&);
        size_t get_instance_id() { return m_instance_id; }
+        size_t get_temporary_pool_size();
+        void set_temporary_pool_size(size_t);
+
    protected:
        std::shared_ptr<Node> m_result;
        std::vector<std::shared_ptr<ngraph::op::Parameter>> m_parameters;
@@ -64,6 +67,7 @@ namespace ngraph
        bool m_ordered_ops_valid;
        std::list<std::shared_ptr<Node>> m_ordered_ops;
        std::list<std::shared_ptr<Node>> m_ops;
+        size_t m_temporary_pool_size;

    private:
        Function(const Function&) = delete;

--- a/src/ngraph/pass/manager_state.cpp
+++ b/src/ngraph/pass/manager_state.cpp
@@ -27,13 +27,3 @@ const vector<shared_ptr<Function>>& ngraph::pass::ManagerState::get_functions()
 {
    return m_function_list;
 }
-
-size_t ngraph::pass::ManagerState::get_temporary_pool_size()
-{
-    return m_temporary_pool_size;
-}
-
-void ngraph::pass::ManagerState::set_temporary_pool_size(size_t size)
-{
-    m_temporary_pool_size = size;
-}
--- a/src/ngraph/pass/manager_state.hpp
+++ b/src/ngraph/pass/manager_state.hpp
@@ -39,10 +39,6 @@ public:
        m_function_list.insert(m_function_list.begin(), collection.begin(), collection.end());
    }

-    size_t get_temporary_pool_size();
-    void set_temporary_pool_size(size_t);
-
 private:
-    size_t m_temporary_pool_size = 0;
    std::vector<std::shared_ptr<Function>> m_function_list;
 };
--- a/src/ngraph/pass/memory_layout.cpp
+++ b/src/ngraph/pass/memory_layout.cpp
@@ -31,10 +31,10 @@ pass::MemoryLayout::MemoryLayout(size_t alignment)
 {
 }

-bool pass::MemoryLayout::run_on_call_graph(std::list<std::shared_ptr<Node>>& node_list)
+bool pass::MemoryLayout::run_on_function(std::shared_ptr<ngraph::Function> function)
 {
    MemoryManager mm(m_alignment);
-    for (shared_ptr<Node> node : node_list)
+    for (shared_ptr<Node> node : function->get_ordered_ops())
    {
        for (Tensor* tensor : node->liveness_new_list)
        {
@@ -46,7 +46,7 @@ bool pass::MemoryLayout::run_on_call_graph(std::list<std::shared_ptr<Node>>& nod
            mm.free(tensor->get_pool_offset());
        }
    }
-    get_state().set_temporary_pool_size(mm.max_allocated());
+    function->set_temporary_pool_size(mm.max_allocated());

    return false;
 }

--- a/src/ngraph/pass/memory_layout.hpp
+++ b/src/ngraph/pass/memory_layout.hpp
@@ -30,11 +30,11 @@ namespace ngraph
    }
 }

-class ngraph::pass::MemoryLayout : public CallGraphPass
+class ngraph::pass::MemoryLayout : public FunctionPass
 {
 public:
    MemoryLayout(size_t alignment = 1);
-    virtual bool run_on_call_graph(std::list<std::shared_ptr<Node>>&) override;
+    bool run_on_function(std::shared_ptr<ngraph::Function>) override;

 private:
    size_t m_alignment;

--- a/src/ngraph/runtime/cpu/call_frame.cpp
+++ b/src/ngraph/runtime/cpu/call_frame.cpp
@@ -47,7 +47,7 @@ void CallFrame::tensor_call(
    }

    // Invoke compiled computation
-    m_compiled_function(inputs, outputs);
+    m_compiled_function(inputs.data(), outputs.data());
 }

 void CallFrame::operator()(const std::vector<std::shared_ptr<ngraph::runtime::Value>>& arguments,

--- a/src/ngraph/runtime/cpu/call_frame.hpp
+++ b/src/ngraph/runtime/cpu/call_frame.hpp
@@ -33,8 +33,7 @@ namespace ngraph
            class CallFrame;
            class ExternalFunction;

-            using EntryPoint_t = void(const std::vector<void*>& inputs,
-                                      const std::vector<void*>& outputs);
+            using EntryPoint_t = void(void** inputs, void** outputs);

            using EntryPoint = std::function<EntryPoint_t>;


--- a/src/ngraph/runtime/cpu/emitter.cpp
+++ b/src/ngraph/runtime/cpu/emitter.cpp
--- a/src/ngraph/runtime/cpu/emitter.hpp
+++ b/src/ngraph/runtime/cpu/emitter.hpp
@@ -24,7 +24,6 @@

 #define EMITTER_DECL(E)                                                                            \
    E(const ngraph::Node* n,                                                                       \
-      ExternalFunction* ef,                                                                        \
      const std::vector<TensorViewInfo>& inputs,                                                   \
      const std::vector<TensorViewInfo>& outputs)


--- a/src/ngraph/runtime/cpu/external_function.cpp
+++ b/src/ngraph/runtime/cpu/external_function.cpp
@@ -183,15 +183,9 @@ void ExternalFunction::compile()
    Emitter emitter;
    codegen::CodeWriter& TU = emitter.get_code_writer();

-    // The "dso_handle" symbol below is required by __cxa_atexit()
-    // which is enabled because the JIT uses it as the default mechanism
-    // to register cleanup handlers. We use it, and not atexit(), because
-    // atexit() happens too late, when the JIT is no longer alive
-
    TU +=
        R"(// Generated by the NGraph CPU backend
 #include <cmath>
-#include <vector>

 #include <Eigen/Dense>

@@ -201,24 +195,27 @@ void ExternalFunction::compile()

 using namespace ngraph::runtime::cpu::eigen;

-void *__dso_handle = 0;
-
 )";
+    string pch_header_source = TU.get_code();
+
+    // The "dso_handle" symbol is required by __cxa_atexit()
+    // which is enabled because the JIT uses it as the default mechanism
+    // to register cleanup handlers. We use it, and not atexit(), because
+    // atexit() happens too late, when the JIT is no longer alive
+
+    TU << "void *__dso_handle = 0;\n\n";

    TU << "// Declare all functions\n";
    for (shared_ptr<Function> f : pass_manager.get_state().get_functions())
    {
-        TU << "extern \"C\" void " << f->get_name() << "(\n";
-        TU << "    const std::vector<void*>& inputs,\n";
-        TU << "    const std::vector<void*>& outputs);\n";
+        TU << "extern \"C\" void " << f->get_name() << "(void** inputs, void** outputs);\n";
    }
    TU << "\n";

    for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
    {
-        TU << "extern \"C\" void " << current_function->get_name() << "(\n";
-        TU << "    const std::vector<void*>& inputs,\n";
-        TU << "    const std::vector<void*>& outputs)\n";
+        TU << "extern \"C\" void " << current_function->get_name();
+        TU << "(void** inputs, void** outputs)\n";
        TU << "{\n";
        TU.indent++;

@@ -233,7 +230,7 @@ void *__dso_handle = 0;
        }
        if (temporaries_used)
        {
-            size_t temp_pool_size = pass_manager.get_state().get_temporary_pool_size();
+            size_t temp_pool_size = current_function->get_temporary_pool_size();
            TU << "// Allocate the memory pool\n";
            TU << "ngraph::runtime::AlignedBuffer memory_handler(" << temp_pool_size << ", "
               << ngraph::runtime::cpu::alignment << ");\n";
@@ -303,7 +300,7 @@ void *__dso_handle = 0;
                auto tv = output.get_tensor_view();
                out.push_back({0, tv});
            }
-            handler->second(&emitter, node.get(), this, in, out);
+            handler->second(&emitter, node.get(), in, out);
        }

        TU.indent--;
@@ -325,7 +322,10 @@ void *__dso_handle = 0;
    compiler.reset(new codegen::Compiler());
    execution_engine.reset(new codegen::ExecutionEngine());

+    compiler->set_precompiled_header_source(pch_header_source);
+
    auto llvm_module = compiler->compile(code);
+
    if (llvm_module == nullptr)
    {
        throw runtime_error("function failed to compile");

--- a/src/ngraph/runtime/cpu/external_function.hpp
+++ b/src/ngraph/runtime/cpu/external_function.hpp
@@ -39,7 +39,6 @@ namespace ngraph

            using OpFunction = std::function<void(Emitter*,
                                                  const ngraph::Node*,
-                                                  ExternalFunction*,
                                                  const std::vector<TensorViewInfo>& inputs,
                                                  const std::vector<TensorViewInfo>& outputs)>;


--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -57,7 +57,7 @@ if(MKLDNN_INCLUDE_DIR)
    set(SRC ${SRC} mkldnn.cpp)
 endif()

-if(LLVM_INCLUDE_DIR)
+if(NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR)
    include_directories(SYSTEM ${LLVM_INCLUDE_DIR})
    link_directories(${LLVM_LIB_DIR})
    set(SRC ${SRC} codegen.cpp)

--- a/test/backend_test.in.cpp
+++ b/test/backend_test.in.cpp
@@ -2708,3 +2708,30 @@ TEST(${BACKEND_NAME}, sign)
    (*cf)({a}, {result});
    ASSERT_EQ((vector<float>{1, -1, 0, -1, 1, 0}), result->get_vector<float>());
 }
+
+TEST(${BACKEND_NAME}, constant_equality_bool)
+{
+    auto shape = Shape{4};
+    // auto A = make_shared<op::Parameter>(element::Bool::element_type(), shape);
+    // auto B = make_shared<op::Parameter>(element::Bool::element_type(), shape);
+    // auto result_type = make_shared<TensorViewType>(element::Bool::element_type(), shape);
+    // auto f = make_shared<Function>(make_shared<op::Equal>(A, B), result_type, op::Parameters{A, B});
+
+    auto a = runtime::make_tensor<element::Bool>(shape, {true, false, true, false});
+    auto A = make_shared<op::ParameterizedConstant<element::Bool>>(shape, a);
+    auto b = runtime::make_tensor<element::Bool>(shape, {true, true, true, true});
+    auto B = make_shared<op::ParameterizedConstant<element::Bool>>(shape, b);
+    auto rt = make_shared<TensorViewType>(element::Bool::element_type(), shape);
+    auto f = make_shared<Function>(make_shared<op::Equal>(A, B), rt, op::Parameters{});
+
+    auto manager = runtime::Manager::get("${BACKEND_NAME}");
+    auto external = manager->compile(f);
+    auto backend = manager->allocate_backend();
+    auto cf = backend->make_call_frame(external);
+
+    // Create some tensors for input/output
+    auto result = backend->make_primary_tensor_view(element::Bool::element_type(), shape);
+
+    (*cf)({}, {result});
+    ASSERT_EQ((vector<char>{true, false, true, false}), result->get_vector<char>());
+}
--- a/test/pass_memory_layout.cpp
+++ b/test/pass_memory_layout.cpp
@@ -215,7 +215,7 @@ TEST(memory_layout, basic)
    auto graph = make_test_graph();
    pass_manager.run_passes(graph);
    auto sorted = graph->get_ordered_ops();
-    size_t temporary_pool_size = pass_manager.get_state().get_temporary_pool_size();
+    size_t temporary_pool_size = graph->get_temporary_pool_size();
    EXPECT_EQ(12, temporary_pool_size);
 }

@@ -235,6 +235,6 @@ TEST(memory_layout, constant)

    pass_manager.run_passes(f);
    auto sorted = f->get_ordered_ops();
-    size_t temporary_pool_size = pass_manager.get_state().get_temporary_pool_size();
+    size_t temporary_pool_size = f->get_temporary_pool_size();
    EXPECT_EQ(0, temporary_pool_size);
 }