Add a real HybridBackend (#1998)

* wip * wip * wip * move hybrid wrapper to hybrid backend dir * move hybrid wrapper to correct namespace * wip * sorta working * remove debug from sorta working homogeneous hybrid backend * is_supported is supported for GPU * cleanup debug * more progress * remove debug * cleanup * turn off hybrid by default * revert change * revert * rename wrapper to backend * revert * address review comments * style

Add a real HybridBackend (#1998)
* wip * wip * wip * move hybrid wrapper to hybrid backend dir * move hybrid wrapper to correct namespace * wip * sorta working * remove debug from sorta working homogeneous hybrid backend * is_supported is supported for GPU * cleanup debug * more progress * remove debug * cleanup * turn off hybrid by default * revert change * revert * rename wrapper to backend * revert * address review comments * style
45fba7b1 · Robert Kimball · GitHub · ba73e2b8 · 45fba7b1 · 45fba7b1
Unverified Commit 45fba7b1 authored Nov 07, 2018 by Robert Kimball Committed by GitHub Nov 07, 2018
12 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -112,6 +112,10 @@ message(STATUS "NGRAPH_ONNX_IMPORT_ENABLE:    ${NGRAPH_ONNX_IMPORT_ENABLE}")
 message(STATUS "NGRAPH_DEX_ONLY:              ${NGRAPH_DEX_ONLY}")
 message(STATUS "NGRAPH_CODE_COVERAGE_ENABLE:  ${NGRAPH_CODE_COVERAGE_ENABLE}")

+if (NGRAPH_HYBRID_ENABLE)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNGRAPH_HYBRID_ENABLE")
+endif()
+
 if (NGRAPH_ONNX_IMPORT_ENABLE)
    option(NGRAPH_USE_SYSTEM_PROTOBUF "Use system provided Protobuf shared object" FALSE)
    option(NGRAPH_ONNXIFI_ENABLE "Enable ONNX Interface for Framework Integration" TRUE)

--- a/src/ngraph/pass/assign_placement.cpp
+++ b/src/ngraph/pass/assign_placement.cpp
@@ -44,10 +44,10 @@ bool pass::AssignPlacement::run_on_node(shared_ptr<Node> node)
            if (backend->is_supported(*node))
            {
                node->set_placement(backend_index);
-
                return false;
            }
        }
+        throw runtime_error("Node " + node->get_name() + " not supported by any backend");
    }
    else
    {

--- a/src/ngraph/runtime/CMakeLists.txt
+++ b/src/ngraph/runtime/CMakeLists.txt
@@ -20,7 +20,6 @@ if (NGRAPH_HYBRID_ENABLE)
    add_subdirectory(hybrid)
 endif()

-
 if (NGRAPH_CPU_ENABLE)
    add_subdirectory(cpu)
 endif()

--- a/src/ngraph/runtime/backend.cpp
+++ b/src/ngraph/runtime/backend.cpp
@@ -110,7 +110,7 @@ void runtime::Backend::validate_call(shared_ptr<const Function> function,

 bool runtime::Backend::is_supported(const Node& node) const
 {
-    // The default behavior is that a backend fully supports all ops. If this is not the case
+    // The default behavior is that a backend does not support any ops. If this is not the case
    // then override this method and enhance.
    return false;
 }
--- a/src/ngraph/runtime/gpu/CMakeLists.txt
+++ b/src/ngraph/runtime/gpu/CMakeLists.txt
@@ -144,13 +144,19 @@ if (NGRAPH_GPU_ENABLE)

    find_package(CUDNN 7 REQUIRED)
    target_include_directories(gpu_backend SYSTEM PUBLIC ${CUDA_INCLUDE_DIRS} ${CUDNN_INCLUDE_DIR})
-    target_link_libraries(gpu_backend PUBLIC
-        ${CUDA_cuda_LIBRARY}
-        ${CUDA_nvrtc_LIBRARY}
-        ${CUDA_cudart_LIBRARY}
-        ${CUDA_LIBRARIES}
-        ${CUDA_CUBLAS_LIBRARIES}
-        ${CUDNN_LIBRARIES})
+    target_link_libraries(gpu_backend
+        PUBLIC
+            ${CUDA_cuda_LIBRARY}
+            ${CUDA_nvrtc_LIBRARY}
+            ${CUDA_cudart_LIBRARY}
+            ${CUDA_LIBRARIES}
+            ${CUDA_CUBLAS_LIBRARIES}
+            ${CUDNN_LIBRARIES})
+    if (NGRAPH_HYBRID_ENABLE)
+        target_link_libraries(gpu_backend
+            PRIVATE
+                hybrid_backend)
+    endif()
    set_target_properties(gpu_backend PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${NGRAPH_BUILD_DIR})

    install(TARGETS gpu_backend LIBRARY DESTINATION ${NGRAPH_INSTALL_LIB})

--- a/src/ngraph/runtime/gpu/gpu_backend.cpp
+++ b/src/ngraph/runtime/gpu/gpu_backend.cpp
@@ -24,6 +24,7 @@
 #include "ngraph/runtime/gpu/gpu_external_function.hpp"
 #include "ngraph/runtime/gpu/gpu_primitive_emitter.hpp"
 #include "ngraph/runtime/gpu/gpu_tensor.hpp"
+#include "ngraph/runtime/hybrid/hybrid_backend.hpp"
 #include "ngraph/util.hpp"

 using namespace ngraph;
@@ -36,7 +37,15 @@ extern "C" const char* get_ngraph_version_string()

 extern "C" runtime::Backend* new_backend(const char* configuration_string)
 {
+#ifdef NGRAPH_HYBRID_ENABLE
+    vector<pair<string, shared_ptr<runtime::Backend>>> backend_list{
+        {"GPU", make_shared<runtime::gpu::GPU_Backend>()}};
+
+    auto wrapper = new runtime::hybrid::HybridBackend(backend_list);
+    return wrapper;
+#else
    return new runtime::gpu::GPU_Backend();
+#endif
 }

 extern "C" void delete_backend(runtime::Backend* backend)
@@ -218,3 +227,34 @@ vector<runtime::PerformanceCounter>
    }
    return rc;
 }
+
+bool runtime::gpu::GPU_Backend::is_supported(const Node& node) const
+{
+    bool rc = true;
+
+    // get op type
+    element::Type type;
+    if (node.description() == "Select")
+    {
+        type = node.get_input_element_type(1);
+    }
+    else if (node.description() == "Constant")
+    {
+        type = node.get_outputs().at(0).get_element_type();
+    }
+    else if (node.description() == "Parameter")
+    {
+        type = node.get_outputs().at(0).get_element_type();
+    }
+    else
+    {
+        type = node.get_input_element_type(0);
+    }
+
+    if (type != element::f32)
+    {
+        rc = false;
+    }
+
+    return rc;
+}
--- a/src/ngraph/runtime/gpu/gpu_backend.hpp
+++ b/src/ngraph/runtime/gpu/gpu_backend.hpp
@@ -62,6 +62,8 @@ namespace ngraph
                std::vector<PerformanceCounter>
                    get_performance_data(std::shared_ptr<Function> func) const override;

+                bool is_supported(const Node& node) const override;
+
                class BackendContext
                {
                public:

--- a/src/ngraph/runtime/hybrid/hybrid_backend.cpp
+++ b/src/ngraph/runtime/hybrid/hybrid_backend.cpp
@@ -14,44 +14,14 @@
 // limitations under the License.
 //*****************************************************************************

-#include <memory>
-#include <sstream>
-#include <string>
-#include <typeindex>
-#include <typeinfo>
-#include <vector>
-
-#include "ngraph/descriptor/layout/dense_tensor_layout.hpp"
-#include "ngraph/except.hpp"
+#include "ngraph/runtime/hybrid/hybrid_backend.hpp"
 #include "ngraph/graph_util.hpp"
-#include "ngraph/pass/assign_layout.hpp"
 #include "ngraph/pass/assign_placement.hpp"
-#include "ngraph/pass/like_replacement.hpp"
-#include "ngraph/pass/liveness.hpp"
 #include "ngraph/pass/manager.hpp"
-#include "ngraph/runtime/host_tensor.hpp"
-#include "ngraph/runtime/hybrid/hybrid_backend.hpp"
-#include "ngraph/util.hpp"
+#include "ngraph/runtime/tensor.hpp"

-using namespace std;
 using namespace ngraph;
-
-using descriptor::layout::DenseTensorLayout;
-
-extern "C" const char* get_ngraph_version_string()
-{
-    return NGRAPH_VERSION;
-}
-
-extern "C" runtime::Backend* new_backend(const char* configuration_string)
-{
-    return new runtime::hybrid::HYBRIDBackend();
-}
-
-extern "C" void delete_backend(runtime::Backend* backend)
-{
-    delete backend;
-}
+using namespace std;

 template <typename T>
 void copy_data(std::shared_ptr<ngraph::runtime::Tensor> tv, const std::vector<T>& data)
@@ -74,49 +44,142 @@ std::vector<T> read_vector(std::shared_ptr<ngraph::runtime::Tensor> tv)
    return rc;
 }

-shared_ptr<runtime::Backend> runtime::hybrid::HYBRIDBackend::get_cached_backend(Placement placement)
+runtime::hybrid::HybridBackend::HybridBackend(
+    const std::vector<std::pair<std::string, std::shared_ptr<runtime::Backend>>>& backend_list)
+    : m_backend_list{backend_list}
 {
-    if (m_cached_backends.find(placement) == m_cached_backends.end())
-    {
-        m_cached_backends[placement] = runtime::Backend::create(placement_to_string(placement));
-    }
-    return m_cached_backends.at(placement);
 }

-shared_ptr<runtime::Tensor> runtime::hybrid::HYBRIDBackend::create_tensor(const element::Type& type,
-                                                                          const Shape& shape)
+shared_ptr<runtime::Tensor>
+    runtime::hybrid::HybridBackend::create_tensor(const element::Type& element_type,
+                                                  const Shape& shape)
 {
-    return make_shared<runtime::HostTensor>(type, shape, "external");
+    auto it = m_backend_list.begin();
+    return it->second->create_tensor(element_type, shape);
 }

-shared_ptr<runtime::Tensor> runtime::hybrid::HYBRIDBackend::create_tensor(const element::Type& type,
-                                                                          const Shape& shape,
-                                                                          void* memory_pointer)
+shared_ptr<runtime::Tensor> runtime::hybrid::HybridBackend::create_tensor(
+    const element::Type& element_type, const Shape& shape, void* memory_pointer)
 {
-    return make_shared<runtime::HostTensor>(type, shape, memory_pointer, "external");
+    auto it = m_backend_list.begin();
+    return it->second->create_tensor(element_type, shape, memory_pointer);
 }

-bool runtime::hybrid::HYBRIDBackend::compile(shared_ptr<Function> function)
+bool runtime::hybrid::HybridBackend::compile(shared_ptr<Function> func)
 {
-    if (m_function_map.find(function) == m_function_map.end())
+    if (m_function_map.find(func) == m_function_map.end())
    {
+        vector<shared_ptr<runtime::Backend>> backend_list;
+        for (auto p : m_backend_list)
+        {
+            backend_list.push_back(p.second);
+        }
+
        // Clone function
        FunctionInstance instance;
-        instance.m_function = clone_function(*function);
+        instance.m_function = clone_function(*func);

+        // Run placement pass
        pass::Manager pass_manager;
+        pass_manager.register_pass<pass::AssignPlacement>(backend_list);
        pass_manager.run_passes(instance.m_function);
+
+        // Split function to sub_functions
+        tie(instance.m_sub_functions, instance.m_map_parameter_to_result) =
+            split_function_by_placement_size(instance.m_function);
+        m_function_map.insert({func, instance});
+
+        // Compile subfunctions in corresponding backends
+        for (shared_ptr<Function>& sub_function : instance.m_sub_functions)
+        {
+            size_t placement = get_colocated_function_placement_size(sub_function);
+            auto backend =
+                m_backend_list[(placement - 1)]; // (placement-1) as 0 is default placement
+            backend.second->compile(sub_function);
+        }
    }
+
    return true;
 }

-bool runtime::hybrid::HYBRIDBackend::call(shared_ptr<Function> function,
+bool runtime::hybrid::HybridBackend::call(shared_ptr<Function> func,
                                          const vector<shared_ptr<runtime::Tensor>>& outputs,
                                          const vector<shared_ptr<runtime::Tensor>>& inputs)
 {
-    validate_call(function, outputs, inputs);
+    // Get FunctionInstance
+    bool rc = true;
+    compile(func);
+
+    auto it = m_function_map.find(func);
+    if (it == m_function_map.end())
+    {
+        throw runtime_error("Unable to compile hybrid backend");
+    }
+    FunctionInstance& instance = it->second;
+
+    // Parameter and result node in sub_function maps to one Tensor
+    unordered_map<shared_ptr<Node>, shared_ptr<runtime::Tensor>> map_node_to_tensor_view;
+    for (size_t i = 0; i < inputs.size(); ++i)
+    {
+        map_node_to_tensor_view[instance.m_function->get_parameters()[i]] = inputs[i];
+    }
+    for (size_t i = 0; i < outputs.size(); ++i)
+    {
+        map_node_to_tensor_view[instance.m_function->get_results()[i]] = outputs[i];
+    }

-    compile(function);
+    // Call subfunctions
+    for (shared_ptr<Function>& sub_function : instance.m_sub_functions)
+    {
+        // Init backend
+        size_t placement = get_colocated_function_placement_size(sub_function);
+        // (placement-1) as 0 is default placement
+        auto backend = m_backend_list[(placement - 1)].second;
+
+        // Prepare parameter TensorViews
+        vector<shared_ptr<runtime::Tensor>> parameter_tvs;
+        for (auto parameter_node : sub_function->get_parameters())
+        {
+            if (map_node_to_tensor_view.find(parameter_node) != map_node_to_tensor_view.end())
+            {
+                parameter_tvs.push_back(map_node_to_tensor_view.at(parameter_node));
+            }
+            else
+            {
+                auto result_node = instance.m_map_parameter_to_result.at(parameter_node);
+                auto result_tv = map_node_to_tensor_view.at(result_node);
+                auto parameter_tv = backend->create_tensor(parameter_node->get_element_type(),
+                                                           parameter_node->get_shape());
+                copy_data(parameter_tv, read_vector<float>(result_tv));
+                map_node_to_tensor_view[parameter_node] = parameter_tv;
+                parameter_tvs.push_back(parameter_tv);
+            }
+        }
+
+        // Prepare result TensorViews
+        vector<shared_ptr<runtime::Tensor>> result_tvs;
+        for (auto result_node : sub_function->get_results())
+        {
+            if (map_node_to_tensor_view.find(result_node) != map_node_to_tensor_view.end())
+            {
+                result_tvs.push_back(map_node_to_tensor_view.at(result_node));
+            }
+            else
+            {
+                auto result_tv = backend->create_tensor(result_node->get_element_type(),
+                                                        result_node->get_shape());
+                map_node_to_tensor_view[result_node] = result_tv;
+                result_tvs.push_back(result_tv);
+            }
+        }
+
+        // Call
+        backend->call_with_validate(sub_function, result_tvs, parameter_tvs);
+    }
+    return rc;
+}

+bool runtime::hybrid::HybridBackend::is_supported(const Node& node) const
+{
    return true;
 }
--- a/src/ngraph/runtime/hybrid/hybrid_backend.hpp
+++ b/src/ngraph/runtime/hybrid/hybrid_backend.hpp
@@ -16,14 +16,12 @@

 #pragma once

+#include <map>
 #include <memory>
-#include <sstream>
 #include <string>
 #include <vector>

 #include "ngraph/runtime/backend.hpp"
-#include "ngraph/runtime/host_tensor.hpp"
-#include "ngraph/runtime/tensor.hpp"

 namespace ngraph
 {
@@ -31,37 +29,45 @@ namespace ngraph
    {
        namespace hybrid
        {
-            class HYBRIDBackend : public runtime::Backend
-            {
-            public:
-                std::shared_ptr<Tensor> create_tensor(const element::Type& type,
-                                                      const Shape& shape,
-                                                      void* memory_pointer) override;
+            class HybridBackend;
+        }
+    }
+}

-                std::shared_ptr<Tensor> create_tensor(const element::Type& type,
-                                                      const Shape& shape) override;
+class ngraph::runtime::hybrid::HybridBackend : public ngraph::runtime::Backend
+{
+public:
+    HybridBackend(
+        const std::vector<std::pair<std::string, std::shared_ptr<runtime::Backend>>>& backend_list);

-                bool compile(std::shared_ptr<Function> function) override;
+    std::shared_ptr<ngraph::runtime::Tensor>
+        create_tensor(const ngraph::element::Type& element_type,
+                      const ngraph::Shape& shape) override;

-                bool call(std::shared_ptr<Function> function,
-                          const std::vector<std::shared_ptr<Tensor>>& outputs,
-                          const std::vector<std::shared_ptr<Tensor>>& intputs) override;
+    std::shared_ptr<ngraph::runtime::Tensor>
+        create_tensor(const ngraph::element::Type& element_type,
+                      const ngraph::Shape& shape,
+                      void* memory_pointer) override;

-            private:
-                class FunctionInstance
-                {
-                public:
-                    std::shared_ptr<Function> m_function;
-                    std::vector<std::shared_ptr<Function>> m_sub_functions;
-                    std::unordered_map<std::shared_ptr<op::Parameter>, std::shared_ptr<op::Result>>
-                        m_map_parameter_to_result;
-                };
+    bool compile(std::shared_ptr<ngraph::Function> func) override;

-                std::shared_ptr<runtime::Backend> get_cached_backend(Placement placement);
+    bool call(std::shared_ptr<ngraph::Function> func,
+              const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>& outputs,
+              const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>& inputs) override;

-                std::map<Placement, std::shared_ptr<runtime::Backend>> m_cached_backends;
-                std::map<std::shared_ptr<Function>, FunctionInstance> m_function_map;
-            };
-        }
-    }
-}
+    bool is_supported(const ngraph::Node& node) const override;
+
+private:
+    class FunctionInstance
+    {
+    public:
+        std::shared_ptr<ngraph::Function> m_function;
+        std::vector<std::shared_ptr<ngraph::Function>> m_sub_functions;
+        std::unordered_map<std::shared_ptr<ngraph::op::Parameter>,
+                           std::shared_ptr<ngraph::op::Result>>
+            m_map_parameter_to_result;
+    };
+
+    std::map<std::shared_ptr<ngraph::Function>, FunctionInstance> m_function_map;
+    std::vector<std::pair<std::string, std::shared_ptr<runtime::Backend>>> m_backend_list;
+};
--- a/src/ngraph/runtime/interpreter/int_backend.hpp
+++ b/src/ngraph/runtime/interpreter/int_backend.hpp
@@ -163,6 +163,7 @@ public:
    std::vector<PerformanceCounter>
        get_performance_data(std::shared_ptr<Function> func) const override;

+    bool is_supported(const Node& node) const override { return true; }
 private:
    class FunctionInstance
    {

--- a/src/ngraph/runtime/tensor.hpp
+++ b/src/ngraph/runtime/tensor.hpp
@@ -19,6 +19,7 @@
 #include <memory>
 #include <vector>

+#include "ngraph/descriptor/layout/tensor_layout.hpp"
 #include "ngraph/descriptor/tensor.hpp"
 #include "ngraph/shape.hpp"
 #include "ngraph/strides.hpp"

--- a/test/hybrid_backend.cpp
+++ b/test/hybrid_backend.cpp
@@ -23,6 +23,7 @@
 #include "ngraph/ngraph.hpp"
 #include "ngraph/runtime/backend.hpp"
 #include "ngraph/runtime/backend_manager.hpp"
+#include "ngraph/runtime/hybrid/hybrid_backend.hpp"
 #include "util/all_close.hpp"
 #include "util/all_close_f.hpp"
 #include "util/ndarray.hpp"