Basic infrastructure for simple halide subgraphs (#1854)

* Basic infrastructure for simple halide subgraphs * Always build the op since it has no dependencies * minor cleanup * Incorporate feedback

Basic infrastructure for simple halide subgraphs (#1854)
* Basic infrastructure for simple halide subgraphs * Always build the op since it has no dependencies * minor cleanup * Incorporate feedback
91219e40 · Jaikrishnan Menon · Robert Kimball · b079f266 · 91219e40 · 91219e40
Commit 91219e40 authored Oct 19, 2018 by Jaikrishnan Menon Committed by Robert Kimball Oct 19, 2018
11 changed files
--- a/src/ngraph/runtime/cpu/CMakeLists.txt
+++ b/src/ngraph/runtime/cpu/CMakeLists.txt
@@ -81,6 +81,7 @@ set(SRC
    op/batch_norm_relu.cpp
    op/bounded_relu.cpp
    op/group_conv.cpp
+    op/halide_op.cpp
    op/conv_bias.cpp
    op/conv_relu.cpp
    op/convert_layout.cpp
@@ -114,6 +115,14 @@ if (NOT NGRAPH_DEX_ONLY)
        )
 endif()

+if (NGRAPH_HALIDE)
+    set(SRC
+        ${SRC}
+        builder/halide_op.cpp
+        pass/halide_subgraph_extraction.cpp
+        )
+endif()
+
 if (NGRAPH_TBB_ENABLE)
    include(${TBB_ROOT}/cmake/TBBBuild.cmake)
    tbb_build(TBB_ROOT ${TBB_ROOT} MAKE_ARGS tbb_build_dir=${CMAKE_CURRENT_BINARY_DIR}/tbb_build
@@ -151,6 +160,12 @@ if (NGRAPH_CPU_ENABLE)
    if (NGRAPH_DEX_ONLY)
        target_compile_definitions(cpu_backend PRIVATE "NGRAPH_DEX_ONLY")
    endif()
+    if (NGRAPH_HALIDE)
+        target_compile_definitions(cpu_backend PRIVATE "NGRAPH_HALIDE")
+        ExternalProject_Get_Property(ext_halide BINARY_DIR)
+        target_include_directories(cpu_backend SYSTEM PRIVATE ${BINARY_DIR}/include)
+        target_link_libraries(cpu_backend PRIVATE ${BINARY_DIR}/lib/libHalide.so)
+    endif()

    if(OPENMP_FOUND)
        target_compile_options(cpu_backend PRIVATE "${OpenMP_CXX_FLAGS}")

--- a/src/ngraph/runtime/cpu/builder/halide_op.cpp
+++ b/src/ngraph/runtime/cpu/builder/halide_op.cpp
+//*****************************************************************************
+// Copyright 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <Halide.h>
+#include <HalideBuffer.h>
+#include <functional>
+#include <string>
+#include <typeindex>
+#include <typeinfo>
+#include <unordered_map>
+
+#include "ngraph/op/add.hpp"
+#include "ngraph/op/multiply.hpp"
+#include "ngraph/op/relu.hpp"
+
+#include "ngraph/runtime/cpu/cpu_builder.hpp"
+#include "ngraph/runtime/cpu/op/halide_op.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+#define TI(x) type_index(typeid(x))
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            namespace halide
+            {
+                static const std::unordered_map<std::type_index,
+                                                std::function<Halide::Func(vector<Halide::Func>)>>
+                    generators{{TI(ngraph::op::Add),
+                                [](vector<Halide::Func> in) {
+                                    Halide::Var x;
+                                    Halide::Func func;
+                                    func(x) = in[0](x) + in[1](x);
+                                    return func;
+                                }},
+                               {TI(ngraph::op::Multiply),
+                                [](vector<Halide::Func> in) {
+                                    Halide::Var x;
+                                    Halide::Func func;
+                                    func(x) = in[0](x) * in[1](x);
+                                    return func;
+                                }},
+                               {TI(ngraph::op::Relu), [](vector<Halide::Func> in) {
+                                    Halide::Var x;
+                                    Halide::Func func;
+                                    func(x) = Halide::max(in[0](x), 0);
+                                    return func;
+                                }}};
+            }
+
+            template <>
+            void Builder::BUILDER_DECL(ngraph::runtime::cpu::op::HalideOp)
+            {
+                const ngraph::runtime::cpu::op::HalideOp* hs =
+                    static_cast<const ngraph::runtime::cpu::op::HalideOp*>(node);
+
+                auto& halide_functions = external_function->get_halide_functions();
+                auto& subgraph_params = external_function->get_subgraph_params();
+                auto& subgraph_param_sizes = external_function->get_subgraph_param_sizes();
+                auto& subgraph_param_ptrs = external_function->get_subgraph_param_ptrs();
+
+                for (const auto& op : hs->get_ops())
+                {
+                    if (!halide::generators.count(TI(*op)))
+                    {
+                        throw ngraph_error("Invalid op in halide subgraph");
+                    }
+                    vector<Halide::Func> inputs;
+                    for (const auto& input : op->get_inputs())
+                    {
+                        auto tensor_name = input.get_output().get_tensor_ptr()->get_name();
+                        if (halide_functions.count(tensor_name))
+                        {
+                            inputs.emplace_back(halide_functions[tensor_name]);
+                        }
+                        else
+                        {
+                            subgraph_params[tensor_name] = Halide::ImageParam(Halide::Float(32), 1);
+                            subgraph_param_sizes[tensor_name] =
+                                shape_size(input.get_output().get_tensor_ptr()->get_shape());
+                            subgraph_param_ptrs.emplace(
+                                tensor_name, external_function->get_tensor_data(tensor_name));
+                            inputs.emplace_back(subgraph_params[tensor_name]);
+                        }
+                    }
+                    halide_functions[op->get_output_tensor_ptr()->get_name()] =
+                        halide::generators.at(TI(*op))(inputs);
+                }
+
+                auto out_tensor_name = hs->get_ops().back()->get_output_tensor_ptr()->get_name();
+                auto& functors = external_function->get_functors();
+                auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
+                auto& terminal_func = halide_functions[out_tensor_name];
+                auto out_size = out[0].get_size();
+
+                auto functor = [&, out_size](CPURuntimeContext* ctx) {
+                    for (auto& param : subgraph_params)
+                    {
+                        Halide::Buffer<float> param_buffer(
+                            static_cast<float*>(subgraph_param_ptrs.at(param.first).get()),
+                            subgraph_param_sizes.at(param.first));
+                        param.second.set(param_buffer);
+                    }
+                    Halide::Buffer<float> out_buffer(static_cast<float*>(out_tensor), out_size);
+                    terminal_func.realize(out_buffer);
+                };
+                functors.emplace_back(functor);
+            }
+        }
+    }
+}
--- a/src/ngraph/runtime/cpu/cpu_builder.cpp
+++ b/src/ngraph/runtime/cpu/cpu_builder.cpp
@@ -98,6 +98,7 @@
 #include "ngraph/runtime/cpu/kernel/tan.hpp"
 #include "ngraph/runtime/cpu/kernel/tanh.hpp"
 #include "ngraph/runtime/cpu/op/convert_layout.hpp"
+#include "ngraph/runtime/cpu/op/halide_op.hpp"
 #include "ngraph/type/element_type.hpp"
 #include "ngraph/util.hpp"

@@ -367,7 +368,9 @@ namespace ngraph
                static BuildOpMap build_dispatcher{
                    {TI(ngraph::op::Parameter), &runtime::cpu::Builder::nop},
                    {TI(ngraph::runtime::cpu::op::ConvertLayout),
-                     &runtime::cpu::Builder::build<ngraph::runtime::cpu::op::ConvertLayout>}};
+                     &runtime::cpu::Builder::build<ngraph::runtime::cpu::op::ConvertLayout>},
+                    {TI(ngraph::runtime::cpu::op::HalideOp),
+                     &runtime::cpu::Builder::build<ngraph::runtime::cpu::op::HalideOp>}};

                return build_dispatcher;
            }

--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
@@ -169,6 +169,7 @@
 #include "ngraph/runtime/cpu/pass/cpu_post_layout_optimizations.hpp"
 #include "ngraph/runtime/cpu/pass/cpu_rnn_fusion.hpp"
 #include "ngraph/runtime/cpu/pass/cpu_workspace_insertion.hpp"
+#include "ngraph/runtime/cpu/pass/halide_subgraph_extraction.hpp"

 #ifdef NGRAPH_DISTRIBUTED
 #include "ngraph/op/allreduce.hpp"
@@ -1019,6 +1020,10 @@ void runtime::cpu::CPU_ExternalFunction::register_common_passes(ngraph::pass::Ma
    pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
    // pass_manager.register_pass<runtime::cpu::pass::CPUHorizontalFusion>();
    pass_manager.register_pass<runtime::cpu::pass::CPUCollapseDims>();
+#if defined(NGRAPH_HALIDE)
+    pass_manager.register_pass<ngraph::runtime::cpu::pass::HalideSubgraphExtraction>();
+#endif
+
    NodeVector nv_cwi; // We dont need CPUWorkspaceInsertion to return list of indices
    pass_manager.register_pass<runtime::cpu::pass::CPUWorkspaceInsertion>(nv_cwi, false);
    pass_manager.register_pass<runtime::cpu::pass::CPUAssignment>(this);

--- a/src/ngraph/runtime/cpu/cpu_external_function.hpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.hpp
@@ -27,6 +27,10 @@
 #include <utility>
 #include <vector>

+#if defined(NGRAPH_HALIDE)
+#include <Halide.h>
+#endif
+
 #if !defined(NGRAPH_DEX_ONLY)

 #include "ngraph/codegen/code_writer.hpp"
@@ -134,6 +138,26 @@ namespace ngraph
                                   const std::string& directory,
                                   const std::string& filename);

+#if defined(NGRAPH_HALIDE)
+                std::unordered_map<std::string, Halide::Func>& get_halide_functions()
+                {
+                    return halide_functions;
+                }
+                std::unordered_map<std::string, Halide::ImageParam>& get_subgraph_params()
+                {
+                    return subgraph_params;
+                }
+                std::unordered_map<std::string, int>& get_subgraph_param_sizes()
+                {
+                    return subgraph_param_sizes;
+                }
+                std::unordered_map<std::string, std::reference_wrapper<void*>>&
+                    get_subgraph_param_ptrs()
+                {
+                    return subgraph_param_ptrs;
+                }
+#endif
+
            protected:
                void build();

@@ -233,6 +257,13 @@ namespace ngraph
                std::unordered_map<std::string, std::shared_ptr<CPU_ExternalFunction>> callees;
                bool m_is_built;
                bool m_direct_execution;
+
+#if defined(NGRAPH_HALIDE)
+                std::unordered_map<std::string, Halide::Func> halide_functions;
+                std::unordered_map<std::string, Halide::ImageParam> subgraph_params;
+                std::unordered_map<std::string, int> subgraph_param_sizes;
+                std::unordered_map<std::string, std::reference_wrapper<void*>> subgraph_param_ptrs;
+#endif
            };
        }
    }

--- a/src/ngraph/runtime/cpu/op/halide_op.cpp
+++ b/src/ngraph/runtime/cpu/op/halide_op.cpp
+//*****************************************************************************
+// Copyright 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/runtime/cpu/op/halide_op.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+shared_ptr<Node> runtime::cpu::op::HalideOp::copy_with_new_args(const NodeVector& new_args) const
+{
+    return make_shared<HalideOp>(new_args, ops, output_type, output_shape);
+}
+
+runtime::cpu::op::HalideOp::HalideOp(const NodeVector& args,
+                                     const std::list<std::shared_ptr<Node>>& ops,
+                                     const element::Type& out_type,
+                                     const Shape& out_shape)
+    : Op("HalideOp", check_single_output_args(args))
+    , ops(ops)
+    , output_type(out_type)
+    , output_shape(out_shape)
+{
+    constructor_validate_and_infer_types();
+}
+
+void runtime::cpu::op::HalideOp::validate_and_infer_types()
+{
+    set_output_type(0, output_type, output_shape);
+}
--- a/src/ngraph/runtime/cpu/op/halide_op.hpp
+++ b/src/ngraph/runtime/cpu/op/halide_op.hpp
+//*****************************************************************************
+// Copyright 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <list>
+#include <vector>
+
+#include "ngraph/op/op.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            namespace op
+            {
+                class HalideOp : public ngraph::op::Op
+                {
+                public:
+                    HalideOp(const NodeVector& args,
+                             const std::list<std::shared_ptr<Node>>& ops,
+                             const element::Type& out_type,
+                             const Shape& out_shape);
+
+                    virtual void validate_and_infer_types() override;
+
+                    virtual std::shared_ptr<Node>
+                        copy_with_new_args(const NodeVector& new_args) const override;
+
+                    const std::list<std::shared_ptr<Node>>& get_ops() const { return ops; }
+                private:
+                    std::list<std::shared_ptr<Node>> ops;
+                    element::Type output_type;
+                    Shape output_shape;
+                };
+            }
+        }
+    }
+}
--- a/src/ngraph/runtime/cpu/pass/halide_subgraph_extraction.cpp
+++ b/src/ngraph/runtime/cpu/pass/halide_subgraph_extraction.cpp
+//*****************************************************************************
+// Copyright 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <iostream>
+
+#include <list>
+#include <typeindex>
+#include <typeinfo>
+#include <unordered_set>
+
+#include "ngraph/op/add.hpp"
+#include "ngraph/op/multiply.hpp"
+#include "ngraph/op/relu.hpp"
+
+#include "ngraph/runtime/cpu/op/halide_op.hpp"
+#include "ngraph/runtime/cpu/pass/halide_subgraph_extraction.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+#define TI(x) type_index(typeid(x))
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            namespace halide
+            {
+                static const std::unordered_set<std::type_index> whitelist{
+                    TI(ngraph::op::Add), TI(ngraph::op::Multiply), TI(ngraph::op::Relu)};
+                static const std::unordered_set<std::type_index> skiplist{TI(ngraph::op::Parameter),
+                                                                          TI(ngraph::op::Result)};
+            }
+        }
+    }
+}
+
+// Support for multiple results, multiple outputs and getoutputelement, and multiple subgraphs in a single
+// pipeline is not implemented since this should go away in favor of the "hybrid" transformer approach of
+// carving out subgraphs in core ngraph
+
+bool runtime::cpu::pass::HalideSubgraphExtraction::run_on_function(
+    std::shared_ptr<ngraph::Function> function)
+{
+    list<shared_ptr<Node>> worklist;
+    auto results = function->get_results();
+
+    // Artificial limitation
+    if (results.size() > 1)
+    {
+        return false;
+    }
+
+    if (function->get_result()->get_element_type() != element::f32)
+    {
+        return false;
+    }
+
+    for (const auto& result : results)
+    {
+        worklist.emplace_back(result);
+    }
+
+    unordered_set<shared_ptr<Node>> ops;
+    list<shared_ptr<Node>> ordered_ops;
+
+    while (!worklist.empty())
+    {
+        const auto& node = worklist.front();
+
+        if (!halide::skiplist.count(TI(*node)))
+        {
+            if (halide::whitelist.count(TI(*node)))
+            {
+                ops.emplace(node);
+                ordered_ops.emplace_back(node);
+            }
+            else
+            {
+                break;
+            }
+        }
+        const auto& args = node->get_arguments();
+        for (const auto& arg : args)
+        {
+            worklist.emplace_back(arg);
+        }
+        worklist.pop_front();
+    }
+
+    NodeVector liveins;
+    for (const auto& op : ops)
+    {
+        const auto& args = op->get_arguments();
+        for (const auto& arg : args)
+        {
+            if (!ops.count(arg))
+            {
+                liveins.emplace_back(arg);
+            }
+        }
+    }
+    ordered_ops.reverse();
+    auto subgraph = make_shared<cpu::op::HalideOp>(liveins,
+                                                   ordered_ops,
+                                                   function->get_result()->get_element_type(),
+                                                   function->get_result()->get_shape());
+
+    replace_node(function->get_result()->get_argument(0), subgraph);
+    return true;
+}
--- a/src/ngraph/runtime/cpu/pass/halide_subgraph_extraction.hpp
+++ b/src/ngraph/runtime/cpu/pass/halide_subgraph_extraction.hpp
+//*****************************************************************************
+// Copyright 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "ngraph/pass/pass.hpp"
+#include "ngraph/runtime/cpu/cpu_external_function.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            namespace pass
+            {
+                class HalideSubgraphExtraction : public ngraph::pass::FunctionPass
+                {
+                public:
+                    HalideSubgraphExtraction() {}
+                    bool run_on_function(std::shared_ptr<ngraph::Function> function) override;
+                };
+            }
+        }
+    }
+}
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -68,6 +68,9 @@ endif()
 if (NGRAPH_CPU_ENABLE)
    list(APPEND SRC core_fusion.cpp quantize_cpu.cpp)
    list(APPEND SRC backend_performance.cpp cpu_fusion.cpp cpu_test.cpp cpu_reshape_sinking.cpp)
+    if (NGRAPH_HALIDE)
+        list(APPEND SRC halide.cpp)
+    endif()
    set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} CPU)
 endif()


--- a/test/halide.cpp
+++ b/test/halide.cpp
+//*****************************************************************************
+// Copyright 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <algorithm>
+#include <cstdio>
+#include <iostream>
+#include <list>
+#include <memory>
+
+#include "gtest/gtest.h"
+#include "ngraph/ngraph.hpp"
+#include "ngraph/util.hpp"
+#include "util/all_close.hpp"
+#include "util/test_tools.hpp"
+
+using namespace ngraph;
+using namespace std;
+
+TEST(halide, halide_subgraph)
+{
+    Shape shape{8};
+    auto A = make_shared<op::Parameter>(element::f32, shape);
+    auto B = make_shared<op::Parameter>(element::f32, shape);
+    auto C = make_shared<op::Parameter>(element::f32, shape);
+    auto D = make_shared<op::Parameter>(element::f32, shape);
+
+    auto relu = make_shared<op::Relu>((A + B) * C);
+
+    auto f = make_shared<Function>(relu + D, op::ParameterVector{A, B, C, D});
+
+    auto backend = runtime::Backend::create("CPU");
+    shared_ptr<runtime::Tensor> a = backend->create_tensor(element::f32, shape);
+    shared_ptr<runtime::Tensor> b = backend->create_tensor(element::f32, shape);
+    shared_ptr<runtime::Tensor> c = backend->create_tensor(element::f32, shape);
+    shared_ptr<runtime::Tensor> d = backend->create_tensor(element::f32, shape);
+
+    shared_ptr<runtime::Tensor> result = backend->create_tensor(element::f32, shape);
+
+    vector<float> data{-1, 4, -2, 5, 1, 5, 7, 9};
+
+    copy_data(a, data);
+    copy_data(b, data);
+    copy_data(c, data);
+    copy_data(d, data);
+
+    vector<float> expected{1, 36, 6, 55, 3, 55, 105, 171};
+
+    backend->call_with_validate(f, {result}, {a, b, c, d});
+
+    EXPECT_TRUE(test::all_close(read_vector<float>(result), expected, 1.0e-4f, 1.0e-4f));
+}