Commit 91219e40 authored by Jaikrishnan Menon's avatar Jaikrishnan Menon Committed by Robert Kimball

Basic infrastructure for simple halide subgraphs (#1854)

* Basic infrastructure for simple halide subgraphs

* Always build the op since it has no dependencies

* minor cleanup

* Incorporate feedback
parent b079f266
......@@ -81,6 +81,7 @@ set(SRC
op/batch_norm_relu.cpp
op/bounded_relu.cpp
op/group_conv.cpp
op/halide_op.cpp
op/conv_bias.cpp
op/conv_relu.cpp
op/convert_layout.cpp
......@@ -114,6 +115,14 @@ if (NOT NGRAPH_DEX_ONLY)
)
endif()
if (NGRAPH_HALIDE)
set(SRC
${SRC}
builder/halide_op.cpp
pass/halide_subgraph_extraction.cpp
)
endif()
if (NGRAPH_TBB_ENABLE)
include(${TBB_ROOT}/cmake/TBBBuild.cmake)
tbb_build(TBB_ROOT ${TBB_ROOT} MAKE_ARGS tbb_build_dir=${CMAKE_CURRENT_BINARY_DIR}/tbb_build
......@@ -151,6 +160,12 @@ if (NGRAPH_CPU_ENABLE)
if (NGRAPH_DEX_ONLY)
target_compile_definitions(cpu_backend PRIVATE "NGRAPH_DEX_ONLY")
endif()
if (NGRAPH_HALIDE)
target_compile_definitions(cpu_backend PRIVATE "NGRAPH_HALIDE")
ExternalProject_Get_Property(ext_halide BINARY_DIR)
target_include_directories(cpu_backend SYSTEM PRIVATE ${BINARY_DIR}/include)
target_link_libraries(cpu_backend PRIVATE ${BINARY_DIR}/lib/libHalide.so)
endif()
if(OPENMP_FOUND)
target_compile_options(cpu_backend PRIVATE "${OpenMP_CXX_FLAGS}")
......
//*****************************************************************************
// Copyright 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <Halide.h>
#include <HalideBuffer.h>
#include <functional>
#include <string>
#include <typeindex>
#include <typeinfo>
#include <unordered_map>
#include "ngraph/op/add.hpp"
#include "ngraph/op/multiply.hpp"
#include "ngraph/op/relu.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/op/halide_op.hpp"
using namespace std;
using namespace ngraph;
#define TI(x) type_index(typeid(x))
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace halide
{
static const std::unordered_map<std::type_index,
std::function<Halide::Func(vector<Halide::Func>)>>
generators{{TI(ngraph::op::Add),
[](vector<Halide::Func> in) {
Halide::Var x;
Halide::Func func;
func(x) = in[0](x) + in[1](x);
return func;
}},
{TI(ngraph::op::Multiply),
[](vector<Halide::Func> in) {
Halide::Var x;
Halide::Func func;
func(x) = in[0](x) * in[1](x);
return func;
}},
{TI(ngraph::op::Relu), [](vector<Halide::Func> in) {
Halide::Var x;
Halide::Func func;
func(x) = Halide::max(in[0](x), 0);
return func;
}}};
}
template <>
void Builder::BUILDER_DECL(ngraph::runtime::cpu::op::HalideOp)
{
const ngraph::runtime::cpu::op::HalideOp* hs =
static_cast<const ngraph::runtime::cpu::op::HalideOp*>(node);
auto& halide_functions = external_function->get_halide_functions();
auto& subgraph_params = external_function->get_subgraph_params();
auto& subgraph_param_sizes = external_function->get_subgraph_param_sizes();
auto& subgraph_param_ptrs = external_function->get_subgraph_param_ptrs();
for (const auto& op : hs->get_ops())
{
if (!halide::generators.count(TI(*op)))
{
throw ngraph_error("Invalid op in halide subgraph");
}
vector<Halide::Func> inputs;
for (const auto& input : op->get_inputs())
{
auto tensor_name = input.get_output().get_tensor_ptr()->get_name();
if (halide_functions.count(tensor_name))
{
inputs.emplace_back(halide_functions[tensor_name]);
}
else
{
subgraph_params[tensor_name] = Halide::ImageParam(Halide::Float(32), 1);
subgraph_param_sizes[tensor_name] =
shape_size(input.get_output().get_tensor_ptr()->get_shape());
subgraph_param_ptrs.emplace(
tensor_name, external_function->get_tensor_data(tensor_name));
inputs.emplace_back(subgraph_params[tensor_name]);
}
}
halide_functions[op->get_output_tensor_ptr()->get_name()] =
halide::generators.at(TI(*op))(inputs);
}
auto out_tensor_name = hs->get_ops().back()->get_output_tensor_ptr()->get_name();
auto& functors = external_function->get_functors();
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto& terminal_func = halide_functions[out_tensor_name];
auto out_size = out[0].get_size();
auto functor = [&, out_size](CPURuntimeContext* ctx) {
for (auto& param : subgraph_params)
{
Halide::Buffer<float> param_buffer(
static_cast<float*>(subgraph_param_ptrs.at(param.first).get()),
subgraph_param_sizes.at(param.first));
param.second.set(param_buffer);
}
Halide::Buffer<float> out_buffer(static_cast<float*>(out_tensor), out_size);
terminal_func.realize(out_buffer);
};
functors.emplace_back(functor);
}
}
}
}
......@@ -98,6 +98,7 @@
#include "ngraph/runtime/cpu/kernel/tan.hpp"
#include "ngraph/runtime/cpu/kernel/tanh.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp"
#include "ngraph/runtime/cpu/op/halide_op.hpp"
#include "ngraph/type/element_type.hpp"
#include "ngraph/util.hpp"
......@@ -367,7 +368,9 @@ namespace ngraph
static BuildOpMap build_dispatcher{
{TI(ngraph::op::Parameter), &runtime::cpu::Builder::nop},
{TI(ngraph::runtime::cpu::op::ConvertLayout),
&runtime::cpu::Builder::build<ngraph::runtime::cpu::op::ConvertLayout>}};
&runtime::cpu::Builder::build<ngraph::runtime::cpu::op::ConvertLayout>},
{TI(ngraph::runtime::cpu::op::HalideOp),
&runtime::cpu::Builder::build<ngraph::runtime::cpu::op::HalideOp>}};
return build_dispatcher;
}
......
......@@ -169,6 +169,7 @@
#include "ngraph/runtime/cpu/pass/cpu_post_layout_optimizations.hpp"
#include "ngraph/runtime/cpu/pass/cpu_rnn_fusion.hpp"
#include "ngraph/runtime/cpu/pass/cpu_workspace_insertion.hpp"
#include "ngraph/runtime/cpu/pass/halide_subgraph_extraction.hpp"
#ifdef NGRAPH_DISTRIBUTED
#include "ngraph/op/allreduce.hpp"
......@@ -1019,6 +1020,10 @@ void runtime::cpu::CPU_ExternalFunction::register_common_passes(ngraph::pass::Ma
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
// pass_manager.register_pass<runtime::cpu::pass::CPUHorizontalFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUCollapseDims>();
#if defined(NGRAPH_HALIDE)
pass_manager.register_pass<ngraph::runtime::cpu::pass::HalideSubgraphExtraction>();
#endif
NodeVector nv_cwi; // We dont need CPUWorkspaceInsertion to return list of indices
pass_manager.register_pass<runtime::cpu::pass::CPUWorkspaceInsertion>(nv_cwi, false);
pass_manager.register_pass<runtime::cpu::pass::CPUAssignment>(this);
......
......@@ -27,6 +27,10 @@
#include <utility>
#include <vector>
#if defined(NGRAPH_HALIDE)
#include <Halide.h>
#endif
#if !defined(NGRAPH_DEX_ONLY)
#include "ngraph/codegen/code_writer.hpp"
......@@ -134,6 +138,26 @@ namespace ngraph
const std::string& directory,
const std::string& filename);
#if defined(NGRAPH_HALIDE)
std::unordered_map<std::string, Halide::Func>& get_halide_functions()
{
return halide_functions;
}
std::unordered_map<std::string, Halide::ImageParam>& get_subgraph_params()
{
return subgraph_params;
}
std::unordered_map<std::string, int>& get_subgraph_param_sizes()
{
return subgraph_param_sizes;
}
std::unordered_map<std::string, std::reference_wrapper<void*>>&
get_subgraph_param_ptrs()
{
return subgraph_param_ptrs;
}
#endif
protected:
void build();
......@@ -233,6 +257,13 @@ namespace ngraph
std::unordered_map<std::string, std::shared_ptr<CPU_ExternalFunction>> callees;
bool m_is_built;
bool m_direct_execution;
#if defined(NGRAPH_HALIDE)
std::unordered_map<std::string, Halide::Func> halide_functions;
std::unordered_map<std::string, Halide::ImageParam> subgraph_params;
std::unordered_map<std::string, int> subgraph_param_sizes;
std::unordered_map<std::string, std::reference_wrapper<void*>> subgraph_param_ptrs;
#endif
};
}
}
......
//*****************************************************************************
// Copyright 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/cpu/op/halide_op.hpp"
using namespace std;
using namespace ngraph;
shared_ptr<Node> runtime::cpu::op::HalideOp::copy_with_new_args(const NodeVector& new_args) const
{
return make_shared<HalideOp>(new_args, ops, output_type, output_shape);
}
runtime::cpu::op::HalideOp::HalideOp(const NodeVector& args,
const std::list<std::shared_ptr<Node>>& ops,
const element::Type& out_type,
const Shape& out_shape)
: Op("HalideOp", check_single_output_args(args))
, ops(ops)
, output_type(out_type)
, output_shape(out_shape)
{
constructor_validate_and_infer_types();
}
void runtime::cpu::op::HalideOp::validate_and_infer_types()
{
set_output_type(0, output_type, output_shape);
}
//*****************************************************************************
// Copyright 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <list>
#include <vector>
#include "ngraph/op/op.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace op
{
class HalideOp : public ngraph::op::Op
{
public:
HalideOp(const NodeVector& args,
const std::list<std::shared_ptr<Node>>& ops,
const element::Type& out_type,
const Shape& out_shape);
virtual void validate_and_infer_types() override;
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
const std::list<std::shared_ptr<Node>>& get_ops() const { return ops; }
private:
std::list<std::shared_ptr<Node>> ops;
element::Type output_type;
Shape output_shape;
};
}
}
}
}
//*****************************************************************************
// Copyright 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <iostream>
#include <list>
#include <typeindex>
#include <typeinfo>
#include <unordered_set>
#include "ngraph/op/add.hpp"
#include "ngraph/op/multiply.hpp"
#include "ngraph/op/relu.hpp"
#include "ngraph/runtime/cpu/op/halide_op.hpp"
#include "ngraph/runtime/cpu/pass/halide_subgraph_extraction.hpp"
using namespace std;
using namespace ngraph;
#define TI(x) type_index(typeid(x))
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace halide
{
static const std::unordered_set<std::type_index> whitelist{
TI(ngraph::op::Add), TI(ngraph::op::Multiply), TI(ngraph::op::Relu)};
static const std::unordered_set<std::type_index> skiplist{TI(ngraph::op::Parameter),
TI(ngraph::op::Result)};
}
}
}
}
// Support for multiple results, multiple outputs and getoutputelement, and multiple subgraphs in a single
// pipeline is not implemented since this should go away in favor of the "hybrid" transformer approach of
// carving out subgraphs in core ngraph
bool runtime::cpu::pass::HalideSubgraphExtraction::run_on_function(
std::shared_ptr<ngraph::Function> function)
{
list<shared_ptr<Node>> worklist;
auto results = function->get_results();
// Artificial limitation
if (results.size() > 1)
{
return false;
}
if (function->get_result()->get_element_type() != element::f32)
{
return false;
}
for (const auto& result : results)
{
worklist.emplace_back(result);
}
unordered_set<shared_ptr<Node>> ops;
list<shared_ptr<Node>> ordered_ops;
while (!worklist.empty())
{
const auto& node = worklist.front();
if (!halide::skiplist.count(TI(*node)))
{
if (halide::whitelist.count(TI(*node)))
{
ops.emplace(node);
ordered_ops.emplace_back(node);
}
else
{
break;
}
}
const auto& args = node->get_arguments();
for (const auto& arg : args)
{
worklist.emplace_back(arg);
}
worklist.pop_front();
}
NodeVector liveins;
for (const auto& op : ops)
{
const auto& args = op->get_arguments();
for (const auto& arg : args)
{
if (!ops.count(arg))
{
liveins.emplace_back(arg);
}
}
}
ordered_ops.reverse();
auto subgraph = make_shared<cpu::op::HalideOp>(liveins,
ordered_ops,
function->get_result()->get_element_type(),
function->get_result()->get_shape());
replace_node(function->get_result()->get_argument(0), subgraph);
return true;
}
//*****************************************************************************
// Copyright 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/pass/pass.hpp"
#include "ngraph/runtime/cpu/cpu_external_function.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace pass
{
class HalideSubgraphExtraction : public ngraph::pass::FunctionPass
{
public:
HalideSubgraphExtraction() {}
bool run_on_function(std::shared_ptr<ngraph::Function> function) override;
};
}
}
}
}
......@@ -68,6 +68,9 @@ endif()
if (NGRAPH_CPU_ENABLE)
list(APPEND SRC core_fusion.cpp quantize_cpu.cpp)
list(APPEND SRC backend_performance.cpp cpu_fusion.cpp cpu_test.cpp cpu_reshape_sinking.cpp)
if (NGRAPH_HALIDE)
list(APPEND SRC halide.cpp)
endif()
set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} CPU)
endif()
......
//*****************************************************************************
// Copyright 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <algorithm>
#include <cstdio>
#include <iostream>
#include <list>
#include <memory>
#include "gtest/gtest.h"
#include "ngraph/ngraph.hpp"
#include "ngraph/util.hpp"
#include "util/all_close.hpp"
#include "util/test_tools.hpp"
using namespace ngraph;
using namespace std;
TEST(halide, halide_subgraph)
{
Shape shape{8};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto C = make_shared<op::Parameter>(element::f32, shape);
auto D = make_shared<op::Parameter>(element::f32, shape);
auto relu = make_shared<op::Relu>((A + B) * C);
auto f = make_shared<Function>(relu + D, op::ParameterVector{A, B, C, D});
auto backend = runtime::Backend::create("CPU");
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> c = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> d = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::f32, shape);
vector<float> data{-1, 4, -2, 5, 1, 5, 7, 9};
copy_data(a, data);
copy_data(b, data);
copy_data(c, data);
copy_data(d, data);
vector<float> expected{1, 36, 6, 55, 3, 55, 105, 171};
backend->call_with_validate(f, {result}, {a, b, c, d});
EXPECT_TRUE(test::all_close(read_vector<float>(result), expected, 1.0e-4f, 1.0e-4f));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment