Commit f4b487a4 authored by Nagy Mostafa's avatar Nagy Mostafa Committed by Scott Cyphers

[MLIR] Add sub-graph extraction support (#3101)

* Initial sub-graph extraction

* Works without detaching input edges from sub-graph

* Added removing input edges to graph

* Works with whole func sub-graphs. Inputs edges to sub-graph are still there

* Works on 2 exclusive sub-graphs. Still not on merged sub-graphs

* Revert removing inputs to sub-graph. nGraph validation crashes

* Added 3 sub-graph test. Remove compiled_kernel fusion pass. Comments

* Revert some changes

* Added cycle detection. Removed unit-tests to backend_mlir.in.cpp. Still not fully functional

* Construct CK nodes after finding outputs to preserve the graph.

* Fix topological sort. UTs pass.

* Minor fixes

* PR fixes

* Enable mlir tests only when building with MLIR on
parent 4b009f09
......@@ -17,6 +17,7 @@
#include "mlir_subgraph_extraction.hpp"
#include "ngraph/assertion.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/add.hpp"
#include "ngraph/op/argmax.hpp"
#include "ngraph/op/argmin.hpp"
......@@ -31,43 +32,209 @@ using namespace ngraph::pass;
#define TI(x) std::type_index(typeid(x))
int MLIRSubgraphExtractionPass::MLIRSubgraph::m_curr_graph_id = 0;
template <typename T>
void MLIRSubgraphExtractionPass::MLIRSubgraph::add_inputs(T& inputs)
{
// inputs list are not exclusive, avoid duplication
for (auto node : inputs)
{
if (m_input_nodes.find(node) == m_input_nodes.end())
{
m_input_nodes.insert(node);
}
}
}
template <typename T>
void MLIRSubgraphExtractionPass::MLIRSubgraph::add_outputs(T& outputs)
{
m_output_nodes.insert(outputs.begin(), outputs.end());
}
void MLIRSubgraphExtractionPass::MLIRSubgraph::add_node(std::shared_ptr<Node> node)
{
NGRAPH_CHECK(m_nodes.find(node) == m_nodes.end(), "node added to graph before");
m_nodes.insert(node);
m_pass.m_node_to_graph[node] = get_id();
}
void MLIRSubgraphExtractionPass::MLIRSubgraph::merge(MLIRSubgraph& sg2)
{
NGRAPH_CHECK(&sg2 != this, "Cannot merge a sub-graph into itself");
// Associate nodes of second sub-graph to first one
auto sg_nodes = sg2.get_nodes();
auto& node_map = m_pass.m_node_to_graph;
for (auto node : sg_nodes)
{
NGRAPH_DEBUG << *node;
NGRAPH_CHECK(m_pass.get_subgraph_id(node) == sg2.get_id(),
"Node does not belong to sub-graph");
m_pass.m_node_to_graph[node] = get_id();
}
// nodes of sub-graphs are exclusive
m_nodes.insert(sg2.get_nodes().begin(), sg2.get_nodes().end());
// merge inputs
add_inputs(sg2.get_inputs());
// Remove sub-graph from map
m_pass.m_id_to_graph.erase(sg2.get_id());
}
// The sub-graph construction algorithm is as follows
// For each node, check its predecessors, if
// - all predecessors in sub-graphs belong to the same sub-graph (graph ID), then extend the sub-graph to include the current node.
// Predecessors outside sub-graphs are marked as input to the sub-graph.
// - predecessors in sub-graphs belong to different sub-graphs, then merge all the sub-graphs into one, and add current node to it.
// Predecessors outside sub-graphs are marked as input to the sub-graph.
//
// If the node has any external inputs, then it's possible that the input may come from one of the predecessor sub-graphs (cycle).
// If a cycle is found, always start a new sub-graph.
//
// For each sub-graph found build a CompiledKernel(CK) node around it as follows
// - all inputs edges to the sub-graph are cloned as inputs to CK node as well.
// - all outputs edges from the sub-graph are removed and added as outputs to CK node instead.
// - CK will internally have lists record graph nodes, and graph output nodes.
bool MLIRSubgraphExtractionPass::run_on_function(std::shared_ptr<Function> func)
{
// Create a CompiledKernel for all the ops in the function, except Parameters and Results.
NodeVector ck_ops;
NGRAPH_DEBUG << "[CK Extract] Construct sub-graphs" << std::endl;
for (auto op : func->get_ordered_ops())
{
// All ops must be supported by MLIR compiler
NodeVector inputs;
int first_graph_id = -1;
std::unordered_set<int> subgraph_ids;
// unsupported ops, skip
if (!is_supported_mlir_op(op))
{
return false;
continue;
}
if (TI(Parameter) == TI(*op) || TI(Result) == TI(*op))
{
continue;
}
if (TI(Parameter) != TI(*op) && TI(Result) != TI(*op))
NGRAPH_DEBUG << "[CK Extract] Processing " << *op << std::endl;
// supported op
for (auto pred : op->get_arguments())
{
int pred_subgraph_id = get_subgraph_id(pred);
if (pred_subgraph_id == -1)
{
ck_ops.push_back(op);
// predecessor doesn't belong to any sub-graph, it is an input
inputs.push_back(pred);
}
else
{
// record sub-graph id of the predecessor
subgraph_ids.insert(pred_subgraph_id);
}
}
if (subgraph_ids.size() == 0)
{
NGRAPH_DEBUG << "[CK Extract] Start new sub-graph " << std::endl;
// we couldn't find any predecessor sub-graphs to extend with this node
// create a new sub-graph
MLIRSubgraph sg = MLIRSubgraph::create(this);
sg.add_inputs(inputs);
sg.add_node(op);
add_subgraph(sg);
}
else
{
// we have sub-graphs.
// check if adding this node to the sub-graph will create a cycle in the DAG
NGRAPH_DEBUG << "[CK Extract] Extending sub-graph. Check for cycles " << std::endl;
if (!check_cycles(inputs, subgraph_ids))
{
NGRAPH_DEBUG << "[CK Extract] Merging subgraphs";
// merge sub-graphs if needed
std::unordered_set<int>::iterator it = subgraph_ids.begin();
int sg_id = *it;
MLIRSubgraph& first_subgraph = get_subgraph(sg_id);
NGRAPH_CHECK(first_subgraph.get_id() == sg_id);
while (++it != subgraph_ids.end())
{
sg_id = *it;
MLIRSubgraph& subgraph = get_subgraph(sg_id);
NGRAPH_CHECK(subgraph.get_id() == sg_id);
first_subgraph.merge(subgraph);
}
NodeVector ck_args;
for (auto& param : func->get_parameters())
first_subgraph.add_node(op);
first_subgraph.add_inputs(inputs);
}
else
{
ck_args.push_back(param);
// we have a cycle, start a new sub-graph
MLIRSubgraph sg = MLIRSubgraph::create(this);
NGRAPH_DEBUG << "[CK Extract] Cycle found. Start a new subgraph";
// use all predecessors as graph inputs
NodeVector inputs = op->get_arguments();
sg.add_inputs(inputs);
sg.add_node(op);
add_subgraph(sg);
}
}
NGRAPH_DEBUG << "[CK Extract] Node Processed " << *op << std::endl;
}
NodeVector ck_outputs = std::move(get_subgraph_outputs(ck_ops, {} /*exclusions*/));
if (ck_outputs.size() != 1)
NGRAPH_DEBUG << "[CK Extract] Get subgraphs output nodes" << std::endl;
// get output nodes for each sub-graph. Do this before attaching CK nodes since we will
// remove output edges from the sub-graphs.
for (IDGraphMap::iterator it = m_id_to_graph.begin(); it != m_id_to_graph.end(); it++)
{
return false;
MLIRSubgraph& sg = it->second;
auto& nodes = sg.get_nodes();
NodeVector outputs = std::move(get_subgraph_outputs(NodeVector(nodes.begin(), nodes.end()),
{} /*exclusions*/,
false /* ignore unused */,
false /* ignore output duplicates */));
sg.add_outputs(outputs);
}
auto ck = std::make_shared<CompiledKernel>(ck_ops, ck_outputs, ck_args);
NGRAPH_DEBUG << "[CK Extract] Construct CK nodes" << std::endl;
// attach CK node to each sub-graph.
for (auto it : m_id_to_graph)
{
MLIRSubgraph sg = it.second;
auto& inputs = sg.get_inputs();
auto& outputs = sg.get_outputs();
auto& nodes = sg.get_nodes();
NodeVector inputs_vector(inputs.begin(), inputs.end());
NodeVector outputs_vector(outputs.begin(), outputs.end());
// must store nodes in topological order
auto nodes_list = subgraph_topological_sort(nodes);
NodeVector nodes_vector(nodes_list.begin(), nodes_list.end());
auto ck = std::make_shared<CompiledKernel>(nodes_vector, outputs_vector, inputs_vector);
NGRAPH_DEBUG << "[CK Extract] Graph ID = " << sg.get_id() << std::endl;
NGRAPH_DEBUG << "[CK Extract] Graph Nodes: " << std::endl;
for (auto node : nodes)
{
NGRAPH_DEBUG << "[CK Extract] " << *node << std::endl;
}
NGRAPH_DEBUG << "[CK Extract] Input Nodes: " << std::endl;
for (auto node : inputs)
{
NGRAPH_DEBUG << "[CK Extract] " << *node << std::endl;
}
NGRAPH_DEBUG << "[CK Extract] Output Nodes: " << std::endl;
for (auto node : outputs)
{
NGRAPH_DEBUG << "[CK Extract] " << *node << std::endl;
}
// Connect CompiledKernel to output nodes by replacing the output descriptors of the output
// nodes.
for (size_t i = 0, end = ck_outputs.size(); i < end; ++i)
for (size_t i = 0, end = outputs_vector.size(); i < end; ++i)
{
auto& output_descs = ck_outputs[i]->get_outputs();
auto& output_descs = outputs_vector[i]->get_outputs();
NGRAPH_CHECK(output_descs.size() == 1, "Unexpected multiple output descriptors");
auto& out_desc = output_descs[0];
......@@ -79,6 +246,7 @@ bool MLIRSubgraphExtractionPass::run_on_function(std::shared_ptr<Function> func)
in_desc->replace_output(ck, i);
}
}
}
return true;
}
......@@ -127,6 +295,30 @@ bool MLIRSubgraphExtractionPass::is_supported_mlir_op(std::shared_ptr<Node> node
return true;
}
bool MLIRSubgraphExtractionPass::check_cycles(NodeVector& inputs,
std::unordered_set<int>& subgraph_ids)
{
NodeVector work_list;
NGRAPH_DEBUG << "[CK Extract] Inputs size: " << inputs.size() << std::endl;
work_list.insert(work_list.end(), inputs.begin(), inputs.end());
while (!work_list.empty())
{
auto node = work_list.back();
work_list.pop_back();
if (subgraph_ids.find(get_subgraph_id(node)) != subgraph_ids.end())
{
// we hit one of the sub-graphs we want to extend. we have a cycle.
NGRAPH_DEBUG << "[CK Extract] Cycle found when trying to add node" << std::endl;
return true;
}
for (auto pred : node->get_arguments())
{
work_list.push_back(pred);
}
}
return false;
}
const std::set<std::type_index> MLIRSubgraphExtractionPass::m_supported_ops{
#define MLIR_OP(OP) TI(ngraph::op::OP),
#include "contrib/mlir/ops_supported.inc"
......
......@@ -16,26 +16,114 @@
#pragma once
#include <mutex>
#include "ngraph/pass/pass.hpp"
namespace ngraph
{
namespace pass
{
/// This pass creates CompiledKernel ops enclosing sub-graphs that will be compiled and
/// executed by MLIR.
// TODO: WIP. Currently we only create a single CompiledKernel op for the whole function
// body.
/// This pass creates CompiledKernel ops enclosing maximal sub-graphs of ops that are supported by MLIR
class MLIRSubgraphExtractionPass : public ngraph::pass::FunctionPass
{
using NodeSet = std::unordered_set<std::shared_ptr<Node>>;
class MLIRSubgraph
{
private:
static int get_new_graph_id() { return m_curr_graph_id++; }
/// Create a sub-graph with a new ID.
MLIRSubgraph(MLIRSubgraphExtractionPass* pass)
: m_graph_id(MLIRSubgraph::get_new_graph_id())
, m_pass(*pass)
{
}
public:
/// Factory method to creates a new sub-graph with unique ID
static MLIRSubgraph create(MLIRSubgraphExtractionPass* pass)
{
// mutex on global graph ID
std::lock_guard<std::mutex> lock(pass->m_subgraph_mutex);
return MLIRSubgraph(pass);
}
/// Get sub-graph id
int get_id() const { return m_graph_id; }
/// Get all nodes in the sub-graph.
NodeSet& get_nodes() { return m_nodes; }
/// Get input nodes. Predecessors to head nodes.
NodeSet& get_inputs() { return m_input_nodes; }
/// Get output nodes. Nodes in the sub-graph with edges to external nodes.
NodeSet& get_outputs() { return m_output_nodes; }
/// Add a list of input nodes to the sub-graph.
template <typename T>
void add_inputs(T& inputs);
/// Add a list of output nodes to the sub-graph.
template <typename T>
void add_outputs(T& outputs);
/// Merges sub-graph (other) into this sub-graph. other will be destroyed.
void merge(MLIRSubgraph& other);
/// Add one node to the sub-graph.
void add_node(std::shared_ptr<Node> node);
private:
// Unique ID for this sub-graph.
int m_graph_id;
// Actual nodes of the sub-graph
NodeSet m_nodes;
// Predecessor to head nodes in the sub-graph.
NodeSet m_input_nodes;
NodeSet m_output_nodes;
MLIRSubgraphExtractionPass& m_pass;
static int m_curr_graph_id;
};
friend class MLIRSubgraph;
public:
MLIRSubgraphExtractionPass() {}
bool run_on_function(std::shared_ptr<Function> func) override;
/// Checks if an ngraph node is supported by MLIR backend
bool is_supported_mlir_op(std::shared_ptr<Node> node);
/// Get the sub-graph ID that a node belongs to
int get_subgraph_id(std::shared_ptr<Node> node)
{
auto it = m_node_to_graph.find(node);
return (it == m_node_to_graph.end()) ? -1 : it->second;
}
/// Get sub-graph by ID
MLIRSubgraph& get_subgraph(int id)
{
auto it = m_id_to_graph.find(id);
NGRAPH_CHECK(it != m_id_to_graph.end(), "Cannot find subgraph with ID: ", id);
return it->second;
}
/// Stores a sub-graph in the map
void add_subgraph(MLIRSubgraph& sg) { m_id_to_graph.emplace(sg.get_id(), sg); }
/// Checks if adding a node to an extracted sub-graph will cause a DAG cycle
/// inputs: the list of input nodes outside sub-graphs to the node we want to add.
/// subgraph_ids: the sub-graphs the predecessor nodes belong to.
/// It traverses backwards from all input nodes and checks if we reach any node that already
/// belongs to one of the sub-graph ids. If so, we have a cycle.
///
/// Example:
/// A(1)
/// | \
/// B(1) C
/// | /
/// D
/// we want to add D to sub-graph 1. C is an input to D. sugraph_ids are 1
/// we traverse backwards C->A(1) and find 1, then we cannot add D since we will form a cycle
bool check_cycles(NodeVector& inputs, std::unordered_set<int>& subgraph_ids);
private:
static const std::set<std::type_index> m_supported_ops;
private:
using IDGraphMap = std::unordered_map<int, MLIRSubgraph>;
using NodeGraphMap = std::unordered_map<std::shared_ptr<Node>, int>;
IDGraphMap m_id_to_graph;
NodeGraphMap m_node_to_graph;
// Mutex over sub-graph IDs
std::mutex m_subgraph_mutex;
};
}
}
......@@ -463,7 +463,8 @@ bool ngraph::is_one(std::shared_ptr<Node> reduce_constant)
NodeVector ngraph::get_subgraph_outputs(const NodeVector& nodes,
const NodeVector& exclusions,
bool ignore_unused)
bool ignore_unused,
bool ignore_output_duplicates)
{
std::set<shared_ptr<Node>> exclusions_set(exclusions.begin(), exclusions.end());
std::set<shared_ptr<Node>> nodes_set(nodes.begin(), nodes.end());
......@@ -479,7 +480,11 @@ NodeVector ngraph::get_subgraph_outputs(const NodeVector& nodes,
for (const auto& u : n->get_users())
{
if (nodes_set.count(u) == 0 && (!ignore_unused || is_used(u.get())))
bool add_output = nodes_set.count(u) == 0 && (!ignore_unused || is_used(u.get()));
// check if output is already captured
add_output &= (ignore_output_duplicates ||
std::find(outputs.begin(), outputs.end(), n) == outputs.end());
if (add_output)
{
outputs.push_back(n);
}
......
......@@ -275,7 +275,8 @@ namespace ngraph
NodeVector get_subgraph_outputs(const NodeVector& nodes,
const NodeVector& exclusions,
bool ignore_unused = false);
bool ignore_unused = false,
bool ignore_output_duplicates = true);
// Extract sub-graph computing the `results`. Stops backward traversal at either a Parameter node
// or a node that belongs to args
......
......@@ -114,7 +114,6 @@ set(SRC
op/update_slice.cpp
pass/cpu_assignment.cpp
pass/cpu_collapse_dims.cpp
pass/cpu_compiled_kernel_fusion.cpp
pass/cpu_fusion.cpp
pass/cpu_horizontal_fusion.cpp
pass/cpu_layout.cpp
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <algorithm>
#include <iostream>
#include <map>
#include <memory>
#include "ngraph/graph_util.hpp"
#include "ngraph/log.hpp"
#include "ngraph/op/abs.hpp"
#include "ngraph/op/add.hpp"
#include "ngraph/op/experimental/compiled_kernel.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/maximum.hpp"
#include "ngraph/op/minimum.hpp"
#include "ngraph/op/negative.hpp"
#include "ngraph/op/relu.hpp"
#include "ngraph/op/subtract.hpp"
#include "ngraph/op/util/binary_elementwise_arithmetic.hpp"
#include "ngraph/op/util/unary_elementwise_arithmetic.hpp"
#include "ngraph/runtime/cpu/pass/cpu_compiled_kernel_fusion.hpp"
#define TI(x) std::type_index(typeid(x))
using namespace ngraph;
struct LKGraph
{
LKGraph(const NodeVector& ns, const NodeVector& ins)
: m_inputs(ins)
, m_nodes(ns)
{
}
NodeVector m_inputs;
NodeVector m_nodes;
};
class CompiledKernelCollector
{
public:
CompiledKernelCollector(std::shared_ptr<Function> f, size_t min_nodes_to_fuse)
{
for (auto n : f->get_ordered_ops())
{
if (is_fusible(n))
{
auto arg_from_fusible_group = collect_fusible_args(n);
// create a new group
if (!arg_from_fusible_group)
{
m_heads.insert(std::make_pair(n, n));
m_graphs.insert(std::make_pair(n, LKGraph{{n}, n->get_arguments()}));
NGRAPH_DEBUG << "Created a new group for " << n->get_name();
log_group(n);
}
else
{
auto smallest_head = m_heads.at(arg_from_fusible_group);
auto& ckgraph = m_graphs.at(smallest_head);
ckgraph.m_nodes.push_back(n);
for (auto arg : n->get_arguments())
{
if (is_leaf(arg))
{
ckgraph.m_inputs.push_back(arg);
}
}
m_heads.insert(std::make_pair(n, smallest_head));
log_group(smallest_head);
}
}
}
prune_graphs(min_nodes_to_fuse);
}
const std::vector<std::shared_ptr<op::CompiledKernel>> get_compiled_kernels() const
{
std::vector<std::shared_ptr<op::CompiledKernel>> cks;
for (auto e : m_graphs)
{
auto& ckg = e.second;
NodeVector member_outputs = ngraph::get_subgraph_outputs(ckg.m_nodes, NodeVector{});
auto ck =
std::make_shared<op::CompiledKernel>(ckg.m_nodes, member_outputs, ckg.m_inputs);
cks.push_back(ck);
}
return cks;
}
private:
static bool is_fusible(std::shared_ptr<Node> n)
{
static const std::set<std::type_index> fusible_ops_set{TI(ngraph::op::Abs),
TI(ngraph::op::Add),
TI(ngraph::op::Negative),
TI(ngraph::op::Subtract),
TI(ngraph::op::Relu),
TI(ngraph::op::Minimum),
TI(ngraph::op::Maximum)};
const Node& node = *n;
return fusible_ops_set.count(TI(node)) != 0;
// return (std::dynamic_pointer_cast<op::util::BinaryElementwiseArithmetic>(n) ||
// std::dynamic_pointer_cast<op::util::UnaryElementwiseArithmetic>(n));
}
bool is_leaf(std::shared_ptr<Node> src) { return src->is_parameter() || src->is_constant(); }
void prune_graphs(size_t min_nodes_to_fuse)
{
for (auto it = m_graphs.begin(); it != m_graphs.end();)
{
if (it->second.m_nodes.size() < min_nodes_to_fuse)
{
it = m_graphs.erase(it);
}
else
{
it++;
}
}
}
void log_group(std::shared_ptr<Node> head) const
{
NGRAPH_DEBUG << "Group leader : " << head->get_name() << std::endl;
NGRAPH_DEBUG << "Group members : " << m_graphs.at(head).m_nodes << std::endl;
NGRAPH_DEBUG << "Inputs: " << m_graphs.at(head).m_inputs << std::endl;
}
std::shared_ptr<Node> collect_fusible_args(std::shared_ptr<Node> n)
{
std::shared_ptr<Node> arg_from_fusible_group;
for (auto arg : n->get_arguments())
{
// an argument is fusible and a part of some group
NGRAPH_DEBUG << "Considering " << arg->get_name();
if (m_heads.count(arg) != 0)
{
if (!arg_from_fusible_group)
{
arg_from_fusible_group = arg;
}
else
{
if (!is_leaf(arg) && m_heads.at(arg) != m_heads.at(arg_from_fusible_group))
{
return {nullptr};
}
}
}
}
return arg_from_fusible_group;
}
std::unordered_map<std::shared_ptr<Node>, LKGraph> m_graphs;
std::unordered_map<std::shared_ptr<Node>, std::shared_ptr<Node>> m_heads;
};
bool ngraph::runtime::cpu::pass::CPUCompiledKernelFusion::run_on_function(
std::shared_ptr<ngraph::Function> function)
{
CompiledKernelCollector ckc(function, m_min_kernel_size);
auto compiled_kernels = ckc.get_compiled_kernels();
for (auto ck : compiled_kernels)
{
auto outputs = ck->get_kernel_outputs();
std::set<std::shared_ptr<Node>> ck_nodes_set(ck->get_node_list().begin(),
ck->get_node_list().end());
for (size_t i = 0; i < outputs.size(); i++)
{
auto ith_goe = std::make_shared<ngraph::op::GetOutputElement>(ck, i);
auto& ith_output = ith_goe->get_outputs().at(0);
if (outputs.at(i)->get_outputs().size() > 1)
{
throw ngraph_error(
"support for fusing multi-output nodes in loop kernels isn't yet implemented");
}
// TODO: revisit when we need support for multi-output nodes
auto& orig_output = outputs.at(i)->get_outputs().at(0);
// this is needed since replace_output modifies orig_output.get_inputs()
std::set<ngraph::descriptor::Input*> inputs_copy{begin(orig_output.get_inputs()),
end(orig_output.get_inputs())};
for (auto input : inputs_copy)
{
// this user is NOT internal to this loop kernel
// so it needs to be replaced with corresponding ck's GOE
if (ck_nodes_set.count(input->get_node()) == 0)
{
input->replace_output(ith_output);
}
}
}
}
return !compiled_kernels.empty();
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/pass/pass.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace pass
{
class CPUCompiledKernelFusion : public ngraph::pass::FunctionPass
{
public:
CPUCompiledKernelFusion(size_t min_kernel_size = 2)
: FunctionPass()
, m_min_kernel_size(min_kernel_size)
{
}
bool run_on_function(std::shared_ptr<ngraph::Function> function) override;
protected:
size_t m_min_kernel_size;
};
}
}
}
}
......@@ -172,6 +172,10 @@ set(MULTI_TEST_SRC
dynamic.in.cpp
)
if (NGRAPH_MLIR_ENABLE)
list(APPEND MULTI_TEST_SRC backend_mlir.in.cpp)
endif()
if(NGRAPH_DISTRIBUTED_ENABLE)
list(APPEND MULTI_TEST_SRC distributed.in.cpp)
endif()
......
......@@ -67,36 +67,6 @@ NGRAPH_TEST(${BACKEND_NAME}, add)
(test::NDArray<float, 2>({{6, 8}, {10, 12}})).get_vector()));
}
NGRAPH_TEST(${BACKEND_NAME}, dot_add)
{
Shape shape_in1{2, 3};
Shape shape_in2{3, 3};
Shape shape_out{2, 3};
auto A = make_shared<op::Parameter>(element::f32, shape_in1);
auto B = make_shared<op::Parameter>(element::f32, shape_in2);
auto dot = make_shared<op::Dot>(A, B);
auto C = make_shared<op::Parameter>(element::f32, shape_out);
auto add = make_shared<op::Add>(dot, C);
auto f = make_shared<Function>(add, ParameterVector{A, B, C});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::f32, shape_in2);
shared_ptr<runtime::Tensor> c = backend->create_tensor(element::f32, shape_out);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::f32, shape_out);
copy_data(a, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
copy_data(b, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f});
copy_data(c, vector<float>{5.f, 4.f, 3.f, 2.f, 1.f, 0.f});
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b, c});
EXPECT_TRUE(test::all_close_f(read_vector<float>(result),
vector<float>{35.f, 40.f, 45.f, 68.f, 82.f, 96.f}));
}
NGRAPH_TEST(${BACKEND_NAME}, add_overload)
{
Shape shape{2, 2};
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
// End to end test for MLIR. Add tests here that are specific to test MLIR functionality
// MLIR is implicitly tested during other unit-tests as well.
#include "gtest/gtest.h"
#include "ngraph/ngraph.hpp"
#include "util/all_close.hpp"
#include "util/all_close_f.hpp"
#include "util/ndarray.hpp"
#include "util/test_control.hpp"
#include "util/test_tools.hpp"
using namespace std;
using namespace ngraph;
static string s_manifest = "${MANIFEST}";
// Combined ops test
NGRAPH_TEST(${BACKEND_NAME}, mlir_dot_add)
{
Shape shape_in1{2, 3};
Shape shape_in2{3, 3};
Shape shape_out{2, 3};
auto A = make_shared<op::Parameter>(element::f32, shape_in1);
auto B = make_shared<op::Parameter>(element::f32, shape_in2);
auto dot = make_shared<op::Dot>(A, B);
auto C = make_shared<op::Parameter>(element::f32, shape_in1);
auto add = make_shared<op::Add>(dot, C);
auto f = make_shared<Function>(add, ParameterVector{A, B, C});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::f32, shape_in2);
shared_ptr<runtime::Tensor> c = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::f32, shape_out);
copy_data(a, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
copy_data(b, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f});
copy_data(c, vector<float>{5.f, 4.f, 3.f, 2.f, 1.f, 0.f});
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b, c});
EXPECT_TRUE(test::all_close_f(read_vector<float>(result),
vector<float>{35.f, 40.f, 45.f, 68.f, 82.f, 96.f}));
}
// Sub-graph extraction tests
NGRAPH_TEST(${BACKEND_NAME}, mlir_subgraphs_dot_add)
{
Shape shape_in1{2, 3};
Shape shape_in2{3, 3};
Shape shape_out{2, 3};
// sub-graph 1
auto P1 = make_shared<op::Parameter>(element::f32, shape_in1);
auto P2 = make_shared<op::Parameter>(element::f32, shape_in2);
auto P3 = make_shared<op::Parameter>(element::f32, shape_in1);
auto dot = make_shared<op::Dot>(P1, P2);
auto sg1_output = make_shared<op::Add>(dot, P3);
// sub-graph 2
auto P4 = make_shared<op::Parameter>(element::f32, shape_in1);
auto P5 = make_shared<op::Parameter>(element::f32, shape_in1);
auto P6 = make_shared<op::Parameter>(element::f32, shape_in1);
auto add = make_shared<op::Add>(P4, P5);
auto sg2_output = make_shared<op::Add>(add, P6);
auto out = make_shared<op::Maximum>(sg1_output, sg2_output);
auto f = make_shared<Function>(out, ParameterVector{P1, P2, P3, P4, P5, P6});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
shared_ptr<runtime::Tensor> p1 = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> p2 = backend->create_tensor(element::f32, shape_in2);
shared_ptr<runtime::Tensor> p3 = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> p4 = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> p5 = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> p6 = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::f32, shape_out);
copy_data(p1, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
copy_data(p2, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f});
copy_data(p3, vector<float>{5.f, 4.f, 3.f, 2.f, 1.f, 0.f});
copy_data(p4, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
copy_data(p5, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
copy_data(p6, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
auto handle = backend->compile(f);
handle->call_with_validate({result}, {p1, p2, p3, p4, p5, p6});
EXPECT_TRUE(
test::all_close_f(read_vector<float>(result), vector<float>{35, 40, 45, 68, 82, 96}));
}
NGRAPH_TEST(${BACKEND_NAME}, mlir_subgraphs_dot_add_2)
{
// Tests 2 sub-graphs merged at a join point into one.
Shape shape_in1{2, 3};
Shape shape_in2{3, 3};
Shape shape_out{2, 3};
// sub-graph 1
auto P1 = make_shared<op::Parameter>(element::f32, shape_in1);
auto P2 = make_shared<op::Parameter>(element::f32, shape_in2);
auto P3 = make_shared<op::Parameter>(element::f32, shape_in1);
auto dot = make_shared<op::Dot>(P1, P2);
auto sg1_output = make_shared<op::Add>(dot, P3);
// sub-graph 2
auto P4 = make_shared<op::Parameter>(element::f32, shape_in1);
auto P5 = make_shared<op::Parameter>(element::f32, shape_in1);
auto P6 = make_shared<op::Parameter>(element::f32, shape_in1);
auto add = make_shared<op::Add>(P4, P5);
auto sg2_output = make_shared<op::Add>(add, P6);
auto add2 = make_shared<op::Add>(sg1_output, sg2_output);
auto abs = make_shared<op::Abs>(add2);
auto f = make_shared<Function>(abs, ParameterVector{P1, P2, P3, P4, P5, P6});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
shared_ptr<runtime::Tensor> p1 = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> p2 = backend->create_tensor(element::f32, shape_in2);
shared_ptr<runtime::Tensor> p3 = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> p4 = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> p5 = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> p6 = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::f32, shape_out);
copy_data(p1, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
copy_data(p2, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f});
copy_data(p3, vector<float>{5.f, 4.f, 3.f, 2.f, 1.f, 0.f});
copy_data(p4, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
copy_data(p5, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
copy_data(p6, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
auto handle = backend->compile(f);
handle->call_with_validate({result}, {p1, p2, p3, p4, p5, p6});
EXPECT_TRUE(
test::all_close_f(read_vector<float>(result), vector<float>{38, 46, 54, 80, 97, 114}));
}
NGRAPH_TEST(${BACKEND_NAME}, mlir_subgraphs_dot_add_3)
{
// Tests 3 distinct sub-graphs
Shape shape_in1{2, 3};
Shape shape_in2{3, 3};
Shape shape_out{2, 3};
// sub-graph 1
auto P1 = make_shared<op::Parameter>(element::f32, shape_in1);
auto P2 = make_shared<op::Parameter>(element::f32, shape_in2);
auto P3 = make_shared<op::Parameter>(element::f32, shape_in1);
auto dot = make_shared<op::Dot>(P1, P2);
auto sg1_output = make_shared<op::Add>(dot, P3);
// sub-graph 2
auto P4 = make_shared<op::Parameter>(element::f32, shape_in1);
auto P5 = make_shared<op::Parameter>(element::f32, shape_in1);
auto P6 = make_shared<op::Parameter>(element::f32, shape_in1);
auto add = make_shared<op::Add>(P4, P5);
auto sg2_output = make_shared<op::Add>(add, P6);
auto max = make_shared<op::Maximum>(sg1_output, sg2_output);
auto add2 = make_shared<op::Add>(max, max);
auto f = make_shared<Function>(add2, ParameterVector{P1, P2, P3, P4, P5, P6});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
shared_ptr<runtime::Tensor> p1 = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> p2 = backend->create_tensor(element::f32, shape_in2);
shared_ptr<runtime::Tensor> p3 = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> p4 = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> p5 = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> p6 = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::f32, shape_out);
copy_data(p1, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
copy_data(p2, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f});
copy_data(p3, vector<float>{5.f, 4.f, 3.f, 2.f, 1.f, 0.f});
copy_data(p4, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
copy_data(p5, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
copy_data(p6, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
auto handle = backend->compile(f);
handle->call_with_validate({result}, {p1, p2, p3, p4, p5, p6});
EXPECT_TRUE(
test::all_close_f(read_vector<float>(result), vector<float>{70, 80, 90, 136, 164, 192}));
}
NGRAPH_TEST(${BACKEND_NAME}, mlir_subgraphs_cycle)
{
// Tests 3 distinct sub-graphs
Shape shape_in1{2, 3};
Shape shape_in2{3, 3};
Shape shape_out{2, 3};
// sub-graph 1
auto P1 = make_shared<op::Parameter>(element::f32, shape_in1);
auto P2 = make_shared<op::Parameter>(element::f32, shape_in2);
auto P3 = make_shared<op::Parameter>(element::f32, shape_in1);
auto dot = make_shared<op::Dot>(P1, P2);
auto add = make_shared<op::Add>(dot, P3);
auto abs = make_shared<op::Abs>(add);
auto add2 = make_shared<op::Add>(add, abs);
auto f = make_shared<Function>(add2, ParameterVector{P1, P2, P3});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
shared_ptr<runtime::Tensor> p1 = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> p2 = backend->create_tensor(element::f32, shape_in2);
shared_ptr<runtime::Tensor> p3 = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::f32, shape_out);
copy_data(p1, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
copy_data(p2, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f});
copy_data(p3, vector<float>{5.f, 4.f, 3.f, 2.f, 1.f, 0.f});
auto handle = backend->compile(f);
handle->call_with_validate({result}, {p1, p2, p3});
EXPECT_TRUE(
test::all_close_f(read_vector<float>(result), vector<float>{70, 80, 90, 136, 164, 192}));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment