Commit 1efd0bfd authored by Robert Kimball's avatar Robert Kimball Committed by Scott Cyphers

The General Purpose graph splitting is no longer used (#2391)

* remove general splitting code. New code in hybrid transformer.

* more cleanup
parent c9a9c154
...@@ -123,7 +123,6 @@ set (SRC ...@@ -123,7 +123,6 @@ set (SRC
op/util/logical_reduction.cpp op/util/logical_reduction.cpp
op/util/unary_elementwise_arithmetic.cpp op/util/unary_elementwise_arithmetic.cpp
partial_shape.cpp partial_shape.cpp
pass/assign_placement.cpp
pass/algebraic_simplification.cpp pass/algebraic_simplification.cpp
pass/common_function_collection.cpp pass/common_function_collection.cpp
pass/constant_folding.cpp pass/constant_folding.cpp
...@@ -139,8 +138,8 @@ set (SRC ...@@ -139,8 +138,8 @@ set (SRC
pass/memory_visualize.cpp pass/memory_visualize.cpp
pass/nop_elimination.cpp pass/nop_elimination.cpp
pass/pass.cpp pass/pass.cpp
pass/pass_config.cpp pass/pass_config.cpp
pass/prefix_reshape_elimination.cpp pass/prefix_reshape_elimination.cpp
pass/propagate_cacheability.cpp pass/propagate_cacheability.cpp
pass/reshape_elimination.cpp pass/reshape_elimination.cpp
pass/reshape_sinking.cpp pass/reshape_sinking.cpp
......
...@@ -31,7 +31,6 @@ ...@@ -31,7 +31,6 @@
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "ngraph/op/parameter.hpp" #include "ngraph/op/parameter.hpp"
#include "ngraph/op/result.hpp" #include "ngraph/op/result.hpp"
#include "ngraph/placement.hpp"
#include "ngraph/result_vector.hpp" #include "ngraph/result_vector.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
...@@ -406,29 +405,6 @@ void ngraph::insert_new_node_between(const shared_ptr<Node>& src_node, ...@@ -406,29 +405,6 @@ void ngraph::insert_new_node_between(const shared_ptr<Node>& src_node,
dst_input->replace_output(new_node, 0); // Remove [0] (again), add [8], remove [1], add [9] dst_input->replace_output(new_node, 0); // Remove [0] (again), add [8], remove [1], add [9]
} }
// Assert that nodes in the function is colocated and return that placement
Placement ngraph::get_colocated_function_placement(shared_ptr<Function> func)
{
Placement function_placement = Placement::DEFAULT;
traverse_nodes(func, [&](shared_ptr<Node> node) {
Placement node_placement = node->get_placement();
if (node_placement == Placement::DEFAULT)
{
throw ngraph_error("Node should have a device placement, not Placement::DEFAULT");
}
if (function_placement == Placement::DEFAULT)
{
// First time seeing a node
function_placement = node->get_placement();
}
else if (function_placement != node_placement)
{
throw ngraph_error("Function contains nodes of two different placements");
}
});
return function_placement;
}
std::shared_ptr<Node> ngraph::make_zero(const element::Type& element_type, const Shape& shape) std::shared_ptr<Node> ngraph::make_zero(const element::Type& element_type, const Shape& shape)
{ {
std::shared_ptr<Node> zero = op::Constant::create(element_type, Shape{}, {0.0}); std::shared_ptr<Node> zero = op::Constant::create(element_type, Shape{}, {0.0});
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/pass/assign_placement.hpp"
#include "ngraph/log.hpp"
#include "ngraph/node.hpp"
#include "ngraph/placement.hpp"
#include "ngraph/runtime/backend.hpp"
using namespace ngraph;
using namespace std;
pass::AssignPlacement::AssignPlacement(function<Placement(shared_ptr<Node>)> placement_policy)
: m_placement_policy(placement_policy)
{
}
bool pass::AssignPlacement::run_on_node(shared_ptr<Node> node)
{
node->set_placement(m_placement_policy(node));
return false;
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <exception>
#include <functional>
#include <sstream>
#include "ngraph/pass/pass.hpp"
#include "ngraph/placement.hpp"
namespace ngraph
{
namespace pass
{
class AssignPlacement : public NodePass
{
public:
// TODO: make policy a class
AssignPlacement(std::function<Placement(std::shared_ptr<Node>)> placement_policy);
private:
bool run_on_node(std::shared_ptr<Node> node) override;
std::function<Placement(std::shared_ptr<Node>)> m_placement_policy;
};
}
}
...@@ -39,188 +39,3 @@ std::string ngraph::placement_to_string(Placement placement) ...@@ -39,188 +39,3 @@ std::string ngraph::placement_to_string(Placement placement)
} }
throw runtime_error("unhandled placement type"); throw runtime_error("unhandled placement type");
} }
static Node* take_independent_node_with_placement_priority(
map<Placement, deque<Node*>>& independent_nodes_by_placement, Placement placement)
{
Node* selected_node = nullptr;
if (independent_nodes_by_placement.find(placement) != independent_nodes_by_placement.end() &&
independent_nodes_by_placement.at(placement).size() != 0)
{
selected_node = independent_nodes_by_placement.at(placement).front();
independent_nodes_by_placement.at(placement).pop_front();
}
else
{
for (auto& it : independent_nodes_by_placement)
{
if (it.second.size() > 0)
{
selected_node = it.second.front();
it.second.pop_front();
break;
}
}
}
return selected_node;
}
static vector<unordered_set<shared_ptr<Node>>>
group_function_nodes_to_clusters(const shared_ptr<Function>& f)
{
// Topologically sort nodes by picking independent node with the same placement as the
// previously picked node greedily
map<Placement, deque<Node*>> independent_nodes_by_placement;
unordered_map<Node*, size_t> node_dependency_count;
unordered_map<ngraph::Node*, shared_ptr<ngraph::Node>> node_map;
for (shared_ptr<Node> node : f->get_ops())
{
size_t dependency_count = node->get_arguments().size();
node_map[node.get()] = node;
node_dependency_count[node.get()] = dependency_count;
if (dependency_count == 0)
{
independent_nodes_by_placement[node->get_placement()].push_back(node.get());
}
}
list<shared_ptr<Node>> sorted_nodes;
Placement previous_placement = Placement::DEFAULT;
while (Node* independent_node = take_independent_node_with_placement_priority(
independent_nodes_by_placement, previous_placement))
{
previous_placement = independent_node->get_placement();
sorted_nodes.push_back(node_map.at(independent_node));
for (auto user : independent_node->get_users())
{
Node* user_node = user.get();
node_dependency_count.at(user_node) -= 1;
if (node_dependency_count.at(user_node) == 0)
{
independent_nodes_by_placement[user_node->get_placement()].push_back(user_node);
}
}
}
if (sorted_nodes.size() != f->get_ops().size())
{
throw ngraph_error("sorted_nodes.size()== " + to_string(sorted_nodes.size()) +
" != f->get_ops().size()== " + to_string(f->get_ops().size()) +
". Internal error with topological sort.");
}
// Build clusters from the sorted_nodes
previous_placement = Placement::DEFAULT;
vector<unordered_set<shared_ptr<Node>>> clusters;
for (shared_ptr<Node> node : sorted_nodes)
{
Placement node_placement = node->get_placement();
if (node_placement != previous_placement)
{
unordered_set<shared_ptr<Node>> new_cluster;
clusters.push_back(new_cluster);
}
clusters.back().insert(node);
previous_placement = node_placement;
}
// Sanity check for node duplication and full node coverage
unordered_set<shared_ptr<Node>> cluster_nodes;
for (auto cluster : clusters)
{
for (auto node : cluster)
{
if (cluster_nodes.find(node) != cluster_nodes.end())
{
throw ngraph_error("Node " + node->get_name() + " is duplicated in clusters");
}
cluster_nodes.insert(node);
}
}
unordered_set<shared_ptr<Node>> f_nodes;
for (auto node : f->get_ordered_ops())
{
f_nodes.insert(node);
}
if (cluster_nodes != f_nodes)
{
throw ngraph_error(
"Cluster's nodes are not the same as function's nodes. cluster_nodes.size()=" +
to_string(cluster_nodes.size()) + ", f_nodes.size()=" + to_string(f_nodes.size()));
}
return clusters;
}
// Split function by placement, maximizing the span each subgraph. Each subgraph will be placed in
// a single device.
//
// For nested functions, we only consider the ops in the main function that represent calling of the
// nested functions.
pair<vector<shared_ptr<Function>>, unordered_map<shared_ptr<op::Parameter>, shared_ptr<op::Result>>>
ngraph::split_function_by_placement(const shared_ptr<Function>& f)
{
// Split functions to clusters of nodes that can be computed together
vector<unordered_set<shared_ptr<Node>>> clusters = group_function_nodes_to_clusters(f);
// Map from (intermediate) parameter to result node, for guiding data copy among devices
unordered_map<shared_ptr<op::Parameter>, shared_ptr<op::Result>> map_parameter_to_result;
// Split neighboring nodes if they belong to different clusters
// TODO: optimization to group multiple result node from the same source,
// and to group the parameter node in the same cluster with the same result node source
unordered_map<shared_ptr<Node>, unordered_set<shared_ptr<Node>>*> map_node_to_cluster;
for (auto& cluster : clusters)
{
for (auto node : cluster)
{
map_node_to_cluster[node] = &cluster;
}
}
for (auto dst_node : f->get_ordered_ops())
{
for (auto src_node : dst_node->get_arguments())
{
auto src_cluster = map_node_to_cluster.at(src_node);
auto dst_cluster = map_node_to_cluster.at(dst_node);
if (src_cluster != dst_cluster)
{
// Split src_node and dst_node
pair<shared_ptr<op::Result>, shared_ptr<op::Parameter>> res_par_pair =
insert_result_parameter_split(src_node, dst_node);
shared_ptr<op::Result> res_node = res_par_pair.first;
shared_ptr<op::Parameter> par_node = res_par_pair.second;
map_parameter_to_result[par_node] = res_node;
// Insert newly created nodes into clusters
src_cluster->insert(res_node);
dst_cluster->insert(par_node);
}
}
}
// Create functions from clusters
vector<shared_ptr<Function>> sub_functions;
for (auto cluster : clusters)
{
ParameterVector par_vector;
ResultVector res_vector;
for (auto node : cluster)
{
if (auto res_node = dynamic_pointer_cast<op::Result>(node))
{
res_vector.push_back(res_node);
}
else if (auto par_node = dynamic_pointer_cast<op::Parameter>(node))
{
par_vector.push_back(par_node);
}
}
auto sub_function = make_shared<Function>(res_vector, par_vector);
sub_functions.push_back(sub_function);
}
return make_pair(sub_functions, map_parameter_to_result);
}
...@@ -26,15 +26,6 @@ ...@@ -26,15 +26,6 @@
namespace ngraph namespace ngraph
{ {
class Function;
class Node;
namespace op
{
class Parameter;
class Result;
}
enum class Placement enum class Placement
{ {
DEFAULT, DEFAULT,
...@@ -46,9 +37,4 @@ namespace ngraph ...@@ -46,9 +37,4 @@ namespace ngraph
}; };
std::string placement_to_string(Placement placement); std::string placement_to_string(Placement placement);
// Split function to function(s) with unique placement
std::pair<std::vector<std::shared_ptr<Function>>,
std::unordered_map<std::shared_ptr<op::Parameter>, std::shared_ptr<op::Result>>>
split_function_by_placement(const std::shared_ptr<Function>& f);
} }
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
#include "ngraph/runtime/gpuh/gpuh_backend.hpp" #include "ngraph/runtime/gpuh/gpuh_backend.hpp"
#include "ngraph/graph_util.hpp" #include "ngraph/graph_util.hpp"
#include "ngraph/pass/assign_placement.hpp"
#include "ngraph/pass/manager.hpp" #include "ngraph/pass/manager.hpp"
#include "ngraph/runtime/gpu/gpu_backend.hpp" #include "ngraph/runtime/gpu/gpu_backend.hpp"
#include "ngraph/runtime/interpreter/int_backend.hpp" #include "ngraph/runtime/interpreter/int_backend.hpp"
......
...@@ -37,7 +37,6 @@ set(SRC ...@@ -37,7 +37,6 @@ set(SRC
cse.cpp cse.cpp
element_type.cpp element_type.cpp
file_util.cpp file_util.cpp
graph_partition.cpp
includes.cpp includes.cpp
input_output_assign.cpp input_output_assign.cpp
main.cpp main.cpp
...@@ -200,7 +199,7 @@ if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "^(Apple)?Clang$") ...@@ -200,7 +199,7 @@ if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "^(Apple)?Clang$")
endif() endif()
if (NGRAPH_CPU_ENABLE) if (NGRAPH_CPU_ENABLE)
# The INTERPRETER backend is required for graph_partition, convolution, and backwards unit tests # The INTERPRETER backend is required for convolution, and backwards unit tests
target_link_libraries(unit-test PRIVATE cpu_backend interpreter_backend) target_link_libraries(unit-test PRIVATE cpu_backend interpreter_backend)
target_link_libraries(unit-test PRIVATE libmkldnn) target_link_libraries(unit-test PRIVATE libmkldnn)
endif() endif()
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <memory>
#include <sstream>
#include <string>
#include <typeindex>
#include <typeinfo>
#include <vector>
#include "gtest/gtest.h"
#include "ngraph/graph_util.hpp"
#include "ngraph/ngraph.hpp"
#include "ngraph/pass/assign_placement.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/runtime/host_tensor.hpp"
#include "ngraph/util.hpp"
#include "util/ndarray.hpp"
#include "util/test_tools.hpp"
using namespace std;
using namespace ngraph;
// Perform all operations on INTERPRETER and fallback Multiply to CPU
static function<Placement(shared_ptr<Node>)> int_with_cpu_mul_policy = [](shared_ptr<Node> node) {
Placement placement;
string node_op = node->description();
if (node_op == "Multiply")
{
placement = Placement::CPU;
}
else
{
placement = Placement::INTERPRETER;
}
return placement;
};
// HybridCallFrame servers 2 purposes:
// 1. HybridBackend's main use case is to test device placement and graph partition routines.
// 2. It also shows how glued-hybrid runtime can be built by combining different runtimes.
//
// By default, HybridBackend operates on INTERPRETER (for example, the tensor view is
// INTERPRETER tensor view). It falls back to CPU when requested by placement.
class HybridBackend
{
public:
HybridBackend(const function<Placement(shared_ptr<Node>)>& placement_policy)
: m_placement_policy(placement_policy)
{
}
~HybridBackend() {}
shared_ptr<runtime::Tensor> create_tensor(const element::Type& element_type, const Shape& shape)
{
return get_cached_backend(Placement::INTERPRETER)->create_tensor(element_type, shape);
}
bool compile(const shared_ptr<Function>& func)
{
if (m_function_map.find(func) == m_function_map.end())
{
// Clone function
FunctionInstance instance;
instance.m_function = clone_function(*func);
// Run placement pass
pass::Manager pass_manager;
pass_manager.register_pass<pass::AssignPlacement>(int_with_cpu_mul_policy);
pass_manager.run_passes(instance.m_function);
// Split function to sub_functions
tie(instance.m_sub_functions, instance.m_map_parameter_to_result) =
split_function_by_placement(instance.m_function);
m_function_map.insert({func, instance});
// Compile subfunctions in corresponding backends
for (shared_ptr<Function>& sub_function : instance.m_sub_functions)
{
Placement placement = get_colocated_function_placement(sub_function);
auto backend = get_cached_backend(placement);
backend->compile(sub_function);
}
}
return true;
}
bool call_with_validate(const shared_ptr<Function>& func,
const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& inputs)
{
// Get FunctionInstance
bool rc = true;
auto it = m_function_map.find(func);
if (it == m_function_map.end())
{
compile(func);
it = m_function_map.find(func);
}
if (it == m_function_map.end())
{
throw runtime_error("Error constructing backend.");
}
FunctionInstance& instance = it->second;
// Parameter and result node in sub_function maps to one Tensor
unordered_map<shared_ptr<Node>, shared_ptr<runtime::Tensor>> map_node_to_tensor_view;
for (size_t i = 0; i < inputs.size(); ++i)
{
map_node_to_tensor_view[instance.m_function->get_parameters()[i]] = inputs[i];
}
for (size_t i = 0; i < outputs.size(); ++i)
{
map_node_to_tensor_view[instance.m_function->get_results()[i]] = outputs[i];
}
// Call subfunctions
for (shared_ptr<Function>& sub_function : instance.m_sub_functions)
{
// Init backend
Placement placement = get_colocated_function_placement(sub_function);
auto backend = get_cached_backend(placement);
// Prepare parameter TensorViews
vector<shared_ptr<runtime::Tensor>> parameter_tvs;
for (auto parameter_node : sub_function->get_parameters())
{
if (map_node_to_tensor_view.find(parameter_node) != map_node_to_tensor_view.end())
{
parameter_tvs.push_back(map_node_to_tensor_view.at(parameter_node));
}
else
{
auto result_node = instance.m_map_parameter_to_result.at(parameter_node);
auto result_tv = map_node_to_tensor_view.at(result_node);
auto parameter_tv = backend->create_tensor(parameter_node->get_element_type(),
parameter_node->get_shape());
copy_data(parameter_tv, read_vector<float>(result_tv));
map_node_to_tensor_view[parameter_node] = parameter_tv;
parameter_tvs.push_back(parameter_tv);
}
}
// Prepare result TensorViews
vector<shared_ptr<runtime::Tensor>> result_tvs;
for (auto result_node : sub_function->get_results())
{
if (map_node_to_tensor_view.find(result_node) != map_node_to_tensor_view.end())
{
result_tvs.push_back(map_node_to_tensor_view.at(result_node));
}
else
{
auto result_tv = backend->create_tensor(result_node->get_element_type(),
result_node->get_shape());
map_node_to_tensor_view[result_node] = result_tv;
result_tvs.push_back(result_tv);
}
}
// Call
backend->call_with_validate(sub_function, result_tvs, parameter_tvs);
}
return rc;
}
protected:
class FunctionInstance
{
public:
shared_ptr<Function> m_function;
vector<shared_ptr<Function>> m_sub_functions;
unordered_map<shared_ptr<op::Parameter>, shared_ptr<op::Result>> m_map_parameter_to_result;
};
shared_ptr<runtime::Backend> get_cached_backend(Placement placement)
{
if (m_cached_backends.find(placement) == m_cached_backends.end())
{
m_cached_backends[placement] = runtime::Backend::create(placement_to_string(placement));
}
return m_cached_backends.at(placement);
}
map<Placement, shared_ptr<runtime::Backend>> m_cached_backends;
map<shared_ptr<Function>, FunctionInstance> m_function_map;
function<Placement(shared_ptr<Node>)> m_placement_policy;
};
TEST(graph_partition, placement_all_cpu_policy)
{
Shape shape = Shape{2, 2};
shared_ptr<op::Parameter> A = make_shared<op::Parameter>(element::f32, shape);
shared_ptr<op::Parameter> B = make_shared<op::Parameter>(element::f32, shape);
shared_ptr<op::Parameter> C = make_shared<op::Parameter>(element::f32, shape);
shared_ptr<Node> AplusB = A + B;
shared_ptr<Node> AplusBtimesC = AplusB * C;
shared_ptr<Function> f = make_shared<Function>(AplusBtimesC, ParameterVector{A, B, C});
for (auto node : f->get_ordered_ops())
{
EXPECT_EQ(node->get_placement(), Placement::DEFAULT);
}
pass::Manager pass_manager;
pass_manager.register_pass<pass::AssignPlacement>(
[](shared_ptr<Node> node) { return Placement::CPU; });
pass_manager.run_passes(f);
for (auto node : f->get_ordered_ops())
{
EXPECT_EQ(node->get_placement(), Placement::CPU);
}
}
#ifdef NGRAPH_CPU_ENABLE
TEST(graph_partition, placement_int_with_cpu_mul_policy)
{
Shape shape = Shape{2, 2};
shared_ptr<op::Parameter> A = make_shared<op::Parameter>(element::f32, shape);
shared_ptr<op::Parameter> B = make_shared<op::Parameter>(element::f32, shape);
shared_ptr<op::Parameter> C = make_shared<op::Parameter>(element::f32, shape);
shared_ptr<Node> AplusB = A + B;
shared_ptr<Node> AplusBtimesC = AplusB * C;
shared_ptr<Function> f = make_shared<Function>(AplusBtimesC, ParameterVector{A, B, C});
for (auto node : f->get_ordered_ops())
{
EXPECT_EQ(node->get_placement(), Placement::DEFAULT);
}
pass::Manager pass_manager;
pass_manager.register_pass<pass::AssignPlacement>(int_with_cpu_mul_policy);
pass_manager.run_passes(f);
for (auto node : f->get_ordered_ops())
{
string node_op = node->description();
if (node_op == "Multiply")
{
EXPECT_EQ(node->get_placement(), Placement::CPU);
}
else
{
EXPECT_EQ(node->get_placement(), Placement::INTERPRETER);
}
}
}
TEST(graph_partition, hybrid_abc_manual)
{
// A B C A B C
// \ / / \ / /
// +D / +D /
// \ / | /
// *E R0 R1 f0(INT)
// | ------------------
// R P0 P1
// \ /
// *E
// |
// R2 f1(CPU)
// ------------------
// P2
// |
// R f2(INT)
// ------------------
Shape shape = Shape{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto C = make_shared<op::Parameter>(element::f32, shape);
auto D = A + B;
auto E = D * C;
auto R = make_shared<op::Result>(E);
auto f = make_shared<Function>(ResultVector{R}, ParameterVector{A, B, C});
pass::Manager pass_manager;
pass_manager.register_pass<pass::AssignPlacement>(int_with_cpu_mul_policy);
pass_manager.run_passes(f);
// Insert parameter
auto RP0 = insert_result_parameter_split(D, E);
shared_ptr<op::Result> R0 = RP0.first;
shared_ptr<op::Parameter> P0 = RP0.second;
auto RP1 = insert_result_parameter_split(C, E);
shared_ptr<op::Result> R1 = RP1.first;
shared_ptr<op::Parameter> P1 = RP1.second;
auto RP2 = insert_result_parameter_split(E, R);
shared_ptr<op::Result> R2 = RP2.first;
shared_ptr<op::Parameter> P2 = RP2.second;
// Backends
auto int_backend = runtime::Backend::create(placement_to_string(Placement::INTERPRETER));
auto cpu_backend = runtime::Backend::create(placement_to_string(Placement::CPU));
// f0 on INT
auto a = int_backend->create_tensor(element::f32, shape);
auto b = int_backend->create_tensor(element::f32, shape);
auto c = int_backend->create_tensor(element::f32, shape);
auto r0 = int_backend->create_tensor(element::f32, shape);
auto r1 = int_backend->create_tensor(element::f32, shape);
copy_data(a, test::NDArray<float, 2>({{1, 2}, {3, 4}}).get_vector());
copy_data(b, test::NDArray<float, 2>({{5, 6}, {7, 8}}).get_vector());
copy_data(c, test::NDArray<float, 2>({{9, 10}, {11, 12}}).get_vector());
auto f0 = make_shared<Function>(ResultVector{R0, R1}, ParameterVector{A, B, C});
int_backend->compile(f0);
int_backend->call_with_validate(f0, {r0, r1}, {a, b, c});
// f1 on CPU
auto p0 = cpu_backend->create_tensor(element::f32, shape);
auto p1 = cpu_backend->create_tensor(element::f32, shape);
auto r2 = cpu_backend->create_tensor(element::f32, shape);
copy_data(p0, read_vector<float>(r0));
copy_data(p1, read_vector<float>(r1));
auto f1 = make_shared<Function>(ResultVector{R2}, ParameterVector{P0, P1});
cpu_backend->compile(f1);
cpu_backend->call_with_validate(f1, {r2}, {p0, p1});
// f2 on INT
auto p2 = int_backend->create_tensor(element::f32, shape);
auto r = int_backend->create_tensor(element::f32, shape);
copy_data(p2, read_vector<float>(r2));
auto f2 = make_shared<Function>(ResultVector{R}, ParameterVector{P2});
int_backend->compile(f2);
int_backend->call_with_validate(f2, {r}, {p2});
// Check final result on INT
EXPECT_EQ(read_vector<float>(r),
(test::NDArray<float, 2>({{54, 80}, {110, 144}})).get_vector());
}
TEST(graph_partition, hybrid_abc)
{
// Same as hybrid_abc_manual, but using the test hybrid transformer
//
// A B C A B C
// \ / / \ / /
// +D / +D /
// \ / | /
// *E R0 R1 f0(INT)
// | ------------------
// R P0 P1
// \ /
// *E
// |
// R2 f1(CPU)
// ------------------
// P2
// |
// R f2(INT)
// ------------------
Shape shape = Shape{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto C = make_shared<op::Parameter>(element::f32, shape);
auto D = A + B;
auto E = D * C;
auto R = make_shared<op::Result>(E);
auto f = make_shared<Function>(ResultVector{R}, ParameterVector{A, B, C});
auto backend = make_shared<HybridBackend>(int_with_cpu_mul_policy);
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> c = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> r = backend->create_tensor(element::f32, shape);
copy_data(a, test::NDArray<float, 2>({{1, 2}, {3, 4}}).get_vector());
copy_data(b, test::NDArray<float, 2>({{5, 6}, {7, 8}}).get_vector());
copy_data(c, test::NDArray<float, 2>({{9, 10}, {11, 12}}).get_vector());
backend->call_with_validate(f, {r}, {a, b, c});
EXPECT_EQ(read_vector<float>(r),
(test::NDArray<float, 2>({{54, 80}, {110, 144}})).get_vector());
}
TEST(graph_partition, hybrid_abcd)
{
// A B
// \ /
// C E* D
// \ / \ /
// F+ G+
// \ /
// H+
Shape shape = Shape{2, 2};
shared_ptr<op::Parameter> A = make_shared<op::Parameter>(element::f32, shape);
shared_ptr<op::Parameter> B = make_shared<op::Parameter>(element::f32, shape);
shared_ptr<op::Parameter> C = make_shared<op::Parameter>(element::f32, shape);
shared_ptr<op::Parameter> D = make_shared<op::Parameter>(element::f32, shape);
shared_ptr<Node> E = A * B;
shared_ptr<Node> F = C + E;
shared_ptr<Node> G = E + D;
shared_ptr<Node> H = F + G;
shared_ptr<Function> f = make_shared<Function>(H, ParameterVector{A, B, C, D});
auto backend = make_shared<HybridBackend>(int_with_cpu_mul_policy);
backend->compile(f);
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> c = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> d = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> r = backend->create_tensor(element::f32, shape);
copy_data(a, test::NDArray<float, 2>({{1, 2}, {3, 4}}).get_vector());
copy_data(b, test::NDArray<float, 2>({{5, 6}, {7, 8}}).get_vector());
copy_data(c, test::NDArray<float, 2>({{9, 10}, {11, 12}}).get_vector());
copy_data(d, test::NDArray<float, 2>({{13, 14}, {15, 16}}).get_vector());
backend->call_with_validate(f, {r}, {a, b, c, d});
EXPECT_EQ(read_vector<float>(r), (test::NDArray<float, 2>({{32, 48}, {68, 92}})).get_vector());
}
TEST(graph_partition, hybrid_back_and_forth)
{
// A B
// \ / \
// D* |
// \ /
// E+ C
// \ /
// F*
Shape shape = Shape{2, 2};
shared_ptr<op::Parameter> A = make_shared<op::Parameter>(element::f32, shape);
shared_ptr<op::Parameter> B = make_shared<op::Parameter>(element::f32, shape);
shared_ptr<op::Parameter> C = make_shared<op::Parameter>(element::f32, shape);
shared_ptr<Node> D = A * B;
shared_ptr<Node> E = D + B;
shared_ptr<Node> F = E * C;
shared_ptr<Function> f = make_shared<Function>(F, ParameterVector{A, B, C});
auto backend = make_shared<HybridBackend>(int_with_cpu_mul_policy);
backend->compile(f);
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> c = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> r = backend->create_tensor(element::f32, shape);
copy_data(a, test::NDArray<float, 2>({{1, 2}, {3, 4}}).get_vector());
copy_data(b, test::NDArray<float, 2>({{5, 6}, {7, 8}}).get_vector());
copy_data(c, test::NDArray<float, 2>({{9, 10}, {11, 12}}).get_vector());
backend->call_with_validate(f, {r}, {a, b, c});
EXPECT_EQ(read_vector<float>(r),
(test::NDArray<float, 2>({{90, 180}, {308, 480}})).get_vector());
}
TEST(graph_partition, hybrid_multi_middle_nodes)
{
// A B C
// \ / \ / \
// D+ E+ |
// \ / \ /
// F* G*
// \ /
// H+
Shape shape = Shape{2, 2};
shared_ptr<op::Parameter> A = make_shared<op::Parameter>(element::f32, shape);
shared_ptr<op::Parameter> B = make_shared<op::Parameter>(element::f32, shape);
shared_ptr<op::Parameter> C = make_shared<op::Parameter>(element::f32, shape);
shared_ptr<Node> D = A + B;
shared_ptr<Node> E = B + C;
shared_ptr<Node> F = D * E;
shared_ptr<Node> G = E * C;
shared_ptr<Node> H = F + G;
shared_ptr<Function> f = make_shared<Function>(H, ParameterVector{A, B, C});
auto backend = make_shared<HybridBackend>(int_with_cpu_mul_policy);
backend->compile(f);
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> c = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> r = backend->create_tensor(element::f32, shape);
copy_data(a, test::NDArray<float, 2>({{1, 2}, {3, 4}}).get_vector());
copy_data(b, test::NDArray<float, 2>({{5, 6}, {7, 8}}).get_vector());
copy_data(c, test::NDArray<float, 2>({{9, 10}, {11, 12}}).get_vector());
backend->call_with_validate(f, {r}, {a, b, c});
EXPECT_EQ(read_vector<float>(r),
(test::NDArray<float, 2>({{210, 288}, {378, 480}})).get_vector());
}
TEST(graph_partition, hybrid_no_split)
{
// A B
// \ /
// +
Shape shape = Shape{2, 2};
shared_ptr<op::Parameter> A = make_shared<op::Parameter>(element::f32, shape);
shared_ptr<op::Parameter> B = make_shared<op::Parameter>(element::f32, shape);
shared_ptr<Node> C = A + B;
shared_ptr<Function> f = make_shared<Function>(C, ParameterVector{A, B});
auto backend = make_shared<HybridBackend>(int_with_cpu_mul_policy);
backend->compile(f);
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> c = backend->create_tensor(element::f32, shape);
copy_data(a, test::NDArray<float, 2>({{1, 2}, {3, 4}}).get_vector());
copy_data(b, test::NDArray<float, 2>({{5, 6}, {7, 8}}).get_vector());
backend->call_with_validate(f, {c}, {a, b});
EXPECT_EQ(read_vector<float>(c), (test::NDArray<float, 2>({{6, 8}, {10, 12}})).get_vector());
}
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment