Commit af2c4c7d authored by Robert Kimball's avatar Robert Kimball Committed by Scott Cyphers

Framework for Hybrid GPU backend (#2196)

* add empty framework for hybrid GPU, or GPUH

* move placement to the runtime directory

* wip

* skeleton for hybrid GPU backend. most unit tests pass.

* cleanup

* move hybrid code into hybrid dir/namespace

* move hybrid functions

* move more hybrid functions to hybrid directory

* fix placement after compile. All unit tests passing

* fix gpu backend ctor
parent 9234cc69
......@@ -71,14 +71,15 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
include(var_functions)
set(NGRAPH_HYBRID_ENABLE TRUE)
option(NGRAPH_UNIT_TEST_ENABLE "Control the building of unit tests" TRUE)
option(NGRAPH_TOOLS_ENABLE "Control the building of tool" TRUE)
option(NGRAPH_CPU_ENABLE "Control the building of the CPU backend" TRUE)
option(NGRAPH_INTELGPU_ENABLE "Control the building of the Intel GPU backend with clDNN" FALSE)
option(NGRAPH_GPU_ENABLE "Control the building of the GPU backend" FALSE)
option(NGRAPH_INTERPRETER_ENABLE "Control the building of the INTERPRETER backend" TRUE)
option(NGRAPH_HYBRID_ENABLE "Control the building of the HYBRID backend" FALSE)
option(NGRAPH_NOP_ENABLE "Control the building of the NOP backend" TRUE)
option(NGRAPH_GPUH_ENABLE "Control the building of the Hybrid GPU backend" FALSE)
option(NGRAPH_DISTRIBUTED_ENABLE "Add distributed mode to the CPU backend" FALSE)
option(NGRAPH_DEBUG_ENABLE "Enable output for NGRAPH_DEBUG statements" FALSE)
option(NGRAPH_ONNX_IMPORT_ENABLE "Enable ONNX importer" FALSE)
......@@ -93,8 +94,8 @@ message(STATUS "NGRAPH_CPU_ENABLE: ${NGRAPH_CPU_ENABLE}")
message(STATUS "NGRAPH_INTELGPU_ENABLE: ${NGRAPH_INTELGPU_ENABLE}")
message(STATUS "NGRAPH_GPU_ENABLE: ${NGRAPH_GPU_ENABLE}")
message(STATUS "NGRAPH_INTERPRETER_ENABLE: ${NGRAPH_INTERPRETER_ENABLE}")
message(STATUS "NGRAPH_HYBRID_ENABLE: ${NGRAPH_HYBRID_ENABLE}")
message(STATUS "NGRAPH_NOP_ENABLE: ${NGRAPH_NOP_ENABLE}")
message(STATUS "NGRAPH_GPUH_ENABLE: ${NGRAPH_GPUH_ENABLE}")
message(STATUS "NGRAPH_DISTRIBUTED_ENABLE: ${NGRAPH_DISTRIBUTED_ENABLE}")
message(STATUS "NGRAPH_DEBUG_ENABLE: ${NGRAPH_DEBUG_ENABLE}")
message(STATUS "NGRAPH_ONNX_IMPORT_ENABLE: ${NGRAPH_ONNX_IMPORT_ENABLE}")
......
......@@ -355,35 +355,6 @@ pair<shared_ptr<op::Result>, shared_ptr<op::Parameter>>
return make_pair(res_node, par_node);
}
// Suffix *_size as a part of function name is temporary, this suffix
// will be removed when the backends move to the latest Hybrid backend
pair<shared_ptr<op::Result>, shared_ptr<op::Parameter>>
ngraph::insert_result_parameter_split_size(const shared_ptr<Node>& src_node,
const shared_ptr<Node>& dst_node)
{
if (src_node->get_output_size() != 1)
{
throw ngraph_error("Multiple output per op not supported in graph partition yet.");
}
// Make parameter node
shared_ptr<op::Parameter> par_node = make_shared<op::Parameter>(
src_node->get_output_element_type(0), src_node->get_output_shape(0));
par_node->set_placement(dst_node->get_placement_size());
// Fix input / output among src, dst and par
descriptor::Input* dst_input = dst_node->get_input_from(src_node);
descriptor::Output* src_output = src_node->get_output_to(dst_node);
src_output->remove_input(dst_input); // Remove [0]
dst_input->replace_output(par_node, 0); // Remove [0] (again), add [8], remove [1], add [9]
// Add res node
shared_ptr<op::Result> res_node = make_shared<op::Result>(src_node); // Add [4], [5], [6], [7]
res_node->set_placement(src_node->get_placement_size());
return make_pair(res_node, par_node);
}
// Insert unary node between two nodes like S->D => S->N->D
// Before: | After:
// +-----+---+ +---+-----+ | +-----+---+ +---+-----+---+ +---+-----+
......@@ -458,31 +429,6 @@ Placement ngraph::get_colocated_function_placement(shared_ptr<Function> func)
return function_placement;
}
// Suffix *_size as a part of function name is temporary, this suffix
// will be removed when the backends move to the latest Hybrid backend
// Assert that nodes in the function is colocated and return that placement
size_t ngraph::get_colocated_function_placement_size(shared_ptr<Function> func)
{
auto ops = func->get_ops();
//it's okay to not do Placement::DEFAULT check; the same node will be checked in the loop below
size_t function_placement = ops.front()->get_placement_size();
for (auto op : ops)
{
size_t node_placement = op->get_placement_size();
if (node_placement == 0)
{
throw ngraph_error("Node should have a device placement, not Placement::DEFAULT");
}
if (function_placement != node_placement)
{
throw ngraph_error("Function contains nodes of two different placements");
}
}
return function_placement;
}
std::shared_ptr<Node> ngraph::make_zero(const element::Type& element_type, const Shape& shape)
{
std::shared_ptr<Node> zero = op::Constant::create(element_type, Shape{}, {0.0});
......
......@@ -286,16 +286,11 @@ namespace ngraph
// Assert that nodes in the function is colocated and return that placement
Placement get_colocated_function_placement(std::shared_ptr<Function> func);
size_t get_colocated_function_placement_size(std::shared_ptr<Function> func);
std::pair<std::shared_ptr<op::Result>, std::shared_ptr<op::Parameter>>
insert_result_parameter_split(const std::shared_ptr<Node>& src_node,
const std::shared_ptr<Node>& dst_node);
std::pair<std::shared_ptr<op::Result>, std::shared_ptr<op::Parameter>>
insert_result_parameter_split_size(const std::shared_ptr<Node>& src_node,
const std::shared_ptr<Node>& dst_node);
void insert_new_node_between(const std::shared_ptr<Node>& src_node,
const std::shared_ptr<Node>& dst_node,
const std::shared_ptr<Node>& new_node);
......
......@@ -150,14 +150,14 @@ void Node::set_placement(Placement placement)
m_placement = placement;
}
size_t Node::get_placement_size() const
size_t Node::get_placement_index() const
{
return m_placement_size;
return m_placement_index;
}
void Node::set_placement(size_t placement)
void Node::set_placement_index(size_t placement)
{
m_placement_size = placement;
m_placement_index = placement;
}
std::shared_ptr<Node> Node::get_argument(size_t index) const
......
......@@ -234,10 +234,10 @@ namespace ngraph
void set_placement(Placement placement);
/// Get device placement
size_t get_placement_size() const;
size_t get_placement_index() const;
/// Set device placement
void set_placement(size_t placement);
void set_placement_index(size_t placement);
/// Get input descriptor that is connected to src
descriptor::Input* get_input_from(const std::shared_ptr<Node>& src);
......@@ -251,6 +251,8 @@ namespace ngraph
virtual std::shared_ptr<Node> get_default_value() const { return nullptr; }
/// Use instance ids for comparison instead of memory addresses to improve determinism
bool operator<(const Node& other) const { return m_instance_id < other.m_instance_id; }
static const size_t placement_invalid = -1;
protected:
std::set<std::shared_ptr<Node>> m_control_dependencies;
void set_output_size(size_t n);
......@@ -264,7 +266,7 @@ namespace ngraph
std::deque<descriptor::Output> m_outputs;
std::unordered_map<Node*, autodiff::Adjoints> m_adjoint_map;
Placement m_placement = Placement::DEFAULT;
size_t m_placement_size = 0;
size_t m_placement_index = placement_invalid;
};
class NodeValidationError : public AssertionFailure
......
......@@ -36,7 +36,7 @@ void op::Result::validate_and_infer_types()
<< " outputs (1 expected).";
// always borrow the placement conf even the default one
set_placement(get_argument(0)->get_placement_size());
set_placement_index(get_argument(0)->get_placement_index());
set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
}
......
......@@ -28,31 +28,9 @@ pass::AssignPlacement::AssignPlacement(function<Placement(shared_ptr<Node>)> pla
{
}
pass::AssignPlacement::AssignPlacement(vector<shared_ptr<runtime::Backend>> placement_backends)
: m_placement_backends(placement_backends)
{
}
bool pass::AssignPlacement::run_on_node(shared_ptr<Node> node)
{
if (!m_placement_backends.empty())
{
size_t backend_index = 0;
for (auto backend : m_placement_backends)
{
backend_index += 1;
if (backend->is_supported(*node))
{
node->set_placement(backend_index);
return false;
}
}
throw runtime_error("Node " + node->get_name() + " not supported by any backend");
}
else
{
node->set_placement(m_placement_policy(node));
}
return false;
}
......@@ -32,14 +32,10 @@ namespace ngraph
public:
// TODO: make policy a class
AssignPlacement(std::function<Placement(std::shared_ptr<Node>)> placement_policy);
AssignPlacement(
std::vector<std::shared_ptr<ngraph::runtime::Backend>> placement_backends);
private:
bool run_on_node(std::shared_ptr<Node> node) override;
std::vector<std::shared_ptr<ngraph::runtime::Backend>> m_placement_backends;
std::function<Placement(std::shared_ptr<Node>)> m_placement_policy;
};
}
......
......@@ -224,186 +224,3 @@ pair<vector<shared_ptr<Function>>, unordered_map<shared_ptr<op::Parameter>, shar
return make_pair(sub_functions, map_parameter_to_result);
}
static Node* take_independent_node_with_placement_priority_size(
map<size_t, deque<Node*>>& independent_nodes_by_placement, size_t placement)
{
Node* selected_node = nullptr;
if (independent_nodes_by_placement.find(placement) != independent_nodes_by_placement.end() &&
independent_nodes_by_placement.at(placement).size() != 0)
{
selected_node = independent_nodes_by_placement.at(placement).front();
independent_nodes_by_placement.at(placement).pop_front();
}
else
{
for (auto& it : independent_nodes_by_placement)
{
if (it.second.size() > 0)
{
selected_node = it.second.front();
it.second.pop_front();
break;
}
}
}
return selected_node;
}
static vector<unordered_set<shared_ptr<Node>>>
group_function_nodes_to_clusters_size(const shared_ptr<Function>& f)
{
// Topologically sort nodes by picking independent node with the same placement as the
// previously picked node greedily
map<size_t, deque<Node*>> independent_nodes_by_placement;
unordered_map<Node*, size_t> node_dependency_count;
unordered_map<ngraph::Node*, shared_ptr<ngraph::Node>> node_map;
for (shared_ptr<Node> node : f->get_ops())
{
size_t dependency_count = node->get_arguments().size();
node_map[node.get()] = node;
node_dependency_count[node.get()] = dependency_count;
if (dependency_count == 0)
{
independent_nodes_by_placement[node->get_placement_size()].push_back(node.get());
}
}
list<shared_ptr<Node>> sorted_nodes;
size_t previous_placement = 0; // Placement::DEFAULT
while (Node* independent_node = take_independent_node_with_placement_priority_size(
independent_nodes_by_placement, previous_placement))
{
previous_placement = independent_node->get_placement_size();
sorted_nodes.push_back(node_map.at(independent_node));
for (auto user : independent_node->get_users())
{
Node* user_node = user.get();
node_dependency_count.at(user_node) -= 1;
if (node_dependency_count.at(user_node) == 0)
{
independent_nodes_by_placement[user_node->get_placement_size()].push_back(
user_node);
}
}
}
if (sorted_nodes.size() != f->get_ops().size())
{
throw ngraph_error("sorted_nodes.size()== " + to_string(sorted_nodes.size()) +
" != f->get_ops().size()== " + to_string(f->get_ops().size()) +
". Internal error with topological sort.");
}
// Build clusters from the sorted_nodes
previous_placement = 0; // Placement::DEFAULT;
vector<unordered_set<shared_ptr<Node>>> clusters;
for (shared_ptr<Node> node : sorted_nodes)
{
size_t node_placement = node->get_placement_size();
if (node_placement != previous_placement)
{
unordered_set<shared_ptr<Node>> new_cluster;
clusters.push_back(new_cluster);
}
clusters.back().insert(node);
previous_placement = node_placement;
}
// Sanity check for node duplication and full node coverage
unordered_set<shared_ptr<Node>> cluster_nodes;
for (auto cluster : clusters)
{
for (auto node : cluster)
{
if (cluster_nodes.find(node) != cluster_nodes.end())
{
throw ngraph_error("Node " + node->get_name() + " is duplicated in clusters");
}
cluster_nodes.insert(node);
}
}
unordered_set<shared_ptr<Node>> f_nodes;
for (auto node : f->get_ordered_ops())
{
f_nodes.insert(node);
}
if (cluster_nodes != f_nodes)
{
throw ngraph_error(
"Cluster's nodes are not the same as function's nodes. cluster_nodes.size()=" +
to_string(cluster_nodes.size()) + ", f_nodes.size()=" + to_string(f_nodes.size()));
}
return clusters;
}
// Suffix *_size as a part of function name is temporary, this suffix
// will be removed when the backends move to the latest Hybrid backend
pair<vector<shared_ptr<Function>>, unordered_map<shared_ptr<op::Parameter>, shared_ptr<op::Result>>>
ngraph::split_function_by_placement_size(const shared_ptr<Function>& f)
{
// Split functions to clusters of nodes that can be computed together
vector<unordered_set<shared_ptr<Node>>> clusters = group_function_nodes_to_clusters_size(f);
// Map from (intermediate) parameter to result node, for guiding data copy among devices
unordered_map<shared_ptr<op::Parameter>, shared_ptr<op::Result>> map_parameter_to_result;
// Split neighboring nodes if they belong to different clusters
// TODO: optimization to group multiple result node from the same source,
// and to group the parameter node in the same cluster with the same result node source
unordered_map<shared_ptr<Node>, unordered_set<shared_ptr<Node>>*> map_node_to_cluster;
for (auto& cluster : clusters)
{
for (auto node : cluster)
{
map_node_to_cluster[node] = &cluster;
}
}
for (auto dst_node : f->get_ordered_ops())
{
for (auto src_node : dst_node->get_arguments())
{
auto src_cluster = map_node_to_cluster.at(src_node);
auto dst_cluster = map_node_to_cluster.at(dst_node);
if (src_cluster != dst_cluster)
{
// Split src_node and dst_node
pair<shared_ptr<op::Result>, shared_ptr<op::Parameter>> res_par_pair =
insert_result_parameter_split_size(src_node, dst_node);
shared_ptr<op::Result> res_node = res_par_pair.first;
shared_ptr<op::Parameter> par_node = res_par_pair.second;
map_parameter_to_result[par_node] = res_node;
// Insert newly created nodes into clusters
src_cluster->insert(res_node);
dst_cluster->insert(par_node);
}
}
}
// Create functions from clusters
vector<shared_ptr<Function>> sub_functions;
for (auto cluster : clusters)
{
ParameterVector par_vector;
ResultVector res_vector;
for (auto node : cluster)
{
if (auto res_node = dynamic_pointer_cast<op::Result>(node))
{
res_vector.push_back(res_node);
}
else if (auto par_node = dynamic_pointer_cast<op::Parameter>(node))
{
par_vector.push_back(par_node);
}
}
auto sub_function = make_shared<Function>(res_vector, par_vector);
sub_functions.push_back(sub_function);
}
return make_pair(sub_functions, map_parameter_to_result);
}
......@@ -51,9 +51,4 @@ namespace ngraph
std::pair<std::vector<std::shared_ptr<Function>>,
std::unordered_map<std::shared_ptr<op::Parameter>, std::shared_ptr<op::Result>>>
split_function_by_placement(const std::shared_ptr<Function>& f);
// Split function to function(s) with unique placement
std::pair<std::vector<std::shared_ptr<Function>>,
std::unordered_map<std::shared_ptr<op::Parameter>, std::shared_ptr<op::Result>>>
split_function_by_placement_size(const std::shared_ptr<Function>& f);
}
......@@ -36,4 +36,8 @@ if (NGRAPH_NOP_ENABLE)
add_subdirectory(nop)
endif()
if (NGRAPH_GPUH_ENABLE)
add_subdirectory(gpuh)
endif()
add_subdirectory(plaidml)
......@@ -37,15 +37,7 @@ extern "C" const char* get_ngraph_version_string()
extern "C" runtime::Backend* new_backend(const char* configuration_string)
{
#ifdef NGRAPH_HYBRID_ENABLE
vector<pair<string, shared_ptr<runtime::Backend>>> backend_list{
{"GPU", make_shared<runtime::gpu::GPU_Backend>()}};
auto wrapper = new runtime::hybrid::HybridBackend(backend_list);
return wrapper;
#else
return new runtime::gpu::GPU_Backend();
#endif
}
extern "C" void delete_backend(runtime::Backend* backend)
......
# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
if (NGRAPH_GPUH_ENABLE)
add_library(gpuh_backend SHARED gpuh_backend.cpp)
if(NGRAPH_LIB_VERSIONING_ENABLE)
set_target_properties(gpuh_backend PROPERTIES
VERSION ${NGRAPH_VERSION}
SOVERSION ${NGRAPH_API_VERSION})
endif()
target_link_libraries(gpuh_backend PUBLIC ngraph)
set_target_properties(gpuh_backend PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${NGRAPH_BUILD_DIR})
install(TARGETS gpuh_backend
LIBRARY DESTINATION "${NGRAPH_INSTALL_LIB}"
ARCHIVE DESTINATION "${NGRAPH_INSTALL_LIB}"
)
endif()
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/gpuh/gpuh_backend.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/pass/assign_placement.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/runtime/interpreter/int_backend.hpp"
#include "ngraph/runtime/tensor.hpp"
using namespace ngraph;
using namespace std;
extern "C" const char* get_ngraph_version_string()
{
return NGRAPH_VERSION;
}
extern "C" runtime::Backend* new_backend(const char* configuration_string)
{
return new runtime::gpuh::GPUHBackend();
}
runtime::gpuh::GPUHBackend::GPUHBackend()
: HybridBackend({{"INTERPRETER", make_shared<ngraph::runtime::interpreter::INTBackend>()}})
{
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "ngraph/runtime/hybrid/hybrid_backend.hpp"
namespace ngraph
{
namespace runtime
{
namespace gpuh
{
class GPUHBackend;
}
}
}
class ngraph::runtime::gpuh::GPUHBackend : public ngraph::runtime::hybrid::HybridBackend
{
public:
GPUHBackend();
};
......@@ -15,7 +15,10 @@
# ******************************************************************************
if (NGRAPH_HYBRID_ENABLE)
add_library(hybrid_backend SHARED hybrid_backend.cpp)
add_library(hybrid_backend SHARED
hybrid_backend.cpp
hybrid_util.cpp
pass/assign_placement.cpp)
if(NGRAPH_LIB_VERSIONING_ENABLE)
set_target_properties(hybrid_backend PROPERTIES
VERSION ${NGRAPH_VERSION}
......
......@@ -16,8 +16,9 @@
#include "ngraph/runtime/hybrid/hybrid_backend.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/pass/assign_placement.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/runtime/hybrid/hybrid_util.hpp"
#include "ngraph/runtime/hybrid/pass/assign_placement.hpp"
#include "ngraph/runtime/tensor.hpp"
using namespace ngraph;
......@@ -80,8 +81,8 @@ runtime::Handle runtime::hybrid::HybridBackend::compile(shared_ptr<Function> fun
instance.m_function = clone_function(*func);
// Run placement pass
pass::Manager pass_manager;
pass_manager.register_pass<pass::AssignPlacement>(backend_list);
ngraph::pass::Manager pass_manager;
pass_manager.register_pass<runtime::hybrid::pass::AssignPlacement>(backend_list);
pass_manager.run_passes(instance.m_function);
// Split function to sub_functions
......@@ -93,9 +94,15 @@ runtime::Handle runtime::hybrid::HybridBackend::compile(shared_ptr<Function> fun
for (shared_ptr<Function>& sub_function : instance.m_sub_functions)
{
size_t placement = get_colocated_function_placement_size(sub_function);
auto backend =
m_backend_list[(placement - 1)]; // (placement-1) as 0 is default placement
auto backend = m_backend_list[placement];
backend.second->compile(sub_function);
// Compile will replace nodes so we need to make one more pass through all
// ops to reset placement
for (auto op : sub_function->get_ops())
{
op->set_placement_index(placement);
}
}
}
......@@ -132,8 +139,7 @@ bool runtime::hybrid::HybridBackend::call(shared_ptr<Function> func,
{
// Init backend
size_t placement = get_colocated_function_placement_size(sub_function);
// (placement-1) as 0 is default placement
auto backend = m_backend_list[(placement - 1)].second;
auto backend = m_backend_list[placement].second;
// Prepare parameter TensorViews
vector<shared_ptr<runtime::Tensor>> parameter_tvs;
......
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/hybrid/hybrid_util.hpp"
using namespace ngraph;
using namespace std;
static Node* take_independent_node_with_placement_priority_size(
map<size_t, deque<Node*>>& independent_nodes_by_placement, size_t placement)
{
Node* selected_node = nullptr;
if (independent_nodes_by_placement.find(placement) != independent_nodes_by_placement.end() &&
independent_nodes_by_placement.at(placement).size() != 0)
{
selected_node = independent_nodes_by_placement.at(placement).front();
independent_nodes_by_placement.at(placement).pop_front();
}
else
{
for (auto& it : independent_nodes_by_placement)
{
if (it.second.size() > 0)
{
selected_node = it.second.front();
it.second.pop_front();
break;
}
}
}
return selected_node;
}
static vector<unordered_set<shared_ptr<Node>>>
group_function_nodes_to_clusters_size(const shared_ptr<Function>& f)
{
// Topologically sort nodes by picking independent node with the same placement as the
// previously picked node greedily
map<size_t, deque<Node*>> independent_nodes_by_placement;
unordered_map<Node*, size_t> node_dependency_count;
unordered_map<ngraph::Node*, shared_ptr<ngraph::Node>> node_map;
for (shared_ptr<Node> node : f->get_ops())
{
size_t dependency_count = node->get_arguments().size();
node_map[node.get()] = node;
node_dependency_count[node.get()] = dependency_count;
if (dependency_count == 0)
{
independent_nodes_by_placement[node->get_placement_index()].push_back(node.get());
}
}
list<shared_ptr<Node>> sorted_nodes;
size_t previous_placement = 0;
while (Node* independent_node = take_independent_node_with_placement_priority_size(
independent_nodes_by_placement, previous_placement))
{
previous_placement = independent_node->get_placement_index();
sorted_nodes.push_back(node_map.at(independent_node));
for (auto user : independent_node->get_users())
{
Node* user_node = user.get();
node_dependency_count.at(user_node) -= 1;
if (node_dependency_count.at(user_node) == 0)
{
independent_nodes_by_placement[user_node->get_placement_index()].push_back(
user_node);
}
}
}
if (sorted_nodes.size() != f->get_ops().size())
{
throw ngraph_error("sorted_nodes.size()== " + to_string(sorted_nodes.size()) +
" != f->get_ops().size()== " + to_string(f->get_ops().size()) +
". Internal error with topological sort.");
}
// Build clusters from the sorted_nodes
previous_placement = Node::placement_invalid;
vector<unordered_set<shared_ptr<Node>>> clusters;
for (shared_ptr<Node> node : sorted_nodes)
{
size_t node_placement = node->get_placement_index();
if (node_placement != previous_placement)
{
unordered_set<shared_ptr<Node>> new_cluster;
clusters.push_back(new_cluster);
}
clusters.back().insert(node);
previous_placement = node_placement;
}
// Sanity check for node duplication and full node coverage
unordered_set<shared_ptr<Node>> cluster_nodes;
for (auto cluster : clusters)
{
for (auto node : cluster)
{
if (cluster_nodes.find(node) != cluster_nodes.end())
{
throw ngraph_error("Node " + node->get_name() + " is duplicated in clusters");
}
cluster_nodes.insert(node);
}
}
unordered_set<shared_ptr<Node>> f_nodes;
for (auto node : f->get_ordered_ops())
{
f_nodes.insert(node);
}
if (cluster_nodes != f_nodes)
{
throw ngraph_error(
"Cluster's nodes are not the same as function's nodes. cluster_nodes.size()=" +
to_string(cluster_nodes.size()) + ", f_nodes.size()=" + to_string(f_nodes.size()));
}
return clusters;
}
// Insert result and parameter node between src_node and dst_node by splitting the graph
//
// Before: | After:
// (Device:0) (Device:1) | (Device:0) (Device:0) (Device:1) (Device:1)
// +-----+---+ +---+-----+ | +-----+---+ +---+-----+ +-----+---+ +---+-----+
// | | | | | | | | | | | | | | | | | | |
// | | o +--[0]--> i | | | | | o +--[4]--> i | | | | o +--[8]--> i | |
// | | <--[1]--+ | | | | | <--[5]--+ | | | | <--[9]--+ | |
// | src +---+ +---+ dst | | | src +---+ +---+ res | | par +---+ +---+ dst |
// | | | | | | | | | | | | |
// | +------[2]------> | | | +------[6]------> | | +------[10]-----> |
// | <------[3]------+ | | | <------[7]------+ | | <------[11]-----+ |
// +-----+ +-----+ | +-----+ +-----+ +-----+ +-----+
// Suffix *_size as a part of function name is temporary, this suffix
// will be removed when the backends move to the latest Hybrid backend
pair<shared_ptr<op::Result>, shared_ptr<op::Parameter>>
insert_result_parameter_split_size(const shared_ptr<Node>& src_node,
const shared_ptr<Node>& dst_node)
{
if (src_node->get_output_size() != 1)
{
throw ngraph_error("Multiple output per op not supported in graph partition yet.");
}
// Make parameter node
shared_ptr<op::Parameter> par_node = make_shared<op::Parameter>(
src_node->get_output_element_type(0), src_node->get_output_shape(0));
par_node->set_placement_index(dst_node->get_placement_index());
// Fix input / output among src, dst and par
descriptor::Input* dst_input = dst_node->get_input_from(src_node);
descriptor::Output* src_output = src_node->get_output_to(dst_node);
src_output->remove_input(dst_input); // Remove [0]
dst_input->replace_output(par_node, 0); // Remove [0] (again), add [8], remove [1], add [9]
// Add res node
shared_ptr<op::Result> res_node = make_shared<op::Result>(src_node); // Add [4], [5], [6], [7]
res_node->set_placement_index(src_node->get_placement_index());
return make_pair(res_node, par_node);
}
// Suffix *_size as a part of function name is temporary, this suffix
// will be removed when the backends move to the latest Hybrid backend
pair<vector<shared_ptr<Function>>, unordered_map<shared_ptr<op::Parameter>, shared_ptr<op::Result>>>
runtime::hybrid::split_function_by_placement_size(const shared_ptr<Function>& f)
{
// Split functions to clusters of nodes that can be computed together
vector<unordered_set<shared_ptr<Node>>> clusters = group_function_nodes_to_clusters_size(f);
// Map from (intermediate) parameter to result node, for guiding data copy among devices
unordered_map<shared_ptr<op::Parameter>, shared_ptr<op::Result>> map_parameter_to_result;
// Split neighboring nodes if they belong to different clusters
// TODO: optimization to group multiple result node from the same source,
// and to group the parameter node in the same cluster with the same result node source
unordered_map<shared_ptr<Node>, unordered_set<shared_ptr<Node>>*> map_node_to_cluster;
for (auto& cluster : clusters)
{
for (auto node : cluster)
{
map_node_to_cluster[node] = &cluster;
}
}
for (auto dst_node : f->get_ordered_ops())
{
for (auto src_node : dst_node->get_arguments())
{
auto src_cluster = map_node_to_cluster.at(src_node);
auto dst_cluster = map_node_to_cluster.at(dst_node);
if (src_cluster != dst_cluster)
{
// Split src_node and dst_node
pair<shared_ptr<op::Result>, shared_ptr<op::Parameter>> res_par_pair =
insert_result_parameter_split_size(src_node, dst_node);
shared_ptr<op::Result> res_node = res_par_pair.first;
shared_ptr<op::Parameter> par_node = res_par_pair.second;
map_parameter_to_result[par_node] = res_node;
// Insert newly created nodes into clusters
src_cluster->insert(res_node);
dst_cluster->insert(par_node);
}
}
}
// Create functions from clusters
vector<shared_ptr<Function>> sub_functions;
for (auto cluster : clusters)
{
ParameterVector par_vector;
ResultVector res_vector;
for (auto node : cluster)
{
if (auto res_node = dynamic_pointer_cast<op::Result>(node))
{
res_vector.push_back(res_node);
}
else if (auto par_node = dynamic_pointer_cast<op::Parameter>(node))
{
par_vector.push_back(par_node);
}
}
auto sub_function = make_shared<Function>(res_vector, par_vector);
sub_functions.push_back(sub_function);
}
return make_pair(sub_functions, map_parameter_to_result);
}
// Suffix *_size as a part of function name is temporary, this suffix
// will be removed when the backends move to the latest Hybrid backend
// Assert that nodes in the function is colocated and return that placement
size_t runtime::hybrid::get_colocated_function_placement_size(shared_ptr<Function> func)
{
auto ops = func->get_ops();
//it's okay to not do Placement::DEFAULT check; the same node will be checked in the loop below
size_t function_placement = ops.front()->get_placement_index();
for (auto op : ops)
{
size_t node_placement = op->get_placement_index();
if (node_placement == Node::placement_invalid)
{
throw ngraph_error("Node should have a device placement");
}
if (function_placement != node_placement)
{
throw ngraph_error("Function contains nodes of two different placements");
}
}
return function_placement;
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <memory>
#include <unordered_map>
#include <vector>
#include "ngraph/function.hpp"
#include "ngraph/op/parameter.hpp"
#include "ngraph/op/result.hpp"
namespace ngraph
{
namespace runtime
{
namespace hybrid
{
// Split function to function(s) with unique placement
std::pair<
std::vector<std::shared_ptr<Function>>,
std::unordered_map<std::shared_ptr<op::Parameter>, std::shared_ptr<op::Result>>>
split_function_by_placement_size(const std::shared_ptr<Function>& f);
// Assert that nodes in the function is colocated and return that placement
size_t get_colocated_function_placement_size(std::shared_ptr<Function> func);
}
}
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/hybrid/pass/assign_placement.hpp"
#include "ngraph/log.hpp"
#include "ngraph/node.hpp"
#include "ngraph/placement.hpp"
#include "ngraph/runtime/backend.hpp"
using namespace ngraph;
using namespace std;
runtime::hybrid::pass::AssignPlacement::AssignPlacement(
vector<shared_ptr<runtime::Backend>> placement_backends)
: m_placement_backends(placement_backends)
{
}
bool runtime::hybrid::pass::AssignPlacement::run_on_node(shared_ptr<Node> node)
{
size_t backend_index = 0;
for (auto backend : m_placement_backends)
{
if (backend->is_supported(*node))
{
node->set_placement_index(backend_index);
return false;
}
backend_index++;
}
throw runtime_error("Node " + node->get_name() + " not supported by any backend");
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <exception>
#include <functional>
#include <sstream>
#include "ngraph/pass/pass.hpp"
namespace ngraph
{
namespace runtime
{
namespace hybrid
{
namespace pass
{
class AssignPlacement;
}
}
}
}
class ngraph::runtime::hybrid::pass::AssignPlacement : public ngraph::pass::NodePass
{
public:
// TODO: make policy a class
AssignPlacement(std::vector<std::shared_ptr<ngraph::runtime::Backend>> placement_backends);
private:
bool run_on_node(std::shared_ptr<Node> node) override;
std::vector<std::shared_ptr<ngraph::runtime::Backend>> m_placement_backends;
};
......@@ -15,7 +15,10 @@
//*****************************************************************************
#include "ngraph/runtime/tensor.hpp"
#include "ngraph/assertion.hpp"
#include "ngraph/descriptor/layout/tensor_layout.hpp"
#include "ngraph/log.hpp"
#include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/type/element_type.hpp"
using namespace ngraph;
......@@ -70,3 +73,21 @@ void runtime::Tensor::set_stale(bool val)
{
m_stale = val;
}
void runtime::Tensor::copy_from(const ngraph::runtime::Tensor& source)
{
if (get_element_count() != source.get_element_count())
{
throw invalid_argument("runtime::Tensor::copy_from element count must match");
}
if (get_element_type() != source.get_element_type())
{
throw invalid_argument("runtime::Tensor::copy_from element types must match");
}
// This is potentially inefficient but is supplied only to get things going
// This is be replaced with more optimial implementations in later PRs
auto size = get_size_in_bytes();
AlignedBuffer buffer{size, 64};
source.read(buffer.get_ptr(), 0, size);
write(buffer.get_ptr(), 0, size);
}
......@@ -100,6 +100,10 @@ namespace ngraph
/// \param n Number of bytes to read, must be integral number of elements.
virtual void read(void* p, size_t offset, size_t n) const = 0;
/// \brief copy bytes directly from source to this tensor
/// \param source The source tensor
virtual void copy_from(const ngraph::runtime::Tensor& source);
protected:
std::shared_ptr<ngraph::descriptor::Tensor> m_descriptor;
bool m_stale;
......
......@@ -76,12 +76,6 @@ if (NGRAPH_INTERPRETER_ENABLE)
set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} INTERPRETER)
endif()
if (NGRAPH_HYBRID_ENABLE)
list(APPEND SRC
hybrid_backend.cpp
hybrid_utils.cpp)
endif()
if (NGRAPH_CPU_ENABLE)
list(APPEND SRC core_fusion.cpp builder_quantization.cpp)
list(APPEND SRC backend_performance.cpp cpu_fusion.cpp cpu_test.cpp cpu_reshape_sinking.cpp cpu_debugger.cpp)
......@@ -100,8 +94,8 @@ if (NGRAPH_INTELGPU_ENABLE)
set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} INTELGPU)
endif()
if (NGRAPH_HYBRID_ENABLE)
set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} HYBRID)
if (NGRAPH_GPUH_ENABLE)
set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} GPUH)
endif()
if (NGRAPH_PLAIDML_ENABLE)
......@@ -232,6 +226,10 @@ if (NGRAPH_NOP_ENABLE)
target_link_libraries(unit-test PRIVATE nop_backend)
endif()
if (NGRAPH_GPUH_ENABLE)
target_link_libraries(unit-test PRIVATE gpuh_backend)
endif()
if (NGRAPH_ONNXIFI_ENABLE)
target_include_directories(unit-test SYSTEM PUBLIC ${ONNX_INCLUDE_DIR})
target_link_libraries(unit-test PRIVATE onnxifi-ngraph)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment