Unverified Commit 90503652 authored by Robert Kimball's avatar Robert Kimball Committed by GitHub

Hybrid GPU Backend (#2240)

* Add GPUH hybrid backend

* update manifests

* update node operator<<

* fix GOE

* remove debug

* remove debug

* more cleanup

* add parent support to cpu and intel gpu backend tensors

* cleanup

* fix odd failure when printing node during construction

* fix node output

* address review comments

* style
parent 42f16035
......@@ -75,7 +75,6 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
include(var_functions)
set(NGRAPH_HYBRID_ENABLE TRUE)
option(NGRAPH_UNIT_TEST_ENABLE "Control the building of unit tests" TRUE)
option(NGRAPH_TOOLS_ENABLE "Control the building of tool" TRUE)
option(NGRAPH_CPU_ENABLE "Control the building of the CPU backend" TRUE)
......@@ -92,6 +91,10 @@ option(NGRAPH_CODE_COVERAGE_ENABLE "Enable code coverage data collection" FALSE)
option(NGRAPH_LIB_VERSIONING_ENABLE "Enable shared library versioning" FALSE)
option(NGRAPH_PYTHON_BUILD_ENABLE "Enable build nGraph python package wheel" FALSE)
if (NGRAPH_GPUH_ENABLE)
set(NGRAPH_GPU_ENABLE TRUE)
endif()
message(STATUS "NGRAPH_UNIT_TEST_ENABLE: ${NGRAPH_UNIT_TEST_ENABLE}")
message(STATUS "NGRAPH_TOOLS_ENABLE: ${NGRAPH_TOOLS_ENABLE}")
message(STATUS "NGRAPH_CPU_ENABLE: ${NGRAPH_CPU_ENABLE}")
......@@ -108,10 +111,6 @@ message(STATUS "NGRAPH_CODE_COVERAGE_ENABLE: ${NGRAPH_CODE_COVERAGE_ENABLE}")
message(STATUS "NGRAPH_LIB_VERSIONING_ENABLE: ${NGRAPH_LIB_VERSIONING_ENABLE}")
message(STATUS "NGRAPH_PYTHON_BUILD_ENABLE: ${NGRAPH_PYTHON_BUILD_ENABLE}")
if (NGRAPH_HYBRID_ENABLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNGRAPH_HYBRID_ENABLE")
endif()
if (NGRAPH_ONNX_IMPORT_ENABLE)
option(NGRAPH_USE_SYSTEM_PROTOBUF "Use system provided Protobuf shared object" FALSE)
option(NGRAPH_ONNXIFI_ENABLE "Enable ONNX Interface for Framework Integration" TRUE)
......
......@@ -238,7 +238,14 @@ std::ostream& Node::write_long_description(std::ostream& out) const
{
out << sep << NodeDescription(*arg, true) << ": "
<< pretty_element_type(arg->get_output_element_type(0))
<< arg->get_output_partial_shape(0) << "";
<< arg->get_output_partial_shape(0);
sep = ", ";
}
out << ") -> (";
sep = "";
for (const auto& o : get_outputs())
{
out << sep << pretty_element_type(o.get_element_type()) << o.get_partial_shape();
sep = ", ";
}
out << ")";
......
......@@ -15,10 +15,7 @@
# ******************************************************************************
add_subdirectory(interpreter)
if (NGRAPH_HYBRID_ENABLE)
add_subdirectory(hybrid)
endif()
add_subdirectory(hybrid)
if (NGRAPH_CPU_ENABLE)
add_subdirectory(cpu)
......
......@@ -58,13 +58,13 @@ shared_ptr<runtime::cpu::CPU_CallFrame> runtime::cpu::CPU_Backend::make_call_fra
shared_ptr<runtime::Tensor>
runtime::cpu::CPU_Backend::create_tensor(const element::Type& element_type, const Shape& shape)
{
return make_shared<runtime::cpu::CPUTensorView>(element_type, shape);
return make_shared<runtime::cpu::CPUTensorView>(element_type, shape, this);
}
shared_ptr<runtime::Tensor> runtime::cpu::CPU_Backend::create_tensor(
const element::Type& element_type, const Shape& shape, void* memory_pointer)
{
return make_shared<runtime::cpu::CPUTensorView>(element_type, shape, memory_pointer);
return make_shared<runtime::cpu::CPUTensorView>(element_type, shape, memory_pointer, this);
}
runtime::Handle runtime::cpu::CPU_Backend::compile(shared_ptr<Function> func)
......
......@@ -35,8 +35,9 @@ using namespace std;
runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer,
const string& name)
: runtime::Tensor(std::make_shared<ngraph::descriptor::Tensor>(element_type, shape, name))
const runtime::Backend* parent)
: runtime::Tensor(std::make_shared<ngraph::descriptor::Tensor>(element_type, shape, "external"),
parent)
, buffer(nullptr)
, aligned_buffer(nullptr)
{
......@@ -77,8 +78,8 @@ runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_
runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
const string& name)
: CPUTensorView(element_type, shape, nullptr, name)
const runtime::Backend* parent)
: CPUTensorView(element_type, shape, nullptr, parent)
{
}
......
......@@ -35,11 +35,11 @@ namespace ngraph
public:
CPUTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
const std::string& name = "external");
const runtime::Backend* parent);
CPUTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer,
const std::string& name = "external");
const runtime::Backend* parent);
virtual ~CPUTensorView() override;
char* get_data_ptr();
......
......@@ -159,11 +159,6 @@ if (NGRAPH_GPU_ENABLE)
${CUDA_LIBRARIES}
${CUDA_CUBLAS_LIBRARIES}
${CUDNN_LIBRARIES})
if (NGRAPH_HYBRID_ENABLE)
target_link_libraries(gpu_backend
PRIVATE
hybrid_backend)
endif()
set_target_properties(gpu_backend PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${NGRAPH_BUILD_DIR})
......
......@@ -20,6 +20,7 @@
#include <cudnn.h>
#include "ngraph/graph_util.hpp"
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/runtime/gpu/gpu_backend.hpp"
#include "ngraph/runtime/gpu/gpu_external_function.hpp"
#include "ngraph/runtime/gpu/gpu_primitive_emitter.hpp"
......@@ -107,13 +108,13 @@ runtime::gpu::GPU_Backend::BackendContext::~BackendContext()
shared_ptr<runtime::Tensor>
runtime::gpu::GPU_Backend::create_tensor(const element::Type& element_type, const Shape& shape)
{
return make_shared<runtime::gpu::GPUTensor>(element_type, shape);
return make_shared<runtime::gpu::GPUTensor>(element_type, shape, this);
}
shared_ptr<runtime::Tensor> runtime::gpu::GPU_Backend::create_tensor(
const element::Type& element_type, const Shape& shape, void* memory_pointer)
{
return make_shared<runtime::gpu::GPUTensor>(element_type, shape, memory_pointer);
return make_shared<runtime::gpu::GPUTensor>(element_type, shape, memory_pointer, this);
}
runtime::Handle runtime::gpu::GPU_Backend::compile(shared_ptr<Function> func)
......@@ -222,33 +223,53 @@ vector<runtime::PerformanceCounter>
return rc;
}
bool runtime::gpu::GPU_Backend::is_supported(const Node& node) const
bool runtime::gpu::GPU_Backend::is_supported(const Node& op) const
{
bool rc = true;
set<string> unsupported_ops = {"Quantize",
"Dequantize",
"ShapeOf",
"All",
"Any",
"AllReduce",
"SelectAndScatter",
"StopGradient",
"EmbeddingLookup",
"GenerateMask"};
// get op type
element::Type type;
if (node.description() == "Select")
{
type = node.get_input_element_type(1);
}
else if (node.description() == "Constant")
set<string> float_only = {"MaxPoolBackprop", "AvgPoolBackprop", "MaxPool", "Dot"};
if (unsupported_ops.find(op.description()) != unsupported_ops.end())
{
type = node.get_outputs().at(0).get_element_type();
return false;
}
else if (node.description() == "Parameter")
if (float_only.find(op.description()) != float_only.end())
{
type = node.get_outputs().at(0).get_element_type();
if (op.get_output_element_type(0) != element::f32 &&
op.get_output_element_type(0) != element::f64)
{
return false;
}
}
else
if (op.description() == "BatchNormInference")
{
type = node.get_input_element_type(0);
const ngraph::op::BatchNormInference* bn =
static_cast<const ngraph::op::BatchNormInference*>(&op);
if (bn->get_eps_value() < CUDNN_BN_MIN_EPSILON)
{
return false;
}
}
if (type != element::f32)
else if (op.description() == "BatchNormTraining")
{
rc = false;
const ngraph::op::BatchNormTraining* bn =
static_cast<const ngraph::op::BatchNormTraining*>(&op);
if (bn->get_eps_value() < CUDNN_BN_MIN_EPSILON)
{
return false;
}
}
return rc;
return true;
}
......@@ -29,8 +29,10 @@ using namespace std;
runtime::gpu::GPUTensor::GPUTensor(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer)
: runtime::Tensor(std::make_shared<ngraph::descriptor::Tensor>(element_type, shape, "external"))
void* memory_pointer,
const Backend* backend)
: runtime::Tensor(std::make_shared<ngraph::descriptor::Tensor>(element_type, shape, "external"),
backend)
, m_custom_memory(false)
{
m_descriptor->set_tensor_layout(
......@@ -48,8 +50,10 @@ runtime::gpu::GPUTensor::GPUTensor(const ngraph::element::Type& element_type,
}
}
runtime::gpu::GPUTensor::GPUTensor(const ngraph::element::Type& element_type, const Shape& shape)
: GPUTensor(element_type, shape, nullptr)
runtime::gpu::GPUTensor::GPUTensor(const ngraph::element::Type& element_type,
const Shape& shape,
const Backend* backend)
: GPUTensor(element_type, shape, nullptr, backend)
{
}
......
......@@ -16,9 +16,9 @@
#pragma once
#include <cuda.h>
#include <memory>
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/tensor.hpp"
#include "ngraph/type/element_type.hpp"
......@@ -36,8 +36,11 @@ namespace ngraph
class ngraph::runtime::gpu::GPUTensor : public ngraph::runtime::Tensor
{
public:
GPUTensor(const ngraph::element::Type& element_type, const Shape& shape);
GPUTensor(const ngraph::element::Type& element_type, const Shape& shape, void* memory_pointer);
GPUTensor(const ngraph::element::Type& element_type, const Shape& shape, const Backend* parent);
GPUTensor(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer,
const Backend* parent);
virtual ~GPUTensor() override;
/// \brief Write bytes directly into the tensor
......
#int64 is not supprted by cuDNN
batch_norm_one_output
batch_norm_three_outputs
backwards_batch_norm_three_outputs
#need to check
# need to check
computation_reuse
#cuda does not support throw
# cuda does not support throw
divide_by_zero_int32
#int64 is not supprted by cuDNN
# int64 is not supprted by cuDNN
dot_matrix_vector_int64
generate_mask
#error throw is not the same on GPU, not supported yet
one_hot_scalar_fp_nonint_in_3
one_hot_scalar_oob_in_3
one_hot_vector_1_barely_oob
one_hot_vector_1_far_oob
one_hot_vector_1_fp_nonint
#select_and_scatter is deprecated
# select_and_scatter is deprecated
select_and_scatter_3d_without_overlap
select_and_scatter_with_overlap
select_and_scatter_without_overlap
#custom_mem is not implemented on GPU
# custom_mem is not implemented on GPU
tensorview_custom_mem
#integer is not supported by cuDNN on backward pooling
# integer is not supported by cuDNN on backward pooling
backwards_maxpool_n4_c1_hw4_2x2_max
backwards_maxpool_n2_c1_hw5_3x3_str2_max
backwards_avgpool_n1_c1_hw2x2
......
......@@ -21,7 +21,7 @@ if (NGRAPH_GPUH_ENABLE)
VERSION ${NGRAPH_VERSION}
SOVERSION ${NGRAPH_API_VERSION})
endif()
target_link_libraries(gpuh_backend PUBLIC ngraph)
target_link_libraries(gpuh_backend PUBLIC ngraph hybrid_base gpu_backend)
set_target_properties(gpuh_backend PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${NGRAPH_BUILD_DIR})
install(TARGETS gpuh_backend
......
......@@ -18,6 +18,7 @@
#include "ngraph/graph_util.hpp"
#include "ngraph/pass/assign_placement.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/runtime/gpu/gpu_backend.hpp"
#include "ngraph/runtime/interpreter/int_backend.hpp"
#include "ngraph/runtime/tensor.hpp"
......@@ -34,7 +35,13 @@ extern "C" runtime::Backend* new_backend(const char* configuration_string)
return new runtime::gpuh::GPUHBackend();
}
vector<string> get_excludes()
{
return vector<string>{{"Not"}};
}
runtime::gpuh::GPUHBackend::GPUHBackend()
: HybridBackend({{"INTERPRETER", make_shared<ngraph::runtime::interpreter::INTBackend>()}})
: HybridBackend({make_shared<ngraph::runtime::gpu::GPU_Backend>(),
make_shared<ngraph::runtime::interpreter::INTBackend>()})
{
}
computation_reuse
tensorview_custom_mem
batch_norm_inference_f64
batch_norm_inference_f32
divide_by_zero_int32
......@@ -26,8 +26,10 @@ using namespace std;
runtime::HostTensor::HostTensor(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer,
const string& name)
: runtime::Tensor(std::make_shared<ngraph::descriptor::Tensor>(element_type, shape, name))
const string& name,
const Backend* parent)
: runtime::Tensor(std::make_shared<ngraph::descriptor::Tensor>(element_type, shape, name),
parent)
, m_allocated_buffer_pool(nullptr)
, m_aligned_buffer_pool(nullptr)
......@@ -56,8 +58,24 @@ runtime::HostTensor::HostTensor(const ngraph::element::Type& element_type,
runtime::HostTensor::HostTensor(const ngraph::element::Type& element_type,
const Shape& shape,
const string& name)
: HostTensor(element_type, shape, nullptr, name)
const string& name,
const Backend* parent)
: HostTensor(element_type, shape, nullptr, name, parent)
{
}
runtime::HostTensor::HostTensor(const ngraph::element::Type& element_type,
const Shape& shape,
const Backend* parent)
: HostTensor(element_type, shape, nullptr, "external", parent)
{
}
runtime::HostTensor::HostTensor(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer,
const Backend* parent)
: HostTensor(element_type, shape, memory_pointer, "external", parent)
{
}
......
......@@ -18,6 +18,7 @@
#include <memory>
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/tensor.hpp"
#include "ngraph/type/element_type.hpp"
......@@ -36,11 +37,20 @@ class ngraph::runtime::HostTensor : public ngraph::runtime::Tensor
public:
HostTensor(const ngraph::element::Type& element_type,
const Shape& shape,
const std::string& name = "external");
const std::string& name = "external",
const Backend* parent = nullptr);
HostTensor(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer,
const std::string& name = "external");
const std::string& name = "external",
const Backend* parent = nullptr);
HostTensor(const ngraph::element::Type& element_type,
const Shape& shape,
const Backend* parent);
HostTensor(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer,
const Backend* parent);
virtual ~HostTensor() override;
char* get_data_ptr();
......
......@@ -14,21 +14,14 @@
# limitations under the License.
# ******************************************************************************
if (NGRAPH_HYBRID_ENABLE)
add_library(hybrid_backend SHARED
hybrid_backend.cpp
hybrid_util.cpp
pass/assign_placement.cpp)
if(NGRAPH_LIB_VERSIONING_ENABLE)
set_target_properties(hybrid_backend PROPERTIES
VERSION ${NGRAPH_VERSION}
SOVERSION ${NGRAPH_API_VERSION})
endif()
target_link_libraries(hybrid_backend PUBLIC ngraph)
set_target_properties(hybrid_backend PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${NGRAPH_BUILD_DIR})
add_library(hybrid_base STATIC
hybrid_backend.cpp
hybrid_util.cpp
pass/assign_placement.cpp
pass/fix_get_output_element.cpp)
target_link_libraries(hybrid_base PUBLIC ngraph)
set_target_properties(hybrid_base PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${NGRAPH_BUILD_DIR})
install(TARGETS hybrid_backend
LIBRARY DESTINATION "${NGRAPH_INSTALL_LIB}"
ARCHIVE DESTINATION "${NGRAPH_INSTALL_LIB}"
)
endif()
install(TARGETS hybrid_base
ARCHIVE DESTINATION "${NGRAPH_INSTALL_LIB}"
)
......@@ -37,8 +37,7 @@ namespace ngraph
class ngraph::runtime::hybrid::HybridBackend : public ngraph::runtime::Backend
{
public:
HybridBackend(
const std::vector<std::pair<std::string, std::shared_ptr<runtime::Backend>>>& backend_list);
HybridBackend(const std::vector<std::shared_ptr<runtime::Backend>>& backend_list);
std::shared_ptr<ngraph::runtime::Tensor>
create_tensor(const ngraph::element::Type& element_type,
......@@ -69,5 +68,9 @@ private:
};
std::map<std::shared_ptr<ngraph::Function>, FunctionInstance> m_function_map;
std::vector<std::pair<std::string, std::shared_ptr<runtime::Backend>>> m_backend_list;
std::vector<std::shared_ptr<runtime::Backend>> m_backend_list;
std::string get_placement_name(const runtime::Tensor* t);
std::string get_placement_name(const runtime::Backend* t);
size_t get_placement(const runtime::Tensor* t);
};
......@@ -15,11 +15,13 @@
//*****************************************************************************
#include "ngraph/runtime/hybrid/hybrid_util.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/visualize_tree.hpp"
using namespace ngraph;
using namespace std;
static Node* take_independent_node_with_placement_priority_size(
static Node* take_independent_node_with_placement_priority(
map<size_t, deque<Node*>>& independent_nodes_by_placement, size_t placement)
{
Node* selected_node = nullptr;
......@@ -45,7 +47,7 @@ static Node* take_independent_node_with_placement_priority_size(
}
static vector<unordered_set<shared_ptr<Node>>>
group_function_nodes_to_clusters_size(const shared_ptr<Function>& f)
group_function_nodes_to_clusters(const shared_ptr<Function>& f)
{
// Topologically sort nodes by picking independent node with the same placement as the
// previously picked node greedily
......@@ -66,7 +68,7 @@ static vector<unordered_set<shared_ptr<Node>>>
list<shared_ptr<Node>> sorted_nodes;
size_t previous_placement = 0;
while (Node* independent_node = take_independent_node_with_placement_priority_size(
while (Node* independent_node = ::take_independent_node_with_placement_priority(
independent_nodes_by_placement, previous_placement))
{
previous_placement = independent_node->get_placement_index();
......@@ -148,42 +150,48 @@ static vector<unordered_set<shared_ptr<Node>>>
// | <------[3]------+ | | | <------[7]------+ | | <------[11]-----+ |
// +-----+ +-----+ | +-----+ +-----+ +-----+ +-----+
// Suffix *_size as a part of function name is temporary, this suffix
// will be removed when the backends move to the latest Hybrid backend
pair<shared_ptr<op::Result>, shared_ptr<op::Parameter>>
insert_result_parameter_split_size(const shared_ptr<Node>& src_node,
const shared_ptr<Node>& dst_node)
static map<shared_ptr<op::Result>, shared_ptr<op::Parameter>>
insert_result_parameter_split(const shared_ptr<Node>& src_node,
const shared_ptr<Node>& dst_node)
{
if (src_node->get_output_size() != 1)
map<shared_ptr<op::Result>, shared_ptr<op::Parameter>> result_map;
for (descriptor::Input& input : dst_node->get_inputs())
{
throw ngraph_error("Multiple output per op not supported in graph partition yet.");
}
if (input.get_output().get_node() == src_node)
{
descriptor::Input* dst_input = &input;
descriptor::Output* src_output = &input.get_output();
// Make parameter node
shared_ptr<op::Parameter> par_node = make_shared<op::Parameter>(
src_node->get_output_element_type(0), src_node->get_output_shape(0));
par_node->set_placement_index(dst_node->get_placement_index());
// Make parameter node
shared_ptr<op::Parameter> par_node =
make_shared<op::Parameter>(src_output->get_element_type(), src_output->get_shape());
par_node->set_placement_index(dst_node->get_placement_index());
// Fix input / output among src, dst and par
descriptor::Input* dst_input = dst_node->get_input_from(src_node);
descriptor::Output* src_output = src_node->get_output_to(dst_node);
src_output->remove_input(dst_input); // Remove [0]
dst_input->replace_output(par_node, 0); // Remove [0] (again), add [8], remove [1], add [9]
// Fix input / output among src, dst and par
// Remove [0]
src_output->remove_input(dst_input);
// Add res node
shared_ptr<op::Result> res_node = make_shared<op::Result>(src_node); // Add [4], [5], [6], [7]
res_node->set_placement_index(src_node->get_placement_index());
// Remove [0] (again), add [8], remove [1], add [9]
dst_input->replace_output(par_node, 0);
return make_pair(res_node, par_node);
// Add res node
shared_ptr<op::Result> res_node =
make_shared<op::Result>(src_node); // Add [4], [5], [6], [7]
res_node->set_placement_index(src_node->get_placement_index());
result_map.insert({res_node, par_node});
}
}
return result_map;
}
// Suffix *_size as a part of function name is temporary, this suffix
// will be removed when the backends move to the latest Hybrid backend
pair<vector<shared_ptr<Function>>, unordered_map<shared_ptr<op::Parameter>, shared_ptr<op::Result>>>
runtime::hybrid::split_function_by_placement_size(const shared_ptr<Function>& f)
runtime::hybrid::split_function_by_placement(const shared_ptr<Function>& f)
{
// Split functions to clusters of nodes that can be computed together
vector<unordered_set<shared_ptr<Node>>> clusters = group_function_nodes_to_clusters_size(f);
vector<unordered_set<shared_ptr<Node>>> clusters = ::group_function_nodes_to_clusters(f);
// Map from (intermediate) parameter to result node, for guiding data copy among devices
unordered_map<shared_ptr<op::Parameter>, shared_ptr<op::Result>> map_parameter_to_result;
......@@ -208,15 +216,18 @@ pair<vector<shared_ptr<Function>>, unordered_map<shared_ptr<op::Parameter>, shar
if (src_cluster != dst_cluster)
{
// Split src_node and dst_node
pair<shared_ptr<op::Result>, shared_ptr<op::Parameter>> res_par_pair =
insert_result_parameter_split_size(src_node, dst_node);
shared_ptr<op::Result> res_node = res_par_pair.first;
shared_ptr<op::Parameter> par_node = res_par_pair.second;
map_parameter_to_result[par_node] = res_node;
map<shared_ptr<op::Result>, shared_ptr<op::Parameter>> res_par_pair_map =
::insert_result_parameter_split(src_node, dst_node);
for (const auto& res_par_pair : res_par_pair_map)
{
shared_ptr<op::Result> res_node = res_par_pair.first;
shared_ptr<op::Parameter> par_node = res_par_pair.second;
map_parameter_to_result[par_node] = res_node;
// Insert newly created nodes into clusters
src_cluster->insert(res_node);
dst_cluster->insert(par_node);
// Insert newly created nodes into clusters
src_cluster->insert(res_node);
dst_cluster->insert(par_node);
}
}
}
}
......@@ -240,15 +251,19 @@ pair<vector<shared_ptr<Function>>, unordered_map<shared_ptr<op::Parameter>, shar
}
auto sub_function = make_shared<Function>(res_vector, par_vector);
sub_functions.push_back(sub_function);
#ifdef HYBRID_DEBUG
ngraph::pass::Manager pass_manager;
pass_manager.register_pass<ngraph::pass::VisualizeTree>("subgraph_" + to_string(index++) +
".png");
pass_manager.run_passes(sub_function);
#endif
}
return make_pair(sub_functions, map_parameter_to_result);
}
// Suffix *_size as a part of function name is temporary, this suffix
// will be removed when the backends move to the latest Hybrid backend
// Assert that nodes in the function is colocated and return that placement
size_t runtime::hybrid::get_colocated_function_placement_size(shared_ptr<Function> func)
size_t runtime::hybrid::get_colocated_function_placement(shared_ptr<Function> func)
{
auto ops = func->get_ops();
......@@ -259,7 +274,7 @@ size_t runtime::hybrid::get_colocated_function_placement_size(shared_ptr<Functio
size_t node_placement = op->get_placement_index();
if (node_placement == Node::placement_invalid)
{
throw ngraph_error("Node should have a device placement");
throw ngraph_error("Node " + op->get_name() + " should have a device placement");
}
if (function_placement != node_placement)
{
......
......@@ -34,10 +34,10 @@ namespace ngraph
std::pair<
std::vector<std::shared_ptr<Function>>,
std::unordered_map<std::shared_ptr<op::Parameter>, std::shared_ptr<op::Result>>>
split_function_by_placement_size(const std::shared_ptr<Function>& f);
split_function_by_placement(const std::shared_ptr<Function>& f);
// Assert that nodes in the function is colocated and return that placement
size_t get_colocated_function_placement_size(std::shared_ptr<Function> func);
size_t get_colocated_function_placement(std::shared_ptr<Function> func);
}
}
}
......@@ -24,7 +24,7 @@ using namespace ngraph;
using namespace std;
runtime::hybrid::pass::AssignPlacement::AssignPlacement(
vector<shared_ptr<runtime::Backend>> placement_backends)
const vector<shared_ptr<runtime::Backend>>& placement_backends)
: m_placement_backends(placement_backends)
{
}
......
......@@ -39,8 +39,8 @@ namespace ngraph
class ngraph::runtime::hybrid::pass::AssignPlacement : public ngraph::pass::NodePass
{
public:
// TODO: make policy a class
AssignPlacement(std::vector<std::shared_ptr<ngraph::runtime::Backend>> placement_backends);
AssignPlacement(
const std::vector<std::shared_ptr<ngraph::runtime::Backend>>& placement_backends);
private:
bool run_on_node(std::shared_ptr<Node> node) override;
......
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/hybrid/pass/fix_get_output_element.hpp"
#include "ngraph/log.hpp"
#include "ngraph/node.hpp"
#include "ngraph/placement.hpp"
#include "ngraph/runtime/backend.hpp"
using namespace ngraph;
using namespace std;
runtime::hybrid::pass::FixGetOutputElement::FixGetOutputElement()
{
}
bool runtime::hybrid::pass::FixGetOutputElement::run_on_node(shared_ptr<Node> node)
{
if (node->description() == "GetOutputElement")
{
auto parent = node->get_arguments().at(0);
node->set_placement_index(parent->get_placement_index());
}
return false;
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <exception>
#include <functional>
#include <sstream>
#include "ngraph/pass/pass.hpp"
namespace ngraph
{
namespace runtime
{
namespace hybrid
{
namespace pass
{
class FixGetOutputElement;
}
}
}
}
class ngraph::runtime::hybrid::pass::FixGetOutputElement : public ngraph::pass::NodePass
{
public:
FixGetOutputElement();
private:
bool run_on_node(std::shared_ptr<Node> node) override;
};
This diff is collapsed.
......@@ -390,14 +390,15 @@ shared_ptr<runtime::Tensor>
runtime::intelgpu::IntelGPUBackend::create_tensor(const element::Type& element_type,
const Shape& shape)
{
return make_shared<runtime::intelgpu::IntelGPUTensorView>(element_type, shape, *ocl_engine);
return make_shared<runtime::intelgpu::IntelGPUTensorView>(
element_type, shape, *ocl_engine, nullptr, this);
}
shared_ptr<runtime::Tensor> runtime::intelgpu::IntelGPUBackend::create_tensor(
const element::Type& element_type, const Shape& shape, void* memory_pointer)
{
return make_shared<runtime::intelgpu::IntelGPUTensorView>(
element_type, shape, *ocl_engine, memory_pointer);
element_type, shape, *ocl_engine, memory_pointer, this);
}
runtime::Handle runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
......
......@@ -28,8 +28,9 @@ using namespace std;
runtime::intelgpu::IntelGPUTensorView::IntelGPUTensorView(const element::Type& element_type,
const Shape& shape,
const cldnn::engine& backend_engine,
void* memory_pointer)
: runtime::Tensor(make_shared<descriptor::Tensor>(element_type, shape, "external"))
void* memory_pointer,
const runtime::Backend* parent)
: runtime::Tensor(make_shared<descriptor::Tensor>(element_type, shape, "external"), parent)
{
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(element_type, shape);
......
......@@ -38,7 +38,8 @@ public:
IntelGPUTensorView(const element::Type& element_type,
const Shape& shape,
const cldnn::engine& backend_engine,
void* memory_pointer = nullptr);
void* memory_pointer,
const runtime::Backend* parent);
/// \brief Write bytes directly into the tensor
/// \param p Pointer to source of data
......
......@@ -43,16 +43,25 @@ extern "C" runtime::Backend* new_backend(const char* configuration_string)
return new runtime::interpreter::INTBackend();
}
runtime::interpreter::INTBackend::INTBackend()
{
}
runtime::interpreter::INTBackend::INTBackend(const vector<string>& unsupported_op_name_list)
: m_unsupported_op_name_list{unsupported_op_name_list.begin(), unsupported_op_name_list.end()}
{
}
shared_ptr<runtime::Tensor>
runtime::interpreter::INTBackend::create_tensor(const element::Type& type, const Shape& shape)
{
return make_shared<runtime::HostTensor>(type, shape, "external");
return make_shared<runtime::HostTensor>(type, shape, this);
}
shared_ptr<runtime::Tensor> runtime::interpreter::INTBackend::create_tensor(
const element::Type& type, const Shape& shape, void* memory_pointer)
{
return make_shared<runtime::HostTensor>(type, shape, memory_pointer, "external");
return make_shared<runtime::HostTensor>(type, shape, memory_pointer, this);
}
runtime::Handle runtime::interpreter::INTBackend::compile(shared_ptr<Function> function)
......@@ -336,3 +345,8 @@ void runtime::interpreter::INTBackend::perform_nan_check(
arg_number++;
}
}
bool runtime::interpreter::INTBackend::is_supported(const Node& node) const
{
return m_unsupported_op_name_list.find(node.description()) == m_unsupported_op_name_list.end();
}
......@@ -16,6 +16,7 @@
#pragma once
#include <initializer_list>
#include <memory>
#include <sstream>
#include <string>
......@@ -156,6 +157,12 @@ namespace ngraph
class ngraph::runtime::interpreter::INTBackend : public Backend
{
public:
INTBackend();
INTBackend(const std::vector<std::string>& unsupported_op_name_list);
INTBackend(const INTBackend&) = delete;
INTBackend(INTBackend&&) = delete;
INTBackend& operator=(const INTBackend&) = delete;
std::shared_ptr<Tensor>
create_tensor(const element::Type& type, const Shape& shape, void* memory_pointer) override;
......@@ -173,7 +180,8 @@ public:
std::vector<PerformanceCounter>
get_performance_data(std::shared_ptr<Function> func) const override;
bool is_supported(const Node& node) const override { return true; }
bool is_supported(const Node& node) const override;
private:
int get_alignment() const { return 64; }
class FunctionInstance
......@@ -190,6 +198,7 @@ private:
void* get_temporary_pointer(size_t offset) { return m_temporary_memory->get_ptr(offset); }
};
std::map<std::shared_ptr<Function>, FunctionInstance> m_function_map;
std::set<std::string> m_unsupported_op_name_list;
static void perform_nan_check(const std::vector<std::shared_ptr<HostTensor>>&,
const Node* op = nullptr);
......
......@@ -21,6 +21,7 @@
#include "ngraph/descriptor/layout/tensor_layout.hpp"
#include "ngraph/descriptor/tensor.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/shape.hpp"
#include "ngraph/strides.hpp"
#include "ngraph/type/element_type.hpp"
......@@ -37,9 +38,11 @@ namespace ngraph
class Tensor
{
protected:
Tensor(const std::shared_ptr<ngraph::descriptor::Tensor>& descriptor)
Tensor(const std::shared_ptr<ngraph::descriptor::Tensor>& descriptor,
const Backend* parent)
: m_descriptor(descriptor)
, m_stale(true)
, m_parent(parent)
{
}
......@@ -104,9 +107,11 @@ namespace ngraph
/// \param source The source tensor
virtual void copy_from(const ngraph::runtime::Tensor& source);
const Backend* get_parent() const { return m_parent; }
protected:
std::shared_ptr<ngraph::descriptor::Tensor> m_descriptor;
bool m_stale;
const Backend* m_parent;
};
using TensorViewPtrs = std::vector<std::shared_ptr<Tensor>>;
......
......@@ -225,10 +225,6 @@ if (NGRAPH_INTERPRETER_ENABLE)
target_link_libraries(unit-test PRIVATE interpreter_backend)
endif()
if (NGRAPH_HYBRID_ENABLE)
target_link_libraries(unit-test PRIVATE hybrid_backend)
endif()
if (NGRAPH_GPU_ENABLE)
target_link_libraries(unit-test PRIVATE gpu_backend)
endif()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment