Unverified Commit 90503652 authored by Robert Kimball's avatar Robert Kimball Committed by GitHub

Hybrid GPU Backend (#2240)

* Add GPUH hybrid backend

* update manifests

* update node operator<<

* fix GOE

* remove debug

* remove debug

* more cleanup

* add parent support to cpu and intel gpu backend tensors

* cleanup

* fix odd failure when printing node during construction

* fix node output

* address review comments

* style
parent 42f16035
......@@ -75,7 +75,6 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
include(var_functions)
set(NGRAPH_HYBRID_ENABLE TRUE)
option(NGRAPH_UNIT_TEST_ENABLE "Control the building of unit tests" TRUE)
option(NGRAPH_TOOLS_ENABLE "Control the building of tool" TRUE)
option(NGRAPH_CPU_ENABLE "Control the building of the CPU backend" TRUE)
......@@ -92,6 +91,10 @@ option(NGRAPH_CODE_COVERAGE_ENABLE "Enable code coverage data collection" FALSE)
option(NGRAPH_LIB_VERSIONING_ENABLE "Enable shared library versioning" FALSE)
option(NGRAPH_PYTHON_BUILD_ENABLE "Enable build nGraph python package wheel" FALSE)
if (NGRAPH_GPUH_ENABLE)
set(NGRAPH_GPU_ENABLE TRUE)
endif()
message(STATUS "NGRAPH_UNIT_TEST_ENABLE: ${NGRAPH_UNIT_TEST_ENABLE}")
message(STATUS "NGRAPH_TOOLS_ENABLE: ${NGRAPH_TOOLS_ENABLE}")
message(STATUS "NGRAPH_CPU_ENABLE: ${NGRAPH_CPU_ENABLE}")
......@@ -108,10 +111,6 @@ message(STATUS "NGRAPH_CODE_COVERAGE_ENABLE: ${NGRAPH_CODE_COVERAGE_ENABLE}")
message(STATUS "NGRAPH_LIB_VERSIONING_ENABLE: ${NGRAPH_LIB_VERSIONING_ENABLE}")
message(STATUS "NGRAPH_PYTHON_BUILD_ENABLE: ${NGRAPH_PYTHON_BUILD_ENABLE}")
if (NGRAPH_HYBRID_ENABLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNGRAPH_HYBRID_ENABLE")
endif()
if (NGRAPH_ONNX_IMPORT_ENABLE)
option(NGRAPH_USE_SYSTEM_PROTOBUF "Use system provided Protobuf shared object" FALSE)
option(NGRAPH_ONNXIFI_ENABLE "Enable ONNX Interface for Framework Integration" TRUE)
......
......@@ -238,7 +238,14 @@ std::ostream& Node::write_long_description(std::ostream& out) const
{
out << sep << NodeDescription(*arg, true) << ": "
<< pretty_element_type(arg->get_output_element_type(0))
<< arg->get_output_partial_shape(0) << "";
<< arg->get_output_partial_shape(0);
sep = ", ";
}
out << ") -> (";
sep = "";
for (const auto& o : get_outputs())
{
out << sep << pretty_element_type(o.get_element_type()) << o.get_partial_shape();
sep = ", ";
}
out << ")";
......
......@@ -15,10 +15,7 @@
# ******************************************************************************
add_subdirectory(interpreter)
if (NGRAPH_HYBRID_ENABLE)
add_subdirectory(hybrid)
endif()
add_subdirectory(hybrid)
if (NGRAPH_CPU_ENABLE)
add_subdirectory(cpu)
......
......@@ -58,13 +58,13 @@ shared_ptr<runtime::cpu::CPU_CallFrame> runtime::cpu::CPU_Backend::make_call_fra
shared_ptr<runtime::Tensor>
runtime::cpu::CPU_Backend::create_tensor(const element::Type& element_type, const Shape& shape)
{
return make_shared<runtime::cpu::CPUTensorView>(element_type, shape);
return make_shared<runtime::cpu::CPUTensorView>(element_type, shape, this);
}
shared_ptr<runtime::Tensor> runtime::cpu::CPU_Backend::create_tensor(
const element::Type& element_type, const Shape& shape, void* memory_pointer)
{
return make_shared<runtime::cpu::CPUTensorView>(element_type, shape, memory_pointer);
return make_shared<runtime::cpu::CPUTensorView>(element_type, shape, memory_pointer, this);
}
runtime::Handle runtime::cpu::CPU_Backend::compile(shared_ptr<Function> func)
......
......@@ -35,8 +35,9 @@ using namespace std;
runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer,
const string& name)
: runtime::Tensor(std::make_shared<ngraph::descriptor::Tensor>(element_type, shape, name))
const runtime::Backend* parent)
: runtime::Tensor(std::make_shared<ngraph::descriptor::Tensor>(element_type, shape, "external"),
parent)
, buffer(nullptr)
, aligned_buffer(nullptr)
{
......@@ -77,8 +78,8 @@ runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_
runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
const string& name)
: CPUTensorView(element_type, shape, nullptr, name)
const runtime::Backend* parent)
: CPUTensorView(element_type, shape, nullptr, parent)
{
}
......
......@@ -35,11 +35,11 @@ namespace ngraph
public:
CPUTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
const std::string& name = "external");
const runtime::Backend* parent);
CPUTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer,
const std::string& name = "external");
const runtime::Backend* parent);
virtual ~CPUTensorView() override;
char* get_data_ptr();
......
......@@ -159,11 +159,6 @@ if (NGRAPH_GPU_ENABLE)
${CUDA_LIBRARIES}
${CUDA_CUBLAS_LIBRARIES}
${CUDNN_LIBRARIES})
if (NGRAPH_HYBRID_ENABLE)
target_link_libraries(gpu_backend
PRIVATE
hybrid_backend)
endif()
set_target_properties(gpu_backend PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${NGRAPH_BUILD_DIR})
......
......@@ -20,6 +20,7 @@
#include <cudnn.h>
#include "ngraph/graph_util.hpp"
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/runtime/gpu/gpu_backend.hpp"
#include "ngraph/runtime/gpu/gpu_external_function.hpp"
#include "ngraph/runtime/gpu/gpu_primitive_emitter.hpp"
......@@ -107,13 +108,13 @@ runtime::gpu::GPU_Backend::BackendContext::~BackendContext()
shared_ptr<runtime::Tensor>
runtime::gpu::GPU_Backend::create_tensor(const element::Type& element_type, const Shape& shape)
{
return make_shared<runtime::gpu::GPUTensor>(element_type, shape);
return make_shared<runtime::gpu::GPUTensor>(element_type, shape, this);
}
shared_ptr<runtime::Tensor> runtime::gpu::GPU_Backend::create_tensor(
const element::Type& element_type, const Shape& shape, void* memory_pointer)
{
return make_shared<runtime::gpu::GPUTensor>(element_type, shape, memory_pointer);
return make_shared<runtime::gpu::GPUTensor>(element_type, shape, memory_pointer, this);
}
runtime::Handle runtime::gpu::GPU_Backend::compile(shared_ptr<Function> func)
......@@ -222,33 +223,53 @@ vector<runtime::PerformanceCounter>
return rc;
}
bool runtime::gpu::GPU_Backend::is_supported(const Node& node) const
bool runtime::gpu::GPU_Backend::is_supported(const Node& op) const
{
bool rc = true;
set<string> unsupported_ops = {"Quantize",
"Dequantize",
"ShapeOf",
"All",
"Any",
"AllReduce",
"SelectAndScatter",
"StopGradient",
"EmbeddingLookup",
"GenerateMask"};
// get op type
element::Type type;
if (node.description() == "Select")
set<string> float_only = {"MaxPoolBackprop", "AvgPoolBackprop", "MaxPool", "Dot"};
if (unsupported_ops.find(op.description()) != unsupported_ops.end())
{
type = node.get_input_element_type(1);
return false;
}
else if (node.description() == "Constant")
if (float_only.find(op.description()) != float_only.end())
{
type = node.get_outputs().at(0).get_element_type();
}
else if (node.description() == "Parameter")
if (op.get_output_element_type(0) != element::f32 &&
op.get_output_element_type(0) != element::f64)
{
type = node.get_outputs().at(0).get_element_type();
return false;
}
else
{
type = node.get_input_element_type(0);
}
if (type != element::f32)
if (op.description() == "BatchNormInference")
{
const ngraph::op::BatchNormInference* bn =
static_cast<const ngraph::op::BatchNormInference*>(&op);
if (bn->get_eps_value() < CUDNN_BN_MIN_EPSILON)
{
rc = false;
return false;
}
}
else if (op.description() == "BatchNormTraining")
{
const ngraph::op::BatchNormTraining* bn =
static_cast<const ngraph::op::BatchNormTraining*>(&op);
if (bn->get_eps_value() < CUDNN_BN_MIN_EPSILON)
{
return false;
}
}
return rc;
return true;
}
......@@ -29,8 +29,10 @@ using namespace std;
runtime::gpu::GPUTensor::GPUTensor(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer)
: runtime::Tensor(std::make_shared<ngraph::descriptor::Tensor>(element_type, shape, "external"))
void* memory_pointer,
const Backend* backend)
: runtime::Tensor(std::make_shared<ngraph::descriptor::Tensor>(element_type, shape, "external"),
backend)
, m_custom_memory(false)
{
m_descriptor->set_tensor_layout(
......@@ -48,8 +50,10 @@ runtime::gpu::GPUTensor::GPUTensor(const ngraph::element::Type& element_type,
}
}
runtime::gpu::GPUTensor::GPUTensor(const ngraph::element::Type& element_type, const Shape& shape)
: GPUTensor(element_type, shape, nullptr)
runtime::gpu::GPUTensor::GPUTensor(const ngraph::element::Type& element_type,
const Shape& shape,
const Backend* backend)
: GPUTensor(element_type, shape, nullptr, backend)
{
}
......
......@@ -16,9 +16,9 @@
#pragma once
#include <cuda.h>
#include <memory>
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/tensor.hpp"
#include "ngraph/type/element_type.hpp"
......@@ -36,8 +36,11 @@ namespace ngraph
class ngraph::runtime::gpu::GPUTensor : public ngraph::runtime::Tensor
{
public:
GPUTensor(const ngraph::element::Type& element_type, const Shape& shape);
GPUTensor(const ngraph::element::Type& element_type, const Shape& shape, void* memory_pointer);
GPUTensor(const ngraph::element::Type& element_type, const Shape& shape, const Backend* parent);
GPUTensor(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer,
const Backend* parent);
virtual ~GPUTensor() override;
/// \brief Write bytes directly into the tensor
......
#int64 is not supprted by cuDNN
batch_norm_one_output
batch_norm_three_outputs
backwards_batch_norm_three_outputs
#need to check
# need to check
computation_reuse
#cuda does not support throw
# cuda does not support throw
divide_by_zero_int32
#int64 is not supprted by cuDNN
# int64 is not supprted by cuDNN
dot_matrix_vector_int64
generate_mask
#error throw is not the same on GPU, not supported yet
one_hot_scalar_fp_nonint_in_3
one_hot_scalar_oob_in_3
one_hot_vector_1_barely_oob
one_hot_vector_1_far_oob
one_hot_vector_1_fp_nonint
#select_and_scatter is deprecated
# select_and_scatter is deprecated
select_and_scatter_3d_without_overlap
select_and_scatter_with_overlap
select_and_scatter_without_overlap
#custom_mem is not implemented on GPU
# custom_mem is not implemented on GPU
tensorview_custom_mem
#integer is not supported by cuDNN on backward pooling
# integer is not supported by cuDNN on backward pooling
backwards_maxpool_n4_c1_hw4_2x2_max
backwards_maxpool_n2_c1_hw5_3x3_str2_max
backwards_avgpool_n1_c1_hw2x2
......
......@@ -21,7 +21,7 @@ if (NGRAPH_GPUH_ENABLE)
VERSION ${NGRAPH_VERSION}
SOVERSION ${NGRAPH_API_VERSION})
endif()
target_link_libraries(gpuh_backend PUBLIC ngraph)
target_link_libraries(gpuh_backend PUBLIC ngraph hybrid_base gpu_backend)
set_target_properties(gpuh_backend PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${NGRAPH_BUILD_DIR})
install(TARGETS gpuh_backend
......
......@@ -18,6 +18,7 @@
#include "ngraph/graph_util.hpp"
#include "ngraph/pass/assign_placement.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/runtime/gpu/gpu_backend.hpp"
#include "ngraph/runtime/interpreter/int_backend.hpp"
#include "ngraph/runtime/tensor.hpp"
......@@ -34,7 +35,13 @@ extern "C" runtime::Backend* new_backend(const char* configuration_string)
return new runtime::gpuh::GPUHBackend();
}
vector<string> get_excludes()
{
return vector<string>{{"Not"}};
}
runtime::gpuh::GPUHBackend::GPUHBackend()
: HybridBackend({{"INTERPRETER", make_shared<ngraph::runtime::interpreter::INTBackend>()}})
: HybridBackend({make_shared<ngraph::runtime::gpu::GPU_Backend>(),
make_shared<ngraph::runtime::interpreter::INTBackend>()})
{
}
computation_reuse
tensorview_custom_mem
batch_norm_inference_f64
batch_norm_inference_f32
divide_by_zero_int32
......@@ -26,8 +26,10 @@ using namespace std;
runtime::HostTensor::HostTensor(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer,
const string& name)
: runtime::Tensor(std::make_shared<ngraph::descriptor::Tensor>(element_type, shape, name))
const string& name,
const Backend* parent)
: runtime::Tensor(std::make_shared<ngraph::descriptor::Tensor>(element_type, shape, name),
parent)
, m_allocated_buffer_pool(nullptr)
, m_aligned_buffer_pool(nullptr)
......@@ -56,8 +58,24 @@ runtime::HostTensor::HostTensor(const ngraph::element::Type& element_type,
runtime::HostTensor::HostTensor(const ngraph::element::Type& element_type,
const Shape& shape,
const string& name)
: HostTensor(element_type, shape, nullptr, name)
const string& name,
const Backend* parent)
: HostTensor(element_type, shape, nullptr, name, parent)
{
}
runtime::HostTensor::HostTensor(const ngraph::element::Type& element_type,
const Shape& shape,
const Backend* parent)
: HostTensor(element_type, shape, nullptr, "external", parent)
{
}
runtime::HostTensor::HostTensor(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer,
const Backend* parent)
: HostTensor(element_type, shape, memory_pointer, "external", parent)
{
}
......
......@@ -18,6 +18,7 @@
#include <memory>
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/tensor.hpp"
#include "ngraph/type/element_type.hpp"
......@@ -36,11 +37,20 @@ class ngraph::runtime::HostTensor : public ngraph::runtime::Tensor
public:
HostTensor(const ngraph::element::Type& element_type,
const Shape& shape,
const std::string& name = "external");
const std::string& name = "external",
const Backend* parent = nullptr);
HostTensor(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer,
const std::string& name = "external");
const std::string& name = "external",
const Backend* parent = nullptr);
HostTensor(const ngraph::element::Type& element_type,
const Shape& shape,
const Backend* parent);
HostTensor(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer,
const Backend* parent);
virtual ~HostTensor() override;
char* get_data_ptr();
......
......@@ -14,21 +14,14 @@
# limitations under the License.
# ******************************************************************************
if (NGRAPH_HYBRID_ENABLE)
add_library(hybrid_backend SHARED
add_library(hybrid_base STATIC
hybrid_backend.cpp
hybrid_util.cpp
pass/assign_placement.cpp)
if(NGRAPH_LIB_VERSIONING_ENABLE)
set_target_properties(hybrid_backend PROPERTIES
VERSION ${NGRAPH_VERSION}
SOVERSION ${NGRAPH_API_VERSION})
endif()
target_link_libraries(hybrid_backend PUBLIC ngraph)
set_target_properties(hybrid_backend PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${NGRAPH_BUILD_DIR})
pass/assign_placement.cpp
pass/fix_get_output_element.cpp)
target_link_libraries(hybrid_base PUBLIC ngraph)
set_target_properties(hybrid_base PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${NGRAPH_BUILD_DIR})
install(TARGETS hybrid_backend
LIBRARY DESTINATION "${NGRAPH_INSTALL_LIB}"
install(TARGETS hybrid_base
ARCHIVE DESTINATION "${NGRAPH_INSTALL_LIB}"
)
endif()
)
......@@ -17,36 +17,21 @@
#include "ngraph/runtime/hybrid/hybrid_backend.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/visualize_tree.hpp"
#include "ngraph/runtime/gpu/gpu_backend.hpp"
#include "ngraph/runtime/gpu/gpu_tensor.hpp"
#include "ngraph/runtime/host_tensor.hpp"
#include "ngraph/runtime/hybrid/hybrid_util.hpp"
#include "ngraph/runtime/hybrid/pass/assign_placement.hpp"
#include "ngraph/runtime/hybrid/pass/fix_get_output_element.hpp"
#include "ngraph/runtime/interpreter/int_backend.hpp"
#include "ngraph/runtime/tensor.hpp"
using namespace ngraph;
using namespace std;
template <typename T>
void copy_data(std::shared_ptr<ngraph::runtime::Tensor> tv, const std::vector<T>& data)
{
size_t data_size = data.size() * sizeof(T);
tv->write(data.data(), 0, data_size);
}
template <typename T>
std::vector<T> read_vector(std::shared_ptr<ngraph::runtime::Tensor> tv)
{
if (ngraph::element::from<T>() != tv->get_tensor_layout()->get_element_type())
{
throw std::invalid_argument("read_vector type must match Tensor type");
}
size_t element_count = ngraph::shape_size(tv->get_shape());
size_t size = element_count * sizeof(T);
std::vector<T> rc(element_count);
tv->read(rc.data(), 0, size);
return rc;
}
runtime::hybrid::HybridBackend::HybridBackend(
const std::vector<std::pair<std::string, std::shared_ptr<runtime::Backend>>>& backend_list)
const std::vector<std::shared_ptr<runtime::Backend>>& backend_list)
: m_backend_list{backend_list}
{
}
......@@ -56,46 +41,44 @@ shared_ptr<runtime::Tensor>
const Shape& shape)
{
auto it = m_backend_list.begin();
return it->second->create_tensor(element_type, shape);
return (*it)->create_tensor(element_type, shape);
}
shared_ptr<runtime::Tensor> runtime::hybrid::HybridBackend::create_tensor(
const element::Type& element_type, const Shape& shape, void* memory_pointer)
{
auto it = m_backend_list.begin();
return it->second->create_tensor(element_type, shape, memory_pointer);
return (*it)->create_tensor(element_type, shape, memory_pointer);
}
runtime::Handle runtime::hybrid::HybridBackend::compile(shared_ptr<Function> func)
{
if (m_function_map.find(func) == m_function_map.end())
{
vector<shared_ptr<runtime::Backend>> backend_list;
for (auto p : m_backend_list)
{
backend_list.push_back(p.second);
}
// Clone function
FunctionInstance instance;
instance.m_function = clone_function(*func);
// Run placement pass
ngraph::pass::Manager pass_manager;
pass_manager.register_pass<runtime::hybrid::pass::AssignPlacement>(backend_list);
pass_manager.register_pass<runtime::hybrid::pass::AssignPlacement>(m_backend_list);
pass_manager.register_pass<runtime::hybrid::pass::FixGetOutputElement>();
#ifdef GPUH_DEBUG
pass_manager.register_pass<ngraph::pass::VisualizeTree>("graph.png");
#endif
pass_manager.run_passes(instance.m_function);
// Split function to sub_functions
tie(instance.m_sub_functions, instance.m_map_parameter_to_result) =
split_function_by_placement_size(instance.m_function);
runtime::hybrid::split_function_by_placement(instance.m_function);
m_function_map.insert({func, instance});
// Compile subfunctions in corresponding backends
for (shared_ptr<Function>& sub_function : instance.m_sub_functions)
{
size_t placement = get_colocated_function_placement_size(sub_function);
size_t placement = runtime::hybrid::get_colocated_function_placement(sub_function);
auto backend = m_backend_list[placement];
backend.second->compile(sub_function);
backend->compile(sub_function);
// Compile will replace nodes so we need to make one more pass through all
// ops to reset placement
......@@ -116,70 +99,103 @@ bool runtime::hybrid::HybridBackend::call(shared_ptr<Function> func,
// Get FunctionInstance
bool rc = true;
auto it = m_function_map.find(func);
if (it == m_function_map.end())
using node_map_t = unordered_map<shared_ptr<Node>, shared_ptr<runtime::Tensor>>;
auto fit = m_function_map.find(func);
if (fit == m_function_map.end())
{
throw runtime_error("compile() must be called before call().");
}
FunctionInstance& instance = it->second;
FunctionInstance& instance = fit->second;
// Parameter and result node in sub_function maps to one Tensor
unordered_map<shared_ptr<Node>, shared_ptr<runtime::Tensor>> map_node_to_tensor_view;
node_map_t map_node_to_tensor;
for (size_t i = 0; i < inputs.size(); ++i)
{
map_node_to_tensor_view[instance.m_function->get_parameters()[i]] = inputs[i];
map_node_to_tensor[instance.m_function->get_parameters()[i]] = inputs[i];
}
for (size_t i = 0; i < outputs.size(); ++i)
{
map_node_to_tensor_view[instance.m_function->get_results()[i]] = outputs[i];
map_node_to_tensor[instance.m_function->get_results()[i]] = outputs[i];
}
// Call subfunctions
for (shared_ptr<Function>& sub_function : instance.m_sub_functions)
for (const shared_ptr<Function>& sub_function : instance.m_sub_functions)
{
// Init backend
size_t placement = get_colocated_function_placement_size(sub_function);
auto backend = m_backend_list[placement].second;
size_t placement = runtime::hybrid::get_colocated_function_placement(sub_function);
auto backend = m_backend_list[placement];
// Prepare parameter TensorViews
vector<shared_ptr<runtime::Tensor>> parameter_tvs;
for (auto parameter_node : sub_function->get_parameters())
// Prepare parameter Tensors
vector<shared_ptr<runtime::Tensor>> parameters;
for (const shared_ptr<op::Parameter>& parameter_node : sub_function->get_parameters())
{
if (map_node_to_tensor_view.find(parameter_node) != map_node_to_tensor_view.end())
auto it = map_node_to_tensor.find(parameter_node);
if (it != map_node_to_tensor.end())
{
parameter_tvs.push_back(map_node_to_tensor_view.at(parameter_node));
if (it->second->get_parent() == backend.get())
{
parameters.push_back(it->second);
}
else
{
auto parameter = backend->create_tensor(parameter_node->get_element_type(),
parameter_node->get_shape());
parameter->copy_from(*(it->second));
parameters.push_back(parameter);
}
}
else
{
// Handle temporary tensors that go between subgraphs
auto result_node = instance.m_map_parameter_to_result.at(parameter_node);
auto result_tv = map_node_to_tensor_view.at(result_node);
auto parameter_tv = backend->create_tensor(parameter_node->get_element_type(),
auto result = map_node_to_tensor.at(result_node);
auto parameter = backend->create_tensor(parameter_node->get_element_type(),
parameter_node->get_shape());
copy_data(parameter_tv, read_vector<float>(result_tv));
map_node_to_tensor_view[parameter_node] = parameter_tv;
parameter_tvs.push_back(parameter_tv);
parameter->copy_from(*result);
map_node_to_tensor[parameter_node] = parameter;
parameters.push_back(parameter);
}
}
// Prepare result TensorViews
vector<shared_ptr<runtime::Tensor>> result_tvs;
for (auto result_node : sub_function->get_results())
// Prepare result Tensors
vector<shared_ptr<runtime::Tensor>> results;
map<runtime::Tensor*, runtime::Tensor*> copy_back;
for (const shared_ptr<op::Result>& result_node : sub_function->get_results())
{
auto it = map_node_to_tensor.find(result_node);
if (it != map_node_to_tensor.end())
{
if (map_node_to_tensor_view.find(result_node) != map_node_to_tensor_view.end())
if (it->second->get_parent() == backend.get())
{
result_tvs.push_back(map_node_to_tensor_view.at(result_node));
results.push_back(it->second);
}
else
{
auto result_tv = backend->create_tensor(result_node->get_element_type(),
auto result = backend->create_tensor(result_node->get_element_type(),
result_node->get_shape());
map_node_to_tensor_view[result_node] = result_tv;
result_tvs.push_back(result_tv);
results.push_back(result);
copy_back.insert({result.get(), it->second.get()});
}
}
else
{
// Handle temporary tensors that go between subgraphs
auto result = backend->create_tensor(result_node->get_element_type(),
result_node->get_shape());
map_node_to_tensor[result_node] = result;
results.push_back(result);
}
}
// Call
backend->call_with_validate(sub_function, result_tvs, parameter_tvs);
backend->call(sub_function, results, parameters);
// Need to copy any results to the correct device
for (const auto& p : copy_back)
{
p.second->copy_from(*p.first);
}
}
return rc;
}
......@@ -188,3 +204,43 @@ bool runtime::hybrid::HybridBackend::is_supported(const Node& node) const
{
return true;
}
string runtime::hybrid::HybridBackend::get_placement_name(const runtime::Tensor* t)
{
string rc;
if (dynamic_cast<const runtime::HostTensor*>(t) != nullptr)
{
rc = "HostTensor";
}
else if (dynamic_cast<const runtime::gpu::GPUTensor*>(t) != nullptr)
{
rc = "GPUTensor";
}
return rc;
}
string runtime::hybrid::HybridBackend::get_placement_name(const runtime::Backend* t)
{
string rc;
if (dynamic_cast<const runtime::interpreter::INTBackend*>(t) != nullptr)
{
rc = "INTBackend";
}
else if (dynamic_cast<const runtime::gpu::GPU_Backend*>(t) != nullptr)
{
rc = "GPU_Backend";
}
return rc;
}
size_t runtime::hybrid::HybridBackend::get_placement(const runtime::Tensor* t)
{
size_t index = 0;
for (const shared_ptr<ngraph::runtime::Backend>& be : m_backend_list)
{
if (t->get_parent() == be.get())
{
return index;
}
index++;
}
return -1;
}
......@@ -37,8 +37,7 @@ namespace ngraph
class ngraph::runtime::hybrid::HybridBackend : public ngraph::runtime::Backend
{
public:
HybridBackend(
const std::vector<std::pair<std::string, std::shared_ptr<runtime::Backend>>>& backend_list);
HybridBackend(const std::vector<std::shared_ptr<runtime::Backend>>& backend_list);
std::shared_ptr<ngraph::runtime::Tensor>
create_tensor(const ngraph::element::Type& element_type,
......@@ -69,5 +68,9 @@ private:
};
std::map<std::shared_ptr<ngraph::Function>, FunctionInstance> m_function_map;
std::vector<std::pair<std::string, std::shared_ptr<runtime::Backend>>> m_backend_list;
std::vector<std::shared_ptr<runtime::Backend>> m_backend_list;
std::string get_placement_name(const runtime::Tensor* t);
std::string get_placement_name(const runtime::Backend* t);
size_t get_placement(const runtime::Tensor* t);
};
......@@ -15,11 +15,13 @@
//*****************************************************************************
#include "ngraph/runtime/hybrid/hybrid_util.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/visualize_tree.hpp"
using namespace ngraph;
using namespace std;
static Node* take_independent_node_with_placement_priority_size(
static Node* take_independent_node_with_placement_priority(
map<size_t, deque<Node*>>& independent_nodes_by_placement, size_t placement)
{
Node* selected_node = nullptr;
......@@ -45,7 +47,7 @@ static Node* take_independent_node_with_placement_priority_size(
}
static vector<unordered_set<shared_ptr<Node>>>
group_function_nodes_to_clusters_size(const shared_ptr<Function>& f)
group_function_nodes_to_clusters(const shared_ptr<Function>& f)
{
// Topologically sort nodes by picking independent node with the same placement as the
// previously picked node greedily
......@@ -66,7 +68,7 @@ static vector<unordered_set<shared_ptr<Node>>>
list<shared_ptr<Node>> sorted_nodes;
size_t previous_placement = 0;
while (Node* independent_node = take_independent_node_with_placement_priority_size(
while (Node* independent_node = ::take_independent_node_with_placement_priority(
independent_nodes_by_placement, previous_placement))
{
previous_placement = independent_node->get_placement_index();
......@@ -148,42 +150,48 @@ static vector<unordered_set<shared_ptr<Node>>>
// | <------[3]------+ | | | <------[7]------+ | | <------[11]-----+ |
// +-----+ +-----+ | +-----+ +-----+ +-----+ +-----+
// Suffix *_size as a part of function name is temporary, this suffix
// will be removed when the backends move to the latest Hybrid backend
pair<shared_ptr<op::Result>, shared_ptr<op::Parameter>>
insert_result_parameter_split_size(const shared_ptr<Node>& src_node,
static map<shared_ptr<op::Result>, shared_ptr<op::Parameter>>
insert_result_parameter_split(const shared_ptr<Node>& src_node,
const shared_ptr<Node>& dst_node)
{
if (src_node->get_output_size() != 1)
map<shared_ptr<op::Result>, shared_ptr<op::Parameter>> result_map;
for (descriptor::Input& input : dst_node->get_inputs())
{
throw ngraph_error("Multiple output per op not supported in graph partition yet.");
}
if (input.get_output().get_node() == src_node)
{
descriptor::Input* dst_input = &input;
descriptor::Output* src_output = &input.get_output();
// Make parameter node
shared_ptr<op::Parameter> par_node = make_shared<op::Parameter>(
src_node->get_output_element_type(0), src_node->get_output_shape(0));
shared_ptr<op::Parameter> par_node =
make_shared<op::Parameter>(src_output->get_element_type(), src_output->get_shape());
par_node->set_placement_index(dst_node->get_placement_index());
// Fix input / output among src, dst and par
descriptor::Input* dst_input = dst_node->get_input_from(src_node);
descriptor::Output* src_output = src_node->get_output_to(dst_node);
src_output->remove_input(dst_input); // Remove [0]
dst_input->replace_output(par_node, 0); // Remove [0] (again), add [8], remove [1], add [9]
// Remove [0]
src_output->remove_input(dst_input);
// Remove [0] (again), add [8], remove [1], add [9]
dst_input->replace_output(par_node, 0);
// Add res node
shared_ptr<op::Result> res_node = make_shared<op::Result>(src_node); // Add [4], [5], [6], [7]
shared_ptr<op::Result> res_node =
make_shared<op::Result>(src_node); // Add [4], [5], [6], [7]
res_node->set_placement_index(src_node->get_placement_index());
return make_pair(res_node, par_node);
result_map.insert({res_node, par_node});
}
}
return result_map;
}
// Suffix *_size as a part of function name is temporary, this suffix
// will be removed when the backends move to the latest Hybrid backend
pair<vector<shared_ptr<Function>>, unordered_map<shared_ptr<op::Parameter>, shared_ptr<op::Result>>>
runtime::hybrid::split_function_by_placement_size(const shared_ptr<Function>& f)
runtime::hybrid::split_function_by_placement(const shared_ptr<Function>& f)
{
// Split functions to clusters of nodes that can be computed together
vector<unordered_set<shared_ptr<Node>>> clusters = group_function_nodes_to_clusters_size(f);
vector<unordered_set<shared_ptr<Node>>> clusters = ::group_function_nodes_to_clusters(f);
// Map from (intermediate) parameter to result node, for guiding data copy among devices
unordered_map<shared_ptr<op::Parameter>, shared_ptr<op::Result>> map_parameter_to_result;
......@@ -208,8 +216,10 @@ pair<vector<shared_ptr<Function>>, unordered_map<shared_ptr<op::Parameter>, shar
if (src_cluster != dst_cluster)
{
// Split src_node and dst_node
pair<shared_ptr<op::Result>, shared_ptr<op::Parameter>> res_par_pair =
insert_result_parameter_split_size(src_node, dst_node);
map<shared_ptr<op::Result>, shared_ptr<op::Parameter>> res_par_pair_map =
::insert_result_parameter_split(src_node, dst_node);
for (const auto& res_par_pair : res_par_pair_map)
{
shared_ptr<op::Result> res_node = res_par_pair.first;
shared_ptr<op::Parameter> par_node = res_par_pair.second;
map_parameter_to_result[par_node] = res_node;
......@@ -220,6 +230,7 @@ pair<vector<shared_ptr<Function>>, unordered_map<shared_ptr<op::Parameter>, shar
}
}
}
}
// Create functions from clusters
vector<shared_ptr<Function>> sub_functions;
......@@ -240,15 +251,19 @@ pair<vector<shared_ptr<Function>>, unordered_map<shared_ptr<op::Parameter>, shar
}
auto sub_function = make_shared<Function>(res_vector, par_vector);
sub_functions.push_back(sub_function);
#ifdef HYBRID_DEBUG
ngraph::pass::Manager pass_manager;
pass_manager.register_pass<ngraph::pass::VisualizeTree>("subgraph_" + to_string(index++) +
".png");
pass_manager.run_passes(sub_function);
#endif
}
return make_pair(sub_functions, map_parameter_to_result);
}
// Suffix *_size as a part of function name is temporary, this suffix
// will be removed when the backends move to the latest Hybrid backend
// Assert that nodes in the function is colocated and return that placement
size_t runtime::hybrid::get_colocated_function_placement_size(shared_ptr<Function> func)
size_t runtime::hybrid::get_colocated_function_placement(shared_ptr<Function> func)
{
auto ops = func->get_ops();
......@@ -259,7 +274,7 @@ size_t runtime::hybrid::get_colocated_function_placement_size(shared_ptr<Functio
size_t node_placement = op->get_placement_index();
if (node_placement == Node::placement_invalid)
{
throw ngraph_error("Node should have a device placement");
throw ngraph_error("Node " + op->get_name() + " should have a device placement");
}
if (function_placement != node_placement)
{
......
......@@ -34,10 +34,10 @@ namespace ngraph
std::pair<
std::vector<std::shared_ptr<Function>>,
std::unordered_map<std::shared_ptr<op::Parameter>, std::shared_ptr<op::Result>>>
split_function_by_placement_size(const std::shared_ptr<Function>& f);
split_function_by_placement(const std::shared_ptr<Function>& f);
// Assert that nodes in the function is colocated and return that placement
size_t get_colocated_function_placement_size(std::shared_ptr<Function> func);
size_t get_colocated_function_placement(std::shared_ptr<Function> func);
}
}
}
......@@ -24,7 +24,7 @@ using namespace ngraph;
using namespace std;
runtime::hybrid::pass::AssignPlacement::AssignPlacement(
vector<shared_ptr<runtime::Backend>> placement_backends)
const vector<shared_ptr<runtime::Backend>>& placement_backends)
: m_placement_backends(placement_backends)
{
}
......
......@@ -39,8 +39,8 @@ namespace ngraph
class ngraph::runtime::hybrid::pass::AssignPlacement : public ngraph::pass::NodePass
{
public:
// TODO: make policy a class
AssignPlacement(std::vector<std::shared_ptr<ngraph::runtime::Backend>> placement_backends);
AssignPlacement(
const std::vector<std::shared_ptr<ngraph::runtime::Backend>>& placement_backends);
private:
bool run_on_node(std::shared_ptr<Node> node) override;
......
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/hybrid/pass/fix_get_output_element.hpp"
#include "ngraph/log.hpp"
#include "ngraph/node.hpp"
#include "ngraph/placement.hpp"
#include "ngraph/runtime/backend.hpp"
using namespace ngraph;
using namespace std;
runtime::hybrid::pass::FixGetOutputElement::FixGetOutputElement()
{
}
bool runtime::hybrid::pass::FixGetOutputElement::run_on_node(shared_ptr<Node> node)
{
if (node->description() == "GetOutputElement")
{
auto parent = node->get_arguments().at(0);
node->set_placement_index(parent->get_placement_index());
}
return false;
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <exception>
#include <functional>
#include <sstream>
#include "ngraph/pass/pass.hpp"
namespace ngraph
{
namespace runtime
{
namespace hybrid
{
namespace pass
{
class FixGetOutputElement;
}
}
}
}
class ngraph::runtime::hybrid::pass::FixGetOutputElement : public ngraph::pass::NodePass
{
public:
FixGetOutputElement();
private:
bool run_on_node(std::shared_ptr<Node> node) override;
};
abc
abc_int64
abs
acos
add
add_overload
aliased_output
argmax_3D_axis_0
argmax_3D_axis_1
argmax_3D_axis_2
argmax_4D_axis_3
argmax_trivial
argmin_4D_axis_3
argmin_trivial
asin
atan
avg_pool_1d_1channel_1image
avg_pool_1d_1channel_2image
avg_pool_1d_2channel_2image
avg_pool_2d_1channel_1image_padded_do_not_include_in_computation
avg_pool_2d_1channel_1image_padded_include_in_computation
avg_pool_2d_1channel_1image_strided
avg_pool_2d_2channel_2image
avg_pool_2d_2channel_2image_3x3_padded_do_not_include_in_computation
avg_pool_2d_2channel_2image_3x3_padded_include_in_computation
avg_pool_2d_2channel_2image_3x3_strided_padded_do_not_include_in_computation
avg_pool_2d_2channel_2image_3x3_strided_padded_include_in_computation
avg_pool_2d_2channel_2image_3x3_strided_uneven_padded_do_not_include_in_computation
avg_pool_2d_2channel_2image_3x3_strided_uneven_padded_include_in_computation
avg_pool_2d_2channel_2image_padded_do_not_include_in_computation
avg_pool_2d_2channel_2image_padded_include_in_computation
avg_pool_2d_2channel_2image_padded_only_above_do_not_include_in_computation
avg_pool_2d_2channel_2image_padded_only_above_include_in_computation
avg_pool_2d_2channel_2image_padded_only_below_do_not_include_in_computation
avg_pool_2d_2channel_2image_padded_only_below_include_in_computation
avg_pool_3d_strided_uneven_padded_do_not_include_in_computation
avg_pool_3d_uneven_strided_padded_include_in_computation
backwards_abc
backwards_abs
backwards_acos
backwards_add
backwards_add_nested
backwards_asin
backwards_atan
backwards_avgpool_n1_c1_hw2x2
backwards_avgpool_n1_c1_hw4x4
backwards_avgpool_n2_c2_hw2x2_win_2x2_str_1x1_padding_numeric
backwards_avgpool_n2_c2_hw4x4
backwards_avgpool_n2_c2_hw4x4_numeric
backwards_avgpool_n2_c2_hw4x4_win_2x2_str_1x1_numeric
backwards_batch_norm_three_outputs
backwards_broadcast0
backwards_broadcast1
backwards_ceiling
backwards_concat_axis_0
backwards_concat_axis_1
backwards_concat_vector
backwards_cos
backwards_cosh
backwards_divide
backwards_dot_scalar_scalar
backwards_dot_scalar_tensor
backwards_dot_tensor_scalar
backwards_dot_tensor_vector
backwards_dot_tensor2_tensor2
backwards_dot_tensor3_tensor3
backwards_dot_vector_vector
backwards_exp
backwards_floor
backwards_log
backwards_maximum
backwards_maxpool_n2_c1_hw5_3x3_str2_max
backwards_maxpool_n2_c1_hw5_3x3_str2_max_pad1x2_2x3
backwards_maxpool_n2c1h5w5_kh3kw3_sh2sw2
backwards_maxpool_n4_c1_hw4_2x2_max
backwards_maxpool_n4c1h4w4_kh2kw2_sh1sw1
backwards_minimum
backwards_multiply
backwards_negative
backwards_parameter
backwards_power
backwards_relu
backwards_replace_slice
backwards_reshape
backwards_reverse_3d_02
backwards_reverse_sequence_n3_c2_h3
backwards_reverse_sequence_n4d2c3h2w2
backwards_select
backwards_select_nested
backwards_sigmoid
backwards_sign
backwards_sin
backwards_sinh
backwards_slice
backwards_softmax_3d
backwards_softmax_all
backwards_softmax_axis
backwards_softmax_underflow
backwards_subtract
backwards_sum_m2s
backwards_sum_m2v_0
backwards_sum_m2v_1
backwards_sum_v2s
backwards_tan
backwards_tanh
batch_norm_one_output
batch_norm_three_outputs
batchnorm_bprop_n4c3h2w2
batchnorm_fprop_b1c2h2w2
batchnorm_fprop_b2c2h2w1
batchnorm_fprop_globalstats_b2c2w2h1
batchnorm_fprop_inference_b2c2h2w1
broadcast_algo_3d_backward
broadcast_algo_3d_stride_1
broadcast_algo_3d_stride_2
broadcast_algo_matrix_backward_4
broadcast_algo_matrix_stride_1
broadcast_algo_matrix_stride_2
broadcast_algo_matrix_stride_3
broadcast_algo_scalar
broadcast_algo_vector_backward_2
broadcast_algo_vector_backward_3
broadcast_algo_vector_backward_4
broadcast_algo_vector_forward_2
broadcast_algo_vector_forward_3
broadcast_algo_vector_forward_4
broadcast_algo_vector_middle
broadcast_matrix_0
broadcast_matrix_1
broadcast_matrix_2
broadcast_scalar_matrix
broadcast_scalar_tensor
broadcast_scalar_to_matrix_int32
broadcast_scalar_to_matrix_int64
broadcast_scalar_vector
broadcast_to_non_existent_axis
broadcast_trivial
broadcast_vector_colwise
broadcast_vector_rowwise
broadcast_vector_rowwise_int64
broadcast_vector_rowwise_reversed
ceiling
computation_reuse
concat_2d_tensor
concat_4d_tensor
concat_5d
concat_matrix_colwise
concat_matrix_int64
concat_matrix_rowwise
concat_vector
concat_zero_length_1d_last
concat_zero_length_1d_middle
concat_zero_length_4d_middle
constant_broadcast
constant_equality_bool
constant_multi_use
convert_float32_bool
convert_int32_bool
convert_int32_float32
convert_uint16_float32
convolution_2d_1item
convolution_2d_1item_1o1i_data_dilated
convolution_2d_1item_2o1i_data_dilated
convolution_2d_1item_2o2i_data_dilated
convolution_2d_1item_5o3i_data_dilated
convolution_2d_1item_padded_1_1x1_1
convolution_2d_1item_padded_2_3x4_5
convolution_2d_2item_5o3i_data_dilated
convolution_2d_2items
convolution_2d_2items_dilated
convolution_2d_2items_dilated_padded
convolution_2d_2items_strided
convolution_2d_2items_strided_padded
convolution_2d_2items_strided_padded_same
convolution_2d_8item_large_5o3i_data_dilated
convolution_2d_8item_large_5o3i_uneven_filter_data_dilated
convolution_2d_8item_large_5o3i_uneven_filter_uneven_data_dilation_data_dilated
convolution_3d_1item_large_5o3i_padded_uneven_filter_uneven_data_dilation_data_dilated
convolution_3d_2item_large_5o3i_padded_strided_uneven_filter_uneven_data_dilation_data_dilated
convolution_3d_2item_large_5o3i_padded_strided_uneven_filter_uneven_data_dilation_filter_dilated_data_dilated
convolution_3d_2item_large_5o3i_uneven_filter_uneven_data_dilation_data_dilated
convolution_3d_2items
convolution_4d_2items
convolution_4d_4items
convolution_4d_4items_dilated
convolution_4d_4items_padded_neg
convolution_4d_4items_strided
convolution_4d_4items_strided_dilated
convolution_4d_4items_strided_dilated_padded
convolution_4d_4items_strided_dilated_padded_neg
convolution_4d_4items_strided_dilated_padded_same
convolution_outlining
cos
cosh
dequantize
dequantize_axes
dequantize_int8
divide
divide_adjoint_stability
divide_by_zero_float32
divide_by_zero_int32
divide_overload
dot_0_0
dot_2x0_0
dot_3d_multi_axis
dot_3d_one_axis_arbitrary
dot_4d_5d_multi_axis
dot_4d_5d_multi_axis_more
dot_matrix_0x2_2x0
dot_matrix_2x0_0x2
dot_matrix_3x2_2x0
dot_matrix_vector
dot_matrix_vector_4_3
dot_matrix_vector_int64
dot_scalar_0x2
dot_scalar_scalar
dot_scalar_tensor_arg0
dot_scalar_tensor_arg1
dot1d
dot2d
dot3d_2d
dot3d_3d
equal
exp
floor
function_call
function_name
fuse_max_with_constant_zero_input_as_relu
greater
greatereq
generate_mask
kahan_sum_3d_to_vector
kahan_sum_to_scalar
less
lesseq
lesseq_bool
log
logical_and
logical_or
lrn
max_3d_eliminate_zero_dim
max_3d_to_matrix_least_sig
max_3d_to_matrix_most_sig
max_3d_to_scalar
max_3d_to_vector
max_matrix_cols_zero
max_matrix_columns
max_matrix_rows
max_matrix_rows_zero
max_matrix_to_scalar_zero_by_zero
max_pool_1d_1channel_1image
max_pool_1d_1channel_2image
max_pool_1d_2channel_2image
max_pool_2d_1channel_1image_overpadded
max_pool_2d_1channel_1image_padded
max_pool_2d_1channel_1image_padded_negative_values
max_pool_2d_1channel_1image_strided
max_pool_2d_2channel_2image
max_pool_2d_2channel_2image_asym_pad
max_pool_3d
max_to_scalar
max_trivial
max_trivial_5d
max_vector_zero
maximum
maximum_int32
maximum_int64
min_3d_eliminate_zero_dim
min_3d_to_matrix_least_sig
min_3d_to_matrix_most_sig
min_3d_to_scalar
min_3d_to_vector
min_matrix_cols_zero
min_matrix_columns
min_matrix_rows
min_matrix_rows_zero
min_matrix_to_scalar_zero_by_zero
min_to_scalar
min_trivial
min_trivial_5d
min_vector_zero
minimum
minimum_int32
minimum_int64
multiple_backends
multiple_result
multiply
multiply_overload
negative
node_name
not
notequal
numeric_double_inf
numeric_double_nan
numeric_float_inf
numeric_float_nan
one_hot_matrix_0
one_hot_scalar_0_in_3
one_hot_scalar_1_in_3
one_hot_scalar_2_in_3
one_hot_scalar_fp_nonint_in_3
one_hot_scalar_oob_in_3
one_hot_vector_0
one_hot_vector_1
one_hot_vector_1_barely_oob
one_hot_vector_1_far_oob
one_hot_vector_1_fp
one_hot_vector_1_fp_nonint
pad_2channel_2image_asym
pad_exterior_1d
pad_exterior_2d_0x0
pad_exterior_2d_0x3
pad_exterior_2d_3x0
pad_exterior_4d_1x2x2x2
pad_interior_1d
pad_interior_exterior_1d
pad_interior_exterior_2d
pad_interior_exterior_4d_2x0x3x2
parameter_as_output
power
product_3d_eliminate_zero_dim
product_3d_to_matrix_least_sig
product_3d_to_matrix_most_sig
product_3d_to_scalar
product_3d_to_vector
product_matrix_cols_zero
product_matrix_columns
product_matrix_rows
product_matrix_rows_zero
product_matrix_to_scalar_zero_by_zero
product_to_scalar
product_trivial
product_trivial_5d
product_vector_zero
quantize
quantize_axes
quantize_clamp
quantize_int8
reduce_3d_to_vector
reduce_matrix_cols_zero
reduce_matrix_columns
reduce_matrix_rows
reduce_matrix_rows_zero
reduce_matrix_to_scalar_zero_by_zero
reduce_to_scalar
reduce_trivial
reduce_vector_zero
reduce_window_emulating_max_pool_1d_1channel_1image
reduce_window_emulating_max_pool_1d_1channel_2image
reduce_window_emulating_max_pool_1d_2channel_2image
reduce_window_emulating_max_pool_2d_1channel_1image_strided
reduce_window_emulating_max_pool_2d_2channel_2image
relu_2Dbackprop
relu_2Dfprop
relu_4Dbackprop
relu_4Dfprop
replace_slice_3d
replace_slice_3d_strided
replace_slice_3d_strided_different_strides
replace_slice_matrix
replace_slice_matrix_inplace
replace_slice_scalar
replace_slice_vector
reshape_3d_transpose_021
reshape_3d_transpose_102
reshape_3d_transpose_120
reshape_3d_transpose_201
reshape_3d_transpose_210
reshape_4d_no_transpose
reshape_4d_transpose
reshape_6d
reshape_m2m_dim_change_transpose
reshape_m2m_same
reshape_m2m_transpose
reshape_s2t
reshape_s2t1
reshape_t2s_012
reshape_t2s_120
reshape_t2v_012
reshape_transposed_shape_change
reshape_v2m_col
reshape_v2m_row
reshape_v2t_middle
reverse_0d
reverse_1d_0
reverse_1d_nochange
reverse_2d_0
reverse_2d_01
reverse_2d_1
reverse_2d_nochange
reverse_3d_0
reverse_3d_01
reverse_3d_012
reverse_3d_02
reverse_3d_1
reverse_3d_12
reverse_3d_2
reverse_3d_nochange
reverse_sequence_n2c3h4w2
reverse_sequence_n4c3h2w2
reverse_sequence_n4d2c3h2w2
scalar_constant_float32
scalar_constant_int64
select
select_and_scatter_3d_without_overlap
select_and_scatter_with_overlap
select_and_scatter_without_overlap
sigmoid_bprop_n1c1h4
sigmoid_n1c1h2w2
sigmoid_n1c1h4
sign
sin
sinh
slice_3d
slice_3d_strided
slice_3d_strided_different_strides
slice_matrix
slice_matrix_strided
slice_scalar
slice_vector
softmax_all
softmax_axis
softmax_axis_2
softmax_axis_3d
softmax_axis_3d_trivial
softmax_underflow
sqrt
subtract
subtract_overload
sum_3d_eliminate_zero_dim
sum_3d_to_matrix_least_sig
sum_3d_to_matrix_most_sig
sum_3d_to_scalar
sum_3d_to_vector
sum_5d_to_scalar
sum_large_1d_to_scalar
sum_matrix_6d
sum_matrix_cols_zero
sum_matrix_columns
sum_matrix_rows
sum_matrix_rows_zero
sum_matrix_to_scalar_zero_by_zero
sum_to_scalar
sum_trivial
sum_trivial_5d
sum_vector_zero
tan
tanh
tensor_2constant
tensor_constant
tensor_constant_float32
tensor_constant_int64
tensor_constant_with_op
tensorview_custom_mem
topk_1d_max_all
topk_1d_max_one
topk_1d_max_partial
topk_1d_min_all
topk_1d_min_one
topk_1d_min_partial
topk_2d_max_all
topk_2d_max_one
topk_2d_max_partial
topk_2d_min_all
topk_2d_min_one
topk_2d_min_partial
topk_3d_max_all
topk_3d_max_one
topk_3d_max_partial
topk_3d_min_all
topk_3d_min_one
topk_3d_min_partial
unhandled_op
validate_call_input_count
validate_call_input_shape
validate_call_input_type
validate_call_output_count
validate_call_output_shape
validate_call_output_type
zero_sized_abs
zero_sized_acos
zero_sized_add
zero_sized_asin
zero_sized_atan
zero_sized_ceiling
zero_sized_cos
zero_sized_cosh
zero_sized_divide
zero_sized_eq
zero_sized_exp
zero_sized_floor
zero_sized_greater
zero_sized_greatereq
zero_sized_less
zero_sized_lesseq
zero_sized_log
zero_sized_maximum
zero_sized_minimum
zero_sized_multiply
zero_sized_negative
zero_sized_not
zero_sized_not_equal
zero_sized_power
zero_sized_sign
zero_sized_sin
zero_sized_sinh
zero_sized_sqrt
zero_sized_subtract
zero_sized_tan
zero_sized_tanh
shape_of_scalar
shape_of_vector
shape_of_matrix
shape_of_5d
......@@ -390,14 +390,15 @@ shared_ptr<runtime::Tensor>
runtime::intelgpu::IntelGPUBackend::create_tensor(const element::Type& element_type,
const Shape& shape)
{
return make_shared<runtime::intelgpu::IntelGPUTensorView>(element_type, shape, *ocl_engine);
return make_shared<runtime::intelgpu::IntelGPUTensorView>(
element_type, shape, *ocl_engine, nullptr, this);
}
shared_ptr<runtime::Tensor> runtime::intelgpu::IntelGPUBackend::create_tensor(
const element::Type& element_type, const Shape& shape, void* memory_pointer)
{
return make_shared<runtime::intelgpu::IntelGPUTensorView>(
element_type, shape, *ocl_engine, memory_pointer);
element_type, shape, *ocl_engine, memory_pointer, this);
}
runtime::Handle runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
......
......@@ -28,8 +28,9 @@ using namespace std;
runtime::intelgpu::IntelGPUTensorView::IntelGPUTensorView(const element::Type& element_type,
const Shape& shape,
const cldnn::engine& backend_engine,
void* memory_pointer)
: runtime::Tensor(make_shared<descriptor::Tensor>(element_type, shape, "external"))
void* memory_pointer,
const runtime::Backend* parent)
: runtime::Tensor(make_shared<descriptor::Tensor>(element_type, shape, "external"), parent)
{
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(element_type, shape);
......
......@@ -38,7 +38,8 @@ public:
IntelGPUTensorView(const element::Type& element_type,
const Shape& shape,
const cldnn::engine& backend_engine,
void* memory_pointer = nullptr);
void* memory_pointer,
const runtime::Backend* parent);
/// \brief Write bytes directly into the tensor
/// \param p Pointer to source of data
......
......@@ -43,16 +43,25 @@ extern "C" runtime::Backend* new_backend(const char* configuration_string)
return new runtime::interpreter::INTBackend();
}
runtime::interpreter::INTBackend::INTBackend()
{
}
runtime::interpreter::INTBackend::INTBackend(const vector<string>& unsupported_op_name_list)
: m_unsupported_op_name_list{unsupported_op_name_list.begin(), unsupported_op_name_list.end()}
{
}
shared_ptr<runtime::Tensor>
runtime::interpreter::INTBackend::create_tensor(const element::Type& type, const Shape& shape)
{
return make_shared<runtime::HostTensor>(type, shape, "external");
return make_shared<runtime::HostTensor>(type, shape, this);
}
shared_ptr<runtime::Tensor> runtime::interpreter::INTBackend::create_tensor(
const element::Type& type, const Shape& shape, void* memory_pointer)
{
return make_shared<runtime::HostTensor>(type, shape, memory_pointer, "external");
return make_shared<runtime::HostTensor>(type, shape, memory_pointer, this);
}
runtime::Handle runtime::interpreter::INTBackend::compile(shared_ptr<Function> function)
......@@ -336,3 +345,8 @@ void runtime::interpreter::INTBackend::perform_nan_check(
arg_number++;
}
}
bool runtime::interpreter::INTBackend::is_supported(const Node& node) const
{
return m_unsupported_op_name_list.find(node.description()) == m_unsupported_op_name_list.end();
}
......@@ -16,6 +16,7 @@
#pragma once
#include <initializer_list>
#include <memory>
#include <sstream>
#include <string>
......@@ -156,6 +157,12 @@ namespace ngraph
class ngraph::runtime::interpreter::INTBackend : public Backend
{
public:
INTBackend();
INTBackend(const std::vector<std::string>& unsupported_op_name_list);
INTBackend(const INTBackend&) = delete;
INTBackend(INTBackend&&) = delete;
INTBackend& operator=(const INTBackend&) = delete;
std::shared_ptr<Tensor>
create_tensor(const element::Type& type, const Shape& shape, void* memory_pointer) override;
......@@ -173,7 +180,8 @@ public:
std::vector<PerformanceCounter>
get_performance_data(std::shared_ptr<Function> func) const override;
bool is_supported(const Node& node) const override { return true; }
bool is_supported(const Node& node) const override;
private:
int get_alignment() const { return 64; }
class FunctionInstance
......@@ -190,6 +198,7 @@ private:
void* get_temporary_pointer(size_t offset) { return m_temporary_memory->get_ptr(offset); }
};
std::map<std::shared_ptr<Function>, FunctionInstance> m_function_map;
std::set<std::string> m_unsupported_op_name_list;
static void perform_nan_check(const std::vector<std::shared_ptr<HostTensor>>&,
const Node* op = nullptr);
......
......@@ -21,6 +21,7 @@
#include "ngraph/descriptor/layout/tensor_layout.hpp"
#include "ngraph/descriptor/tensor.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/shape.hpp"
#include "ngraph/strides.hpp"
#include "ngraph/type/element_type.hpp"
......@@ -37,9 +38,11 @@ namespace ngraph
class Tensor
{
protected:
Tensor(const std::shared_ptr<ngraph::descriptor::Tensor>& descriptor)
Tensor(const std::shared_ptr<ngraph::descriptor::Tensor>& descriptor,
const Backend* parent)
: m_descriptor(descriptor)
, m_stale(true)
, m_parent(parent)
{
}
......@@ -104,9 +107,11 @@ namespace ngraph
/// \param source The source tensor
virtual void copy_from(const ngraph::runtime::Tensor& source);
const Backend* get_parent() const { return m_parent; }
protected:
std::shared_ptr<ngraph::descriptor::Tensor> m_descriptor;
bool m_stale;
const Backend* m_parent;
};
using TensorViewPtrs = std::vector<std::shared_ptr<Tensor>>;
......
......@@ -225,10 +225,6 @@ if (NGRAPH_INTERPRETER_ENABLE)
target_link_libraries(unit-test PRIVATE interpreter_backend)
endif()
if (NGRAPH_HYBRID_ENABLE)
target_link_libraries(unit-test PRIVATE hybrid_backend)
endif()
if (NGRAPH_GPU_ENABLE)
target_link_libraries(unit-test PRIVATE gpu_backend)
endif()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment