Commit ef2e0118 authored by Robert Kimball's avatar Robert Kimball

copy executable from bob/backend_api2

parent 122754c1
......@@ -28,4 +28,5 @@ else:
sys.setdlopenflags(flags)
from _pyngraph.runtime import Backend
from _pyngraph.runtime import Executable
from _pyngraph.runtime import Tensor
......@@ -20,7 +20,7 @@ from typing import List, Union
import numpy as np
from ngraph.impl import Function, Node, Shape, serialize, util
from ngraph.impl.runtime import Backend, Tensor
from ngraph.impl.runtime import Backend, Executable, Tensor
from ngraph.utils.types import get_dtype, NumericData
from ngraph.exceptions import UserInputError
......@@ -93,7 +93,7 @@ class Computation(object):
value = np.array(value)
Computation._write_ndarray_to_tensor_view(value, tensor_view)
self.runtime.backend.call(self.handle, self.result_views, self.tensor_views)
self.handle.call(self.result_views, self.tensor_views)
results = []
for result_view in self.result_views:
......
......@@ -35,23 +35,7 @@ void regclass_pyngraph_runtime_Backend(py::module m)
const ngraph::element::Type&, const ngraph::Shape&)) &
ngraph::runtime::Backend::create_tensor);
backend.def("compile",
(std::shared_ptr<ngraph::Function>(ngraph::runtime::Backend::*)(
(std::unique_ptr<ngraph::runtime::Executable>(ngraph::runtime::Backend::*)(
std::shared_ptr<ngraph::Function>)) &
ngraph::runtime::Backend::compile);
backend.def("call",
(bool (ngraph::runtime::Backend::*)(
std::shared_ptr<ngraph::Function>,
const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>&,
const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>&)) &
ngraph::runtime::Backend::call);
backend.def("remove_compiled_function",
(void (ngraph::runtime::Backend::*)(std::shared_ptr<ngraph::Function>)) &
ngraph::runtime::Backend::remove_compiled_function);
backend.def("enable_performance_data",
(void (ngraph::runtime::Backend::*)(std::shared_ptr<ngraph::Function>, bool)) &
ngraph::runtime::Backend::enable_performance_data);
backend.def("get_performance_data",
(std::vector<ngraph::runtime::PerformanceCounter>(ngraph::runtime::Backend::*)(
std::shared_ptr<ngraph::Function>)) &
ngraph::runtime::Backend::get_performance_data);
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/tensor.hpp"
#include "pyngraph/runtime/executable.hpp"
namespace py = pybind11;
void regclass_pyngraph_runtime_Executable(py::module m)
{
py::class_<ngraph::runtime::Executable, std::unique_ptr<ngraph::runtime::Executable>>
executable(m, "Executable");
executable.doc() = "ngraph.impl.runtime.Executable wraps ngraph::runtime::Executable";
executable.def("call",
(bool (ngraph::runtime::Executable::*)(
const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>&,
const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>&)) &
ngraph::runtime::Executable::call);
executable.def(
"get_performance_data",
(std::vector<ngraph::runtime::PerformanceCounter>(ngraph::runtime::Executable::*)()) &
ngraph::runtime::Executable::get_performance_data);
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <pybind11/pybind11.h>
namespace py = pybind11;
void regclass_pyngraph_runtime_Executable(py::module m);
......@@ -25,4 +25,5 @@ void regmodule_pyngraph_runtime(py::module m)
m.def_submodule("runtime", "Package ngraph.impl.runtime wraps ngraph::runtime");
regclass_pyngraph_runtime_Tensor(m_runtime);
regclass_pyngraph_runtime_Backend(m_runtime);
regclass_pyngraph_runtime_Executable(m_runtime);
}
......@@ -228,6 +228,7 @@ sources = [
'pyngraph/ops/softmax.cpp',
'pyngraph/ops/result.cpp',
'pyngraph/runtime/backend.cpp',
'pyngraph/runtime/executable.cpp',
'pyngraph/runtime/regmodule_pyngraph_runtime.cpp',
'pyngraph/runtime/tensor.cpp',
'pyngraph/passes/manager.cpp',
......
This diff is collapsed.
......@@ -139,8 +139,8 @@ set (SRC
pass/memory_visualize.cpp
pass/nop_elimination.cpp
pass/pass.cpp
pass/pass_config.cpp
pass/prefix_reshape_elimination.cpp
pass/pass_config.cpp
pass/prefix_reshape_elimination.cpp
pass/propagate_cacheability.cpp
pass/reshape_elimination.cpp
pass/reshape_sinking.cpp
......
......@@ -39,78 +39,123 @@ vector<string> runtime::Backend::get_registered_devices()
return BackendManager::get_registered_backends();
}
void runtime::Backend::remove_compiled_function(shared_ptr<Function> func)
bool runtime::Backend::is_supported(const Node& node) const
{
// The default behavior is that a backend does not support any ops. If this is not the case
// then override this method and enhance.
return false;
}
vector<ngraph::runtime::PerformanceCounter>
runtime::Backend::get_performance_data(shared_ptr<Function> func) const
runtime::Executable::Executable()
{
return vector<PerformanceCounter>();
}
void runtime::Backend::validate(shared_ptr<const Function> function,
const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& inputs)
runtime::Executable::~Executable()
{
const ParameterVector& input_parameters = function->get_parameters();
if (input_parameters.size() != inputs.size())
}
bool runtime::Executable::call_with_validate(const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& inputs)
{
validate(outputs, inputs);
return call(outputs, inputs);
}
void runtime::Executable::validate(const vector<std::shared_ptr<runtime::Tensor>>& outputs,
const vector<std::shared_ptr<runtime::Tensor>>& inputs)
{
const ParameterVector& parameters = get_parameters();
const ResultVector& results = get_results();
if (parameters.size() != inputs.size())
{
stringstream ss;
ss << "Call input count " << inputs.size() << " does not match Function's Parameter count "
<< input_parameters.size();
<< parameters.size();
throw runtime_error(ss.str());
}
if (function->get_output_size() != outputs.size())
if (results.size() != outputs.size())
{
stringstream ss;
ss << "Call output count " << outputs.size() << " does not match Function's Result count "
<< function->get_output_size();
<< results.size();
throw runtime_error(ss.str());
}
for (size_t i = 0; i < input_parameters.size(); i++)
for (size_t i = 0; i < parameters.size(); i++)
{
if (input_parameters[i]->get_element_type() != inputs[i]->get_element_type())
if (parameters[i]->get_element_type() != inputs[i]->get_element_type())
{
stringstream ss;
ss << "Input " << i << " type '" << inputs[i]->get_element_type()
<< "' does not match Parameter type '" << input_parameters[i]->get_element_type()
<< "'";
<< "' does not match Parameter type '" << parameters[i]->get_element_type() << "'";
throw runtime_error(ss.str());
}
if (input_parameters[i]->get_shape() != inputs[i]->get_shape())
if (parameters[i]->get_shape() != inputs[i]->get_shape())
{
stringstream ss;
ss << "Input " << i << " shape {" << join(inputs[i]->get_shape())
<< "} does not match Parameter shape {" << join(input_parameters[i]->get_shape())
<< "}";
<< "} does not match Parameter shape {" << join(parameters[i]->get_shape()) << "}";
throw runtime_error(ss.str());
}
}
for (size_t i = 0; i < function->get_output_size(); i++)
for (size_t i = 0; i < results.size(); i++)
{
if (function->get_output_element_type(i) != outputs[i]->get_element_type())
if (results[i]->get_element_type() != outputs[i]->get_element_type())
{
stringstream ss;
ss << "Output " << i << " type '" << outputs[i]->get_element_type()
<< "' does not match Result type '" << function->get_output_element_type(i) << "'";
<< "' does not match Result type '" << results[i]->get_element_type() << "'";
throw runtime_error(ss.str());
}
if (function->get_output_shape(i) != outputs[i]->get_shape())
if (results[i]->get_shape() != outputs[i]->get_shape())
{
stringstream ss;
ss << "Output " << i << " shape {" << join(outputs[i]->get_shape())
<< "} does not match Result shape {" << join(function->get_output_shape(i)) << "}";
<< "} does not match Result shape {" << join(results[i]->get_shape()) << "}";
throw runtime_error(ss.str());
}
}
}
bool runtime::Backend::is_supported(const Node& node) const
const ngraph::ParameterVector& runtime::Executable::get_parameters() const
{
return m_parameters;
}
const ngraph::ResultVector& runtime::Executable::get_results() const
{
return m_results;
}
void runtime::Executable::set_parameters_and_results(const Function& func)
{
m_parameters = func.get_parameters();
m_results = func.get_results();
}
vector<runtime::PerformanceCounter> runtime::Executable::get_performance_data() const
{
return vector<PerformanceCounter>();
}
bool runtime::Backend::is_supported_property(const Property prop) const
{
// The default behavior is that a backend does not support any ops. If this is not the case
// then override this method and enhance.
return false;
}
bool runtime::Backend::call_with_validate(
std::shared_ptr<Executable> exec,
const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs)
{
return exec->call_with_validate(outputs, inputs);
}
bool runtime::Backend::call_with_validate(
const std::unique_ptr<Executable>& exec,
const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs)
{
return exec->call_with_validate(outputs, inputs);
}
......@@ -30,7 +30,8 @@ namespace ngraph
class ExternalFunction;
class Tensor;
class Backend;
using Handle = std::shared_ptr<Function>;
class Executable;
using Handle = std::shared_ptr<Executable>;
}
}
......@@ -81,43 +82,8 @@ public:
/// \brief Compiles a Function.
/// \param func The function to compile
/// \returns compiled function or nullptr on failure
virtual Handle compile(std::shared_ptr<Function> func) = 0;
/// \brief Executes a single iteration of a Function. If func is not compiled the call will
/// compile it.
/// \param func The function to execute
/// \returns true if iteration is successful, false otherwise
virtual bool call(std::shared_ptr<Function> func,
const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs) = 0;
/// \brief Executes a single iteration of a Function. If func is not compiled the call will
/// compile it. Optionally validates the inputs and outputs against the function graph.
/// \param func The function to execute
/// \returns true if iteration is successful, false otherwise
bool call_with_validate(std::shared_ptr<Function> func,
const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs)
{
validate(func, outputs, inputs);
return call(func, outputs, inputs);
}
/// \brief Compiled functions may be cached. This function removes a compiled function
/// from the cache.
/// \param func The function to execute
virtual void remove_compiled_function(std::shared_ptr<Function> func);
/// \brief Enable the collection of per-op performance information on a specified Function.
/// Data collection is via the `get_performance_data` method.
/// \param func The function to collect perfomance data on.
/// \param enable Set to true to enable or false to disable data collection
virtual void enable_performance_data(std::shared_ptr<Function> func, bool enable) {}
/// \brief Collect performance information gathered on a Function.
/// \param func The function to get collected data.
/// \returns Vector of PerformanceCounter information.
virtual std::vector<PerformanceCounter>
get_performance_data(std::shared_ptr<Function> func) const;
virtual std::shared_ptr<Executable> compile(std::shared_ptr<Function> func,
bool enable_performance_data = false) = 0;
/// \brief Test if a backend is capable of supporting an op
/// \param node is the op to test.
......@@ -133,8 +99,62 @@ public:
/// \brief Test if a backend particular property is supported
/// \param prop is the feature to test.
/// \returns true if the property is supported, false otherwise.
virtual bool is_supported_property(const Property prop) const { return false; }
void validate(std::shared_ptr<const Function> func,
const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
virtual bool is_supported_property(const Property prop) const;
/// The following methods are temporary hacks to reduce the number of changes in this PR
/// They will be removed in a follow-on PR
bool call_with_validate(std::shared_ptr<Executable> handle,
const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs);
bool call_with_validate(const std::unique_ptr<Executable>& handle,
const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs);
};
class ngraph::runtime::Executable
{
public:
Executable();
virtual ~Executable();
/// \param outputs vector of runtime::Tensor used as outputs
/// \param inputs vector of runtime::Tensor used as inputs
/// \returns true if iteration is successful, false otherwise
virtual bool call(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs) = 0;
/// \brief Executes a single iteration of a Function.
/// \param outputs vector of runtime::Tensor used as outputs
/// \param inputs vector of runtime::Tensor used as inputs
/// \returns true if iteration is successful, false otherwise
bool call_with_validate(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs);
/// \brief Collect performance information gathered on a Function.
/// \returns Vector of PerformanceCounter information.
virtual std::vector<PerformanceCounter> get_performance_data() const;
/// \brief Validates a Function.
/// \param outputs vector of runtime::Tensor used as outputs
/// \param inputs vector of runtime::Tensor used as inputs
void validate(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs);
/// \brief Query the input Parameters
/// \returns an ngraph::op::ParameterVector of all input parameters
const ngraph::ParameterVector& get_parameters() const;
/// \brief Query the output Results
/// \returns an ngraph::ResultVector of all input parameters
const ngraph::ResultVector& get_results() const;
protected:
/// \brief Called at the end of compile to the the values to be returned by get_parameters
/// and get_results
/// \param func The function with Results fully resolved.
void set_parameters_and_results(const Function& func);
private:
ngraph::ParameterVector m_parameters;
ngraph::ResultVector m_results;
};
......@@ -62,14 +62,24 @@ static void node_modifiers(const Node& node, vector<string>& attributes)
}
}
runtime::Handle runtime::hybrid::HybridBackend::compile(shared_ptr<Function> func)
shared_ptr<runtime::Executable>
runtime::hybrid::HybridBackend::compile(shared_ptr<Function> func,
bool enable_performance_collection)
{
if (m_function_map.find(func) == m_function_map.end())
{
// Clone function
FunctionInstance instance;
instance.m_function = clone_function(*func);
return make_shared<HybridExecutable>(
m_backend_list, func, enable_performance_collection, m_debug_enabled);
}
runtime::hybrid::HybridExecutable::HybridExecutable(
const std::vector<std::shared_ptr<runtime::Backend>>& backend_list,
const shared_ptr<Function>& func,
bool enable_performance_collection,
bool debug_enabled)
: m_function{func}
, m_backend_list{backend_list}
, m_debug_enabled{debug_enabled}
{
{
// Run placement pass
ngraph::pass::Manager pass_manager;
pass_manager.register_pass<runtime::hybrid::pass::AssignPlacement>(m_backend_list);
......@@ -81,16 +91,15 @@ runtime::Handle runtime::hybrid::HybridBackend::compile(shared_ptr<Function> fun
{
pass_manager.register_pass<ngraph::pass::VisualizeTree>("graph.png", node_modifiers);
}
pass_manager.run_passes(instance.m_function);
pass_manager.run_passes(m_function);
// Split function to sub_functions
tie(instance.m_sub_functions, instance.m_map_parameter_to_result) =
runtime::hybrid::split_function_by_placement(instance.m_function);
m_function_map.insert({func, instance});
tie(m_sub_functions, m_map_parameter_to_result) =
runtime::hybrid::split_function_by_placement(m_function);
// Compile subfunctions in corresponding backends
size_t subfunction_number = 0;
for (shared_ptr<Function>& sub_function : instance.m_sub_functions)
for (shared_ptr<Function>& sub_function : m_sub_functions)
{
size_t placement = runtime::hybrid::get_colocated_function_placement(sub_function);
if (m_debug_enabled)
......@@ -102,7 +111,8 @@ runtime::Handle runtime::hybrid::HybridBackend::compile(shared_ptr<Function> fun
pm.run_passes(sub_function);
}
auto backend = m_backend_list[placement];
backend->compile(sub_function);
shared_ptr<Executable> exec = backend->compile(sub_function);
m_executable_map[sub_function] = exec;
// Compile will replace nodes so we need to make one more pass through all
// ops to reset placement
......@@ -113,38 +123,29 @@ runtime::Handle runtime::hybrid::HybridBackend::compile(shared_ptr<Function> fun
}
}
return func;
set_parameters_and_results(*func);
}
bool runtime::hybrid::HybridBackend::call(shared_ptr<Function> func,
const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& inputs)
bool runtime::hybrid::HybridExecutable::call(const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& inputs)
{
// Get FunctionInstance
bool rc = true;
using node_map_t = unordered_map<shared_ptr<Node>, shared_ptr<runtime::Tensor>>;
auto fit = m_function_map.find(func);
if (fit == m_function_map.end())
{
throw runtime_error("compile() must be called before call().");
}
FunctionInstance& instance = fit->second;
// Parameter and result node in sub_function maps to one Tensor
node_map_t map_node_to_tensor;
for (size_t i = 0; i < inputs.size(); ++i)
{
map_node_to_tensor[instance.m_function->get_parameters()[i]] = inputs[i];
map_node_to_tensor[m_function->get_parameters()[i]] = inputs[i];
}
for (size_t i = 0; i < outputs.size(); ++i)
{
map_node_to_tensor[instance.m_function->get_results()[i]] = outputs[i];
map_node_to_tensor[m_function->get_results()[i]] = outputs[i];
}
// Call subfunctions
for (const shared_ptr<Function>& sub_function : instance.m_sub_functions)
for (const shared_ptr<Function>& sub_function : m_sub_functions)
{
// Init backend
size_t placement = runtime::hybrid::get_colocated_function_placement(sub_function);
......@@ -172,7 +173,7 @@ bool runtime::hybrid::HybridBackend::call(shared_ptr<Function> func,
else
{
// Handle temporary tensors that go between subgraphs
auto result_node = instance.m_map_parameter_to_result.at(parameter_node);
auto result_node = m_map_parameter_to_result.at(parameter_node);
auto result = map_node_to_tensor.at(result_node);
auto parameter = backend->create_tensor(parameter_node->get_element_type(),
parameter_node->get_shape());
......@@ -213,7 +214,8 @@ bool runtime::hybrid::HybridBackend::call(shared_ptr<Function> func,
}
// Call
backend->call(sub_function, results, parameters);
auto exec = m_executable_map[sub_function];
exec->call(results, parameters);
// Need to copy any results to the correct device
for (const auto& p : copy_back)
......@@ -229,7 +231,7 @@ bool runtime::hybrid::HybridBackend::is_supported(const Node& node) const
return true;
}
size_t runtime::hybrid::HybridBackend::get_placement(const runtime::Tensor* t)
size_t runtime::hybrid::HybridExecutable::get_placement(const runtime::Tensor* t)
{
size_t index = 0;
for (const shared_ptr<ngraph::runtime::Backend>& be : m_backend_list)
......
......@@ -30,6 +30,7 @@ namespace ngraph
namespace hybrid
{
class HybridBackend;
class HybridExecutable;
}
}
}
......@@ -48,29 +49,37 @@ public:
const ngraph::Shape& shape,
void* memory_pointer) override;
Handle compile(std::shared_ptr<ngraph::Function> func) override;
bool call(std::shared_ptr<ngraph::Function> func,
const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>& inputs) override;
std::shared_ptr<Executable> compile(std::shared_ptr<ngraph::Function> func,
bool enable_performance_data = false) override;
bool is_supported(const ngraph::Node& node) const override;
void set_debug_enabled(bool flag) { m_debug_enabled = flag; }
private:
class FunctionInstance
{
public:
std::shared_ptr<ngraph::Function> m_function;
std::vector<std::shared_ptr<ngraph::Function>> m_sub_functions;
std::unordered_map<std::shared_ptr<ngraph::op::Parameter>,
std::shared_ptr<ngraph::op::Result>>
m_map_parameter_to_result;
};
std::vector<std::shared_ptr<runtime::Backend>> m_backend_list;
bool m_debug_enabled = false;
};
class ngraph::runtime::hybrid::HybridExecutable : public runtime::Executable
{
public:
HybridExecutable(const std::vector<std::shared_ptr<runtime::Backend>>& backend_list,
const std::shared_ptr<Function>& func,
bool enable_performance_collection = false,
bool debug_enabled = false);
bool call(const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>& inputs) override;
private:
std::shared_ptr<ngraph::Function> m_function;
std::vector<std::shared_ptr<ngraph::Function>> m_sub_functions;
std::unordered_map<std::shared_ptr<ngraph::op::Parameter>, std::shared_ptr<ngraph::op::Result>>
m_map_parameter_to_result;
std::map<std::shared_ptr<ngraph::Function>, FunctionInstance> m_function_map;
std::vector<std::shared_ptr<runtime::Backend>> m_backend_list;
bool m_debug_enabled = false;
std::unordered_map<std::shared_ptr<Function>, std::shared_ptr<Executable>> m_executable_map;
size_t get_placement(const runtime::Tensor* t);
};
......@@ -64,12 +64,17 @@ shared_ptr<runtime::Tensor> runtime::interpreter::INTBackend::create_tensor(
return make_shared<runtime::HostTensor>(type, shape, memory_pointer, this);
}
runtime::Handle runtime::interpreter::INTBackend::compile(shared_ptr<Function> function)
shared_ptr<runtime::Executable>
runtime::interpreter::INTBackend::compile(shared_ptr<Function> function,
bool enable_performance_collection)
{
return make_shared<INTExecutable>(function, enable_performance_collection);
}
runtime::interpreter::INTExecutable::INTExecutable(const shared_ptr<Function>& function,
bool enable_performance_collection)
{
FunctionInstance& instance = m_function_map[function];
if (!instance.m_is_compiled)
{
instance.m_is_compiled = true;
pass::Manager pass_manager;
pass_manager.register_pass<pass::LikeReplacement>();
pass_manager.register_pass<pass::AssignLayout<DenseTensorLayout>>();
......@@ -78,32 +83,20 @@ runtime::Handle runtime::interpreter::INTBackend::compile(shared_ptr<Function> f
pass_manager.run_passes(function);
size_t memory_pool_size = function->get_temporary_pool_size();
instance.m_temporary_memory.reset(new AlignedBuffer(memory_pool_size, get_alignment()));
m_temporary_memory.reset(new AlignedBuffer(memory_pool_size, get_alignment()));
for (const shared_ptr<Node>& node : function->get_ordered_ops())
{
instance.m_wrapped_nodes.emplace_back(node);
m_wrapped_nodes.emplace_back(node);
}
}
return function;
set_parameters_and_results(*function);
}
bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,
const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& inputs)
bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& inputs)
{
auto fit = m_function_map.find(function);
if (fit == m_function_map.end())
{
throw runtime_error("compile() must be called before call().");
}
FunctionInstance& instance = fit->second;
if (!instance.m_is_compiled)
{
throw runtime_error("compile() must be called before call().");
}
// convert inputs to HostTensor
vector<void*> func_inputs;
vector<shared_ptr<runtime::HostTensor>> htv_inputs;
......@@ -113,7 +106,7 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,
func_inputs.push_back(static_cast<void*>(host_tensor->get_data_ptr()));
htv_inputs.push_back(host_tensor);
}
if (instance.m_nan_check_enabled)
if (m_nan_check_enabled)
{
perform_nan_check(htv_inputs);
}
......@@ -129,7 +122,7 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,
// map function params -> HostTensor
unordered_map<descriptor::Tensor*, void*> tensor_map;
size_t input_count = 0;
for (auto param : function->get_parameters())
for (auto param : get_parameters())
{
for (size_t i = 0; i < param->get_output_size(); ++i)
{
......@@ -139,9 +132,9 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,
}
// map function outputs -> HostTensor
for (size_t output_count = 0; output_count < function->get_output_size(); ++output_count)
for (size_t output_count = 0; output_count < get_results().size(); ++output_count)
{
auto output = function->get_output_op(output_count);
auto output = get_results()[output_count];
if (!dynamic_pointer_cast<op::Result>(output))
{
throw ngraph_error("One of function's outputs isn't op::Result");
......@@ -151,7 +144,7 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,
}
// for each ordered op in the graph
for (const NodeWrapper& wrapped : instance.m_wrapped_nodes)
for (const NodeWrapper& wrapped : m_wrapped_nodes)
{
const Node* op = &wrapped.get_node();
auto type_id = wrapped.get_typeid();
......@@ -185,7 +178,7 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,
if (it == tensor_map.end())
{
auto offset = op->get_output_tensor(i).get_pool_offset();
host_tensor = instance.get_temporary_pointer(offset);
host_tensor = get_temporary_pointer(offset);
tensor_map.insert({tensor, host_tensor});
}
else
......@@ -224,16 +217,16 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,
}
#pragma GCC diagnostic pop
if (instance.m_performance_counters_enabled)
if (m_performance_counters_enabled)
{
instance.m_timer_map[op].start();
m_timer_map[op].start();
}
generate_calls(type, wrapped, op_outputs, op_inputs, instance);
if (instance.m_performance_counters_enabled)
generate_calls(type, wrapped, op_outputs, op_inputs);
if (m_performance_counters_enabled)
{
instance.m_timer_map[op].stop();
m_timer_map[op].stop();
}
if (instance.m_nan_check_enabled)
if (m_nan_check_enabled)
{
perform_nan_check(htv_outputs, op);
}
......@@ -242,26 +235,25 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,
return true;
}
void runtime::interpreter::INTBackend::generate_calls(const element::Type& type,
const NodeWrapper& op,
const vector<void*>& outputs,
const vector<const void*>& inputs,
FunctionInstance& instance)
void runtime::interpreter::INTExecutable::generate_calls(const element::Type& type,
const NodeWrapper& op,
const vector<void*>& outputs,
const vector<const void*>& inputs)
{
stringstream ss;
switch (type.get_type_enum())
{
case element::Type_t::boolean: op_engine<char>(op, outputs, inputs, instance); break;
case element::Type_t::f32: op_engine<float>(op, outputs, inputs, instance); break;
case element::Type_t::f64: op_engine<double>(op, outputs, inputs, instance); break;
case element::Type_t::i8: op_engine<int8_t>(op, outputs, inputs, instance); break;
case element::Type_t::i16: op_engine<int16_t>(op, outputs, inputs, instance); break;
case element::Type_t::i32: op_engine<int32_t>(op, outputs, inputs, instance); break;
case element::Type_t::i64: op_engine<int64_t>(op, outputs, inputs, instance); break;
case element::Type_t::u8: op_engine<uint8_t>(op, outputs, inputs, instance); break;
case element::Type_t::u16: op_engine<uint16_t>(op, outputs, inputs, instance); break;
case element::Type_t::u32: op_engine<uint32_t>(op, outputs, inputs, instance); break;
case element::Type_t::u64: op_engine<uint64_t>(op, outputs, inputs, instance); break;
case element::Type_t::boolean: op_engine<char>(op, outputs, inputs); break;
case element::Type_t::f32: op_engine<float>(op, outputs, inputs); break;
case element::Type_t::f64: op_engine<double>(op, outputs, inputs); break;
case element::Type_t::i8: op_engine<int8_t>(op, outputs, inputs); break;
case element::Type_t::i16: op_engine<int16_t>(op, outputs, inputs); break;
case element::Type_t::i32: op_engine<int32_t>(op, outputs, inputs); break;
case element::Type_t::i64: op_engine<int64_t>(op, outputs, inputs); break;
case element::Type_t::u8: op_engine<uint8_t>(op, outputs, inputs); break;
case element::Type_t::u16: op_engine<uint16_t>(op, outputs, inputs); break;
case element::Type_t::u32: op_engine<uint32_t>(op, outputs, inputs); break;
case element::Type_t::u64: op_engine<uint64_t>(op, outputs, inputs); break;
case element::Type_t::undefined:
case element::Type_t::dynamic:
case element::Type_t::bf16:
......@@ -270,25 +262,11 @@ void runtime::interpreter::INTBackend::generate_calls(const element::Type& type,
}
}
void runtime::interpreter::INTBackend::set_nan_check(shared_ptr<Function> func, bool enable)
{
FunctionInstance& instance = m_function_map[func];
instance.m_nan_check_enabled = enable;
}
void runtime::interpreter::INTBackend::enable_performance_data(shared_ptr<Function> func,
bool enable)
{
FunctionInstance& instance = m_function_map[func];
instance.m_performance_counters_enabled = enable;
}
vector<runtime::PerformanceCounter>
runtime::interpreter::INTBackend::get_performance_data(shared_ptr<Function> func) const
runtime::interpreter::INTExecutable::get_performance_data() const
{
vector<runtime::PerformanceCounter> rc;
const FunctionInstance& instance = m_function_map.at(func);
for (const pair<const Node*, stopwatch> p : instance.m_timer_map)
for (const pair<const Node*, stopwatch> p : m_timer_map)
{
rc.emplace_back(p.first->get_name().c_str(),
p.second.get_total_microseconds(),
......@@ -297,7 +275,7 @@ vector<runtime::PerformanceCounter>
return rc;
}
void runtime::interpreter::INTBackend::perform_nan_check(
void runtime::interpreter::INTExecutable::perform_nan_check(
const vector<shared_ptr<HostTensor>>& tensors, const Node* op)
{
size_t arg_number = 1;
......
......@@ -143,6 +143,7 @@ namespace ngraph
namespace interpreter
{
class INTBackend;
class INTExecutable;
}
}
}
......@@ -161,52 +162,49 @@ public:
std::shared_ptr<Tensor> create_tensor(const element::Type& type, const Shape& shape) override;
Handle compile(std::shared_ptr<Function> function) override;
std::shared_ptr<Executable> compile(std::shared_ptr<Function> function,
bool enable_performance_data = false) override;
bool call(std::shared_ptr<Function> function,
const std::vector<std::shared_ptr<Tensor>>& outputs,
const std::vector<std::shared_ptr<Tensor>>& intputs) override;
bool is_supported(const Node& node) const override;
void set_nan_check(std::shared_ptr<Function> func, bool);
private:
std::set<std::string> m_unsupported_op_name_list;
};
void enable_performance_data(std::shared_ptr<Function> func, bool enable) override;
std::vector<PerformanceCounter>
get_performance_data(std::shared_ptr<Function> func) const override;
class ngraph::runtime::interpreter::INTExecutable : public Executable
{
public:
INTExecutable(const std::shared_ptr<Function>& function,
bool enable_performance_collection = false);
bool is_supported(const Node& node) const override;
bool call(const std::vector<std::shared_ptr<Tensor>>& outputs,
const std::vector<std::shared_ptr<Tensor>>& intputs) override;
void set_nan_check(bool value) { m_nan_check_enabled = value; }
std::vector<PerformanceCounter> get_performance_data() const override;
private:
int get_alignment() const { return 64; }
class FunctionInstance
{
public:
bool m_is_compiled = false;
bool m_nan_check_enabled = false;
bool m_performance_counters_enabled = false;
std::unordered_map<const Node*, stopwatch> m_timer_map;
std::vector<NodeWrapper> m_wrapped_nodes;
std::unordered_map<const Node*, std::shared_ptr<RNGState>> m_states;
std::shared_ptr<AlignedBuffer> m_temporary_memory;
void* get_temporary_pointer(size_t offset) { return m_temporary_memory->get_ptr(offset); }
};
std::map<std::shared_ptr<Function>, FunctionInstance> m_function_map;
std::set<std::string> m_unsupported_op_name_list;
bool m_nan_check_enabled = false;
bool m_performance_counters_enabled = false;
std::unordered_map<const Node*, stopwatch> m_timer_map;
std::vector<NodeWrapper> m_wrapped_nodes;
std::unordered_map<const Node*, std::shared_ptr<RNGState>> m_states;
std::shared_ptr<AlignedBuffer> m_temporary_memory;
void* get_temporary_pointer(size_t offset) { return m_temporary_memory->get_ptr(offset); }
static void perform_nan_check(const std::vector<std::shared_ptr<HostTensor>>&,
const Node* op = nullptr);
void generate_calls(const element::Type& type,
const NodeWrapper& op,
const std::vector<void*>& outputs,
const std::vector<const void*>& inputs,
FunctionInstance& instance);
const std::vector<const void*>& inputs);
template <typename T>
void op_engine(const NodeWrapper& node_wrapper,
const std::vector<void*>& out,
const std::vector<const void*>& args,
FunctionInstance& instance)
const std::vector<const void*>& args)
{
const Node& node = node_wrapper.get_node();
std::string node_op = node.description();
......@@ -364,15 +362,15 @@ private:
}
case OP_TYPEID::GenerateMask:
{
if (instance.m_states.count(&node) == 0)
if (m_states.count(&node) == 0)
{
const op::GenerateMask* gm = static_cast<const op::GenerateMask*>(&node);
instance.m_states[&node] = std::unique_ptr<ngraph::RNGState>(
m_states[&node] = std::unique_ptr<ngraph::RNGState>(
ngraph::RNGState::create_rng_state(gm->get_seed(), gm->get_probability()));
}
bool training = static_cast<bool>(static_cast<const T*>(args[0])[0]);
auto state = instance.m_states.at(&node).get();
auto state = m_states.at(&node).get();
size_t element_count = shape_size(node.get_output_shape(0));
reference::generate_mask<T>(
reinterpret_cast<T*>(out[0]), element_count, state, training);
......
......@@ -54,14 +54,25 @@ shared_ptr<runtime::Tensor> runtime::nop::NOPBackend::create_tensor(const elemen
return make_shared<runtime::HostTensor>(type, shape, memory_pointer, "external");
}
runtime::Handle runtime::nop::NOPBackend::compile(shared_ptr<Function> function)
shared_ptr<runtime::Executable>
runtime::nop::NOPBackend::compile(shared_ptr<Function> function,
bool enable_performance_collection)
{
return function;
return make_shared<NOPExecutable>(function, enable_performance_collection);
}
bool runtime::nop::NOPBackend::call(shared_ptr<Function> function,
const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& inputs)
runtime::nop::NOPExecutable::NOPExecutable(shared_ptr<Function> function,
bool enable_performance_collection)
{
pass::Manager pass_manager;
pass_manager.register_pass<pass::AssignLayout<DenseTensorLayout>>();
pass_manager.run_passes(function);
set_parameters_and_results(*function);
}
bool runtime::nop::NOPExecutable::call(const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& inputs)
{
return true;
}
......@@ -32,6 +32,7 @@ namespace ngraph
namespace nop
{
class NOPBackend;
class NOPExecutable;
}
}
}
......@@ -44,9 +45,14 @@ public:
std::shared_ptr<Tensor> create_tensor(const element::Type& type, const Shape& shape) override;
Handle compile(std::shared_ptr<Function> function) override;
std::shared_ptr<Executable> compile(std::shared_ptr<Function> function,
bool enable_performance_data = false) override;
};
bool call(std::shared_ptr<Function> function,
const std::vector<std::shared_ptr<Tensor>>& outputs,
const std::vector<std::shared_ptr<Tensor>>& intputs) override;
class ngraph::runtime::nop::NOPExecutable : public Executable
{
public:
NOPExecutable(std::shared_ptr<Function> function, bool enable_performance_collection = false);
bool call(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs) override;
};
......@@ -136,8 +136,7 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
stopwatch timer;
timer.start();
auto backend = runtime::Backend::create(backend_name);
backend->enable_performance_data(f, timing_detail);
auto compiled_func = backend->compile(f);
auto compiled_func = backend->compile(f, timing_detail);
timer.stop();
cout.imbue(locale(""));
cout << "compile time: " << timer.get_milliseconds() << "ms" << endl;
......@@ -183,7 +182,7 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
{
for (int i = 0; i < warmup_iterations; i++)
{
backend->call(compiled_func, results, args);
compiled_func->call(results, args);
}
}
......@@ -205,7 +204,7 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
}
}
}
backend->call(compiled_func, results, args);
compiled_func->call(results, args);
if (copy_data)
{
for (size_t result_index = 0; result_index < results.size(); result_index++)
......@@ -222,6 +221,6 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
float time = t1.get_milliseconds();
cout << time / iterations << "ms per iteration" << endl;
vector<runtime::PerformanceCounter> perf_data = backend->get_performance_data(f);
vector<runtime::PerformanceCounter> perf_data = compiled_func->get_performance_data();
return perf_data;
}
......@@ -36,7 +36,6 @@ set(SRC
cse.cpp
element_type.cpp
file_util.cpp
graph_partition.cpp
includes.cpp
input_output_assign.cpp
main.cpp
......
......@@ -37,9 +37,6 @@ TEST(INTERPRETER, nan_check_input)
shared_ptr<runtime::Backend> backend = runtime::Backend::create("INTERPRETER");
shared_ptr<runtime::interpreter::INTBackend> ibackend =
static_pointer_cast<runtime::interpreter::INTBackend>(backend);
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape);
copy_data(a, vector<float>{2, 4, NAN, 16});
......@@ -47,9 +44,12 @@ TEST(INTERPRETER, nan_check_input)
copy_data(b, vector<float>{1, 2, 1, 8});
auto result = backend->create_tensor(element::f32, shape);
auto handle = backend->compile(f);
ibackend->set_nan_check(handle, true);
EXPECT_ANY_THROW(ibackend->call_with_validate(handle, {result}, {a, b}));
shared_ptr<runtime::Executable> handle = backend->compile(f);
shared_ptr<runtime::interpreter::INTExecutable> ihandle =
static_pointer_cast<runtime::interpreter::INTExecutable>(handle);
ihandle->set_nan_check(true);
EXPECT_ANY_THROW(handle->call_with_validate({result}, {a, b}));
}
TEST(INTERPRETER, nan_check_output)
......@@ -61,9 +61,6 @@ TEST(INTERPRETER, nan_check_output)
shared_ptr<runtime::Backend> backend = runtime::Backend::create("INTERPRETER");
shared_ptr<runtime::interpreter::INTBackend> ibackend =
static_pointer_cast<runtime::interpreter::INTBackend>(backend);
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape);
copy_data(a, vector<float>{2, 4, 0, 16});
......@@ -71,7 +68,9 @@ TEST(INTERPRETER, nan_check_output)
copy_data(b, vector<float>{1, 2, 0, 8});
auto result = backend->create_tensor(element::f32, shape);
auto handle = backend->compile(f);
ibackend->set_nan_check(handle, true);
EXPECT_ANY_THROW(ibackend->call_with_validate(handle, {result}, {a, b}));
shared_ptr<runtime::Executable> handle = backend->compile(f);
shared_ptr<runtime::interpreter::INTExecutable> ihandle =
static_pointer_cast<runtime::interpreter::INTExecutable>(handle);
ihandle->set_nan_check(true);
EXPECT_ANY_THROW(handle->call_with_validate({result}, {a, b}));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment