Commit ef2e0118 authored by Robert Kimball's avatar Robert Kimball

copy executable from bob/backend_api2

parent 122754c1
...@@ -28,4 +28,5 @@ else: ...@@ -28,4 +28,5 @@ else:
sys.setdlopenflags(flags) sys.setdlopenflags(flags)
from _pyngraph.runtime import Backend from _pyngraph.runtime import Backend
from _pyngraph.runtime import Executable
from _pyngraph.runtime import Tensor from _pyngraph.runtime import Tensor
...@@ -20,7 +20,7 @@ from typing import List, Union ...@@ -20,7 +20,7 @@ from typing import List, Union
import numpy as np import numpy as np
from ngraph.impl import Function, Node, Shape, serialize, util from ngraph.impl import Function, Node, Shape, serialize, util
from ngraph.impl.runtime import Backend, Tensor from ngraph.impl.runtime import Backend, Executable, Tensor
from ngraph.utils.types import get_dtype, NumericData from ngraph.utils.types import get_dtype, NumericData
from ngraph.exceptions import UserInputError from ngraph.exceptions import UserInputError
...@@ -93,7 +93,7 @@ class Computation(object): ...@@ -93,7 +93,7 @@ class Computation(object):
value = np.array(value) value = np.array(value)
Computation._write_ndarray_to_tensor_view(value, tensor_view) Computation._write_ndarray_to_tensor_view(value, tensor_view)
self.runtime.backend.call(self.handle, self.result_views, self.tensor_views) self.handle.call(self.result_views, self.tensor_views)
results = [] results = []
for result_view in self.result_views: for result_view in self.result_views:
......
...@@ -35,23 +35,7 @@ void regclass_pyngraph_runtime_Backend(py::module m) ...@@ -35,23 +35,7 @@ void regclass_pyngraph_runtime_Backend(py::module m)
const ngraph::element::Type&, const ngraph::Shape&)) & const ngraph::element::Type&, const ngraph::Shape&)) &
ngraph::runtime::Backend::create_tensor); ngraph::runtime::Backend::create_tensor);
backend.def("compile", backend.def("compile",
(std::shared_ptr<ngraph::Function>(ngraph::runtime::Backend::*)( (std::unique_ptr<ngraph::runtime::Executable>(ngraph::runtime::Backend::*)(
std::shared_ptr<ngraph::Function>)) & std::shared_ptr<ngraph::Function>)) &
ngraph::runtime::Backend::compile); ngraph::runtime::Backend::compile);
backend.def("call",
(bool (ngraph::runtime::Backend::*)(
std::shared_ptr<ngraph::Function>,
const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>&,
const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>&)) &
ngraph::runtime::Backend::call);
backend.def("remove_compiled_function",
(void (ngraph::runtime::Backend::*)(std::shared_ptr<ngraph::Function>)) &
ngraph::runtime::Backend::remove_compiled_function);
backend.def("enable_performance_data",
(void (ngraph::runtime::Backend::*)(std::shared_ptr<ngraph::Function>, bool)) &
ngraph::runtime::Backend::enable_performance_data);
backend.def("get_performance_data",
(std::vector<ngraph::runtime::PerformanceCounter>(ngraph::runtime::Backend::*)(
std::shared_ptr<ngraph::Function>)) &
ngraph::runtime::Backend::get_performance_data);
} }
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/tensor.hpp"
#include "pyngraph/runtime/executable.hpp"
namespace py = pybind11;
void regclass_pyngraph_runtime_Executable(py::module m)
{
py::class_<ngraph::runtime::Executable, std::unique_ptr<ngraph::runtime::Executable>>
executable(m, "Executable");
executable.doc() = "ngraph.impl.runtime.Executable wraps ngraph::runtime::Executable";
executable.def("call",
(bool (ngraph::runtime::Executable::*)(
const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>&,
const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>&)) &
ngraph::runtime::Executable::call);
executable.def(
"get_performance_data",
(std::vector<ngraph::runtime::PerformanceCounter>(ngraph::runtime::Executable::*)()) &
ngraph::runtime::Executable::get_performance_data);
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <pybind11/pybind11.h>
namespace py = pybind11;
void regclass_pyngraph_runtime_Executable(py::module m);
...@@ -25,4 +25,5 @@ void regmodule_pyngraph_runtime(py::module m) ...@@ -25,4 +25,5 @@ void regmodule_pyngraph_runtime(py::module m)
m.def_submodule("runtime", "Package ngraph.impl.runtime wraps ngraph::runtime"); m.def_submodule("runtime", "Package ngraph.impl.runtime wraps ngraph::runtime");
regclass_pyngraph_runtime_Tensor(m_runtime); regclass_pyngraph_runtime_Tensor(m_runtime);
regclass_pyngraph_runtime_Backend(m_runtime); regclass_pyngraph_runtime_Backend(m_runtime);
regclass_pyngraph_runtime_Executable(m_runtime);
} }
...@@ -228,6 +228,7 @@ sources = [ ...@@ -228,6 +228,7 @@ sources = [
'pyngraph/ops/softmax.cpp', 'pyngraph/ops/softmax.cpp',
'pyngraph/ops/result.cpp', 'pyngraph/ops/result.cpp',
'pyngraph/runtime/backend.cpp', 'pyngraph/runtime/backend.cpp',
'pyngraph/runtime/executable.cpp',
'pyngraph/runtime/regmodule_pyngraph_runtime.cpp', 'pyngraph/runtime/regmodule_pyngraph_runtime.cpp',
'pyngraph/runtime/tensor.cpp', 'pyngraph/runtime/tensor.cpp',
'pyngraph/passes/manager.cpp', 'pyngraph/passes/manager.cpp',
......
...@@ -22,7 +22,7 @@ import numpy as np ...@@ -22,7 +22,7 @@ import numpy as np
from ngraph.impl import util from ngraph.impl import util
from ngraph.impl import Shape, Strides, CoordinateDiff, AxisSet, AxisVector, Coordinate from ngraph.impl import Shape, Strides, CoordinateDiff, AxisSet, AxisVector, Coordinate
from ngraph.impl import Type, Function, NodeVector from ngraph.impl import Type, Function, NodeVector
from ngraph.impl.runtime import Backend from ngraph.impl.runtime import Backend, Executable
from ngraph.impl.op import Acos, Asin, Atan, Cos, Sin, Tan from ngraph.impl.op import Acos, Asin, Atan, Cos, Sin, Tan
from ngraph.impl.op import Cosh, Sinh, Tanh, Sqrt, Sign from ngraph.impl.op import Cosh, Sinh, Tanh, Sqrt, Sign
from ngraph.impl.op import Power, Negative, Ceiling, Floor from ngraph.impl.op import Power, Negative, Ceiling, Floor
...@@ -127,7 +127,8 @@ def binary_op_exec(op_str): ...@@ -127,7 +127,8 @@ def binary_op_exec(op_str):
result_arr = np.array([[0, 0], [0, 0]], dtype=np.float32) result_arr = np.array([[0, 0], [0, 0]], dtype=np.float32)
result.write(util.numpy_to_c(result_arr), 0, 16) result.write(util.numpy_to_c(result_arr), 0, 16)
backend.call(backend.compile(function), [result], [a, b]) handle = backend.compile(function)
handle.call([result], [a, b])
result.read(util.numpy_to_c(result_arr), 0, 16) result.read(util.numpy_to_c(result_arr), 0, 16)
a_arr = np.array([[1, 6], [7, 4]], dtype=np.float32) a_arr = np.array([[1, 6], [7, 4]], dtype=np.float32)
...@@ -156,7 +157,8 @@ def binary_op_comparison(op_str): ...@@ -156,7 +157,8 @@ def binary_op_comparison(op_str):
result_arr = np.array([[False, False], [False, False]], dtype=np.bool) result_arr = np.array([[False, False], [False, False]], dtype=np.bool)
result.write(util.numpy_to_c(result_arr), 0, 4) result.write(util.numpy_to_c(result_arr), 0, 4)
backend.call(backend.compile(function), [result], [a, b]) handle = backend.compile(function)
handle.call([result], [a, b])
result.read(util.numpy_to_c(result_arr), 0, 4) result.read(util.numpy_to_c(result_arr), 0, 4)
a_arr = np.array([[1, 5], [3, 2]], dtype=np.float32) a_arr = np.array([[1, 5], [3, 2]], dtype=np.float32)
...@@ -256,7 +258,8 @@ def test_add_with_mul(): ...@@ -256,7 +258,8 @@ def test_add_with_mul():
result_arr = np.array([0, 0, 0, 0], dtype=np.float32) result_arr = np.array([0, 0, 0, 0], dtype=np.float32)
result.write(util.numpy_to_c(result_arr), 0, 16) result.write(util.numpy_to_c(result_arr), 0, 16)
backend.call(backend.compile(function), [result], [a, b, c]) handle = backend.compile(function)
handle.call([result], [a, b, c])
result.read(util.numpy_to_c(result_arr), 0, 16) result.read(util.numpy_to_c(result_arr), 0, 16)
a_arr = np.array([1, 2, 3, 4], dtype=np.float32) a_arr = np.array([1, 2, 3, 4], dtype=np.float32)
...@@ -364,7 +367,8 @@ def unary_op_exec(op_str, input_list): ...@@ -364,7 +367,8 @@ def unary_op_exec(op_str, input_list):
result_arr = np.zeros(shape_np, dtype=np.float32) result_arr = np.zeros(shape_np, dtype=np.float32)
result.write(util.numpy_to_c(result_arr), 0, 16) result.write(util.numpy_to_c(result_arr), 0, 16)
backend.call(backend.compile(function), [result], [a]) handle = backend.compile(function)
handle.call([result], [a])
result.read(util.numpy_to_c(result_arr), 0, 16) result.read(util.numpy_to_c(result_arr), 0, 16)
a_arr = np.array(input_list, dtype=np.float32) a_arr = np.array(input_list, dtype=np.float32)
...@@ -497,7 +501,8 @@ def test_not(): ...@@ -497,7 +501,8 @@ def test_not():
result_arr = np.array([False, False], dtype=np.bool) result_arr = np.array([False, False], dtype=np.bool)
result.write(util.numpy_to_c(result_arr), 0, 2) result.write(util.numpy_to_c(result_arr), 0, 2)
backend.call(backend.compile(function), [result], [a]) handle = backend.compile(function)
handle.call([result], [a])
result.read(util.numpy_to_c(result_arr), 0, 2) result.read(util.numpy_to_c(result_arr), 0, 2)
a_arr = np.array([True, False], dtype=np.bool) a_arr = np.array([True, False], dtype=np.bool)
...@@ -522,7 +527,8 @@ def test_sum(): ...@@ -522,7 +527,8 @@ def test_sum():
result_arr = np.array([0], dtype=np.float32) result_arr = np.array([0], dtype=np.float32)
result.write(util.numpy_to_c(result_arr), 0, 4) result.write(util.numpy_to_c(result_arr), 0, 4)
backend.call(backend.compile(function), [result], [a]) handle = backend.compile(function)
handle.call([result], [a])
result.read(util.numpy_to_c(result_arr), 0, 4) result.read(util.numpy_to_c(result_arr), 0, 4)
a_arr = np.array([1, 2, 3, 4], dtype=np.float32) a_arr = np.array([1, 2, 3, 4], dtype=np.float32)
...@@ -547,7 +553,8 @@ def test_reshape(): ...@@ -547,7 +553,8 @@ def test_reshape():
result_arr = np.array([[0, 0], [0, 0], [0, 0]], dtype=np.float32) result_arr = np.array([[0, 0], [0, 0], [0, 0]], dtype=np.float32)
result.write(util.numpy_to_c(result_arr), 0, 24) result.write(util.numpy_to_c(result_arr), 0, 24)
backend.call(backend.compile(function), [result], [a]) handle = backend.compile(function)
handle.call([result], [a])
result.read(util.numpy_to_c(result_arr), 0, 24) result.read(util.numpy_to_c(result_arr), 0, 24)
a_arr = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32) a_arr = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
...@@ -573,7 +580,8 @@ def test_convert(): ...@@ -573,7 +580,8 @@ def test_convert():
result_arr = np.array([False, False, False], dtype=np.bool) result_arr = np.array([False, False, False], dtype=np.bool)
result.write(util.numpy_to_c(result_arr), 0, 3) result.write(util.numpy_to_c(result_arr), 0, 3)
backend.call(backend.compile(function), [result], [a]) handle = backend.compile(function)
handle.call([result], [a])
result.read(util.numpy_to_c(result_arr), 0, 3) result.read(util.numpy_to_c(result_arr), 0, 3)
a_arr = np.array([1, 5, 3], dtype=np.float32) a_arr = np.array([1, 5, 3], dtype=np.float32)
...@@ -590,7 +598,8 @@ def test_convert(): ...@@ -590,7 +598,8 @@ def test_convert():
result_arr = np.array([0, 0, 0], dtype=np.int32) result_arr = np.array([0, 0, 0], dtype=np.int32)
result.write(util.numpy_to_c(result_arr), 0, 12) result.write(util.numpy_to_c(result_arr), 0, 12)
backend.call(backend.compile(function), [result], [a]) handle = backend.compile(function)
handle.call([result], [a])
result.read(util.numpy_to_c(result_arr), 0, 12) result.read(util.numpy_to_c(result_arr), 0, 12)
a_arr = np.array([1.4, 5.4, 3.9], dtype=np.float32) a_arr = np.array([1.4, 5.4, 3.9], dtype=np.float32)
...@@ -614,7 +623,8 @@ def test_broadcast(): ...@@ -614,7 +623,8 @@ def test_broadcast():
result_arr = np.zeros((3, 3), dtype=np.float32) result_arr = np.zeros((3, 3), dtype=np.float32)
result.write(util.numpy_to_c(result_arr), 0, 36) result.write(util.numpy_to_c(result_arr), 0, 36)
backend.call(backend.compile(function), [result], [a]) handle = backend.compile(function)
handle.call([result], [a])
result.read(util.numpy_to_c(result_arr), 0, 36) result.read(util.numpy_to_c(result_arr), 0, 36)
a_arr = np.array([[0], [0], [0]], dtype=np.float32) a_arr = np.array([[0], [0], [0]], dtype=np.float32)
...@@ -636,7 +646,8 @@ def test_constant(): ...@@ -636,7 +646,8 @@ def test_constant():
result_arr = np.zeros((3, 3), dtype=np.float32) result_arr = np.zeros((3, 3), dtype=np.float32)
result.write(util.numpy_to_c(result_arr), 0, 36) result.write(util.numpy_to_c(result_arr), 0, 36)
backend.call(backend.compile(function), [result], []) handle = backend.compile(function)
handle.call([result], [])
result.read(util.numpy_to_c(result_arr), 0, 36) result.read(util.numpy_to_c(result_arr), 0, 36)
result_arr_ref = np.arange(9).reshape(3, 3) result_arr_ref = np.arange(9).reshape(3, 3)
...@@ -659,7 +670,8 @@ def test_onehot(): ...@@ -659,7 +670,8 @@ def test_onehot():
result_arr = np.zeros((3, 3), dtype=np.float32) result_arr = np.zeros((3, 3), dtype=np.float32)
result.write(util.numpy_to_c(result_arr), 0, 36) result.write(util.numpy_to_c(result_arr), 0, 36)
backend.call(backend.compile(function), [result], [a]) handle = backend.compile(function)
handle.call([result], [a])
result.read(util.numpy_to_c(result_arr), 0, 36) result.read(util.numpy_to_c(result_arr), 0, 36)
a_arr = np.array([1, 0, 2]) a_arr = np.array([1, 0, 2])
...@@ -691,7 +703,8 @@ def test_concat(): ...@@ -691,7 +703,8 @@ def test_concat():
result_arr = np.zeros(6, dtype=np.float32).reshape(3, 2) result_arr = np.zeros(6, dtype=np.float32).reshape(3, 2)
result.write(util.numpy_to_c(result_arr), 0, 24) result.write(util.numpy_to_c(result_arr), 0, 24)
backend.call(backend.compile(function), [result], [a, b, c]) handle = backend.compile(function)
handle.call([result], [a, b, c])
result.read(util.numpy_to_c(result_arr), 0, 24) result.read(util.numpy_to_c(result_arr), 0, 24)
a_arr = np.array([[1, 2]], dtype=np.float32) a_arr = np.array([[1, 2]], dtype=np.float32)
...@@ -742,7 +755,8 @@ def test_select(): ...@@ -742,7 +755,8 @@ def test_select():
result_arr = np.array([[0, 0]], dtype=np.float32) result_arr = np.array([[0, 0]], dtype=np.float32)
result.write(util.numpy_to_c(result_arr), 0, 8) result.write(util.numpy_to_c(result_arr), 0, 8)
backend.call(backend.compile(function), [result], [a, b, c]) handle = backend.compile(function)
handle.call([result], [a, b, c])
result.read(util.numpy_to_c(result_arr), 0, 8) result.read(util.numpy_to_c(result_arr), 0, 8)
result_arr_ref = np.array([[5, 8]]) result_arr_ref = np.array([[5, 8]])
...@@ -773,7 +787,8 @@ def test_slice(): ...@@ -773,7 +787,8 @@ def test_slice():
result_arr = np.zeros(16, dtype=np.float32).reshape(4, 4) result_arr = np.zeros(16, dtype=np.float32).reshape(4, 4)
result.write(util.numpy_to_c(result_arr), 0, 16*4) result.write(util.numpy_to_c(result_arr), 0, 16*4)
backend.call(backend.compile(function), [result], [a]) handle = backend.compile(function)
handle.call([result], [a])
result.read(util.numpy_to_c(result_arr), 0, 64) result.read(util.numpy_to_c(result_arr), 0, 64)
result_arr_ref = input_arr[lower_bounds[0]:upper_bounds[0], lower_bounds[1]:upper_bounds[1]] result_arr_ref = input_arr[lower_bounds[0]:upper_bounds[0], lower_bounds[1]:upper_bounds[1]]
...@@ -792,7 +807,8 @@ def test_slice(): ...@@ -792,7 +807,8 @@ def test_slice():
result_arr = np.zeros(8, dtype=np.float32).reshape(4, 2) result_arr = np.zeros(8, dtype=np.float32).reshape(4, 2)
result.write(util.numpy_to_c(result_arr), 0, 8*4) result.write(util.numpy_to_c(result_arr), 0, 8*4)
backend.call(backend.compile(function), [result], [a]) handle = backend.compile(function)
handle.call([result], [a])
result.read(util.numpy_to_c(result_arr), 0, 32) result.read(util.numpy_to_c(result_arr), 0, 32)
result_arr_ref = result_arr_ref[::strides[0], ::strides[1]] result_arr_ref = result_arr_ref[::strides[0], ::strides[1]]
...@@ -826,7 +842,8 @@ def test_replace_slice(): ...@@ -826,7 +842,8 @@ def test_replace_slice():
result_arr = np.zeros(24, dtype=np.float32).reshape(6, 4) result_arr = np.zeros(24, dtype=np.float32).reshape(6, 4)
result.write(util.numpy_to_c(result_arr), 0, 24*4) result.write(util.numpy_to_c(result_arr), 0, 24*4)
backend.call(backend.compile(function), [result], [a, b]) handle = backend.compile(function)
handle.call([result], [a, b])
result.read(util.numpy_to_c(result_arr), 0, 24*4) result.read(util.numpy_to_c(result_arr), 0, 24*4)
result_arr_ref = np.copy(input_arr_a) result_arr_ref = np.copy(input_arr_a)
...@@ -844,7 +861,8 @@ def test_replace_slice(): ...@@ -844,7 +861,8 @@ def test_replace_slice():
parameter_list, 'test') parameter_list, 'test')
backend = Backend.create(pytest.config.getoption('backend')) backend = Backend.create(pytest.config.getoption('backend'))
backend.call(backend.compile(function), [result], [a, b]) handle = backend.compile(function)
handle.call([result], [a, b])
result.read(util.numpy_to_c(result_arr), 0, 24*4) result.read(util.numpy_to_c(result_arr), 0, 24*4)
result_arr_ref = np.copy(input_arr_a) result_arr_ref = np.copy(input_arr_a)
...@@ -875,7 +893,8 @@ def test_max_pool(): ...@@ -875,7 +893,8 @@ def test_max_pool():
result_arr = np.zeros(8, dtype=np.float32).reshape(1, 1, 8) result_arr = np.zeros(8, dtype=np.float32).reshape(1, 1, 8)
result.write(util.numpy_to_c(result_arr), 0, 8*4) result.write(util.numpy_to_c(result_arr), 0, 8*4)
backend.call(backend.compile(function), [result], [a]) handle = backend.compile(function)
handle.call([result], [a])
result.read(util.numpy_to_c(result_arr), 0, 32) result.read(util.numpy_to_c(result_arr), 0, 32)
result_arr_ref = (np.arange(8) + 2).reshape(1, 1, 8) result_arr_ref = (np.arange(8) + 2).reshape(1, 1, 8)
...@@ -892,7 +911,8 @@ def test_max_pool(): ...@@ -892,7 +911,8 @@ def test_max_pool():
result_arr = np.zeros(size, dtype=np.float32).reshape(1, 1, size) result_arr = np.zeros(size, dtype=np.float32).reshape(1, 1, size)
result.write(util.numpy_to_c(result_arr), 0, size*4) result.write(util.numpy_to_c(result_arr), 0, size*4)
backend.call(backend.compile(function), [result], [a]) handle = backend.compile(function)
handle.call([result], [a])
result.read(util.numpy_to_c(result_arr), 0, size*4) result.read(util.numpy_to_c(result_arr), 0, size*4)
result_arr_ref = ((np.arange(size) + 1) * 2).reshape(1, 1, size) result_arr_ref = ((np.arange(size) + 1) * 2).reshape(1, 1, size)
...@@ -917,7 +937,8 @@ def test_max_pool(): ...@@ -917,7 +937,8 @@ def test_max_pool():
result_arr = np.zeros(64, dtype=np.float32).reshape(1, 1, 8, 8) result_arr = np.zeros(64, dtype=np.float32).reshape(1, 1, 8, 8)
result.write(util.numpy_to_c(result_arr), 0, 8*8*4) result.write(util.numpy_to_c(result_arr), 0, 8*8*4)
backend.call(backend.compile(function), [result], [a]) handle = backend.compile(function)
handle.call([result], [a])
result.read(util.numpy_to_c(result_arr), 0, 8*8*4) result.read(util.numpy_to_c(result_arr), 0, 8*8*4)
result_arr_ref = ((np.arange(100).reshape(10, 10))[2:, 2:]).reshape(1, 1, 8, 8) result_arr_ref = ((np.arange(100).reshape(10, 10))[2:, 2:]).reshape(1, 1, 8, 8)
...@@ -934,7 +955,8 @@ def test_max_pool(): ...@@ -934,7 +955,8 @@ def test_max_pool():
result_arr = np.zeros(size*size, dtype=np.float32).reshape(1, 1, size, size) result_arr = np.zeros(size*size, dtype=np.float32).reshape(1, 1, size, size)
result.write(util.numpy_to_c(result_arr), 0, size*size*4) result.write(util.numpy_to_c(result_arr), 0, size*size*4)
backend.call(backend.compile(function), [result], [a]) handle = backend.compile(function)
handle.call([result], [a])
result.read(util.numpy_to_c(result_arr), 0, size*size*4) result.read(util.numpy_to_c(result_arr), 0, size*size*4)
result_arr_ref = ((np.arange(100).reshape(10, 10))[2::2, 2::2]).reshape(1, 1, size, size) result_arr_ref = ((np.arange(100).reshape(10, 10))[2::2, 2::2]).reshape(1, 1, size, size)
...@@ -1014,7 +1036,8 @@ def test_convolution(): ...@@ -1014,7 +1036,8 @@ def test_convolution():
result = backend.create_tensor(element_type, Shape([1, 1, 14, 14])) result = backend.create_tensor(element_type, Shape([1, 1, 14, 14]))
result.write(util.numpy_to_c(result_arr), 0, 14*14*4) result.write(util.numpy_to_c(result_arr), 0, 14*14*4)
backend.call(backend.compile(function), [result], [a, b]) handle = backend.compile(function)
handle.call([result], [a, b])
result.read(util.numpy_to_c(result_arr), 0, 14*14*4) result.read(util.numpy_to_c(result_arr), 0, 14*14*4)
result_arr_ref = convolution2d(image_arr[0][0], filter_arr[0][0]).reshape(1, 1, 14, 14) result_arr_ref = convolution2d(image_arr[0][0], filter_arr[0][0]).reshape(1, 1, 14, 14)
...@@ -1048,7 +1071,8 @@ def test_convolution_with_strides(): ...@@ -1048,7 +1071,8 @@ def test_convolution_with_strides():
result_arr = np.zeros(16, dtype=np.float32).reshape(1, 1, 4, 4) result_arr = np.zeros(16, dtype=np.float32).reshape(1, 1, 4, 4)
result = backend.create_tensor(element_type, Shape([1, 1, 4, 4])) result = backend.create_tensor(element_type, Shape([1, 1, 4, 4]))
result.write(util.numpy_to_c(result_arr), 0, 4*4*4) result.write(util.numpy_to_c(result_arr), 0, 4*4*4)
backend.call(backend.compile(function), [result], [a, b]) handle = backend.compile(function)
handle.call([result], [a, b])
result.read(util.numpy_to_c(result_arr), 0, 4*4*4) result.read(util.numpy_to_c(result_arr), 0, 4*4*4)
result_arr_ref = convolution2d(image_arr[0][0], filter_arr[0][0], strides).reshape(1, 1, 4, 4) result_arr_ref = convolution2d(image_arr[0][0], filter_arr[0][0], strides).reshape(1, 1, 4, 4)
...@@ -1082,7 +1106,8 @@ def test_convolution_with_filter_dilation(): ...@@ -1082,7 +1106,8 @@ def test_convolution_with_filter_dilation():
result_arr = np.zeros(36, dtype=np.float32).reshape(1, 1, 6, 6) result_arr = np.zeros(36, dtype=np.float32).reshape(1, 1, 6, 6)
result = backend.create_tensor(element_type, Shape([1, 1, 6, 6])) result = backend.create_tensor(element_type, Shape([1, 1, 6, 6]))
result.write(util.numpy_to_c(result_arr), 0, 6*6*4) result.write(util.numpy_to_c(result_arr), 0, 6*6*4)
backend.call(backend.compile(function), [result], [a, b]) handle = backend.compile(function)
handle.call([result], [a, b])
result.read(util.numpy_to_c(result_arr), 0, 6*6*4) result.read(util.numpy_to_c(result_arr), 0, 6*6*4)
result_arr_ref = convolution2d(image_arr[0][0], filter_arr[0][0], strides, result_arr_ref = convolution2d(image_arr[0][0], filter_arr[0][0], strides,
...@@ -1122,7 +1147,8 @@ def test_convolution_with_padding(): ...@@ -1122,7 +1147,8 @@ def test_convolution_with_padding():
result_arr = np.zeros(36, dtype=np.float32).reshape(1, 1, 6, 6) result_arr = np.zeros(36, dtype=np.float32).reshape(1, 1, 6, 6)
result = backend.create_tensor(element_type, Shape([1, 1, 6, 6])) result = backend.create_tensor(element_type, Shape([1, 1, 6, 6]))
result.write(util.numpy_to_c(result_arr), 0, 6*6*4) result.write(util.numpy_to_c(result_arr), 0, 6*6*4)
backend.call(backend.compile(function), [result], [a, b]) handle = backend.compile(function)
handle.call([result], [a, b])
result.read(util.numpy_to_c(result_arr), 0, 6*6*4) result.read(util.numpy_to_c(result_arr), 0, 6*6*4)
result_arr_ref = convolution2d(image_arr[0][0], filter_arr[0][0], strides, result_arr_ref = convolution2d(image_arr[0][0], filter_arr[0][0], strides,
...@@ -1160,7 +1186,8 @@ def test_convolution_with_padding(): ...@@ -1160,7 +1186,8 @@ def test_convolution_with_padding():
result_arr = np.zeros(81, dtype=np.float32).reshape(1, 1, 9, 9) result_arr = np.zeros(81, dtype=np.float32).reshape(1, 1, 9, 9)
result = backend.create_tensor(element_type, Shape([1, 1, 9, 9])) result = backend.create_tensor(element_type, Shape([1, 1, 9, 9]))
result.write(util.numpy_to_c(result_arr), 0, 9*9*4) result.write(util.numpy_to_c(result_arr), 0, 9*9*4)
backend.call(backend.compile(function), [result], [a, b]) handle = backend.compile(function)
handle.call([result], [a, b])
result.read(util.numpy_to_c(result_arr), 0, 9*9*4) result.read(util.numpy_to_c(result_arr), 0, 9*9*4)
result_arr_ref = convolution2d(image_arr[0][0], filter_arr[0][0], strides, result_arr_ref = convolution2d(image_arr[0][0], filter_arr[0][0], strides,
...@@ -1201,7 +1228,8 @@ def test_convolution_with_data_dilation(): ...@@ -1201,7 +1228,8 @@ def test_convolution_with_data_dilation():
result_arr = np.zeros(17*17, dtype=np.float32).reshape(1, 1, 17, 17) result_arr = np.zeros(17*17, dtype=np.float32).reshape(1, 1, 17, 17)
result = backend.create_tensor(element_type, Shape([1, 1, 17, 17])) result = backend.create_tensor(element_type, Shape([1, 1, 17, 17]))
result.write(util.numpy_to_c(result_arr), 0, 17*17*4) result.write(util.numpy_to_c(result_arr), 0, 17*17*4)
backend.call(backend.compile(function), [result], [a, b]) handle = backend.compile(function)
handle.call([result], [a, b])
result.read(util.numpy_to_c(result_arr), 0, 17*17*4) result.read(util.numpy_to_c(result_arr), 0, 17*17*4)
result_arr_ref = convolution2d(image_arr[0][0], filter_arr[0][0], strides, result_arr_ref = convolution2d(image_arr[0][0], filter_arr[0][0], strides,
...@@ -1248,7 +1276,8 @@ def test_convolutionBackpropData(): ...@@ -1248,7 +1276,8 @@ def test_convolutionBackpropData():
result_arr = np.zeros(10*10, dtype=np.float32).reshape(1, 1, 10, 10) result_arr = np.zeros(10*10, dtype=np.float32).reshape(1, 1, 10, 10)
result = backend.create_tensor(element_type, Shape([1, 1, 10, 10])) result = backend.create_tensor(element_type, Shape([1, 1, 10, 10]))
result.write(util.numpy_to_c(result_arr), 0, 10*10*4) result.write(util.numpy_to_c(result_arr), 0, 10*10*4)
backend.call(backend.compile(function), [result], [a, b]) handle = backend.compile(function)
handle.call([result], [a, b])
result.read(util.numpy_to_c(result_arr), 0, 10*10*4) result.read(util.numpy_to_c(result_arr), 0, 10*10*4)
result_arr_ref = np.array( result_arr_ref = np.array(
...@@ -1303,7 +1332,8 @@ def test_convolutionBackpropFilters(): ...@@ -1303,7 +1332,8 @@ def test_convolutionBackpropFilters():
result_arr = np.zeros(3*3, dtype=np.float32).reshape(1, 1, 3, 3) result_arr = np.zeros(3*3, dtype=np.float32).reshape(1, 1, 3, 3)
result = backend.create_tensor(element_type, Shape([1, 1, 3, 3])) result = backend.create_tensor(element_type, Shape([1, 1, 3, 3]))
result.write(util.numpy_to_c(result_arr), 0, 3*3*4) result.write(util.numpy_to_c(result_arr), 0, 3*3*4)
backend.call(backend.compile(function), [result], [a, b]) handle = backend.compile(function)
handle.call([result], [a, b])
result.read(util.numpy_to_c(result_arr), 0, 3*3*4) result.read(util.numpy_to_c(result_arr), 0, 3*3*4)
result_arr_ref = np.array( result_arr_ref = np.array(
......
...@@ -139,8 +139,8 @@ set (SRC ...@@ -139,8 +139,8 @@ set (SRC
pass/memory_visualize.cpp pass/memory_visualize.cpp
pass/nop_elimination.cpp pass/nop_elimination.cpp
pass/pass.cpp pass/pass.cpp
pass/pass_config.cpp pass/pass_config.cpp
pass/prefix_reshape_elimination.cpp pass/prefix_reshape_elimination.cpp
pass/propagate_cacheability.cpp pass/propagate_cacheability.cpp
pass/reshape_elimination.cpp pass/reshape_elimination.cpp
pass/reshape_sinking.cpp pass/reshape_sinking.cpp
......
...@@ -39,78 +39,123 @@ vector<string> runtime::Backend::get_registered_devices() ...@@ -39,78 +39,123 @@ vector<string> runtime::Backend::get_registered_devices()
return BackendManager::get_registered_backends(); return BackendManager::get_registered_backends();
} }
void runtime::Backend::remove_compiled_function(shared_ptr<Function> func) bool runtime::Backend::is_supported(const Node& node) const
{ {
// The default behavior is that a backend does not support any ops. If this is not the case
// then override this method and enhance.
return false;
} }
vector<ngraph::runtime::PerformanceCounter> runtime::Executable::Executable()
runtime::Backend::get_performance_data(shared_ptr<Function> func) const
{ {
return vector<PerformanceCounter>();
} }
void runtime::Backend::validate(shared_ptr<const Function> function, runtime::Executable::~Executable()
const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& inputs)
{ {
const ParameterVector& input_parameters = function->get_parameters(); }
if (input_parameters.size() != inputs.size())
bool runtime::Executable::call_with_validate(const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& inputs)
{
validate(outputs, inputs);
return call(outputs, inputs);
}
void runtime::Executable::validate(const vector<std::shared_ptr<runtime::Tensor>>& outputs,
const vector<std::shared_ptr<runtime::Tensor>>& inputs)
{
const ParameterVector& parameters = get_parameters();
const ResultVector& results = get_results();
if (parameters.size() != inputs.size())
{ {
stringstream ss; stringstream ss;
ss << "Call input count " << inputs.size() << " does not match Function's Parameter count " ss << "Call input count " << inputs.size() << " does not match Function's Parameter count "
<< input_parameters.size(); << parameters.size();
throw runtime_error(ss.str()); throw runtime_error(ss.str());
} }
if (function->get_output_size() != outputs.size()) if (results.size() != outputs.size())
{ {
stringstream ss; stringstream ss;
ss << "Call output count " << outputs.size() << " does not match Function's Result count " ss << "Call output count " << outputs.size() << " does not match Function's Result count "
<< function->get_output_size(); << results.size();
throw runtime_error(ss.str()); throw runtime_error(ss.str());
} }
for (size_t i = 0; i < input_parameters.size(); i++) for (size_t i = 0; i < parameters.size(); i++)
{ {
if (input_parameters[i]->get_element_type() != inputs[i]->get_element_type()) if (parameters[i]->get_element_type() != inputs[i]->get_element_type())
{ {
stringstream ss; stringstream ss;
ss << "Input " << i << " type '" << inputs[i]->get_element_type() ss << "Input " << i << " type '" << inputs[i]->get_element_type()
<< "' does not match Parameter type '" << input_parameters[i]->get_element_type() << "' does not match Parameter type '" << parameters[i]->get_element_type() << "'";
<< "'";
throw runtime_error(ss.str()); throw runtime_error(ss.str());
} }
if (input_parameters[i]->get_shape() != inputs[i]->get_shape()) if (parameters[i]->get_shape() != inputs[i]->get_shape())
{ {
stringstream ss; stringstream ss;
ss << "Input " << i << " shape {" << join(inputs[i]->get_shape()) ss << "Input " << i << " shape {" << join(inputs[i]->get_shape())
<< "} does not match Parameter shape {" << join(input_parameters[i]->get_shape()) << "} does not match Parameter shape {" << join(parameters[i]->get_shape()) << "}";
<< "}";
throw runtime_error(ss.str()); throw runtime_error(ss.str());
} }
} }
for (size_t i = 0; i < function->get_output_size(); i++) for (size_t i = 0; i < results.size(); i++)
{ {
if (function->get_output_element_type(i) != outputs[i]->get_element_type()) if (results[i]->get_element_type() != outputs[i]->get_element_type())
{ {
stringstream ss; stringstream ss;
ss << "Output " << i << " type '" << outputs[i]->get_element_type() ss << "Output " << i << " type '" << outputs[i]->get_element_type()
<< "' does not match Result type '" << function->get_output_element_type(i) << "'"; << "' does not match Result type '" << results[i]->get_element_type() << "'";
throw runtime_error(ss.str()); throw runtime_error(ss.str());
} }
if (function->get_output_shape(i) != outputs[i]->get_shape()) if (results[i]->get_shape() != outputs[i]->get_shape())
{ {
stringstream ss; stringstream ss;
ss << "Output " << i << " shape {" << join(outputs[i]->get_shape()) ss << "Output " << i << " shape {" << join(outputs[i]->get_shape())
<< "} does not match Result shape {" << join(function->get_output_shape(i)) << "}"; << "} does not match Result shape {" << join(results[i]->get_shape()) << "}";
throw runtime_error(ss.str()); throw runtime_error(ss.str());
} }
} }
} }
bool runtime::Backend::is_supported(const Node& node) const const ngraph::ParameterVector& runtime::Executable::get_parameters() const
{
return m_parameters;
}
const ngraph::ResultVector& runtime::Executable::get_results() const
{
return m_results;
}
void runtime::Executable::set_parameters_and_results(const Function& func)
{
m_parameters = func.get_parameters();
m_results = func.get_results();
}
vector<runtime::PerformanceCounter> runtime::Executable::get_performance_data() const
{
return vector<PerformanceCounter>();
}
bool runtime::Backend::is_supported_property(const Property prop) const
{ {
// The default behavior is that a backend does not support any ops. If this is not the case
// then override this method and enhance.
return false; return false;
} }
bool runtime::Backend::call_with_validate(
std::shared_ptr<Executable> exec,
const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs)
{
return exec->call_with_validate(outputs, inputs);
}
bool runtime::Backend::call_with_validate(
const std::unique_ptr<Executable>& exec,
const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs)
{
return exec->call_with_validate(outputs, inputs);
}
...@@ -30,7 +30,8 @@ namespace ngraph ...@@ -30,7 +30,8 @@ namespace ngraph
class ExternalFunction; class ExternalFunction;
class Tensor; class Tensor;
class Backend; class Backend;
using Handle = std::shared_ptr<Function>; class Executable;
using Handle = std::shared_ptr<Executable>;
} }
} }
...@@ -81,43 +82,8 @@ public: ...@@ -81,43 +82,8 @@ public:
/// \brief Compiles a Function. /// \brief Compiles a Function.
/// \param func The function to compile /// \param func The function to compile
/// \returns compiled function or nullptr on failure /// \returns compiled function or nullptr on failure
virtual Handle compile(std::shared_ptr<Function> func) = 0; virtual std::shared_ptr<Executable> compile(std::shared_ptr<Function> func,
bool enable_performance_data = false) = 0;
/// \brief Executes a single iteration of a Function. If func is not compiled the call will
/// compile it.
/// \param func The function to execute
/// \returns true if iteration is successful, false otherwise
virtual bool call(std::shared_ptr<Function> func,
const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs) = 0;
/// \brief Executes a single iteration of a Function. If func is not compiled the call will
/// compile it. Optionally validates the inputs and outputs against the function graph.
/// \param func The function to execute
/// \returns true if iteration is successful, false otherwise
bool call_with_validate(std::shared_ptr<Function> func,
const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs)
{
validate(func, outputs, inputs);
return call(func, outputs, inputs);
}
/// \brief Compiled functions may be cached. This function removes a compiled function
/// from the cache.
/// \param func The function to execute
virtual void remove_compiled_function(std::shared_ptr<Function> func);
/// \brief Enable the collection of per-op performance information on a specified Function.
/// Data collection is via the `get_performance_data` method.
/// \param func The function to collect perfomance data on.
/// \param enable Set to true to enable or false to disable data collection
virtual void enable_performance_data(std::shared_ptr<Function> func, bool enable) {}
/// \brief Collect performance information gathered on a Function.
/// \param func The function to get collected data.
/// \returns Vector of PerformanceCounter information.
virtual std::vector<PerformanceCounter>
get_performance_data(std::shared_ptr<Function> func) const;
/// \brief Test if a backend is capable of supporting an op /// \brief Test if a backend is capable of supporting an op
/// \param node is the op to test. /// \param node is the op to test.
...@@ -133,8 +99,62 @@ public: ...@@ -133,8 +99,62 @@ public:
/// \brief Test if a backend particular property is supported /// \brief Test if a backend particular property is supported
/// \param prop is the feature to test. /// \param prop is the feature to test.
/// \returns true if the property is supported, false otherwise. /// \returns true if the property is supported, false otherwise.
virtual bool is_supported_property(const Property prop) const { return false; } virtual bool is_supported_property(const Property prop) const;
void validate(std::shared_ptr<const Function> func,
const std::vector<std::shared_ptr<runtime::Tensor>>& outputs, /// The following methods are temporary hacks to reduce the number of changes in this PR
/// They will be removed in a follow-on PR
bool call_with_validate(std::shared_ptr<Executable> handle,
const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs);
bool call_with_validate(const std::unique_ptr<Executable>& handle,
const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs);
};
class ngraph::runtime::Executable
{
public:
Executable();
virtual ~Executable();
/// \param outputs vector of runtime::Tensor used as outputs
/// \param inputs vector of runtime::Tensor used as inputs
/// \returns true if iteration is successful, false otherwise
virtual bool call(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs) = 0;
/// \brief Executes a single iteration of a Function.
/// \param outputs vector of runtime::Tensor used as outputs
/// \param inputs vector of runtime::Tensor used as inputs
/// \returns true if iteration is successful, false otherwise
bool call_with_validate(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs);
/// \brief Collect performance information gathered on a Function.
/// \returns Vector of PerformanceCounter information.
virtual std::vector<PerformanceCounter> get_performance_data() const;
/// \brief Validates a Function.
/// \param outputs vector of runtime::Tensor used as outputs
/// \param inputs vector of runtime::Tensor used as inputs
void validate(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs); const std::vector<std::shared_ptr<runtime::Tensor>>& inputs);
/// \brief Query the input Parameters
/// \returns an ngraph::op::ParameterVector of all input parameters
const ngraph::ParameterVector& get_parameters() const;
/// \brief Query the output Results
/// \returns an ngraph::ResultVector of all input parameters
const ngraph::ResultVector& get_results() const;
protected:
/// \brief Called at the end of compile to the the values to be returned by get_parameters
/// and get_results
/// \param func The function with Results fully resolved.
void set_parameters_and_results(const Function& func);
private:
ngraph::ParameterVector m_parameters;
ngraph::ResultVector m_results;
}; };
...@@ -62,14 +62,24 @@ static void node_modifiers(const Node& node, vector<string>& attributes) ...@@ -62,14 +62,24 @@ static void node_modifiers(const Node& node, vector<string>& attributes)
} }
} }
runtime::Handle runtime::hybrid::HybridBackend::compile(shared_ptr<Function> func) shared_ptr<runtime::Executable>
runtime::hybrid::HybridBackend::compile(shared_ptr<Function> func,
bool enable_performance_collection)
{ {
if (m_function_map.find(func) == m_function_map.end()) return make_shared<HybridExecutable>(
{ m_backend_list, func, enable_performance_collection, m_debug_enabled);
// Clone function }
FunctionInstance instance;
instance.m_function = clone_function(*func);
runtime::hybrid::HybridExecutable::HybridExecutable(
const std::vector<std::shared_ptr<runtime::Backend>>& backend_list,
const shared_ptr<Function>& func,
bool enable_performance_collection,
bool debug_enabled)
: m_function{func}
, m_backend_list{backend_list}
, m_debug_enabled{debug_enabled}
{
{
// Run placement pass // Run placement pass
ngraph::pass::Manager pass_manager; ngraph::pass::Manager pass_manager;
pass_manager.register_pass<runtime::hybrid::pass::AssignPlacement>(m_backend_list); pass_manager.register_pass<runtime::hybrid::pass::AssignPlacement>(m_backend_list);
...@@ -81,16 +91,15 @@ runtime::Handle runtime::hybrid::HybridBackend::compile(shared_ptr<Function> fun ...@@ -81,16 +91,15 @@ runtime::Handle runtime::hybrid::HybridBackend::compile(shared_ptr<Function> fun
{ {
pass_manager.register_pass<ngraph::pass::VisualizeTree>("graph.png", node_modifiers); pass_manager.register_pass<ngraph::pass::VisualizeTree>("graph.png", node_modifiers);
} }
pass_manager.run_passes(instance.m_function); pass_manager.run_passes(m_function);
// Split function to sub_functions // Split function to sub_functions
tie(instance.m_sub_functions, instance.m_map_parameter_to_result) = tie(m_sub_functions, m_map_parameter_to_result) =
runtime::hybrid::split_function_by_placement(instance.m_function); runtime::hybrid::split_function_by_placement(m_function);
m_function_map.insert({func, instance});
// Compile subfunctions in corresponding backends // Compile subfunctions in corresponding backends
size_t subfunction_number = 0; size_t subfunction_number = 0;
for (shared_ptr<Function>& sub_function : instance.m_sub_functions) for (shared_ptr<Function>& sub_function : m_sub_functions)
{ {
size_t placement = runtime::hybrid::get_colocated_function_placement(sub_function); size_t placement = runtime::hybrid::get_colocated_function_placement(sub_function);
if (m_debug_enabled) if (m_debug_enabled)
...@@ -102,7 +111,8 @@ runtime::Handle runtime::hybrid::HybridBackend::compile(shared_ptr<Function> fun ...@@ -102,7 +111,8 @@ runtime::Handle runtime::hybrid::HybridBackend::compile(shared_ptr<Function> fun
pm.run_passes(sub_function); pm.run_passes(sub_function);
} }
auto backend = m_backend_list[placement]; auto backend = m_backend_list[placement];
backend->compile(sub_function); shared_ptr<Executable> exec = backend->compile(sub_function);
m_executable_map[sub_function] = exec;
// Compile will replace nodes so we need to make one more pass through all // Compile will replace nodes so we need to make one more pass through all
// ops to reset placement // ops to reset placement
...@@ -113,38 +123,29 @@ runtime::Handle runtime::hybrid::HybridBackend::compile(shared_ptr<Function> fun ...@@ -113,38 +123,29 @@ runtime::Handle runtime::hybrid::HybridBackend::compile(shared_ptr<Function> fun
} }
} }
return func; set_parameters_and_results(*func);
} }
bool runtime::hybrid::HybridBackend::call(shared_ptr<Function> func, bool runtime::hybrid::HybridExecutable::call(const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& outputs, const vector<shared_ptr<runtime::Tensor>>& inputs)
const vector<shared_ptr<runtime::Tensor>>& inputs)
{ {
// Get FunctionInstance
bool rc = true; bool rc = true;
using node_map_t = unordered_map<shared_ptr<Node>, shared_ptr<runtime::Tensor>>; using node_map_t = unordered_map<shared_ptr<Node>, shared_ptr<runtime::Tensor>>;
auto fit = m_function_map.find(func);
if (fit == m_function_map.end())
{
throw runtime_error("compile() must be called before call().");
}
FunctionInstance& instance = fit->second;
// Parameter and result node in sub_function maps to one Tensor // Parameter and result node in sub_function maps to one Tensor
node_map_t map_node_to_tensor; node_map_t map_node_to_tensor;
for (size_t i = 0; i < inputs.size(); ++i) for (size_t i = 0; i < inputs.size(); ++i)
{ {
map_node_to_tensor[instance.m_function->get_parameters()[i]] = inputs[i]; map_node_to_tensor[m_function->get_parameters()[i]] = inputs[i];
} }
for (size_t i = 0; i < outputs.size(); ++i) for (size_t i = 0; i < outputs.size(); ++i)
{ {
map_node_to_tensor[instance.m_function->get_results()[i]] = outputs[i]; map_node_to_tensor[m_function->get_results()[i]] = outputs[i];
} }
// Call subfunctions // Call subfunctions
for (const shared_ptr<Function>& sub_function : instance.m_sub_functions) for (const shared_ptr<Function>& sub_function : m_sub_functions)
{ {
// Init backend // Init backend
size_t placement = runtime::hybrid::get_colocated_function_placement(sub_function); size_t placement = runtime::hybrid::get_colocated_function_placement(sub_function);
...@@ -172,7 +173,7 @@ bool runtime::hybrid::HybridBackend::call(shared_ptr<Function> func, ...@@ -172,7 +173,7 @@ bool runtime::hybrid::HybridBackend::call(shared_ptr<Function> func,
else else
{ {
// Handle temporary tensors that go between subgraphs // Handle temporary tensors that go between subgraphs
auto result_node = instance.m_map_parameter_to_result.at(parameter_node); auto result_node = m_map_parameter_to_result.at(parameter_node);
auto result = map_node_to_tensor.at(result_node); auto result = map_node_to_tensor.at(result_node);
auto parameter = backend->create_tensor(parameter_node->get_element_type(), auto parameter = backend->create_tensor(parameter_node->get_element_type(),
parameter_node->get_shape()); parameter_node->get_shape());
...@@ -213,7 +214,8 @@ bool runtime::hybrid::HybridBackend::call(shared_ptr<Function> func, ...@@ -213,7 +214,8 @@ bool runtime::hybrid::HybridBackend::call(shared_ptr<Function> func,
} }
// Call // Call
backend->call(sub_function, results, parameters); auto exec = m_executable_map[sub_function];
exec->call(results, parameters);
// Need to copy any results to the correct device // Need to copy any results to the correct device
for (const auto& p : copy_back) for (const auto& p : copy_back)
...@@ -229,7 +231,7 @@ bool runtime::hybrid::HybridBackend::is_supported(const Node& node) const ...@@ -229,7 +231,7 @@ bool runtime::hybrid::HybridBackend::is_supported(const Node& node) const
return true; return true;
} }
size_t runtime::hybrid::HybridBackend::get_placement(const runtime::Tensor* t) size_t runtime::hybrid::HybridExecutable::get_placement(const runtime::Tensor* t)
{ {
size_t index = 0; size_t index = 0;
for (const shared_ptr<ngraph::runtime::Backend>& be : m_backend_list) for (const shared_ptr<ngraph::runtime::Backend>& be : m_backend_list)
......
...@@ -30,6 +30,7 @@ namespace ngraph ...@@ -30,6 +30,7 @@ namespace ngraph
namespace hybrid namespace hybrid
{ {
class HybridBackend; class HybridBackend;
class HybridExecutable;
} }
} }
} }
...@@ -48,29 +49,37 @@ public: ...@@ -48,29 +49,37 @@ public:
const ngraph::Shape& shape, const ngraph::Shape& shape,
void* memory_pointer) override; void* memory_pointer) override;
Handle compile(std::shared_ptr<ngraph::Function> func) override; std::shared_ptr<Executable> compile(std::shared_ptr<ngraph::Function> func,
bool enable_performance_data = false) override;
bool call(std::shared_ptr<ngraph::Function> func,
const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>& inputs) override;
bool is_supported(const ngraph::Node& node) const override; bool is_supported(const ngraph::Node& node) const override;
void set_debug_enabled(bool flag) { m_debug_enabled = flag; } void set_debug_enabled(bool flag) { m_debug_enabled = flag; }
private: private:
class FunctionInstance std::vector<std::shared_ptr<runtime::Backend>> m_backend_list;
{ bool m_debug_enabled = false;
public: };
std::shared_ptr<ngraph::Function> m_function;
std::vector<std::shared_ptr<ngraph::Function>> m_sub_functions; class ngraph::runtime::hybrid::HybridExecutable : public runtime::Executable
std::unordered_map<std::shared_ptr<ngraph::op::Parameter>, {
std::shared_ptr<ngraph::op::Result>> public:
m_map_parameter_to_result; HybridExecutable(const std::vector<std::shared_ptr<runtime::Backend>>& backend_list,
}; const std::shared_ptr<Function>& func,
bool enable_performance_collection = false,
bool debug_enabled = false);
bool call(const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>& inputs) override;
private:
std::shared_ptr<ngraph::Function> m_function;
std::vector<std::shared_ptr<ngraph::Function>> m_sub_functions;
std::unordered_map<std::shared_ptr<ngraph::op::Parameter>, std::shared_ptr<ngraph::op::Result>>
m_map_parameter_to_result;
std::map<std::shared_ptr<ngraph::Function>, FunctionInstance> m_function_map;
std::vector<std::shared_ptr<runtime::Backend>> m_backend_list; std::vector<std::shared_ptr<runtime::Backend>> m_backend_list;
bool m_debug_enabled = false; bool m_debug_enabled = false;
std::unordered_map<std::shared_ptr<Function>, std::shared_ptr<Executable>> m_executable_map;
size_t get_placement(const runtime::Tensor* t); size_t get_placement(const runtime::Tensor* t);
}; };
...@@ -64,12 +64,17 @@ shared_ptr<runtime::Tensor> runtime::interpreter::INTBackend::create_tensor( ...@@ -64,12 +64,17 @@ shared_ptr<runtime::Tensor> runtime::interpreter::INTBackend::create_tensor(
return make_shared<runtime::HostTensor>(type, shape, memory_pointer, this); return make_shared<runtime::HostTensor>(type, shape, memory_pointer, this);
} }
runtime::Handle runtime::interpreter::INTBackend::compile(shared_ptr<Function> function) shared_ptr<runtime::Executable>
runtime::interpreter::INTBackend::compile(shared_ptr<Function> function,
bool enable_performance_collection)
{
return make_shared<INTExecutable>(function, enable_performance_collection);
}
runtime::interpreter::INTExecutable::INTExecutable(const shared_ptr<Function>& function,
bool enable_performance_collection)
{ {
FunctionInstance& instance = m_function_map[function];
if (!instance.m_is_compiled)
{ {
instance.m_is_compiled = true;
pass::Manager pass_manager; pass::Manager pass_manager;
pass_manager.register_pass<pass::LikeReplacement>(); pass_manager.register_pass<pass::LikeReplacement>();
pass_manager.register_pass<pass::AssignLayout<DenseTensorLayout>>(); pass_manager.register_pass<pass::AssignLayout<DenseTensorLayout>>();
...@@ -78,32 +83,20 @@ runtime::Handle runtime::interpreter::INTBackend::compile(shared_ptr<Function> f ...@@ -78,32 +83,20 @@ runtime::Handle runtime::interpreter::INTBackend::compile(shared_ptr<Function> f
pass_manager.run_passes(function); pass_manager.run_passes(function);
size_t memory_pool_size = function->get_temporary_pool_size(); size_t memory_pool_size = function->get_temporary_pool_size();
instance.m_temporary_memory.reset(new AlignedBuffer(memory_pool_size, get_alignment())); m_temporary_memory.reset(new AlignedBuffer(memory_pool_size, get_alignment()));
for (const shared_ptr<Node>& node : function->get_ordered_ops()) for (const shared_ptr<Node>& node : function->get_ordered_ops())
{ {
instance.m_wrapped_nodes.emplace_back(node); m_wrapped_nodes.emplace_back(node);
} }
} }
return function; set_parameters_and_results(*function);
} }
bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function, bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& outputs, const vector<shared_ptr<runtime::Tensor>>& inputs)
const vector<shared_ptr<runtime::Tensor>>& inputs)
{ {
auto fit = m_function_map.find(function);
if (fit == m_function_map.end())
{
throw runtime_error("compile() must be called before call().");
}
FunctionInstance& instance = fit->second;
if (!instance.m_is_compiled)
{
throw runtime_error("compile() must be called before call().");
}
// convert inputs to HostTensor // convert inputs to HostTensor
vector<void*> func_inputs; vector<void*> func_inputs;
vector<shared_ptr<runtime::HostTensor>> htv_inputs; vector<shared_ptr<runtime::HostTensor>> htv_inputs;
...@@ -113,7 +106,7 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function, ...@@ -113,7 +106,7 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,
func_inputs.push_back(static_cast<void*>(host_tensor->get_data_ptr())); func_inputs.push_back(static_cast<void*>(host_tensor->get_data_ptr()));
htv_inputs.push_back(host_tensor); htv_inputs.push_back(host_tensor);
} }
if (instance.m_nan_check_enabled) if (m_nan_check_enabled)
{ {
perform_nan_check(htv_inputs); perform_nan_check(htv_inputs);
} }
...@@ -129,7 +122,7 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function, ...@@ -129,7 +122,7 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,
// map function params -> HostTensor // map function params -> HostTensor
unordered_map<descriptor::Tensor*, void*> tensor_map; unordered_map<descriptor::Tensor*, void*> tensor_map;
size_t input_count = 0; size_t input_count = 0;
for (auto param : function->get_parameters()) for (auto param : get_parameters())
{ {
for (size_t i = 0; i < param->get_output_size(); ++i) for (size_t i = 0; i < param->get_output_size(); ++i)
{ {
...@@ -139,9 +132,9 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function, ...@@ -139,9 +132,9 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,
} }
// map function outputs -> HostTensor // map function outputs -> HostTensor
for (size_t output_count = 0; output_count < function->get_output_size(); ++output_count) for (size_t output_count = 0; output_count < get_results().size(); ++output_count)
{ {
auto output = function->get_output_op(output_count); auto output = get_results()[output_count];
if (!dynamic_pointer_cast<op::Result>(output)) if (!dynamic_pointer_cast<op::Result>(output))
{ {
throw ngraph_error("One of function's outputs isn't op::Result"); throw ngraph_error("One of function's outputs isn't op::Result");
...@@ -151,7 +144,7 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function, ...@@ -151,7 +144,7 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,
} }
// for each ordered op in the graph // for each ordered op in the graph
for (const NodeWrapper& wrapped : instance.m_wrapped_nodes) for (const NodeWrapper& wrapped : m_wrapped_nodes)
{ {
const Node* op = &wrapped.get_node(); const Node* op = &wrapped.get_node();
auto type_id = wrapped.get_typeid(); auto type_id = wrapped.get_typeid();
...@@ -185,7 +178,7 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function, ...@@ -185,7 +178,7 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,
if (it == tensor_map.end()) if (it == tensor_map.end())
{ {
auto offset = op->get_output_tensor(i).get_pool_offset(); auto offset = op->get_output_tensor(i).get_pool_offset();
host_tensor = instance.get_temporary_pointer(offset); host_tensor = get_temporary_pointer(offset);
tensor_map.insert({tensor, host_tensor}); tensor_map.insert({tensor, host_tensor});
} }
else else
...@@ -224,16 +217,16 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function, ...@@ -224,16 +217,16 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,
} }
#pragma GCC diagnostic pop #pragma GCC diagnostic pop
if (instance.m_performance_counters_enabled) if (m_performance_counters_enabled)
{ {
instance.m_timer_map[op].start(); m_timer_map[op].start();
} }
generate_calls(type, wrapped, op_outputs, op_inputs, instance); generate_calls(type, wrapped, op_outputs, op_inputs);
if (instance.m_performance_counters_enabled) if (m_performance_counters_enabled)
{ {
instance.m_timer_map[op].stop(); m_timer_map[op].stop();
} }
if (instance.m_nan_check_enabled) if (m_nan_check_enabled)
{ {
perform_nan_check(htv_outputs, op); perform_nan_check(htv_outputs, op);
} }
...@@ -242,26 +235,25 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function, ...@@ -242,26 +235,25 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,
return true; return true;
} }
void runtime::interpreter::INTBackend::generate_calls(const element::Type& type, void runtime::interpreter::INTExecutable::generate_calls(const element::Type& type,
const NodeWrapper& op, const NodeWrapper& op,
const vector<void*>& outputs, const vector<void*>& outputs,
const vector<const void*>& inputs, const vector<const void*>& inputs)
FunctionInstance& instance)
{ {
stringstream ss; stringstream ss;
switch (type.get_type_enum()) switch (type.get_type_enum())
{ {
case element::Type_t::boolean: op_engine<char>(op, outputs, inputs, instance); break; case element::Type_t::boolean: op_engine<char>(op, outputs, inputs); break;
case element::Type_t::f32: op_engine<float>(op, outputs, inputs, instance); break; case element::Type_t::f32: op_engine<float>(op, outputs, inputs); break;
case element::Type_t::f64: op_engine<double>(op, outputs, inputs, instance); break; case element::Type_t::f64: op_engine<double>(op, outputs, inputs); break;
case element::Type_t::i8: op_engine<int8_t>(op, outputs, inputs, instance); break; case element::Type_t::i8: op_engine<int8_t>(op, outputs, inputs); break;
case element::Type_t::i16: op_engine<int16_t>(op, outputs, inputs, instance); break; case element::Type_t::i16: op_engine<int16_t>(op, outputs, inputs); break;
case element::Type_t::i32: op_engine<int32_t>(op, outputs, inputs, instance); break; case element::Type_t::i32: op_engine<int32_t>(op, outputs, inputs); break;
case element::Type_t::i64: op_engine<int64_t>(op, outputs, inputs, instance); break; case element::Type_t::i64: op_engine<int64_t>(op, outputs, inputs); break;
case element::Type_t::u8: op_engine<uint8_t>(op, outputs, inputs, instance); break; case element::Type_t::u8: op_engine<uint8_t>(op, outputs, inputs); break;
case element::Type_t::u16: op_engine<uint16_t>(op, outputs, inputs, instance); break; case element::Type_t::u16: op_engine<uint16_t>(op, outputs, inputs); break;
case element::Type_t::u32: op_engine<uint32_t>(op, outputs, inputs, instance); break; case element::Type_t::u32: op_engine<uint32_t>(op, outputs, inputs); break;
case element::Type_t::u64: op_engine<uint64_t>(op, outputs, inputs, instance); break; case element::Type_t::u64: op_engine<uint64_t>(op, outputs, inputs); break;
case element::Type_t::undefined: case element::Type_t::undefined:
case element::Type_t::dynamic: case element::Type_t::dynamic:
case element::Type_t::bf16: case element::Type_t::bf16:
...@@ -270,25 +262,11 @@ void runtime::interpreter::INTBackend::generate_calls(const element::Type& type, ...@@ -270,25 +262,11 @@ void runtime::interpreter::INTBackend::generate_calls(const element::Type& type,
} }
} }
void runtime::interpreter::INTBackend::set_nan_check(shared_ptr<Function> func, bool enable)
{
FunctionInstance& instance = m_function_map[func];
instance.m_nan_check_enabled = enable;
}
void runtime::interpreter::INTBackend::enable_performance_data(shared_ptr<Function> func,
bool enable)
{
FunctionInstance& instance = m_function_map[func];
instance.m_performance_counters_enabled = enable;
}
vector<runtime::PerformanceCounter> vector<runtime::PerformanceCounter>
runtime::interpreter::INTBackend::get_performance_data(shared_ptr<Function> func) const runtime::interpreter::INTExecutable::get_performance_data() const
{ {
vector<runtime::PerformanceCounter> rc; vector<runtime::PerformanceCounter> rc;
const FunctionInstance& instance = m_function_map.at(func); for (const pair<const Node*, stopwatch> p : m_timer_map)
for (const pair<const Node*, stopwatch> p : instance.m_timer_map)
{ {
rc.emplace_back(p.first->get_name().c_str(), rc.emplace_back(p.first->get_name().c_str(),
p.second.get_total_microseconds(), p.second.get_total_microseconds(),
...@@ -297,7 +275,7 @@ vector<runtime::PerformanceCounter> ...@@ -297,7 +275,7 @@ vector<runtime::PerformanceCounter>
return rc; return rc;
} }
void runtime::interpreter::INTBackend::perform_nan_check( void runtime::interpreter::INTExecutable::perform_nan_check(
const vector<shared_ptr<HostTensor>>& tensors, const Node* op) const vector<shared_ptr<HostTensor>>& tensors, const Node* op)
{ {
size_t arg_number = 1; size_t arg_number = 1;
......
...@@ -143,6 +143,7 @@ namespace ngraph ...@@ -143,6 +143,7 @@ namespace ngraph
namespace interpreter namespace interpreter
{ {
class INTBackend; class INTBackend;
class INTExecutable;
} }
} }
} }
...@@ -161,52 +162,49 @@ public: ...@@ -161,52 +162,49 @@ public:
std::shared_ptr<Tensor> create_tensor(const element::Type& type, const Shape& shape) override; std::shared_ptr<Tensor> create_tensor(const element::Type& type, const Shape& shape) override;
Handle compile(std::shared_ptr<Function> function) override; std::shared_ptr<Executable> compile(std::shared_ptr<Function> function,
bool enable_performance_data = false) override;
bool call(std::shared_ptr<Function> function, bool is_supported(const Node& node) const override;
const std::vector<std::shared_ptr<Tensor>>& outputs,
const std::vector<std::shared_ptr<Tensor>>& intputs) override;
void set_nan_check(std::shared_ptr<Function> func, bool); private:
std::set<std::string> m_unsupported_op_name_list;
};
void enable_performance_data(std::shared_ptr<Function> func, bool enable) override; class ngraph::runtime::interpreter::INTExecutable : public Executable
std::vector<PerformanceCounter> {
get_performance_data(std::shared_ptr<Function> func) const override; public:
INTExecutable(const std::shared_ptr<Function>& function,
bool enable_performance_collection = false);
bool is_supported(const Node& node) const override; bool call(const std::vector<std::shared_ptr<Tensor>>& outputs,
const std::vector<std::shared_ptr<Tensor>>& intputs) override;
void set_nan_check(bool value) { m_nan_check_enabled = value; }
std::vector<PerformanceCounter> get_performance_data() const override;
private: private:
int get_alignment() const { return 64; } int get_alignment() const { return 64; }
class FunctionInstance bool m_nan_check_enabled = false;
{ bool m_performance_counters_enabled = false;
public: std::unordered_map<const Node*, stopwatch> m_timer_map;
bool m_is_compiled = false; std::vector<NodeWrapper> m_wrapped_nodes;
bool m_nan_check_enabled = false; std::unordered_map<const Node*, std::shared_ptr<RNGState>> m_states;
bool m_performance_counters_enabled = false; std::shared_ptr<AlignedBuffer> m_temporary_memory;
std::unordered_map<const Node*, stopwatch> m_timer_map;
std::vector<NodeWrapper> m_wrapped_nodes;
std::unordered_map<const Node*, std::shared_ptr<RNGState>> m_states;
std::shared_ptr<AlignedBuffer> m_temporary_memory;
void* get_temporary_pointer(size_t offset) { return m_temporary_memory->get_ptr(offset); }
};
std::map<std::shared_ptr<Function>, FunctionInstance> m_function_map;
std::set<std::string> m_unsupported_op_name_list;
void* get_temporary_pointer(size_t offset) { return m_temporary_memory->get_ptr(offset); }
static void perform_nan_check(const std::vector<std::shared_ptr<HostTensor>>&, static void perform_nan_check(const std::vector<std::shared_ptr<HostTensor>>&,
const Node* op = nullptr); const Node* op = nullptr);
void generate_calls(const element::Type& type, void generate_calls(const element::Type& type,
const NodeWrapper& op, const NodeWrapper& op,
const std::vector<void*>& outputs, const std::vector<void*>& outputs,
const std::vector<const void*>& inputs, const std::vector<const void*>& inputs);
FunctionInstance& instance);
template <typename T> template <typename T>
void op_engine(const NodeWrapper& node_wrapper, void op_engine(const NodeWrapper& node_wrapper,
const std::vector<void*>& out, const std::vector<void*>& out,
const std::vector<const void*>& args, const std::vector<const void*>& args)
FunctionInstance& instance)
{ {
const Node& node = node_wrapper.get_node(); const Node& node = node_wrapper.get_node();
std::string node_op = node.description(); std::string node_op = node.description();
...@@ -364,15 +362,15 @@ private: ...@@ -364,15 +362,15 @@ private:
} }
case OP_TYPEID::GenerateMask: case OP_TYPEID::GenerateMask:
{ {
if (instance.m_states.count(&node) == 0) if (m_states.count(&node) == 0)
{ {
const op::GenerateMask* gm = static_cast<const op::GenerateMask*>(&node); const op::GenerateMask* gm = static_cast<const op::GenerateMask*>(&node);
instance.m_states[&node] = std::unique_ptr<ngraph::RNGState>( m_states[&node] = std::unique_ptr<ngraph::RNGState>(
ngraph::RNGState::create_rng_state(gm->get_seed(), gm->get_probability())); ngraph::RNGState::create_rng_state(gm->get_seed(), gm->get_probability()));
} }
bool training = static_cast<bool>(static_cast<const T*>(args[0])[0]); bool training = static_cast<bool>(static_cast<const T*>(args[0])[0]);
auto state = instance.m_states.at(&node).get(); auto state = m_states.at(&node).get();
size_t element_count = shape_size(node.get_output_shape(0)); size_t element_count = shape_size(node.get_output_shape(0));
reference::generate_mask<T>( reference::generate_mask<T>(
reinterpret_cast<T*>(out[0]), element_count, state, training); reinterpret_cast<T*>(out[0]), element_count, state, training);
......
...@@ -54,14 +54,25 @@ shared_ptr<runtime::Tensor> runtime::nop::NOPBackend::create_tensor(const elemen ...@@ -54,14 +54,25 @@ shared_ptr<runtime::Tensor> runtime::nop::NOPBackend::create_tensor(const elemen
return make_shared<runtime::HostTensor>(type, shape, memory_pointer, "external"); return make_shared<runtime::HostTensor>(type, shape, memory_pointer, "external");
} }
runtime::Handle runtime::nop::NOPBackend::compile(shared_ptr<Function> function) shared_ptr<runtime::Executable>
runtime::nop::NOPBackend::compile(shared_ptr<Function> function,
bool enable_performance_collection)
{ {
return function; return make_shared<NOPExecutable>(function, enable_performance_collection);
} }
bool runtime::nop::NOPBackend::call(shared_ptr<Function> function, runtime::nop::NOPExecutable::NOPExecutable(shared_ptr<Function> function,
const vector<shared_ptr<runtime::Tensor>>& outputs, bool enable_performance_collection)
const vector<shared_ptr<runtime::Tensor>>& inputs) {
pass::Manager pass_manager;
pass_manager.register_pass<pass::AssignLayout<DenseTensorLayout>>();
pass_manager.run_passes(function);
set_parameters_and_results(*function);
}
bool runtime::nop::NOPExecutable::call(const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& inputs)
{ {
return true; return true;
} }
...@@ -32,6 +32,7 @@ namespace ngraph ...@@ -32,6 +32,7 @@ namespace ngraph
namespace nop namespace nop
{ {
class NOPBackend; class NOPBackend;
class NOPExecutable;
} }
} }
} }
...@@ -44,9 +45,14 @@ public: ...@@ -44,9 +45,14 @@ public:
std::shared_ptr<Tensor> create_tensor(const element::Type& type, const Shape& shape) override; std::shared_ptr<Tensor> create_tensor(const element::Type& type, const Shape& shape) override;
Handle compile(std::shared_ptr<Function> function) override; std::shared_ptr<Executable> compile(std::shared_ptr<Function> function,
bool enable_performance_data = false) override;
};
bool call(std::shared_ptr<Function> function, class ngraph::runtime::nop::NOPExecutable : public Executable
const std::vector<std::shared_ptr<Tensor>>& outputs, {
const std::vector<std::shared_ptr<Tensor>>& intputs) override; public:
NOPExecutable(std::shared_ptr<Function> function, bool enable_performance_collection = false);
bool call(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs) override;
}; };
...@@ -136,8 +136,7 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f, ...@@ -136,8 +136,7 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
stopwatch timer; stopwatch timer;
timer.start(); timer.start();
auto backend = runtime::Backend::create(backend_name); auto backend = runtime::Backend::create(backend_name);
backend->enable_performance_data(f, timing_detail); auto compiled_func = backend->compile(f, timing_detail);
auto compiled_func = backend->compile(f);
timer.stop(); timer.stop();
cout.imbue(locale("")); cout.imbue(locale(""));
cout << "compile time: " << timer.get_milliseconds() << "ms" << endl; cout << "compile time: " << timer.get_milliseconds() << "ms" << endl;
...@@ -183,7 +182,7 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f, ...@@ -183,7 +182,7 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
{ {
for (int i = 0; i < warmup_iterations; i++) for (int i = 0; i < warmup_iterations; i++)
{ {
backend->call(compiled_func, results, args); compiled_func->call(results, args);
} }
} }
...@@ -205,7 +204,7 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f, ...@@ -205,7 +204,7 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
} }
} }
} }
backend->call(compiled_func, results, args); compiled_func->call(results, args);
if (copy_data) if (copy_data)
{ {
for (size_t result_index = 0; result_index < results.size(); result_index++) for (size_t result_index = 0; result_index < results.size(); result_index++)
...@@ -222,6 +221,6 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f, ...@@ -222,6 +221,6 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
float time = t1.get_milliseconds(); float time = t1.get_milliseconds();
cout << time / iterations << "ms per iteration" << endl; cout << time / iterations << "ms per iteration" << endl;
vector<runtime::PerformanceCounter> perf_data = backend->get_performance_data(f); vector<runtime::PerformanceCounter> perf_data = compiled_func->get_performance_data();
return perf_data; return perf_data;
} }
...@@ -36,7 +36,6 @@ set(SRC ...@@ -36,7 +36,6 @@ set(SRC
cse.cpp cse.cpp
element_type.cpp element_type.cpp
file_util.cpp file_util.cpp
graph_partition.cpp
includes.cpp includes.cpp
input_output_assign.cpp input_output_assign.cpp
main.cpp main.cpp
......
...@@ -37,9 +37,6 @@ TEST(INTERPRETER, nan_check_input) ...@@ -37,9 +37,6 @@ TEST(INTERPRETER, nan_check_input)
shared_ptr<runtime::Backend> backend = runtime::Backend::create("INTERPRETER"); shared_ptr<runtime::Backend> backend = runtime::Backend::create("INTERPRETER");
shared_ptr<runtime::interpreter::INTBackend> ibackend =
static_pointer_cast<runtime::interpreter::INTBackend>(backend);
// Create some tensors for input/output // Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape); auto a = backend->create_tensor(element::f32, shape);
copy_data(a, vector<float>{2, 4, NAN, 16}); copy_data(a, vector<float>{2, 4, NAN, 16});
...@@ -47,9 +44,12 @@ TEST(INTERPRETER, nan_check_input) ...@@ -47,9 +44,12 @@ TEST(INTERPRETER, nan_check_input)
copy_data(b, vector<float>{1, 2, 1, 8}); copy_data(b, vector<float>{1, 2, 1, 8});
auto result = backend->create_tensor(element::f32, shape); auto result = backend->create_tensor(element::f32, shape);
auto handle = backend->compile(f); shared_ptr<runtime::Executable> handle = backend->compile(f);
ibackend->set_nan_check(handle, true);
EXPECT_ANY_THROW(ibackend->call_with_validate(handle, {result}, {a, b})); shared_ptr<runtime::interpreter::INTExecutable> ihandle =
static_pointer_cast<runtime::interpreter::INTExecutable>(handle);
ihandle->set_nan_check(true);
EXPECT_ANY_THROW(handle->call_with_validate({result}, {a, b}));
} }
TEST(INTERPRETER, nan_check_output) TEST(INTERPRETER, nan_check_output)
...@@ -61,9 +61,6 @@ TEST(INTERPRETER, nan_check_output) ...@@ -61,9 +61,6 @@ TEST(INTERPRETER, nan_check_output)
shared_ptr<runtime::Backend> backend = runtime::Backend::create("INTERPRETER"); shared_ptr<runtime::Backend> backend = runtime::Backend::create("INTERPRETER");
shared_ptr<runtime::interpreter::INTBackend> ibackend =
static_pointer_cast<runtime::interpreter::INTBackend>(backend);
// Create some tensors for input/output // Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape); auto a = backend->create_tensor(element::f32, shape);
copy_data(a, vector<float>{2, 4, 0, 16}); copy_data(a, vector<float>{2, 4, 0, 16});
...@@ -71,7 +68,9 @@ TEST(INTERPRETER, nan_check_output) ...@@ -71,7 +68,9 @@ TEST(INTERPRETER, nan_check_output)
copy_data(b, vector<float>{1, 2, 0, 8}); copy_data(b, vector<float>{1, 2, 0, 8});
auto result = backend->create_tensor(element::f32, shape); auto result = backend->create_tensor(element::f32, shape);
auto handle = backend->compile(f); shared_ptr<runtime::Executable> handle = backend->compile(f);
ibackend->set_nan_check(handle, true); shared_ptr<runtime::interpreter::INTExecutable> ihandle =
EXPECT_ANY_THROW(ibackend->call_with_validate(handle, {result}, {a, b})); static_pointer_cast<runtime::interpreter::INTExecutable>(handle);
ihandle->set_nan_check(true);
EXPECT_ANY_THROW(handle->call_with_validate({result}, {a, b}));
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment