Commit 89963725 authored by Robert Kimball's avatar Robert Kimball Committed by Scott Cyphers

Replace interpreter with Adam's simplified implementation (#915)

* wip

* simplified interpreter backend
parent 66198b33
......@@ -127,10 +127,7 @@ set (SRC
runtime/aligned_buffer.cpp
runtime/backend.cpp
runtime/host_tensor_view.cpp
runtime/ie/ie_backend.cpp
runtime/interpreter/int_backend.cpp
runtime/interpreter/int_call_frame.cpp
runtime/interpreter/int_external_function.cpp
runtime/tensor_view.cpp
serializer.cpp
shape.cpp
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/runtime/ie/ie_backend.hpp"
#include "ngraph/descriptor/layout/dense_tensor_view_layout.hpp"
#include "ngraph/op/convert.hpp"
#include "ngraph/op/select.hpp"
#include "ngraph/op/util/binary_elementwise_comparison.hpp"
#include "ngraph/pass/assign_layout.hpp"
#include "ngraph/pass/liveness.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/util.hpp"
using namespace std;
using namespace ngraph;
using descriptor::layout::DenseTensorViewLayout;
static bool static_init()
{
runtime::Backend::register_backend("IE", make_shared<runtime::ie::IE_Backend>());
return true;
};
bool runtime::ie::IE_Backend::init = static_init();
shared_ptr<runtime::TensorView> runtime::ie::IE_Backend::create_tensor(const element::Type& type,
const Shape& shape)
{
return make_shared<runtime::HostTensorView>(type, shape, "external");
}
shared_ptr<runtime::TensorView> runtime::ie::IE_Backend::create_tensor(const element::Type& type,
const Shape& shape,
void* memory_pointer)
{
return make_shared<runtime::HostTensorView>(type, shape, memory_pointer, "external");
}
bool runtime::ie::IE_Backend::compile(shared_ptr<Function> function)
{
pass::Manager pass_manager;
pass_manager.register_pass<pass::AssignLayout<DenseTensorViewLayout>>();
pass_manager.register_pass<pass::Liveness>();
pass_manager.run_passes(function);
return true;
}
bool runtime::ie::IE_Backend::call(shared_ptr<Function> function,
const vector<shared_ptr<runtime::TensorView>>& outputs,
const vector<shared_ptr<runtime::TensorView>>& inputs)
{
validate_call(function, outputs, inputs);
// TODO: check if function already compiled?
compile(function);
// convert inputs to HostTensorView
vector<shared_ptr<runtime::HostTensorView>> func_inputs;
for (auto tv : inputs)
{
func_inputs.push_back(static_pointer_cast<runtime::HostTensorView>(tv));
}
// convert outputs to HostTensorView
vector<shared_ptr<runtime::HostTensorView>> func_outputs;
for (auto tv : outputs)
{
func_outputs.push_back(static_pointer_cast<runtime::HostTensorView>(tv));
}
// map function params -> HostTensorView
unordered_map<descriptor::TensorView*, shared_ptr<runtime::HostTensorView>> tensor_map;
size_t input_count = 0;
for (auto param : function->get_parameters())
{
for (size_t i = 0; i < param->get_output_size(); ++i)
{
descriptor::TensorView* tv = param->get_output_tensor_view(i).get();
tensor_map.insert({tv, func_inputs[input_count++]});
}
}
// map function outputs -> HostTensorView
for (size_t output_count = 0; output_count < function->get_output_size(); ++output_count)
{
auto output = function->get_output_op(output_count);
if (!dynamic_pointer_cast<op::Result>(output))
{
throw ngraph_error("One of function's outputs isn't op::Result");
}
descriptor::TensorView* tv = output->get_output_tensor_view(0).get();
tensor_map.insert({tv, func_outputs[output_count]});
}
// for each ordered op in the graph
for (shared_ptr<Node> op : function->get_ordered_ops())
{
if (op->description() == "Parameter")
{
continue;
}
// get op inputs from map
vector<shared_ptr<runtime::HostTensorView>> op_inputs;
for (const descriptor::Input& input : op->get_inputs())
{
descriptor::TensorView* tv = input.get_output().get_tensor_view().get();
op_inputs.push_back(tensor_map.at(tv));
}
// get op outputs from map or create
vector<shared_ptr<runtime::HostTensorView>> op_outputs;
for (size_t i = 0; i < op->get_output_size(); ++i)
{
descriptor::TensorView* tv = op->get_output_tensor_view(i).get();
shared_ptr<runtime::HostTensorView> htv;
if (!contains_key(tensor_map, tv))
{
// the output tensor is not in the tensor map so create a new tensor
const Shape& shape = op->get_output_shape(i);
const element::Type& type = op->get_output_element_type(i);
string name = op->get_output_tensor(i).get_name();
htv = make_shared<runtime::HostTensorView>(type, shape, name);
tensor_map.insert({tv, htv});
}
else
{
htv = tensor_map.at(tv);
}
op_outputs.push_back(htv);
}
// get op type
element::Type type;
if (dynamic_pointer_cast<op::util::BinaryElementwiseComparison>(op) ||
dynamic_pointer_cast<op::Select>(op))
{
// Get the type of the second input, not the first
// All BinaryElementwiseComparision ops have the same type for inputs
// Select has bool for first input and the type we are interested in for the second
type = op->get_inputs().at(1).get_tensor().get_element_type();
}
else if (dynamic_pointer_cast<op::Convert>(op))
{
type = op->get_inputs().at(0).get_tensor().get_element_type();
}
else
{
type = op->get_element_type();
}
generate_calls(type, *op, op_outputs, op_inputs);
// delete any obsolete tensors
for (const descriptor::Tensor* t : op->liveness_free_list)
{
for (auto it = tensor_map.begin(); it != tensor_map.end(); ++it)
{
if (it->second->get_tensor().get_name() == t->get_name())
{
tensor_map.erase(it);
break;
}
}
}
}
return true;
}
void runtime::ie::IE_Backend::generate_calls(const element::Type& type,
Node& op,
const vector<shared_ptr<HostTensorView>>& outputs,
const vector<shared_ptr<HostTensorView>>& inputs)
{
if (type == element::boolean)
{
op_engine<char>(op, outputs, inputs);
}
else if (type == element::f32)
{
op_engine<float>(op, outputs, inputs);
}
else if (type == element::f64)
{
op_engine<double>(op, outputs, inputs);
}
else if (type == element::i8)
{
op_engine<int8_t>(op, outputs, inputs);
}
else if (type == element::i16)
{
op_engine<int16_t>(op, outputs, inputs);
}
else if (type == element::i32)
{
op_engine<int32_t>(op, outputs, inputs);
}
else if (type == element::i64)
{
op_engine<int64_t>(op, outputs, inputs);
}
else if (type == element::u8)
{
op_engine<uint8_t>(op, outputs, inputs);
}
else if (type == element::u16)
{
op_engine<uint16_t>(op, outputs, inputs);
}
else if (type == element::u32)
{
op_engine<uint32_t>(op, outputs, inputs);
}
else if (type == element::u64)
{
op_engine<uint64_t>(op, outputs, inputs);
}
else
{
stringstream ss;
ss << "unsupported element type " << type << " op " << op.get_name();
throw ngraph_error(ss.str());
}
}
This diff is collapsed.
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <algorithm>
#include <cstdlib>
#include <iomanip>
#include "ngraph/op/result.hpp"
#include "ngraph/runtime/host_tensor_view.hpp"
#include "ngraph/runtime/interpreter/int_call_frame.hpp"
using namespace std;
using namespace ngraph;
runtime::interpreter::INT_CallFrame::INT_CallFrame(shared_ptr<Function> func)
: m_function(func)
, m_emit_timing(false)
, m_nan_check(std::getenv("NGRAPH_INTERPRETER_NAN_CHECK") != nullptr)
{
}
void runtime::interpreter::INT_CallFrame::call(
std::shared_ptr<Function> function,
const vector<shared_ptr<runtime::HostTensorView>>& output_tvs,
const vector<shared_ptr<runtime::HostTensorView>>& input_tvs)
{
if (m_nan_check)
{
perform_nan_check(input_tvs);
}
unordered_map<descriptor::TensorView*, shared_ptr<runtime::HostTensorView>> tensor_map;
size_t arg_index = 0;
for (shared_ptr<op::Parameter> param : function->get_parameters())
{
for (size_t i = 0; i < param->get_output_size(); ++i)
{
descriptor::TensorView* tv = param->get_output_tensor_view(i).get();
tensor_map.insert({tv, input_tvs[arg_index++]});
}
}
for (size_t i = 0; i < function->get_output_size(); i++)
{
auto output_op = function->get_output_op(i);
if (!std::dynamic_pointer_cast<op::Result>(output_op))
{
throw ngraph_error("One of function's outputs isn't op::Result");
}
descriptor::TensorView* tv = function->get_output_op(i)->get_output_tensor_view(0).get();
tensor_map.insert({tv, output_tvs[i]});
}
// Invoke computation
for (shared_ptr<Node> op : function->get_ordered_ops())
{
if (op->description() == "Parameter")
{
continue;
}
vector<shared_ptr<runtime::HostTensorView>> inputs;
vector<shared_ptr<runtime::HostTensorView>> outputs;
for (const descriptor::Input& input : op->get_inputs())
{
descriptor::TensorView* tv = input.get_output().get_tensor_view().get();
string name = tv->get_tensor().get_name();
inputs.push_back(tensor_map.at(tv));
}
for (size_t i = 0; i < op->get_output_size(); ++i)
{
descriptor::TensorView* tv = op->get_output_tensor_view(i).get();
string name = tv->get_tensor().get_name();
shared_ptr<runtime::HostTensorView> itv;
if (!contains_key(tensor_map, tv))
{
// The output tensor is not in the tensor map so create a new tensor
const Shape& shape = op->get_output_shape(i);
const element::Type& element_type = op->get_output_element_type(i);
string tensor_name = op->get_output_tensor(i).get_name();
itv = make_shared<runtime::HostTensorView>(element_type, shape, tensor_name);
tensor_map.insert({tv, itv});
}
else
{
itv = tensor_map.at(tv);
}
outputs.push_back(itv);
}
element::Type base_type;
element::Type secondary_type;
if (op->get_inputs().empty())
{
base_type = op->get_element_type();
}
else
{
base_type = op->get_inputs().at(0).get_tensor().get_element_type();
}
secondary_type = op->get_element_type();
// Some ops have unusual intput/output types so handle those special cases here
if (op->description() == "Select")
{
base_type = op->get_inputs().at(1).get_tensor().get_element_type();
secondary_type = op->get_inputs().at(0).get_tensor().get_element_type();
}
if (m_emit_timing)
{
m_timer_map[op.get()].start();
}
generate_calls(base_type, secondary_type, *op, inputs, outputs);
if (m_emit_timing)
{
stopwatch& timer = m_timer_map[op.get()];
timer.stop();
}
if (m_nan_check)
{
perform_nan_check(outputs, op.get());
}
// Delete any obsolete tensors
for (const descriptor::Tensor* t : op->liveness_free_list)
{
for (auto it = tensor_map.begin(); it != tensor_map.end(); ++it)
{
if (it->second->get_tensor().get_name() == t->get_name())
{
tensor_map.erase(it);
break;
}
}
}
}
}
void runtime::interpreter::INT_CallFrame::generate_calls(
const element::Type& base_type,
const element::Type& secondary_type,
ngraph::Node& op,
const std::vector<std::shared_ptr<HostTensorView>>& args,
const std::vector<std::shared_ptr<HostTensorView>>& out)
{
if (base_type == element::boolean)
{
generate_calls<char>(secondary_type, op, args, out);
}
else if (base_type == element::f32)
{
generate_calls<float>(secondary_type, op, args, out);
}
else if (base_type == element::f64)
{
generate_calls<double>(secondary_type, op, args, out);
}
else if (base_type == element::i8)
{
generate_calls<int8_t>(secondary_type, op, args, out);
}
else if (base_type == element::i16)
{
generate_calls<int16_t>(secondary_type, op, args, out);
}
else if (base_type == element::i32)
{
generate_calls<int32_t>(secondary_type, op, args, out);
}
else if (base_type == element::i64)
{
generate_calls<int64_t>(secondary_type, op, args, out);
}
else if (base_type == element::u8)
{
generate_calls<uint8_t>(secondary_type, op, args, out);
}
else if (base_type == element::u16)
{
generate_calls<uint16_t>(secondary_type, op, args, out);
}
else if (base_type == element::u32)
{
generate_calls<uint32_t>(secondary_type, op, args, out);
}
else if (base_type == element::u64)
{
generate_calls<uint64_t>(secondary_type, op, args, out);
}
else
{
stringstream ss;
ss << "unsupported element type " << base_type << " op " << op.get_name();
throw runtime_error(ss.str());
}
}
void runtime::interpreter::INT_CallFrame::call(
const vector<shared_ptr<runtime::TensorView>>& output_tvs,
const vector<shared_ptr<runtime::TensorView>>& input_tvs)
{
vector<shared_ptr<runtime::HostTensorView>> args;
vector<shared_ptr<runtime::HostTensorView>> out;
for (auto tv : input_tvs)
{
args.push_back(static_pointer_cast<runtime::HostTensorView>(tv));
}
for (auto tv : output_tvs)
{
out.push_back(static_pointer_cast<runtime::HostTensorView>(tv));
}
call(m_function, out, args);
}
vector<runtime::PerformanceCounter>
runtime::interpreter::INT_CallFrame::get_performance_data() const
{
vector<runtime::PerformanceCounter> rc;
for (const pair<const Node*, stopwatch> p : m_timer_map)
{
rc.emplace_back(p.first->get_name().c_str(),
p.second.get_total_microseconds(),
p.second.get_call_count());
}
return rc;
}
void runtime::interpreter::INT_CallFrame::perform_nan_check(
const vector<shared_ptr<HostTensorView>>& tvs, const Node* op)
{
size_t arg_number = 1;
for (shared_ptr<HostTensorView> tv : tvs)
{
const element::Type& type = tv->get_tensor().get_element_type();
if (type == element::f32)
{
const float* data = reinterpret_cast<float*>(tv->get_data_ptr());
for (size_t i = 0; i < tv->get_element_count(); i++)
{
if (std::isnan(data[i]))
{
if (op)
{
throw runtime_error("nan found in op '" + op->get_name() + "' output");
}
else
{
throw runtime_error("nan found in function's input tensor number " +
to_string(arg_number));
}
}
}
}
else if (type == element::f64)
{
const double* data = reinterpret_cast<double*>(tv->get_data_ptr());
for (size_t i = 0; i < tv->get_element_count(); i++)
{
if (std::isnan(data[i]))
{
if (op)
{
throw runtime_error("nan found in op '" + op->get_name() + "' output");
}
else
{
throw runtime_error("nan found in function's input tensor number " +
to_string(arg_number));
}
}
}
}
arg_number++;
}
}
void runtime::interpreter::INT_CallFrame::set_nan_check(bool value)
{
m_nan_check = value;
}
This diff is collapsed.
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <fstream>
#include <memory>
#include <string>
#include <tuple>
#include <typeindex>
#include <typeinfo>
#include <unordered_map>
#include "ngraph/descriptor/input.hpp"
#include "ngraph/descriptor/layout/dense_tensor_view_layout.hpp"
#include "ngraph/descriptor/output.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/function.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/abs.hpp"
#include "ngraph/op/acos.hpp"
#include "ngraph/op/add.hpp"
#include "ngraph/op/asin.hpp"
#include "ngraph/op/atan.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convert.hpp"
#include "ngraph/op/cos.hpp"
#include "ngraph/op/cosh.hpp"
#include "ngraph/op/divide.hpp"
#include "ngraph/op/dot.hpp"
#include "ngraph/op/equal.hpp"
#include "ngraph/op/exp.hpp"
#include "ngraph/op/function_call.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/greater.hpp"
#include "ngraph/op/greater_eq.hpp"
#include "ngraph/op/less.hpp"
#include "ngraph/op/less_eq.hpp"
#include "ngraph/op/log.hpp"
#include "ngraph/op/maximum.hpp"
#include "ngraph/op/minimum.hpp"
#include "ngraph/op/multiply.hpp"
#include "ngraph/op/negative.hpp"
#include "ngraph/op/not_equal.hpp"
#include "ngraph/op/power.hpp"
#include "ngraph/op/reduce.hpp"
#include "ngraph/op/relu.hpp"
#include "ngraph/op/reshape.hpp"
#include "ngraph/op/select.hpp"
#include "ngraph/op/sign.hpp"
#include "ngraph/op/sin.hpp"
#include "ngraph/op/sinh.hpp"
#include "ngraph/op/slice.hpp"
#include "ngraph/op/subtract.hpp"
#include "ngraph/op/sum.hpp"
#include "ngraph/op/tan.hpp"
#include "ngraph/op/tanh.hpp"
#include "ngraph/pass/assign_layout.hpp"
#include "ngraph/pass/dump_sorted.hpp"
#include "ngraph/pass/liveness.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/memory_layout.hpp"
#include "ngraph/runtime/interpreter/int_backend.hpp"
#include "ngraph/runtime/interpreter/int_call_frame.hpp"
#include "ngraph/runtime/interpreter/int_external_function.hpp"
using namespace std;
using namespace ngraph;
static const string s_output_dir = "cpu_codegen";
class StaticInitializers
{
public:
StaticInitializers() { file_util::remove_directory(s_output_dir); }
};
static StaticInitializers s_static_initializers;
using descriptor::layout::DenseTensorViewLayout;
runtime::interpreter::ExternalFunction::ExternalFunction(const shared_ptr<Function>& function,
bool release_function)
: m_function(function)
, m_release_function(release_function)
, m_is_compiled(false)
, m_timing(false)
{
}
void runtime::interpreter::ExternalFunction::compile()
{
if (m_is_compiled)
{
return;
}
pass::Manager pass_manager;
// For now, just make everyone row-major.
pass_manager.register_pass<pass::AssignLayout<DenseTensorViewLayout>>();
pass_manager.register_pass<pass::Liveness>();
pass_manager.run_passes(m_function);
m_is_compiled = true;
if (m_release_function)
{
release_function();
}
}
shared_ptr<runtime::interpreter::INT_CallFrame>
runtime::interpreter::ExternalFunction::make_call_frame()
{
if (!m_is_compiled)
{
compile();
}
return make_shared<runtime::interpreter::INT_CallFrame>(m_function);
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <memory>
#include "ngraph/function.hpp"
namespace ngraph
{
namespace runtime
{
namespace interpreter
{
class INT_CallFrame;
class ExternalFunction
{
public:
ExternalFunction(const std::shared_ptr<ngraph::Function>& function,
bool release_function = false);
std::shared_ptr<INT_CallFrame> make_call_frame();
protected:
void compile();
void release_function() { m_function = nullptr; }
std::shared_ptr<ngraph::Function> m_function;
bool m_release_function;
bool m_is_compiled;
bool m_timing;
};
}
}
}
......@@ -69,7 +69,6 @@ add_subdirectory(files)
#================================================================================================
# TODO add interpreter back to unit tests when it works
set(BACKEND_NAMES ${BACKEND_NAMES} "INTERPRETER")
set(BACKEND_NAMES ${BACKEND_NAMES} "IE")
if(MKLDNN_INCLUDE_DIR)
include_directories(SYSTEM ${MKLDNN_INCLUDE_DIR})
......
......@@ -37,8 +37,8 @@ TEST(INTERPRETER, nan_check_input)
auto backend = runtime::Backend::create("INTERPRETER");
shared_ptr<runtime::interpreter::INT_Backend> ibackend =
static_pointer_cast<runtime::interpreter::INT_Backend>(backend);
shared_ptr<runtime::interpreter::INTBackend> ibackend =
static_pointer_cast<runtime::interpreter::INTBackend>(backend);
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape);
......@@ -60,8 +60,8 @@ TEST(INTERPRETER, nan_check_output)
auto backend = runtime::Backend::create("INTERPRETER");
shared_ptr<runtime::interpreter::INT_Backend> ibackend =
static_pointer_cast<runtime::interpreter::INT_Backend>(backend);
shared_ptr<runtime::interpreter::INTBackend> ibackend =
static_pointer_cast<runtime::interpreter::INTBackend>(backend);
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment