Commit 862a34d3 authored by Robert Kimball's avatar Robert Kimball Committed by Sang Ik Lee

Greatly simplify the GCPU backend (#4131)

* New GCPU layout

* Passing tests

* Test passing

* Move GCPU to the gcpu directory since names must match

* Revert "Move GCPU to the gcpu directory since names must match"

This reverts commit a51f50699d0d1ab602a71226687cf053a9ade27d.

* Cleanup

* Disable some onnx tests

* Fix manifest

* style
Co-authored-by: 's avatarScott Cyphers <diyessi@users.noreply.github.com>
parent 868ba1f8
......@@ -37,59 +37,10 @@ using namespace ngraph;
using descriptor::layout::DenseTensorLayout;
runtime::gcpu::OP_TYPEID runtime::gcpu::GCPUExecutable::get_typeid(const NodeTypeInfo& type_info)
{
// This expands the op list in op_tbl.hpp into a list of enumerations that look like this:
// {Abs::type_info, OP_TYPEID::Abs},
// {Acos::type_info, OP_TYPEID::Acos},
// ...
static const map<NodeTypeInfo, OP_TYPEID> type_info_map{
#define NGRAPH_OP(NAME, NAMESPACE) {NAMESPACE::NAME::type_info, OP_TYPEID::NAME},
#include "ngraph/opsets/opset0_tbl.hpp"
#undef NGRAPH_OP
};
OP_TYPEID rc = OP_TYPEID::UnknownOp;
auto it = type_info_map.find(type_info);
if (it != type_info_map.end())
{
rc = it->second;
}
return rc;
}
runtime::gcpu::GCPUExecutable::GCPUExecutable(const shared_ptr<Function>& function,
bool enable_performance_collection)
: m_is_compiled{true}
, m_performance_counters_enabled{enable_performance_collection}
{
m_function = clone_function(*function);
pass::Manager pass_manager;
pass_manager.register_pass<pass::LikeReplacement>();
pass_manager.register_pass<pass::FusedOpDecomposition>();
pass_manager.register_pass<pass::Opset0Downgrade>();
pass_manager.register_pass<pass::ImplicitBroadcastElimination>();
pass_manager.register_pass<pass::AssignLayout<DenseTensorLayout>>();
pass_manager.register_pass<pass::Liveness>();
pass_manager.run_passes(m_function);
for (auto node : m_function->get_ordered_ops())
{
m_nodes.push_back(node);
}
set_parameters_and_results(*m_function);
}
runtime::gcpu::GCPUExecutable::GCPUExecutable(const std::string& model_string)
: m_is_compiled{true}
, m_performance_counters_enabled{false}
: INTExecutable(function, enable_performance_collection)
{
m_function = deserialize(model_string);
for (auto& node : m_function->get_ordered_ops())
{
m_nodes.push_back(node);
}
set_parameters_and_results(*m_function);
}
bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor>>& outputs,
......@@ -102,10 +53,6 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
auto host_tensor = static_pointer_cast<runtime::HostTensor>(tensor);
func_inputs.push_back(host_tensor);
}
if (m_nan_check_enabled)
{
perform_nan_check(func_inputs);
}
// convert outputs to HostTensor
vector<shared_ptr<HostTensor>> func_outputs;
......@@ -142,8 +89,8 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
// for each ordered op in the graph
for (auto& op : m_nodes)
{
auto type_id = get_typeid(op->get_type_info());
if (type_id == OP_TYPEID::Parameter)
auto type_id = get_typeid(*op);
if (type_id == ngraph::runtime::interpreter::OP_TYPEID::Parameter)
{
continue;
}
......@@ -184,23 +131,27 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
#pragma GCC diagnostic ignored "-Wswitch-enum"
switch (type_id)
{
case OP_TYPEID::Convert:
case OP_TYPEID::Quantize:
case OP_TYPEID::Dequantize:
case OP_TYPEID::ArgMin:
case OP_TYPEID::ArgMax: type = op->get_input_element_type(0); break;
case OP_TYPEID::Equal:
case OP_TYPEID::Greater:
case OP_TYPEID::GreaterEq:
case OP_TYPEID::Less:
case OP_TYPEID::LessEq:
case OP_TYPEID::NotEqual:
case ngraph::runtime::interpreter::OP_TYPEID::Convert:
case ngraph::runtime::interpreter::OP_TYPEID::Quantize:
case ngraph::runtime::interpreter::OP_TYPEID::Dequantize:
case ngraph::runtime::interpreter::OP_TYPEID::ArgMin:
case ngraph::runtime::interpreter::OP_TYPEID::ArgMax:
type = op->get_input_element_type(0);
break;
case ngraph::runtime::interpreter::OP_TYPEID::Equal:
case ngraph::runtime::interpreter::OP_TYPEID::Greater:
case ngraph::runtime::interpreter::OP_TYPEID::GreaterEq:
case ngraph::runtime::interpreter::OP_TYPEID::Less:
case ngraph::runtime::interpreter::OP_TYPEID::LessEq:
case ngraph::runtime::interpreter::OP_TYPEID::NotEqual:
// Get the type of the second input, not the first
// All BinaryElementwiseComparision ops have the same type for inputs
// Select has bool for first input and the type we are interested in for the second
type = op->get_input_element_type(1);
break;
case OP_TYPEID::TopK: type = op->get_output_element_type(1); break;
case ngraph::runtime::interpreter::OP_TYPEID::TopK:
type = op->get_output_element_type(1);
break;
default: type = op->get_output_element_type(0); break;
}
#pragma GCC diagnostic pop
......@@ -214,10 +165,6 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
{
m_timer_map[op].stop();
}
if (m_nan_check_enabled)
{
perform_nan_check(op_outputs, op.get());
}
}
return true;
......@@ -231,17 +178,17 @@ void runtime::gcpu::GCPUExecutable::generate_calls(const element::Type& type,
stringstream ss;
switch (type)
{
case element::Type_t::boolean: op_engine<char>(op, out, in); break;
case element::Type_t::f32: op_engine<float>(op, out, in); break;
case element::Type_t::f64: op_engine<double>(op, out, in); break;
case element::Type_t::i8: op_engine<int8_t>(op, out, in); break;
case element::Type_t::i16: op_engine<int16_t>(op, out, in); break;
case element::Type_t::i32: op_engine<int32_t>(op, out, in); break;
case element::Type_t::i64: op_engine<int64_t>(op, out, in); break;
case element::Type_t::u8: op_engine<uint8_t>(op, out, in); break;
case element::Type_t::u16: op_engine<uint16_t>(op, out, in); break;
case element::Type_t::u32: op_engine<uint32_t>(op, out, in); break;
case element::Type_t::u64: op_engine<uint64_t>(op, out, in); break;
case element::Type_t::boolean: gop_engine<char>(op, out, in); break;
case element::Type_t::f32: gop_engine<float>(op, out, in); break;
case element::Type_t::f64: gop_engine<double>(op, out, in); break;
case element::Type_t::i8: gop_engine<int8_t>(op, out, in); break;
case element::Type_t::i16: gop_engine<int16_t>(op, out, in); break;
case element::Type_t::i32: gop_engine<int32_t>(op, out, in); break;
case element::Type_t::i64: gop_engine<int64_t>(op, out, in); break;
case element::Type_t::u8: gop_engine<uint8_t>(op, out, in); break;
case element::Type_t::u16: gop_engine<uint16_t>(op, out, in); break;
case element::Type_t::u32: gop_engine<uint32_t>(op, out, in); break;
case element::Type_t::u64: gop_engine<uint64_t>(op, out, in); break;
case element::Type_t::undefined:
case element::Type_t::dynamic:
case element::Type_t::u1:
......@@ -251,76 +198,3 @@ void runtime::gcpu::GCPUExecutable::generate_calls(const element::Type& type,
throw ngraph_error(ss.str());
}
}
void runtime::gcpu::GCPUExecutable::set_nan_check(bool enable)
{
m_nan_check_enabled = enable;
}
vector<runtime::PerformanceCounter> runtime::gcpu::GCPUExecutable::get_performance_data() const
{
vector<runtime::PerformanceCounter> rc;
for (const pair<shared_ptr<const Node>, stopwatch> p : m_timer_map)
{
rc.emplace_back(p.first, p.second.get_total_microseconds(), p.second.get_call_count());
}
return rc;
}
void runtime::gcpu::GCPUExecutable::perform_nan_check(const vector<shared_ptr<HostTensor>>& tensors,
const Node* op)
{
size_t arg_number = 1;
for (const shared_ptr<HostTensor>& tensor : tensors)
{
const element::Type& type = tensor->get_element_type();
if (type == element::f32)
{
const float* data = tensor->get_data_ptr<float>();
for (size_t i = 0; i < tensor->get_element_count(); i++)
{
if (std::isnan(data[i]))
{
if (op)
{
throw runtime_error("nan found in op '" + op->get_name() + "' output");
}
else
{
throw runtime_error("nan found in function's input tensor number " +
to_string(arg_number));
}
}
}
}
else if (type == element::f64)
{
const double* data = tensor->get_data_ptr<double>();
for (size_t i = 0; i < tensor->get_element_count(); i++)
{
if (std::isnan(data[i]))
{
if (op)
{
throw runtime_error("nan found in op '" + op->get_name() + "' output");
}
else
{
throw runtime_error("nan found in function's input tensor number " +
to_string(arg_number));
}
}
}
}
arg_number++;
}
}
void runtime::gcpu::GCPUExecutable::save(ostream& out)
{
cpio::Writer writer(out);
string si = "INTERPRETER Save File 1.0";
writer.write("save_info", si.data(), si.size());
string model = serialize(m_function, 0);
writer.write("model", model.data(), model.size());
}
topk_resnet50
topk_max_sort_none
tile_3d_small_data_rank
tile_3d_few_repeats
fake_quantize_pdpd
convert_float32_bf16
convert_bf16_float32
......@@ -36,9 +36,9 @@ using namespace ngraph;
using descriptor::layout::DenseTensorLayout;
runtime::interpreter::OP_TYPEID
runtime::interpreter::INTExecutable::get_typeid(const NodeTypeInfo& type_info)
runtime::interpreter::OP_TYPEID runtime::interpreter::INTExecutable::get_typeid(const Node& node)
{
const NodeTypeInfo& type_info = node.get_type_info();
// This expands the op list in op_tbl.hpp into a list of enumerations that look like this:
// {Abs::type_info, OP_TYPEID::Abs},
// {Acos::type_info, OP_TYPEID::Acos},
......
......@@ -127,22 +127,18 @@ namespace ngraph
class INTBackend;
class INTExecutable;
namespace
// This expands the op list in op_tbl.hpp into a list of enumerations that look like
// this:
// Abs,
// Acos,
// ...
enum class OP_TYPEID
{
// This expands the op list in op_tbl.hpp into a list of enumerations that look like
// this:
// Abs,
// Acos,
// ...
enum class OP_TYPEID
{
#define NGRAPH_OP(NAME, NAMESPACE) ID_SUFFIX(NAME),
#include "ngraph/runtime/interpreter/opset_int_tbl.hpp"
#undef NGRAPH_OP
UnknownOp
};
}
UnknownOp
};
} // namespace interpreter
} // namespace runtime
} // namespace ngraph
......@@ -174,7 +170,7 @@ public:
std::vector<std::shared_ptr<runtime::Tensor>>
create_output_tensor(size_t output_index, size_t pipeline_depth) override;
private:
protected:
INTExecutable(const std::string& model_string);
std::shared_ptr<ngraph::op::Parameter> get_parameter(size_t index) const;
......@@ -189,15 +185,15 @@ private:
std::unordered_map<const Node*, std::shared_ptr<State>> m_states;
std::set<std::string> m_unsupported_op_name_list;
static OP_TYPEID get_typeid(const NodeTypeInfo& type_info);
static OP_TYPEID get_typeid(const Node& node);
static void perform_nan_check(const std::vector<std::shared_ptr<HostTensor>>&,
const Node* op = nullptr);
void generate_calls(const element::Type& type,
const Node& op,
const std::vector<std::shared_ptr<HostTensor>>& outputs,
const std::vector<std::shared_ptr<HostTensor>>& inputs);
virtual void generate_calls(const element::Type& type,
const Node& op,
const std::vector<std::shared_ptr<HostTensor>>& outputs,
const std::vector<std::shared_ptr<HostTensor>>& inputs);
template <typename T>
void op_engine(const Node& node,
......@@ -210,7 +206,8 @@ private:
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
switch (get_typeid(node.get_type_info()))
// #pragma GCC diagnostic error "-Wcovered-switch-default"
switch (get_typeid(node))
{
case OP_TYPEID::Abs:
{
......
# Quantized convolution is not supported on interpreter
model_quant_conv_linear
model_qlinear_matmul
model_qlinear_matmul_3d
model_conv_integer_no_zero_point
model_matmul_integer_no_zero_point
model_matmul_integer_4d_no_zero_point
fake_quantize
tile_3d_small_data_rank
tile_3d_few_repeats
fake_quantize_pdpd
fake_quantize_with_clip
fake_quantize_with_clip_across_channels
# casting not supported on interpreter
convert_float32_bf16
convert_bf16_float32
# ONNX TopK with dynamic K
top_k_opset_10
top_k_opset_11_const_k_smallest
# Tile op case that the number of elements in "repeats" and shape of "data" are different
tile_3d_small_data_rank
tile_3d_few_repeats
onnx_INTERPRETER.model_quant_conv_linear
onnx_INTERPRETER.top_k_opset_10
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment