Greatly simplify the GCPU backend (#4131)

* New GCPU layout * Passing tests * Test passing * Move GCPU to the gcpu directory since names must match * Revert "Move GCPU to the gcpu directory since names must match" This reverts commit a51f50699d0d1ab602a71226687cf053a9ade27d. * Cleanup * Disable some onnx tests * Fix manifest * style Co-authored-by: Scott Cyphers <diyessi@users.noreply.github.com>

Greatly simplify the GCPU backend (#4131)
* New GCPU layout * Passing tests * Test passing * Move GCPU to the gcpu directory since names must match * Revert "Move GCPU to the gcpu directory since names must match" This reverts commit a51f50699d0d1ab602a71226687cf053a9ade27d. * Cleanup * Disable some onnx tests * Fix manifest * style Co-authored-by: Scott Cyphers <diyessi@users.noreply.github.com>
862a34d3 · Robert Kimball · Sang Ik Lee · 868ba1f8 · 862a34d3 · 862a34d3
Commit 862a34d3 authored Jan 08, 2020 by Robert Kimball Committed by Sang Ik Lee Jan 08, 2020
6 changed files
--- a/src/ngraph/runtime/generic_cpu/gcpu_executable.cpp
+++ b/src/ngraph/runtime/generic_cpu/gcpu_executable.cpp
@@ -37,59 +37,10 @@ using namespace ngraph;

 using descriptor::layout::DenseTensorLayout;

-runtime::gcpu::OP_TYPEID runtime::gcpu::GCPUExecutable::get_typeid(const NodeTypeInfo& type_info)
-{
-    // This expands the op list in op_tbl.hpp into a list of enumerations that look like this:
-    // {Abs::type_info, OP_TYPEID::Abs},
-    // {Acos::type_info, OP_TYPEID::Acos},
-    // ...
-    static const map<NodeTypeInfo, OP_TYPEID> type_info_map{
-#define NGRAPH_OP(NAME, NAMESPACE) {NAMESPACE::NAME::type_info, OP_TYPEID::NAME},
-#include "ngraph/opsets/opset0_tbl.hpp"
-#undef NGRAPH_OP
-    };
-    OP_TYPEID rc = OP_TYPEID::UnknownOp;
-
-    auto it = type_info_map.find(type_info);
-    if (it != type_info_map.end())
-    {
-        rc = it->second;
-    }
-    return rc;
-}
-
 runtime::gcpu::GCPUExecutable::GCPUExecutable(const shared_ptr<Function>& function,
                                              bool enable_performance_collection)
-    : m_is_compiled{true}
-    , m_performance_counters_enabled{enable_performance_collection}
-{
-    m_function = clone_function(*function);
-    pass::Manager pass_manager;
-    pass_manager.register_pass<pass::LikeReplacement>();
-    pass_manager.register_pass<pass::FusedOpDecomposition>();
-    pass_manager.register_pass<pass::Opset0Downgrade>();
-    pass_manager.register_pass<pass::ImplicitBroadcastElimination>();
-    pass_manager.register_pass<pass::AssignLayout<DenseTensorLayout>>();
-    pass_manager.register_pass<pass::Liveness>();
-    pass_manager.run_passes(m_function);
-
-    for (auto node : m_function->get_ordered_ops())
-    {
-        m_nodes.push_back(node);
-    }
-    set_parameters_and_results(*m_function);
-}
-
-runtime::gcpu::GCPUExecutable::GCPUExecutable(const std::string& model_string)
-    : m_is_compiled{true}
-    , m_performance_counters_enabled{false}
+    : INTExecutable(function, enable_performance_collection)
 {
-    m_function = deserialize(model_string);
-    for (auto& node : m_function->get_ordered_ops())
-    {
-        m_nodes.push_back(node);
-    }
-    set_parameters_and_results(*m_function);
 }

 bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor>>& outputs,
@@ -102,10 +53,6 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
        auto host_tensor = static_pointer_cast<runtime::HostTensor>(tensor);
        func_inputs.push_back(host_tensor);
    }
-    if (m_nan_check_enabled)
-    {
-        perform_nan_check(func_inputs);
-    }

    // convert outputs to HostTensor
    vector<shared_ptr<HostTensor>> func_outputs;
@@ -142,8 +89,8 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
    // for each ordered op in the graph
    for (auto& op : m_nodes)
    {
-        auto type_id = get_typeid(op->get_type_info());
-        if (type_id == OP_TYPEID::Parameter)
+        auto type_id = get_typeid(*op);
+        if (type_id == ngraph::runtime::interpreter::OP_TYPEID::Parameter)
        {
            continue;
        }
@@ -184,23 +131,27 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
 #pragma GCC diagnostic ignored "-Wswitch-enum"
        switch (type_id)
        {
-        case OP_TYPEID::Convert:
-        case OP_TYPEID::Quantize:
-        case OP_TYPEID::Dequantize:
-        case OP_TYPEID::ArgMin:
-        case OP_TYPEID::ArgMax: type = op->get_input_element_type(0); break;
-        case OP_TYPEID::Equal:
-        case OP_TYPEID::Greater:
-        case OP_TYPEID::GreaterEq:
-        case OP_TYPEID::Less:
-        case OP_TYPEID::LessEq:
-        case OP_TYPEID::NotEqual:
+        case ngraph::runtime::interpreter::OP_TYPEID::Convert:
+        case ngraph::runtime::interpreter::OP_TYPEID::Quantize:
+        case ngraph::runtime::interpreter::OP_TYPEID::Dequantize:
+        case ngraph::runtime::interpreter::OP_TYPEID::ArgMin:
+        case ngraph::runtime::interpreter::OP_TYPEID::ArgMax:
+            type = op->get_input_element_type(0);
+            break;
+        case ngraph::runtime::interpreter::OP_TYPEID::Equal:
+        case ngraph::runtime::interpreter::OP_TYPEID::Greater:
+        case ngraph::runtime::interpreter::OP_TYPEID::GreaterEq:
+        case ngraph::runtime::interpreter::OP_TYPEID::Less:
+        case ngraph::runtime::interpreter::OP_TYPEID::LessEq:
+        case ngraph::runtime::interpreter::OP_TYPEID::NotEqual:
            // Get the type of the second input, not the first
            // All BinaryElementwiseComparision ops have the same type for inputs
            // Select has bool for first input and the type we are interested in for the second
            type = op->get_input_element_type(1);
            break;
-        case OP_TYPEID::TopK: type = op->get_output_element_type(1); break;
+        case ngraph::runtime::interpreter::OP_TYPEID::TopK:
+            type = op->get_output_element_type(1);
+            break;
        default: type = op->get_output_element_type(0); break;
        }
 #pragma GCC diagnostic pop
@@ -214,10 +165,6 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
        {
            m_timer_map[op].stop();
        }
-        if (m_nan_check_enabled)
-        {
-            perform_nan_check(op_outputs, op.get());
-        }
    }

    return true;
@@ -231,17 +178,17 @@ void runtime::gcpu::GCPUExecutable::generate_calls(const element::Type& type,
    stringstream ss;
    switch (type)
    {
-    case element::Type_t::boolean: op_engine<char>(op, out, in); break;
-    case element::Type_t::f32: op_engine<float>(op, out, in); break;
-    case element::Type_t::f64: op_engine<double>(op, out, in); break;
-    case element::Type_t::i8: op_engine<int8_t>(op, out, in); break;
-    case element::Type_t::i16: op_engine<int16_t>(op, out, in); break;
-    case element::Type_t::i32: op_engine<int32_t>(op, out, in); break;
-    case element::Type_t::i64: op_engine<int64_t>(op, out, in); break;
-    case element::Type_t::u8: op_engine<uint8_t>(op, out, in); break;
-    case element::Type_t::u16: op_engine<uint16_t>(op, out, in); break;
-    case element::Type_t::u32: op_engine<uint32_t>(op, out, in); break;
-    case element::Type_t::u64: op_engine<uint64_t>(op, out, in); break;
+    case element::Type_t::boolean: gop_engine<char>(op, out, in); break;
+    case element::Type_t::f32: gop_engine<float>(op, out, in); break;
+    case element::Type_t::f64: gop_engine<double>(op, out, in); break;
+    case element::Type_t::i8: gop_engine<int8_t>(op, out, in); break;
+    case element::Type_t::i16: gop_engine<int16_t>(op, out, in); break;
+    case element::Type_t::i32: gop_engine<int32_t>(op, out, in); break;
+    case element::Type_t::i64: gop_engine<int64_t>(op, out, in); break;
+    case element::Type_t::u8: gop_engine<uint8_t>(op, out, in); break;
+    case element::Type_t::u16: gop_engine<uint16_t>(op, out, in); break;
+    case element::Type_t::u32: gop_engine<uint32_t>(op, out, in); break;
+    case element::Type_t::u64: gop_engine<uint64_t>(op, out, in); break;
    case element::Type_t::undefined:
    case element::Type_t::dynamic:
    case element::Type_t::u1:
@@ -251,76 +198,3 @@ void runtime::gcpu::GCPUExecutable::generate_calls(const element::Type& type,
        throw ngraph_error(ss.str());
    }
 }
-
-void runtime::gcpu::GCPUExecutable::set_nan_check(bool enable)
-{
-    m_nan_check_enabled = enable;
-}
-
-vector<runtime::PerformanceCounter> runtime::gcpu::GCPUExecutable::get_performance_data() const
-{
-    vector<runtime::PerformanceCounter> rc;
-    for (const pair<shared_ptr<const Node>, stopwatch> p : m_timer_map)
-    {
-        rc.emplace_back(p.first, p.second.get_total_microseconds(), p.second.get_call_count());
-    }
-    return rc;
-}
-
-void runtime::gcpu::GCPUExecutable::perform_nan_check(const vector<shared_ptr<HostTensor>>& tensors,
-                                                      const Node* op)
-{
-    size_t arg_number = 1;
-    for (const shared_ptr<HostTensor>& tensor : tensors)
-    {
-        const element::Type& type = tensor->get_element_type();
-        if (type == element::f32)
-        {
-            const float* data = tensor->get_data_ptr<float>();
-            for (size_t i = 0; i < tensor->get_element_count(); i++)
-            {
-                if (std::isnan(data[i]))
-                {
-                    if (op)
-                    {
-                        throw runtime_error("nan found in op '" + op->get_name() + "' output");
-                    }
-                    else
-                    {
-                        throw runtime_error("nan found in function's input tensor number " +
-                                            to_string(arg_number));
-                    }
-                }
-            }
-        }
-        else if (type == element::f64)
-        {
-            const double* data = tensor->get_data_ptr<double>();
-            for (size_t i = 0; i < tensor->get_element_count(); i++)
-            {
-                if (std::isnan(data[i]))
-                {
-                    if (op)
-                    {
-                        throw runtime_error("nan found in op '" + op->get_name() + "' output");
-                    }
-                    else
-                    {
-                        throw runtime_error("nan found in function's input tensor number " +
-                                            to_string(arg_number));
-                    }
-                }
-            }
-        }
-        arg_number++;
-    }
-}
-
-void runtime::gcpu::GCPUExecutable::save(ostream& out)
-{
-    cpio::Writer writer(out);
-    string si = "INTERPRETER Save File 1.0";
-    writer.write("save_info", si.data(), si.size());
-    string model = serialize(m_function, 0);
-    writer.write("model", model.data(), model.size());
-}
--- a/src/ngraph/runtime/generic_cpu/gcpu_executable.hpp
+++ b/src/ngraph/runtime/generic_cpu/gcpu_executable.hpp
--- a/src/ngraph/runtime/generic_cpu/unit_test.manifest
+++ b/src/ngraph/runtime/generic_cpu/unit_test.manifest
-topk_resnet50
-topk_max_sort_none
+tile_3d_small_data_rank
+tile_3d_few_repeats
+fake_quantize_pdpd
+convert_float32_bf16
+convert_bf16_float32
--- a/src/ngraph/runtime/interpreter/int_executable.cpp
+++ b/src/ngraph/runtime/interpreter/int_executable.cpp
@@ -36,9 +36,9 @@ using namespace ngraph;

 using descriptor::layout::DenseTensorLayout;

-runtime::interpreter::OP_TYPEID
-    runtime::interpreter::INTExecutable::get_typeid(const NodeTypeInfo& type_info)
+runtime::interpreter::OP_TYPEID runtime::interpreter::INTExecutable::get_typeid(const Node& node)
 {
+    const NodeTypeInfo& type_info = node.get_type_info();
    // This expands the op list in op_tbl.hpp into a list of enumerations that look like this:
    // {Abs::type_info, OP_TYPEID::Abs},
    // {Acos::type_info, OP_TYPEID::Acos},

--- a/src/ngraph/runtime/interpreter/int_executable.hpp
+++ b/src/ngraph/runtime/interpreter/int_executable.hpp
@@ -127,22 +127,18 @@ namespace ngraph
            class INTBackend;
            class INTExecutable;

-            namespace
+            // This expands the op list in op_tbl.hpp into a list of enumerations that look like
+            // this:
+            // Abs,
+            // Acos,
+            // ...
+            enum class OP_TYPEID
            {
-                // This expands the op list in op_tbl.hpp into a list of enumerations that look like
-                // this:
-                // Abs,
-                // Acos,
-                // ...
-                enum class OP_TYPEID
-                {
 #define NGRAPH_OP(NAME, NAMESPACE) ID_SUFFIX(NAME),
 #include "ngraph/runtime/interpreter/opset_int_tbl.hpp"
 #undef NGRAPH_OP
-                    UnknownOp
-                };
-            }
-
+                UnknownOp
+            };
        } // namespace interpreter
    }     // namespace runtime
 } // namespace ngraph
@@ -174,7 +170,7 @@ public:
    std::vector<std::shared_ptr<runtime::Tensor>>
        create_output_tensor(size_t output_index, size_t pipeline_depth) override;

-private:
+protected:
    INTExecutable(const std::string& model_string);

    std::shared_ptr<ngraph::op::Parameter> get_parameter(size_t index) const;
@@ -189,15 +185,15 @@ private:
    std::unordered_map<const Node*, std::shared_ptr<State>> m_states;
    std::set<std::string> m_unsupported_op_name_list;

-    static OP_TYPEID get_typeid(const NodeTypeInfo& type_info);
+    static OP_TYPEID get_typeid(const Node& node);

    static void perform_nan_check(const std::vector<std::shared_ptr<HostTensor>>&,
                                  const Node* op = nullptr);

-    void generate_calls(const element::Type& type,
-                        const Node& op,
-                        const std::vector<std::shared_ptr<HostTensor>>& outputs,
-                        const std::vector<std::shared_ptr<HostTensor>>& inputs);
+    virtual void generate_calls(const element::Type& type,
+                                const Node& op,
+                                const std::vector<std::shared_ptr<HostTensor>>& outputs,
+                                const std::vector<std::shared_ptr<HostTensor>>& inputs);

    template <typename T>
    void op_engine(const Node& node,
@@ -210,7 +206,8 @@ private:
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
-        switch (get_typeid(node.get_type_info()))
+        // #pragma GCC diagnostic error "-Wcovered-switch-default"
+        switch (get_typeid(node))
        {
        case OP_TYPEID::Abs:
        {

--- a/src/ngraph/runtime/interpreter/unit_test.manifest
+++ b/src/ngraph/runtime/interpreter/unit_test.manifest
-# Quantized convolution is not supported on interpreter
-model_quant_conv_linear
-model_qlinear_matmul
-model_qlinear_matmul_3d
-model_conv_integer_no_zero_point
-model_matmul_integer_no_zero_point
-model_matmul_integer_4d_no_zero_point
-
-fake_quantize
+tile_3d_small_data_rank
+tile_3d_few_repeats
 fake_quantize_pdpd
-fake_quantize_with_clip
-fake_quantize_with_clip_across_channels
-
-# casting not supported on interpreter
 convert_float32_bf16
 convert_bf16_float32

-# ONNX TopK with dynamic K
-top_k_opset_10
-top_k_opset_11_const_k_smallest
-
-# Tile op case that the number of elements in "repeats" and shape of "data" are different
-tile_3d_small_data_rank
-tile_3d_few_repeats
+onnx_INTERPRETER.model_quant_conv_linear
+onnx_INTERPRETER.top_k_opset_10