Change GPU backend to use op_tbl (#1618)

* sort op list * use op_tbl * throw unsupported_op exception when appropriate * remove dead code * Add more use of NGRAPH_OP macro to remove boilerplate definitions/implementations * revert moving class out of namespace * change from switch to dispatcher map

Change GPU backend to use op_tbl (#1618)
* sort op list * use op_tbl * throw unsupported_op exception when appropriate * remove dead code * Add more use of NGRAPH_OP macro to remove boilerplate definitions/implementations * revert moving class out of namespace * change from switch to dispatcher map
d52473c8 · Robert Kimball · GitHub · 3d69bf7a · d52473c8 · d52473c8
Unverified Commit d52473c8 authored Sep 19, 2018 by Robert Kimball Committed by GitHub Sep 19, 2018
4 changed files
--- a/src/ngraph/runtime/gpu/gpu_emitter.cpp
+++ b/src/ngraph/runtime/gpu/gpu_emitter.cpp
--- a/src/ngraph/runtime/gpu/gpu_emitter.hpp
+++ b/src/ngraph/runtime/gpu/gpu_emitter.hpp
@@ -24,12 +24,6 @@
 #include "ngraph/runtime/gpu/gpu_external_function.hpp"
 #include "ngraph/runtime/gpu/gpu_tensor_view_wrapper.hpp"
-#define EMITTER_DECL(op_name)                                                                      \
-    emit<op_name>(GPU_ExternalFunction * external_function,                                        \
-                  codegen::CodeWriter & writer,                                                    \
-                  const ngraph::Node* node,                                                        \
-                  const std::vector<GPU_TensorViewWrapper>& args,                                  \
-                  const std::vector<GPU_TensorViewWrapper>& out)
 namespace ngraph
 {
    namespace runtime
@@ -39,31 +33,17 @@ namespace ngraph
            class GPU_Emitter
            {
            public:
-                template <typename OP>
+                static std::function<void(EMIT_ARGS)> get_emit_function(const Node& node);
-                static void emit(GPU_ExternalFunction* external_function,
-                                 codegen::CodeWriter& writer,
-                                 const ngraph::Node* node,
-                                 const std::vector<GPU_TensorViewWrapper>& args,
-                                 const std::vector<GPU_TensorViewWrapper>& out)
-                {
-                    throw std::runtime_error("Unimplemented op '" + node->description() +
-                                             "' in GPU emitter");
-                }
-                static void nop(GPU_ExternalFunction* external_function,
+// This defines a collection of function declarations like this
-                                codegen::CodeWriter& writer,
+// static void emit_Abs(EMIT_ARGS);
-                                const ngraph::Node* node,
+// static void emit_Acos(EMIT_ARGS);
-                                const std::vector<GPU_TensorViewWrapper>& args,
+#define NGRAPH_OP(a) static void emit_##a(EMIT_ARGS);
-                                const std::vector<GPU_TensorViewWrapper>& out)
+#include "ngraph/op/op_tbl.hpp"
-                {
+#undef NGRAPH_OP
-                }
                template <typename T>
-                static void emit_elementwise(GPU_ExternalFunction* external_function,
+                static void emit_elementwise(EMIT_ARGS)
-                                             codegen::CodeWriter& writer,
-                                             const ngraph::Node* node,
-                                             const std::vector<GPU_TensorViewWrapper>& args,
-                                             const std::vector<GPU_TensorViewWrapper>& out)
                {
                    if (out[0].get_size() == 0)
                    {
@@ -104,6 +84,7 @@ namespace ngraph
                static std::string node_names(const std::vector<GPU_TensorViewWrapper>& args,
                                              std::initializer_list<int> arg_indexes = {});
            };
            Shape get_padded_shape(const Shape& input_shape,
                                   const Shape& padding_below,
                                   const Shape& padding_above,

--- a/src/ngraph/runtime/gpu/gpu_external_function.cpp
+++ b/src/ngraph/runtime/gpu/gpu_external_function.cpp
@@ -24,6 +24,7 @@
 #include <string>
 #include <tuple>
+#include "ngraph/codegen/code_writer.hpp"
 #include "ngraph/descriptor/input.hpp"
 #include "ngraph/descriptor/layout/dense_tensor_layout.hpp"
 #include "ngraph/descriptor/output.hpp"
@@ -36,6 +37,8 @@
 #include "ngraph/op/add.hpp"
 #include "ngraph/op/allreduce.hpp"
 #include "ngraph/op/and.hpp"
+#include "ngraph/op/argmax.hpp"
+#include "ngraph/op/argmin.hpp"
 #include "ngraph/op/asin.hpp"
 #include "ngraph/op/atan.hpp"
 #include "ngraph/op/avg_pool.hpp"
@@ -60,6 +63,7 @@
 #include "ngraph/op/less.hpp"
 #include "ngraph/op/less_eq.hpp"
 #include "ngraph/op/log.hpp"
+#include "ngraph/op/lrn.hpp"
 #include "ngraph/op/max.hpp"
 #include "ngraph/op/max_pool.hpp"
 #include "ngraph/op/maximum.hpp"
@@ -93,10 +97,12 @@
 #include "ngraph/op/slice.hpp"
 #include "ngraph/op/softmax.hpp"
 #include "ngraph/op/sqrt.hpp"
+#include "ngraph/op/stop_gradient.hpp"
 #include "ngraph/op/subtract.hpp"
 #include "ngraph/op/sum.hpp"
 #include "ngraph/op/tan.hpp"
 #include "ngraph/op/tanh.hpp"
+#include "ngraph/op/topk.hpp"
 #include "ngraph/pass/common_function_collection.hpp"
 #include "ngraph/pass/like_replacement.hpp"
 #include "ngraph/runtime/gpu/gpu_backend.hpp"
@@ -104,6 +110,7 @@
 #include "ngraph/runtime/gpu/gpu_external_function.hpp"
 #include "ngraph/runtime/gpu/gpu_kernel_emitters.hpp"
 #include "ngraph/runtime/gpu/gpu_runtime_context.hpp"
+#include "ngraph/runtime/gpu/gpu_tensor_view_wrapper.hpp"
 #include "ngraph/runtime/gpu/pass/gpu_layout.hpp"
 #include "ngraph/runtime/gpu/pass/tensor_memory_reservation.hpp"
@@ -157,91 +164,15 @@ static string emit_string_array(const vector<string>& s, size_t max_line_length)
 static GPUStaticInitializers s_static_initializers;
-#define TI(x) type_index(typeid(x))
+void runtime::gpu::GPU_ExternalFunction::emit_op(GPU_ExternalFunction* external_function,
+                                                 codegen::CodeWriter& writer,
-static const runtime::gpu::OpMap dispatcher{
+                                                 const ngraph::Node* node,
-    {TI(ngraph::op::Add), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Add>},
+                                                 const std::vector<GPU_TensorViewWrapper>& args,
-    {TI(ngraph::op::Dot), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Dot>},
+                                                 const std::vector<GPU_TensorViewWrapper>& out)
-    {TI(ngraph::op::Multiply), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Multiply>},
+{
-    {TI(ngraph::op::Parameter), &runtime::gpu::GPU_Emitter::nop},
+    auto emit_function = GPU_Emitter::get_emit_function(*node);
-    {TI(ngraph::op::Abs), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Abs>},
+    emit_function(external_function, writer, node, args, out);
-    {TI(ngraph::op::Concat), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Concat>},
+};
-    {TI(ngraph::op::Divide), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Divide>},
-    {TI(ngraph::op::Equal), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Equal>},
-    {TI(ngraph::op::GetOutputElement),
-     &runtime::gpu::GPU_Emitter::emit<ngraph::op::GetOutputElement>},
-    {TI(ngraph::op::Greater), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Greater>},
-    {TI(ngraph::op::GreaterEq),
-     &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::GreaterEq>},
-    {TI(ngraph::op::Less), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Less>},
-    {TI(ngraph::op::LessEq), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::LessEq>},
-    {TI(ngraph::op::Log), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Log>},
-    {TI(ngraph::op::Maximum), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Maximum>},
-    {TI(ngraph::op::Minimum), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Minimum>},
-    {TI(ngraph::op::Negative), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Negative>},
-    {TI(ngraph::op::NotEqual), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::NotEqual>},
-    {TI(ngraph::op::Power), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Power>},
-    {TI(ngraph::op::Select), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Select>},
-    {TI(ngraph::op::Subtract), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Subtract>},
-    {TI(ngraph::op::Broadcast), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Broadcast>},
-    {TI(ngraph::op::Convert), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Convert>},
-    {TI(ngraph::op::Constant), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Constant>},
-    {TI(ngraph::op::Reshape), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Reshape>},
-    {TI(ngraph::op::FunctionCall), &runtime::gpu::GPU_Emitter::emit<ngraph::op::FunctionCall>},
-    {TI(ngraph::op::Reduce), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Reduce>},
-    {TI(ngraph::op::Sign), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Sign>},
-    {TI(ngraph::op::Slice), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Slice>},
-    {TI(ngraph::op::Sum), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Sum>},
-    {TI(ngraph::op::Exp), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Exp>},
-    {TI(ngraph::op::Sin), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Sin>},
-    {TI(ngraph::op::Sinh), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Sinh>},
-    {TI(ngraph::op::Cos), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Cos>},
-    {TI(ngraph::op::Cosh), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Cosh>},
-    {TI(ngraph::op::Tan), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Tan>},
-    {TI(ngraph::op::Tanh), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Tanh>},
-    {TI(ngraph::op::Asin), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Asin>},
-    {TI(ngraph::op::Acos), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Acos>},
-    {TI(ngraph::op::Atan), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Atan>},
-    {TI(ngraph::op::ReplaceSlice), &runtime::gpu::GPU_Emitter::emit<ngraph::op::ReplaceSlice>},
-    {TI(ngraph::op::OneHot), &runtime::gpu::GPU_Emitter::emit<ngraph::op::OneHot>},
-    {TI(ngraph::op::Floor), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Floor>},
-    {TI(ngraph::op::Ceiling), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Ceiling>},
-    {TI(ngraph::op::Sqrt), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Sqrt>},
-    {TI(ngraph::op::Convolution), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Convolution>},
-    {TI(ngraph::op::ConvolutionBackpropFilters),
-     &runtime::gpu::GPU_Emitter::emit<ngraph::op::ConvolutionBackpropFilters>},
-    {TI(ngraph::op::ConvolutionBackpropData),
-     &runtime::gpu::GPU_Emitter::emit<ngraph::op::ConvolutionBackpropData>},
-    {TI(ngraph::op::Not), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Not>},
-    {TI(ngraph::op::MaxPool), &runtime::gpu::GPU_Emitter::emit<ngraph::op::MaxPool>},
-    {TI(ngraph::op::Reverse), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Reverse>},
-    {TI(ngraph::op::ReverseSequence),
-     &runtime::gpu::GPU_Emitter::emit<ngraph::op::ReverseSequence>},
-    {TI(ngraph::op::Result), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Result>},
-    {TI(ngraph::op::ReduceWindow), &runtime::gpu::GPU_Emitter::emit<ngraph::op::ReduceWindow>},
-    {TI(ngraph::op::SelectAndScatter),
-     &runtime::gpu::GPU_Emitter::emit<ngraph::op::SelectAndScatter>},
-    {TI(ngraph::op::AvgPool), &runtime::gpu::GPU_Emitter::emit<ngraph::op::AvgPool>},
-    {TI(ngraph::op::AvgPoolBackprop),
-     &runtime::gpu::GPU_Emitter::emit<ngraph::op::AvgPoolBackprop>},
-    {TI(ngraph::op::Pad), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Pad>},
-    {TI(ngraph::op::BatchNorm), &runtime::gpu::GPU_Emitter::emit<ngraph::op::BatchNorm>},
-    {TI(ngraph::op::BatchNormBackprop),
-     &runtime::gpu::GPU_Emitter::emit<ngraph::op::BatchNormBackprop>},
-    {TI(ngraph::op::MaxPoolBackprop),
-     &runtime::gpu::GPU_Emitter::emit<ngraph::op::MaxPoolBackprop>},
-    {TI(ngraph::op::Product), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Product>},
-    {TI(ngraph::op::Max), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Max>},
-    {TI(ngraph::op::Min), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Min>},
-    {TI(ngraph::op::Relu), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Relu>},
-    {TI(ngraph::op::ReluBackprop),
-     &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::ReluBackprop>},
-    {TI(ngraph::op::Softmax), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Softmax>},
-    {TI(ngraph::op::Sigmoid), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Sigmoid>},
-    {TI(ngraph::op::SigmoidBackprop),
-     &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::SigmoidBackprop>},
-    {TI(ngraph::op::And), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::And>},
-    {TI(ngraph::op::Or), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Or>}};
 const size_t runtime::gpu::GPU_ExternalFunction::GPU_ExternalFunction::s_memory_pool_alignment = 64;
@@ -548,14 +479,6 @@ void runtime::gpu::GPU_ExternalFunction::emit_functions()
            for (shared_ptr<Node> node : m_function_ordered_ops.at(current_function))
            {
-                auto& n = *node;
-                // Work around a compiler warning (*node inside typeid may have effects
-                // with shared pointers, which is fine here but clang doesn't like it.)
-                auto handler = dispatcher.find(type_index(typeid(n)));
-                if (handler == dispatcher.end())
-                {
-                    throw ngraph::unsupported_op(node->description());
-                }
                vector<GPU_TensorViewWrapper> in;
                vector<string> node_input_names;
                vector<string> node_output_names;
@@ -590,7 +513,7 @@ void runtime::gpu::GPU_ExternalFunction::emit_functions()
                auto it = m_node_function_map.find(node.get());
                if (it == m_node_function_map.end())
                {
-                    handler->second(this, m_writer, node.get(), in, out);
+                    emit_op(this, m_writer, node.get(), in, out);
                }
                else
                {
@@ -747,13 +670,6 @@ string runtime::gpu::GPU_ExternalFunction::emit_op_as_function(const Node& node,
    codegen::CodeWriter writer;
    writer << "static void " << function_name << "(";
    writer.indent++;
-    // Work around a compiler warning (*node inside typeid may have effects
-    // with shared pointers, which is fine here but clang doesn't like it.)
-    auto handler = dispatcher.find(type_index(typeid(node)));
-    if (handler == dispatcher.end())
-    {
-        throw ngraph::unsupported_op(node.description());
-    }
    vector<GPU_TensorViewWrapper> in;
    size_t arg_index = 0;
    set<string> arg_names;
@@ -791,7 +707,7 @@ string runtime::gpu::GPU_ExternalFunction::emit_op_as_function(const Node& node,
    writer.indent--;
    writer << "\n)\n";
    codegen::CodeWriter tmp_writer;
-    handler->second(this, tmp_writer, &node, in, out);
+    emit_op(this, tmp_writer, &node, in, out);
    string body = tmp_writer.get_code();
    if (body.size() > 0 && body[0] == '{')
    {

--- a/src/ngraph/runtime/gpu/gpu_external_function.hpp
+++ b/src/ngraph/runtime/gpu/gpu_external_function.hpp
@@ -36,6 +36,11 @@
 #include "ngraph/runtime/gpu/gpu_primitive_emitter.hpp"
 #include "ngraph/runtime/gpu/gpu_tensor_view_wrapper.hpp"
+#define EMIT_ARGS                                                                                  \
+    runtime::gpu::GPU_ExternalFunction *external_function, codegen::CodeWriter &writer,            \
+        const Node *node, const std::vector<runtime::gpu::GPU_TensorViewWrapper> &args,            \
+        const std::vector<runtime::gpu::GPU_TensorViewWrapper> &out
 namespace ngraph
 {
    namespace runtime
@@ -46,15 +51,6 @@ namespace ngraph
            class GPU_CallFrame;
            struct GPURuntimeContext;
-            using OpFunction =
-                std::function<void(GPU_ExternalFunction* external_function,
-                                   codegen::CodeWriter&,
-                                   const ngraph::Node*,
-                                   const std::vector<GPU_TensorViewWrapper>& inputs,
-                                   const std::vector<GPU_TensorViewWrapper>& outputs)>;
-            using OpMap = std::unordered_map<std::type_index, OpFunction>;
            class GPU_ExternalFunction : public std::enable_shared_from_this<GPU_ExternalFunction>
            {
                friend class GPU_CallFrame;
@@ -97,6 +93,7 @@ namespace ngraph
                void emit_debug_function_entry(Node* node);
                void emit_debug_function_exit(Node* node);
                void emit_temp_mem_pool_allocation(std::shared_ptr<Function> current_function);
+                void emit_op(EMIT_ARGS);
                void release_function() { m_function = nullptr; }
                void store_emitted_functions(const std::string& code);
                std::string emit_op_as_function(const Node& node, const std::string& function_name);