Bring NVIDIA GPU backend up to date with nGraph master. (#4306)

Co-authored-by: Robert Kimball <robert.kimball@intel.com>

Bring NVIDIA GPU backend up to date with nGraph master. (#4306)
Co-authored-by: Robert Kimball <robert.kimball@intel.com>
3d9004c0 · Chris Sullivan · GitHub · 670c74af · 3d9004c0 · 3d9004c0
Unverified Commit 3d9004c0 authored Feb 11, 2020 by Chris Sullivan Committed by GitHub Feb 11, 2020
13 changed files
--- a/src/ngraph/runtime/gpu/cudnn_emitter.cpp
+++ b/src/ngraph/runtime/gpu/cudnn_emitter.cpp
@@ -20,12 +20,17 @@
 #include <vector>

 #include "ngraph/log.hpp"
+#include "ngraph/op/convolution.hpp"
+#include "ngraph/op/max.hpp"
+#include "ngraph/op/max_pool.hpp"
+#include "ngraph/op/min.hpp"
 #include "ngraph/runtime/gpu/cudnn_emitter.hpp"
 #include "ngraph/runtime/gpu/gpu_emitter.hpp"
 #include "ngraph/runtime/gpu/gpu_invoke.hpp"
 #include "ngraph/runtime/gpu/gpu_primitive_emitter.hpp"
 #include "ngraph/runtime/gpu/gpu_runtime_context.hpp"
 #include "ngraph/runtime/gpu/gpu_util.hpp"
+#include "ngraph/runtime/gpu/op/rnn.hpp"
 #include "ngraph/runtime/gpu/type_info.hpp"
 #include "ngraph/util.hpp"


--- a/src/ngraph/runtime/gpu/cudnn_emitter.hpp
+++ b/src/ngraph/runtime/gpu/cudnn_emitter.hpp
@@ -31,14 +31,21 @@
 #include "ngraph/runtime/gpu/gpu_runtime_context.hpp"
 #include "ngraph/shape.hpp"

-#include "ngraph/op/convolution.hpp"
-#include "ngraph/op/max.hpp"
-#include "ngraph/op/max_pool.hpp"
-#include "ngraph/op/min.hpp"
-#include "ngraph/runtime/gpu/op/rnn.hpp"
-
 namespace ngraph
 {
+    namespace op
+    {
+        class Convolution;
+        class ConvolutionBackpropData;
+        class ConvolutionBackpropFilters;
+        class MaxPool;
+        class Max;
+        class Min;
+        namespace gpu
+        {
+            class Rnn;
+        }
+    }
    namespace runtime
    {
        namespace gpu

--- a/src/ngraph/runtime/gpu/gpu_backend.cpp
+++ b/src/ngraph/runtime/gpu/gpu_backend.cpp
@@ -33,20 +33,11 @@
 using namespace ngraph;
 using namespace std;

-extern "C" runtime::BackendConstructor* get_backend_constructor_pointer()
+extern "C" GPU_BACKEND_API void ngraph_register_gpu_backend()
 {
-    class LocalBackendConstructor : public runtime::BackendConstructor
-    {
-    public:
-        std::shared_ptr<runtime::Backend> create(const std::string& config) override
-        {
-            return std::make_shared<runtime::gpu::GPU_Backend>();
-        }
-    };
-
-    static unique_ptr<runtime::BackendConstructor> s_backend_constructor(
-        new LocalBackendConstructor());
-    return s_backend_constructor.get();
+    runtime::BackendManager::register_backend("GPU", [](const std::string& /* config */) {
+        return make_shared<runtime::gpu::GPU_Backend>();
+    });
 }

 runtime::gpu::GPU_Backend::GPU_Backend()

--- a/src/ngraph/runtime/gpu/gpu_backend.hpp
+++ b/src/ngraph/runtime/gpu/gpu_backend.hpp
@@ -19,7 +19,9 @@
 #include <map>
 #include <memory>

+#include "gpu_backend_visibility.hpp"
 #include "ngraph/runtime/backend.hpp"
+#include "ngraph/runtime/backend_manager.hpp"

 namespace ngraph
 {
@@ -37,6 +39,7 @@ namespace ngraph
            using EntryPoint_t = void(void** inputs, void** outputs, GPURuntimeContext* ctx);
            using EntryPoint = std::function<EntryPoint_t>;

+            BackendConstructor GPU_BACKEND_API get_backend_constructor_pointer();
            class GPU_Backend : public Backend
            {
            public:

--- a/src/ngraph/runtime/gpu/gpu_compiled_function.hpp
+++ b/src/ngraph/runtime/gpu/gpu_compiled_function.hpp
@@ -100,7 +100,7 @@ namespace ngraph
                                                       const std::string& output_name) = 0;
                std::shared_ptr<ngraph::Function> m_function;

-                std::unordered_map<std::shared_ptr<Function>, std::list<std::shared_ptr<Node>>>
+                std::unordered_map<std::shared_ptr<Function>, std::vector<std::shared_ptr<Node>>>
                    m_function_ordered_ops;

                bool m_emit_timing;

--- a/src/ngraph/runtime/gpu/gpu_emitter.cpp
+++ b/src/ngraph/runtime/gpu/gpu_emitter.cpp
--- a/src/ngraph/runtime/gpu/gpu_emitter.hpp
+++ b/src/ngraph/runtime/gpu/gpu_emitter.hpp
@@ -37,7 +37,7 @@ namespace ngraph
 // This defines a collection of function declarations like this
 // static std::string emit_Abs(EMIT_ARGS);
 // static std::string emit_Acos(EMIT_ARGS);
-#define NGRAPH_OP(a, b) static std::string emit_##a(EMIT_ARGS);
+#define NGRAPH_OP(a, b, VERS) static std::string emit_v##VERS##_##a(EMIT_ARGS);
 #include "ngraph/runtime/gpu/op/op_tbl.hpp"
 #undef NGRAPH_OP


--- a/src/ngraph/runtime/gpu/gpu_runtime_constructor.hpp
+++ b/src/ngraph/runtime/gpu/gpu_runtime_constructor.hpp
@@ -39,8 +39,8 @@ namespace ngraph
            public:
                using op_runtime_t =
                    std::function<void(GPUCallFrame& call_frame, GPURuntimeContext* ctx)>;
-                using op_order_t =
-                    std::unordered_map<std::shared_ptr<Function>, std::list<std::shared_ptr<Node>>>;
+                using op_order_t = std::unordered_map<std::shared_ptr<Function>,
+                                                      std::vector<std::shared_ptr<Node>>>;

                GPURuntimeConstructor(const op_order_t& ordered_ops);
                void add(const std::string& name, const op_runtime_t& step);

--- a/src/ngraph/runtime/gpu/op/batch_norm.cpp
+++ b/src/ngraph/runtime/gpu/op/batch_norm.cpp
@@ -15,8 +15,11 @@
 //*****************************************************************************

 #include "ngraph/runtime/gpu/op/batch_norm.hpp"
+#include "ngraph/node.hpp"
 #include "ngraph/validation_util.hpp"

+constexpr ngraph::NodeTypeInfo ngraph::op::gpu::BatchNormTrainingWithStats::type_info;
+
 ngraph::op::gpu::BatchNormTrainingWithStats::BatchNormTrainingWithStats(
    double eps,
    std::shared_ptr<ngraph::Node> gamma,

--- a/src/ngraph/runtime/gpu/op/batch_norm.hpp
+++ b/src/ngraph/runtime/gpu/op/batch_norm.hpp
@@ -40,6 +40,8 @@ namespace ngraph

                void validate_and_infer_types() override;

+                static constexpr NodeTypeInfo type_info{"BatchNormTrainingWithStats", 0};
+                const NodeTypeInfo& get_type_info() const override { return type_info; }
            protected:
                virtual std::shared_ptr<Node>
                    copy_with_new_args(const NodeVector& new_args) const override;

--- a/src/ngraph/runtime/gpu/op/op_tbl.hpp
+++ b/src/ngraph/runtime/gpu/op/op_tbl.hpp
@@ -14,8 +14,8 @@
 // limitations under the License.
 //*****************************************************************************

-#include "ngraph/op/op_tbl.hpp"
+#include "ngraph/op/op_version_tbl.hpp"
 #if CUDNN_VERSION >= 7200
-NGRAPH_OP(Rnn, ngraph::op::gpu)
+NGRAPH_OP(Rnn, ngraph::op::gpu, 0)
 #endif
-NGRAPH_OP(BatchNormTrainingWithStats, ngraph::op::gpu)
+NGRAPH_OP(BatchNormTrainingWithStats, ngraph::op::gpu, 0)
--- a/src/ngraph/runtime/gpu/op/rnn.cpp
+++ b/src/ngraph/runtime/gpu/op/rnn.cpp
@@ -16,11 +16,14 @@

 #include "ngraph/runtime/gpu/op/rnn.hpp"
 #include "ngraph/log.hpp"
+#include "ngraph/node.hpp"
 #include "ngraph/util.hpp"

 using namespace std;
 using namespace ngraph;

+constexpr NodeTypeInfo op::gpu::Rnn::type_info;
+
 shared_ptr<Node> op::gpu::Rnn::copy_with_new_args(const NodeVector& new_args) const
 {
    NGRAPH_CHECK(new_args.size() == 4, "Incorrect number of new arguments");

--- a/src/ngraph/runtime/gpu/op/rnn.hpp
+++ b/src/ngraph/runtime/gpu/op/rnn.hpp
@@ -65,6 +65,8 @@ namespace ngraph
                    const int src_iter_feature_size,
                    const int direction,
                    const int num_fused_layers);
+                static constexpr NodeTypeInfo type_info{"Rnn", 0};
+                const NodeTypeInfo& get_type_info() const override { return type_info; }
                virtual std::shared_ptr<Node>
                    copy_with_new_args(const NodeVector& new_args) const override;
                int get_num_timesteps() const { return m_num_timesteps; }