Move GPU Call Frame into GPU Backend class (#1676)

* Move CallFrame functionality into GPUBackend

Move GPU Call Frame into GPU Backend class (#1676)
* Move CallFrame functionality into GPUBackend
f8a084ac · Robert Kimball · GitHub · 3db4f121 · f8a084ac · f8a084ac
Unverified Commit f8a084ac authored Sep 26, 2018 by Robert Kimball Committed by GitHub Sep 26, 2018
7 changed files
--- a/src/ngraph/runtime/gpu/CMakeLists.txt
+++ b/src/ngraph/runtime/gpu/CMakeLists.txt
@@ -22,7 +22,6 @@ set(SRC
    cuda_emitter.cpp
    cudnn_emitter.cpp
    gpu_backend.cpp
-    gpu_call_frame.cpp
    gpu_cuda_context_manager.cpp
    gpu_cuda_function_builder.cpp
    gpu_cuda_function_pool.cpp

--- a/src/ngraph/runtime/gpu/gpu_backend.cpp
+++ b/src/ngraph/runtime/gpu/gpu_backend.cpp
@@ -96,12 +96,6 @@ runtime::gpu::GPU_Backend::BackendContext::~BackendContext()
    delete m_runtime_context->compiled_kernel_pool;
 }
-shared_ptr<runtime::gpu::GPU_CallFrame> runtime::gpu::GPU_Backend::make_call_frame(
-    const shared_ptr<GPU_ExternalFunction>& external_function)
-{
-    return external_function->make_call_frame();
-}
 shared_ptr<runtime::TensorView>
    runtime::gpu::GPU_Backend::create_tensor(const element::Type& element_type, const Shape& shape)
 {
@@ -121,19 +115,19 @@ bool runtime::gpu::GPU_Backend::compile(shared_ptr<Function> func)
    {
        instance.m_external_function = make_shared<GPU_ExternalFunction>(func, m_context);
        instance.m_external_function->m_emit_timing = instance.m_performance_counters_enabled;
-        auto cf = instance.m_external_function->make_call_frame();
+        instance.m_external_function->compile();
-        instance.m_call_frame = dynamic_pointer_cast<GPU_CallFrame>(cf);
+        instance.m_compiled_function = instance.m_external_function->m_compiled_function;
    }
    return true;
 }
 bool runtime::gpu::GPU_Backend::call(shared_ptr<Function> func,
-                                     const vector<shared_ptr<runtime::TensorView>>& outputs,
+                                     const vector<shared_ptr<runtime::TensorView>>& output_tvs,
-                                     const vector<shared_ptr<runtime::TensorView>>& inputs)
+                                     const vector<shared_ptr<runtime::TensorView>>& input_tvs)
 {
    bool rc = true;
-    validate_call(func, outputs, inputs);
+    validate_call(func, output_tvs, input_tvs);
    FunctionInstance& instance = m_function_map[func];
    if (instance.m_external_function == nullptr)
@@ -143,7 +137,26 @@ bool runtime::gpu::GPU_Backend::call(shared_ptr<Function> func,
    // ensure the GPURuntimeContext primitive pointers are valid
    m_context->prepare_runtime_context();
-    instance.m_call_frame->call(outputs, inputs, m_context->m_runtime_context.get());
+    // Device tensors
+    vector<void*> inputs;
+    vector<void*> outputs;
+    for (size_t i = 0; i < input_tvs.size(); i++)
+    {
+        shared_ptr<runtime::gpu::GPU_TensorView> tv =
+            static_pointer_cast<runtime::gpu::GPU_TensorView>(input_tvs[i]);
+        inputs.push_back(tv->m_allocated_buffer_pool);
+    }
+    for (size_t i = 0; i < output_tvs.size(); i++)
+    {
+        shared_ptr<runtime::gpu::GPU_TensorView> tv =
+            static_pointer_cast<runtime::gpu::GPU_TensorView>(output_tvs[i]);
+        outputs.push_back(tv->m_allocated_buffer_pool);
+    }
+    auto ctx = m_context->m_runtime_context.get();
+    instance.m_compiled_function(inputs.data(), outputs.data(), ctx);
    return rc;
 }

--- a/src/ngraph/runtime/gpu/gpu_backend.hpp
+++ b/src/ngraph/runtime/gpu/gpu_backend.hpp
@@ -30,18 +30,17 @@ namespace ngraph
            static size_t alignment = 64;
            class GPU_ExternalFunction;
-            class GPU_CallFrame;
            class GPUPrimitiveEmitter;
            struct GPURuntimeContext;
            class CudaContextManager;
+            using EntryPoint_t = void(void** inputs, void** outputs, GPURuntimeContext* ctx);
+            using EntryPoint = std::function<EntryPoint_t>;
            class GPU_Backend : public Backend
            {
            public:
                GPU_Backend();
-                std::shared_ptr<ngraph::runtime::gpu::GPU_CallFrame> make_call_frame(
-                    const std::shared_ptr<ngraph::runtime::gpu::GPU_ExternalFunction>&
-                        external_function);
                std::shared_ptr<ngraph::runtime::TensorView>
                    create_tensor(const ngraph::element::Type& element_type,
@@ -82,8 +81,8 @@ namespace ngraph
                {
                public:
                    std::shared_ptr<GPU_ExternalFunction> m_external_function;
-                    std::shared_ptr<GPU_CallFrame> m_call_frame;
                    bool m_performance_counters_enabled = false;
+                    EntryPoint m_compiled_function;
                };
                std::map<std::shared_ptr<Function>, FunctionInstance> m_function_map;

--- a/src/ngraph/runtime/gpu/gpu_call_frame.cpp
+++ b/src/ngraph/runtime/gpu/gpu_call_frame.cpp
-//*****************************************************************************
-// Copyright 2017-2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//*****************************************************************************
-#include <cstdlib>
-#include <fstream>
-#include <stdio.h>
-#include "ngraph/runtime/gpu/gpu_call_frame.hpp"
-#include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp"
-#include "ngraph/runtime/gpu/gpu_external_function.hpp"
-#include "ngraph/runtime/gpu/gpu_tensor_view.hpp"
-#include "ngraph/runtime/gpu/gpu_util.hpp"
-using namespace std;
-using namespace ngraph;
-runtime::gpu::GPU_CallFrame::GPU_CallFrame(std::shared_ptr<GPU_ExternalFunction> external_function,
-                                           EntryPoint compiled_function)
-    : m_external_function(external_function)
-    , m_compiled_function(compiled_function)
-{
-}
-runtime::gpu::GPU_CallFrame::~GPU_CallFrame()
-{
-}
-void runtime::gpu::GPU_CallFrame::call(
-    const std::vector<std::shared_ptr<runtime::TensorView>>& output_tvs,
-    const std::vector<std::shared_ptr<runtime::TensorView>>& input_tvs,
-    GPURuntimeContext* ctx)
-{
-    // Device tensors
-    vector<void*> inputs;
-    vector<void*> outputs;
-    for (size_t i = 0; i < input_tvs.size(); i++)
-    {
-        shared_ptr<runtime::gpu::GPU_TensorView> tv =
-            static_pointer_cast<runtime::gpu::GPU_TensorView>(input_tvs[i]);
-        inputs.push_back(tv->m_allocated_buffer_pool);
-    }
-    for (size_t i = 0; i < output_tvs.size(); i++)
-    {
-        shared_ptr<runtime::gpu::GPU_TensorView> tv =
-            static_pointer_cast<runtime::gpu::GPU_TensorView>(output_tvs[i]);
-        outputs.push_back(tv->m_allocated_buffer_pool);
-    }
-    m_compiled_function(inputs.data(), outputs.data(), ctx);
-}
--- a/src/ngraph/runtime/gpu/gpu_call_frame.hpp
+++ b/src/ngraph/runtime/gpu/gpu_call_frame.hpp
-//*****************************************************************************
-// Copyright 2017-2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//*****************************************************************************
-#pragma once
-#include <cublas_v2.h>
-#include <cuda_runtime.h>
-#include <cudnn.h>
-#include <functional>
-#include <memory>
-#include <vector>
-#include "ngraph/function.hpp"
-#include "ngraph/runtime/gpu/gpu_runtime_context.hpp"
-#include "ngraph/runtime/tensor_view.hpp"
-namespace ngraph
-{
-    namespace runtime
-    {
-        namespace gpu
-        {
-            class GPU_CallFrame;
-            class GPU_ExternalFunction;
-            using EntryPoint_t = void(void** inputs, void** outputs, GPURuntimeContext* ctx);
-            using EntryPoint = std::function<EntryPoint_t>;
-            // Compile and execute graphs
-            class GPU_CallFrame
-            {
-            public:
-                GPU_CallFrame(std::shared_ptr<GPU_ExternalFunction> external_function,
-                              EntryPoint compiled_function);
-                ~GPU_CallFrame();
-                /// \brief Invoke the function with values matching the signature of the function.
-                ///
-                /// Tuples will be expanded into their tensor views to build the call frame.
-                void call(const std::vector<std::shared_ptr<runtime::TensorView>>& outputs,
-                          const std::vector<std::shared_ptr<runtime::TensorView>>& inputs,
-                          GPURuntimeContext* ctx);
-            protected:
-                std::shared_ptr<GPU_ExternalFunction> m_external_function;
-                EntryPoint m_compiled_function;
-                static bool init;
-            };
-        }
-    }
-}
--- a/src/ngraph/runtime/gpu/gpu_external_function.cpp
+++ b/src/ngraph/runtime/gpu/gpu_external_function.cpp
@@ -635,17 +635,6 @@ void runtime::gpu::GPU_ExternalFunction::compile()
    }
 }
-shared_ptr<ngraph::runtime::gpu::GPU_CallFrame>
-    runtime::gpu::GPU_ExternalFunction::make_call_frame()
-{
-    if (!m_is_compiled)
-    {
-        compile();
-    }
-    return make_shared<GPU_CallFrame>(shared_from_this(), m_compiled_function);
-}
 void runtime::gpu::GPU_ExternalFunction::emit_debug_function_entry(Node* node)
 {
    if (m_emit_timing)

--- a/src/ngraph/runtime/gpu/gpu_external_function.hpp
+++ b/src/ngraph/runtime/gpu/gpu_external_function.hpp
@@ -32,7 +32,6 @@
 #include "ngraph/pass/manager.hpp"
 #include "ngraph/pass/memory_layout.hpp"
 #include "ngraph/runtime/gpu/gpu_backend.hpp"
-#include "ngraph/runtime/gpu/gpu_call_frame.hpp"
 #include "ngraph/runtime/gpu/gpu_primitive_emitter.hpp"
 #include "ngraph/runtime/gpu/gpu_tensor_view_wrapper.hpp"
@@ -48,12 +47,10 @@ namespace ngraph
        namespace gpu
        {
            class GPU_Emitter;
-            class GPU_CallFrame;
            struct GPURuntimeContext;
-            class GPU_ExternalFunction : public std::enable_shared_from_this<GPU_ExternalFunction>
+            class GPU_ExternalFunction
            {
-                friend class GPU_CallFrame;
                friend class GPU_Backend;
            public:
@@ -62,7 +59,6 @@ namespace ngraph
                                     bool release_function = true);
                ~GPU_ExternalFunction();
-                std::shared_ptr<ngraph::runtime::gpu::GPU_CallFrame> make_call_frame();
                std::unique_ptr<runtime::gpu::GPURuntimeContext>& ctx();
                const std::unique_ptr<GPUPrimitiveEmitter>& get_primitive_emitter() const
                {