Unverified Commit f8a084ac authored by Robert Kimball's avatar Robert Kimball Committed by GitHub

Move GPU Call Frame into GPU Backend class (#1676)

* Move CallFrame functionality into GPUBackend
parent 3db4f121
......@@ -22,7 +22,6 @@ set(SRC
cuda_emitter.cpp
cudnn_emitter.cpp
gpu_backend.cpp
gpu_call_frame.cpp
gpu_cuda_context_manager.cpp
gpu_cuda_function_builder.cpp
gpu_cuda_function_pool.cpp
......
......@@ -96,12 +96,6 @@ runtime::gpu::GPU_Backend::BackendContext::~BackendContext()
delete m_runtime_context->compiled_kernel_pool;
}
shared_ptr<runtime::gpu::GPU_CallFrame> runtime::gpu::GPU_Backend::make_call_frame(
const shared_ptr<GPU_ExternalFunction>& external_function)
{
return external_function->make_call_frame();
}
shared_ptr<runtime::TensorView>
runtime::gpu::GPU_Backend::create_tensor(const element::Type& element_type, const Shape& shape)
{
......@@ -121,19 +115,19 @@ bool runtime::gpu::GPU_Backend::compile(shared_ptr<Function> func)
{
instance.m_external_function = make_shared<GPU_ExternalFunction>(func, m_context);
instance.m_external_function->m_emit_timing = instance.m_performance_counters_enabled;
auto cf = instance.m_external_function->make_call_frame();
instance.m_call_frame = dynamic_pointer_cast<GPU_CallFrame>(cf);
instance.m_external_function->compile();
instance.m_compiled_function = instance.m_external_function->m_compiled_function;
}
return true;
}
bool runtime::gpu::GPU_Backend::call(shared_ptr<Function> func,
const vector<shared_ptr<runtime::TensorView>>& outputs,
const vector<shared_ptr<runtime::TensorView>>& inputs)
const vector<shared_ptr<runtime::TensorView>>& output_tvs,
const vector<shared_ptr<runtime::TensorView>>& input_tvs)
{
bool rc = true;
validate_call(func, outputs, inputs);
validate_call(func, output_tvs, input_tvs);
FunctionInstance& instance = m_function_map[func];
if (instance.m_external_function == nullptr)
......@@ -143,7 +137,26 @@ bool runtime::gpu::GPU_Backend::call(shared_ptr<Function> func,
// ensure the GPURuntimeContext primitive pointers are valid
m_context->prepare_runtime_context();
instance.m_call_frame->call(outputs, inputs, m_context->m_runtime_context.get());
// Device tensors
vector<void*> inputs;
vector<void*> outputs;
for (size_t i = 0; i < input_tvs.size(); i++)
{
shared_ptr<runtime::gpu::GPU_TensorView> tv =
static_pointer_cast<runtime::gpu::GPU_TensorView>(input_tvs[i]);
inputs.push_back(tv->m_allocated_buffer_pool);
}
for (size_t i = 0; i < output_tvs.size(); i++)
{
shared_ptr<runtime::gpu::GPU_TensorView> tv =
static_pointer_cast<runtime::gpu::GPU_TensorView>(output_tvs[i]);
outputs.push_back(tv->m_allocated_buffer_pool);
}
auto ctx = m_context->m_runtime_context.get();
instance.m_compiled_function(inputs.data(), outputs.data(), ctx);
return rc;
}
......
......@@ -30,18 +30,17 @@ namespace ngraph
static size_t alignment = 64;
class GPU_ExternalFunction;
class GPU_CallFrame;
class GPUPrimitiveEmitter;
struct GPURuntimeContext;
class CudaContextManager;
using EntryPoint_t = void(void** inputs, void** outputs, GPURuntimeContext* ctx);
using EntryPoint = std::function<EntryPoint_t>;
class GPU_Backend : public Backend
{
public:
GPU_Backend();
std::shared_ptr<ngraph::runtime::gpu::GPU_CallFrame> make_call_frame(
const std::shared_ptr<ngraph::runtime::gpu::GPU_ExternalFunction>&
external_function);
std::shared_ptr<ngraph::runtime::TensorView>
create_tensor(const ngraph::element::Type& element_type,
......@@ -82,8 +81,8 @@ namespace ngraph
{
public:
std::shared_ptr<GPU_ExternalFunction> m_external_function;
std::shared_ptr<GPU_CallFrame> m_call_frame;
bool m_performance_counters_enabled = false;
EntryPoint m_compiled_function;
};
std::map<std::shared_ptr<Function>, FunctionInstance> m_function_map;
......
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <cstdlib>
#include <fstream>
#include <stdio.h>
#include "ngraph/runtime/gpu/gpu_call_frame.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp"
#include "ngraph/runtime/gpu/gpu_external_function.hpp"
#include "ngraph/runtime/gpu/gpu_tensor_view.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp"
using namespace std;
using namespace ngraph;
runtime::gpu::GPU_CallFrame::GPU_CallFrame(std::shared_ptr<GPU_ExternalFunction> external_function,
EntryPoint compiled_function)
: m_external_function(external_function)
, m_compiled_function(compiled_function)
{
}
runtime::gpu::GPU_CallFrame::~GPU_CallFrame()
{
}
void runtime::gpu::GPU_CallFrame::call(
const std::vector<std::shared_ptr<runtime::TensorView>>& output_tvs,
const std::vector<std::shared_ptr<runtime::TensorView>>& input_tvs,
GPURuntimeContext* ctx)
{
// Device tensors
vector<void*> inputs;
vector<void*> outputs;
for (size_t i = 0; i < input_tvs.size(); i++)
{
shared_ptr<runtime::gpu::GPU_TensorView> tv =
static_pointer_cast<runtime::gpu::GPU_TensorView>(input_tvs[i]);
inputs.push_back(tv->m_allocated_buffer_pool);
}
for (size_t i = 0; i < output_tvs.size(); i++)
{
shared_ptr<runtime::gpu::GPU_TensorView> tv =
static_pointer_cast<runtime::gpu::GPU_TensorView>(output_tvs[i]);
outputs.push_back(tv->m_allocated_buffer_pool);
}
m_compiled_function(inputs.data(), outputs.data(), ctx);
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <cublas_v2.h>
#include <cuda_runtime.h>
#include <cudnn.h>
#include <functional>
#include <memory>
#include <vector>
#include "ngraph/function.hpp"
#include "ngraph/runtime/gpu/gpu_runtime_context.hpp"
#include "ngraph/runtime/tensor_view.hpp"
namespace ngraph
{
namespace runtime
{
namespace gpu
{
class GPU_CallFrame;
class GPU_ExternalFunction;
using EntryPoint_t = void(void** inputs, void** outputs, GPURuntimeContext* ctx);
using EntryPoint = std::function<EntryPoint_t>;
// Compile and execute graphs
class GPU_CallFrame
{
public:
GPU_CallFrame(std::shared_ptr<GPU_ExternalFunction> external_function,
EntryPoint compiled_function);
~GPU_CallFrame();
/// \brief Invoke the function with values matching the signature of the function.
///
/// Tuples will be expanded into their tensor views to build the call frame.
void call(const std::vector<std::shared_ptr<runtime::TensorView>>& outputs,
const std::vector<std::shared_ptr<runtime::TensorView>>& inputs,
GPURuntimeContext* ctx);
protected:
std::shared_ptr<GPU_ExternalFunction> m_external_function;
EntryPoint m_compiled_function;
static bool init;
};
}
}
}
......@@ -635,17 +635,6 @@ void runtime::gpu::GPU_ExternalFunction::compile()
}
}
shared_ptr<ngraph::runtime::gpu::GPU_CallFrame>
runtime::gpu::GPU_ExternalFunction::make_call_frame()
{
if (!m_is_compiled)
{
compile();
}
return make_shared<GPU_CallFrame>(shared_from_this(), m_compiled_function);
}
void runtime::gpu::GPU_ExternalFunction::emit_debug_function_entry(Node* node)
{
if (m_emit_timing)
......
......@@ -32,7 +32,6 @@
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/memory_layout.hpp"
#include "ngraph/runtime/gpu/gpu_backend.hpp"
#include "ngraph/runtime/gpu/gpu_call_frame.hpp"
#include "ngraph/runtime/gpu/gpu_primitive_emitter.hpp"
#include "ngraph/runtime/gpu/gpu_tensor_view_wrapper.hpp"
......@@ -48,12 +47,10 @@ namespace ngraph
namespace gpu
{
class GPU_Emitter;
class GPU_CallFrame;
struct GPURuntimeContext;
class GPU_ExternalFunction : public std::enable_shared_from_this<GPU_ExternalFunction>
class GPU_ExternalFunction
{
friend class GPU_CallFrame;
friend class GPU_Backend;
public:
......@@ -62,7 +59,6 @@ namespace ngraph
bool release_function = true);
~GPU_ExternalFunction();
std::shared_ptr<ngraph::runtime::gpu::GPU_CallFrame> make_call_frame();
std::unique_ptr<runtime::gpu::GPURuntimeContext>& ctx();
const std::unique_ptr<GPUPrimitiveEmitter>& get_primitive_emitter() const
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment