Unverified Commit e6b26ac6 authored by Robert Kimball's avatar Robert Kimball Committed by GitHub

GPU transformer cleanup (#4361)

* Move Executable to its own files

* Normalize class names

* More cleanup
Co-authored-by: 's avatarChris Sullivan <chris.sullivan@intel.com>
parent 134b285f
......@@ -30,6 +30,7 @@ set(SRC
gpu_cuda_function_pool.cpp
gpu_cuda_kernel_builder.cpp
gpu_emitter.cpp
gpu_executable.cpp
gpu_compiled_function.cpp
gpu_internal_function.cpp
gpu_invoke.cpp
......
......@@ -23,6 +23,7 @@
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/runtime/backend_manager.hpp"
#include "ngraph/runtime/gpu/gpu_backend.hpp"
#include "ngraph/runtime/gpu/gpu_executable.hpp"
#include "ngraph/runtime/gpu/gpu_external_function.hpp"
#include "ngraph/runtime/gpu/gpu_internal_function.hpp"
#include "ngraph/runtime/gpu/gpu_primitive_emitter.hpp"
......@@ -36,16 +37,16 @@ using namespace std;
extern "C" GPU_BACKEND_API void ngraph_register_gpu_backend()
{
runtime::BackendManager::register_backend("GPU", [](const std::string& /* config */) {
return make_shared<runtime::gpu::GPU_Backend>();
return make_shared<runtime::gpu::GPUBackend>();
});
}
runtime::gpu::GPU_Backend::GPU_Backend()
runtime::gpu::GPUBackend::GPUBackend()
: runtime::Backend()
{
}
runtime::gpu::GPU_Backend::BackendContext::BackendContext()
runtime::gpu::GPUBackend::BackendContext::BackendContext()
: m_runtime_context(new GPURuntimeContext)
, m_primitive_emitter(new GPUPrimitiveEmitter(m_runtime_context))
, m_cuda_manager(new CudaContextManager)
......@@ -75,7 +76,7 @@ runtime::gpu::GPU_Backend::BackendContext::BackendContext()
m_runtime_context->compiled_kernel_pool = new CudaFunctionPool;
}
void runtime::gpu::GPU_Backend::BackendContext::prepare_runtime_context()
void runtime::gpu::GPUBackend::BackendContext::prepare_runtime_context()
{
// set context current each time in case thread changed
bind_cuda_context_to_thread();
......@@ -84,12 +85,12 @@ void runtime::gpu::GPU_Backend::BackendContext::prepare_runtime_context()
m_runtime_context->gpu_memory_primitives = m_primitive_emitter->get_memory_primitives().data();
}
void runtime::gpu::GPU_Backend::BackendContext::bind_cuda_context_to_thread()
void runtime::gpu::GPUBackend::BackendContext::bind_cuda_context_to_thread()
{
m_cuda_manager->SetContextCurrent();
}
runtime::gpu::GPU_Backend::BackendContext::~BackendContext()
runtime::gpu::GPUBackend::BackendContext::~BackendContext()
{
cublasDestroy(*m_runtime_context->cublas_handle);
delete m_runtime_context->cublas_handle;
......@@ -99,12 +100,12 @@ runtime::gpu::GPU_Backend::BackendContext::~BackendContext()
}
shared_ptr<runtime::Tensor>
runtime::gpu::GPU_Backend::create_tensor(const element::Type& element_type, const Shape& shape)
runtime::gpu::GPUBackend::create_tensor(const element::Type& element_type, const Shape& shape)
{
return make_shared<runtime::gpu::GPUTensor>(element_type, shape);
}
shared_ptr<runtime::Tensor> runtime::gpu::GPU_Backend::create_tensor(
shared_ptr<runtime::Tensor> runtime::gpu::GPUBackend::create_tensor(
const element::Type& element_type, const Shape& shape, void* memory_pointer)
{
if (memory_pointer != nullptr && !is_device_pointer(memory_pointer))
......@@ -114,8 +115,8 @@ shared_ptr<runtime::Tensor> runtime::gpu::GPU_Backend::create_tensor(
return make_shared<runtime::gpu::GPUTensor>(element_type, shape, memory_pointer);
}
shared_ptr<runtime::Executable> runtime::gpu::GPU_Backend::compile(shared_ptr<Function> func,
bool timing_enable)
shared_ptr<runtime::Executable> runtime::gpu::GPUBackend::compile(shared_ptr<Function> func,
bool timing_enable)
{
shared_ptr<runtime::Executable> rc;
auto it = m_exec_map.find(func);
......@@ -125,87 +126,13 @@ shared_ptr<runtime::Executable> runtime::gpu::GPU_Backend::compile(shared_ptr<Fu
}
else
{
rc = make_shared<GPU_Executable>(func, timing_enable);
rc = make_shared<GPUExecutable>(func, timing_enable);
m_exec_map.insert({func, rc});
}
return rc;
}
runtime::gpu::GPU_Executable::GPU_Executable(shared_ptr<Function> func, bool enable_timing)
: m_context(new GPU_Backend::BackendContext())
{
FunctionInstance& instance = m_function_instance;
if (instance.m_compiled_function == nullptr)
{
m_context->bind_cuda_context_to_thread();
instance.m_compiled_function = runtime::gpu::GPUCompiledFunction::make(func, m_context);
instance.m_compiled_function->m_emit_timing = enable_timing;
instance.m_compiled_function->compile();
instance.m_runtime = instance.m_compiled_function->m_runtime;
instance.m_inputs.resize(func->get_parameters().size());
instance.m_outputs.resize(func->get_output_size());
}
set_parameters_and_results(*func);
}
void runtime::gpu::GPU_Executable::initialize_io(void** target,
const vector<shared_ptr<runtime::Tensor>>& source)
{
for (size_t i = 0; i < source.size(); i++)
{
shared_ptr<runtime::gpu::GPUTensor> tv =
dynamic_pointer_cast<runtime::gpu::GPUTensor>(source[i]);
if (tv)
{
target[i] = tv->m_allocated_buffer_pool;
}
else
{
throw invalid_argument("Tensors passed to GPU backend must be GPU Tensors");
}
}
}
bool runtime::gpu::GPU_Executable::call(const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& inputs)
{
FunctionInstance& instance = m_function_instance;
if (instance.m_compiled_function == nullptr)
{
throw runtime_error("compile() must be called before call().");
}
// ensure the GPURuntimeContext primitive pointers are valid
m_context->prepare_runtime_context();
// Device tensors
initialize_io(instance.m_inputs.data(), inputs);
initialize_io(instance.m_outputs.data(), outputs);
auto ctx = m_context->m_runtime_context.get();
instance.m_runtime(instance.m_inputs.data(), instance.m_outputs.data(), ctx);
return true;
}
// void runtime::gpu::GPU_Backend::remove_compiled_function(shared_ptr<Function> func)
// {
// m_function_map.erase(func);
// }
vector<runtime::PerformanceCounter> runtime::gpu::GPU_Executable::get_performance_data() const
{
std::vector<runtime::PerformanceCounter> rc;
const FunctionInstance& instance = m_function_instance;
if (instance.m_compiled_function != nullptr)
{
instance.m_compiled_function->get_performance_data(rc);
}
return rc;
}
bool runtime::gpu::GPU_Backend::is_supported(const Node& op) const
bool runtime::gpu::GPUBackend::is_supported(const Node& op) const
{
set<string> unsupported_ops = {"Quantize",
"Dequantize",
......
......@@ -35,15 +35,16 @@ namespace ngraph
class GPUPrimitiveEmitter;
struct GPURuntimeContext;
class CudaContextManager;
class GPUExecutable;
using EntryPoint_t = void(void** inputs, void** outputs, GPURuntimeContext* ctx);
using EntryPoint = std::function<EntryPoint_t>;
BackendConstructor GPU_BACKEND_API get_backend_constructor_pointer();
class GPU_Backend : public Backend
class GPUBackend : public Backend
{
public:
GPU_Backend();
GPUBackend();
std::shared_ptr<ngraph::runtime::Tensor>
create_tensor(const ngraph::element::Type& element_type,
......@@ -77,40 +78,6 @@ namespace ngraph
private:
std::map<std::shared_ptr<Function>, std::shared_ptr<Executable>> m_exec_map;
};
class GPU_Executable : public Executable
{
public:
GPU_Executable(std::shared_ptr<Function> func, bool enable_timing);
bool call(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs) override;
// void remove_compiled_function(std::shared_ptr<Function> func) override;
std::vector<PerformanceCounter> get_performance_data() const override;
private:
class FunctionInstance
{
public:
std::shared_ptr<GPUCompiledFunction> m_compiled_function;
bool m_performance_counters_enabled = false;
EntryPoint m_runtime;
std::vector<void*> m_inputs;
std::vector<void*> m_outputs;
} m_function_instance;
/// \brief Convert a vector of Tensor into a vector of void* where each void*
/// points to a Tensor's data buffer.
/// \param target Pointer to a pre-allocated array of void* with
/// size >= source.size()
/// \param source Source vector of Tensors
static void
initialize_io(void** target,
const std::vector<std::shared_ptr<runtime::Tensor>>& source);
std::shared_ptr<GPU_Backend::BackendContext> m_context;
};
}
}
}
......@@ -81,7 +81,7 @@ static GPUStaticInitializers s_static_initializers;
runtime::gpu::GPUCompiledFunction::GPUCompiledFunction(
const shared_ptr<ngraph::Function>& function,
const std::shared_ptr<GPU_Backend::BackendContext>& shared_context)
const std::shared_ptr<GPUBackend::BackendContext>& shared_context)
: m_runtime(nullptr)
, m_function(function)
, m_emit_timing(false)
......@@ -119,7 +119,7 @@ std::vector<std::string> get_case_variants(std::vector<std::string> cases)
std::shared_ptr<runtime::gpu::GPUCompiledFunction> runtime::gpu::GPUCompiledFunction::make(
const std::shared_ptr<ngraph::Function>& function,
const std::shared_ptr<GPU_Backend::BackendContext>& shared_context)
const std::shared_ptr<GPUBackend::BackendContext>& shared_context)
{
return std::make_shared<runtime::gpu::GPUInternalFunction>(function, shared_context);
}
......
......@@ -48,18 +48,18 @@ namespace ngraph
class GPUCompiledFunction
{
friend class GPU_Backend;
friend class GPU_Executable;
friend class GPUBackend;
friend class GPUExecutable;
public:
GPUCompiledFunction(
const std::shared_ptr<ngraph::Function>& function,
const std::shared_ptr<GPU_Backend::BackendContext>& shared_context);
const std::shared_ptr<GPUBackend::BackendContext>& shared_context);
virtual ~GPUCompiledFunction();
static std::shared_ptr<GPUCompiledFunction>
make(const std::shared_ptr<ngraph::Function>& function,
const std::shared_ptr<GPU_Backend::BackendContext>& shared_context);
const std::shared_ptr<GPUBackend::BackendContext>& shared_context);
std::unique_ptr<runtime::gpu::GPURuntimeContext>& ctx();
const std::unique_ptr<GPUPrimitiveEmitter>& get_primitive_emitter() const
{
......@@ -110,7 +110,7 @@ namespace ngraph
std::string m_function_name;
std::unordered_map<std::string, size_t> m_tensor_memory_buffers;
std::shared_ptr<GPU_Backend::BackendContext> m_shared_context;
std::shared_ptr<GPUBackend::BackendContext> m_shared_context;
};
}
}
......
//*****************************************************************************
// Copyright 2017-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <cublas_v2.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cudnn.h>
#include "ngraph/graph_util.hpp"
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/runtime/backend_manager.hpp"
#include "ngraph/runtime/gpu/gpu_executable.hpp"
#include "ngraph/runtime/gpu/gpu_external_function.hpp"
#include "ngraph/runtime/gpu/gpu_internal_function.hpp"
#include "ngraph/runtime/gpu/gpu_primitive_emitter.hpp"
#include "ngraph/runtime/gpu/gpu_tensor.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp"
#include "ngraph/util.hpp"
using namespace ngraph;
using namespace std;
runtime::gpu::GPUExecutable::GPUExecutable(shared_ptr<Function> func, bool enable_timing)
: m_context(new GPUBackend::BackendContext())
{
if (m_compiled_function == nullptr)
{
m_context->bind_cuda_context_to_thread();
m_compiled_function = runtime::gpu::GPUCompiledFunction::make(func, m_context);
m_compiled_function->m_emit_timing = enable_timing;
m_compiled_function->compile();
m_runtime = m_compiled_function->m_runtime;
m_inputs.resize(func->get_parameters().size());
m_outputs.resize(func->get_output_size());
}
set_parameters_and_results(*func);
}
void runtime::gpu::GPUExecutable::initialize_io(void** target,
const vector<shared_ptr<runtime::Tensor>>& source)
{
for (size_t i = 0; i < source.size(); i++)
{
shared_ptr<runtime::gpu::GPUTensor> tv =
dynamic_pointer_cast<runtime::gpu::GPUTensor>(source[i]);
if (tv)
{
target[i] = tv->m_allocated_buffer_pool;
}
else
{
throw invalid_argument("Tensors passed to GPU backend must be GPU Tensors");
}
}
}
bool runtime::gpu::GPUExecutable::call(const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& inputs)
{
if (m_compiled_function == nullptr)
{
throw runtime_error("compile() must be called before call().");
}
// ensure the GPURuntimeContext primitive pointers are valid
m_context->prepare_runtime_context();
// Device tensors
initialize_io(m_inputs.data(), inputs);
initialize_io(m_outputs.data(), outputs);
auto ctx = m_context->m_runtime_context.get();
m_runtime(m_inputs.data(), m_outputs.data(), ctx);
return true;
}
vector<runtime::PerformanceCounter> runtime::gpu::GPUExecutable::get_performance_data() const
{
std::vector<runtime::PerformanceCounter> rc;
if (m_compiled_function != nullptr)
{
m_compiled_function->get_performance_data(rc);
}
return rc;
}
//*****************************************************************************
// Copyright 2017-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <map>
#include <memory>
#include "gpu_backend_visibility.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/backend_manager.hpp"
#include "ngraph/runtime/gpu/gpu_backend.hpp"
namespace ngraph
{
namespace runtime
{
namespace gpu
{
class GPUPrimitiveEmitter;
struct GPURuntimeContext;
class CudaContextManager;
using EntryPoint_t = void(void** inputs, void** outputs, GPURuntimeContext* ctx);
using EntryPoint = std::function<EntryPoint_t>;
class GPUExecutable : public Executable
{
public:
GPUExecutable(std::shared_ptr<Function> func, bool enable_timing);
bool call(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs) override;
// void remove_compiled_function(std::shared_ptr<Function> func) override;
std::vector<PerformanceCounter> get_performance_data() const override;
private:
std::shared_ptr<GPUCompiledFunction> m_compiled_function;
bool m_performance_counters_enabled = false;
EntryPoint m_runtime;
std::vector<void*> m_inputs;
std::vector<void*> m_outputs;
/// \brief Convert a vector of Tensor into a vector of void* where each void*
/// points to a Tensor's data buffer.
/// \param target Pointer to a pre-allocated array of void* with
/// size >= source.size()
/// \param source Source vector of Tensors
static void
initialize_io(void** target,
const std::vector<std::shared_ptr<runtime::Tensor>>& source);
std::shared_ptr<GPUBackend::BackendContext> m_context;
};
}
}
}
......@@ -153,7 +153,7 @@ std::string runtime::gpu::GPUExternalFunction::emit_op(GPUCompiledFunction* exte
runtime::gpu::GPUExternalFunction::GPUExternalFunction(
const shared_ptr<ngraph::Function>& function,
const std::shared_ptr<GPU_Backend::BackendContext>& shared_context)
const std::shared_ptr<GPUBackend::BackendContext>& shared_context)
: GPUCompiledFunction(function, shared_context)
{
}
......
......@@ -52,7 +52,7 @@ namespace ngraph
public:
GPUExternalFunction(
const std::shared_ptr<ngraph::Function>& function,
const std::shared_ptr<GPU_Backend::BackendContext>& shared_context);
const std::shared_ptr<GPUBackend::BackendContext>& shared_context);
virtual ~GPUExternalFunction();
virtual std::string
......
......@@ -128,7 +128,7 @@ std::string runtime::gpu::GPUInternalFunction::emit_op(GPUCompiledFunction* comp
runtime::gpu::GPUInternalFunction::GPUInternalFunction(
const shared_ptr<ngraph::Function>& function,
const std::shared_ptr<GPU_Backend::BackendContext>& shared_context)
const std::shared_ptr<GPUBackend::BackendContext>& shared_context)
: GPUCompiledFunction(function, shared_context)
{
}
......
......@@ -49,7 +49,7 @@ namespace ngraph
public:
GPUInternalFunction(
const std::shared_ptr<ngraph::Function>& function,
const std::shared_ptr<GPU_Backend::BackendContext>& shared_context);
const std::shared_ptr<GPUBackend::BackendContext>& shared_context);
virtual ~GPUInternalFunction();
virtual std::string
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment