Unverified Commit 943b167f authored by Robert Kimball's avatar Robert Kimball Committed by GitHub

GPU External Function cleanup (#1698)

* cleanup

* cleanup header includes

* cleanup

* cleanup TensorMemoryReservation pass

* include cleanup

* more cleanup

* more header cleanup

* style

* Remove obsolete comments
parent d38aba91
...@@ -16,17 +16,16 @@ ...@@ -16,17 +16,16 @@
#pragma once #pragma once
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <stdint.h>
#include <string>
#include <cublas_v2.h> #include <cublas_v2.h>
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <cudnn.h> #include <cudnn.h>
#include <iostream>
#include <nvrtc.h> #include <nvrtc.h>
#include <sstream>
#include <stdexcept>
#include <stdint.h>
#include <string>
//why use "do...while.." //why use "do...while.."
//https://stackoverflow.com/questions/154136/why-use-apparently-meaningless-do-while-and-if-else-statements-in-macros //https://stackoverflow.com/questions/154136/why-use-apparently-meaningless-do-while-and-if-else-statements-in-macros
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <cudnn.h> #include <cudnn.h>
#include "ngraph/runtime/gpu/cuda_error_check.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp" #include "ngraph/runtime/gpu/gpu_util.hpp"
namespace ngraph namespace ngraph
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include "ngraph/runtime/gpu/cuda_error_check.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp" #include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp"
using namespace ngraph; using namespace ngraph;
......
...@@ -16,11 +16,10 @@ ...@@ -16,11 +16,10 @@
#pragma once #pragma once
#include <cuda.h>
#include <memory> #include <memory>
#include <string> #include <string>
#include "ngraph/runtime/gpu/gpu_util.hpp"
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <iostream> #include <iostream>
#include <string> #include <string>
#include "ngraph/runtime/gpu/cuda_error_check.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp" #include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_function_builder.hpp" #include "ngraph/runtime/gpu/gpu_cuda_function_builder.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp" #include "ngraph/runtime/gpu/gpu_util.hpp"
......
...@@ -16,11 +16,10 @@ ...@@ -16,11 +16,10 @@
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include "ngraph/runtime/gpu/gpu_util.hpp"
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
......
...@@ -178,15 +178,11 @@ const size_t runtime::gpu::GPU_ExternalFunction::GPU_ExternalFunction::s_memory_ ...@@ -178,15 +178,11 @@ const size_t runtime::gpu::GPU_ExternalFunction::GPU_ExternalFunction::s_memory_
runtime::gpu::GPU_ExternalFunction::GPU_ExternalFunction( runtime::gpu::GPU_ExternalFunction::GPU_ExternalFunction(
const shared_ptr<ngraph::Function>& function, const shared_ptr<ngraph::Function>& function,
std::shared_ptr<GPU_Backend::BackendContext>& shared_context, std::shared_ptr<GPU_Backend::BackendContext>& shared_context)
bool release_function)
: m_compiled_function(nullptr) : m_compiled_function(nullptr)
, m_function(function) , m_function(function)
, m_emit_timing(false) , m_emit_timing(false)
, m_is_compiled(false) , m_is_compiled(false)
, m_release_function(release_function)
, m_temporaries_used(false)
, m_tensor_memory_buffers(new std::unordered_map<std::string, size_t>)
, m_shared_context(shared_context) , m_shared_context(shared_context)
{ {
} }
...@@ -195,51 +191,44 @@ runtime::gpu::GPU_ExternalFunction::~GPU_ExternalFunction() ...@@ -195,51 +191,44 @@ runtime::gpu::GPU_ExternalFunction::~GPU_ExternalFunction()
{ {
} }
void runtime::gpu::GPU_ExternalFunction::emit_header() const string& runtime::gpu::GPU_ExternalFunction::get_pch_header_source()
{ {
m_writer += R"( static string s_pch_header_source = R"(
// Generated by the nGraph GPU backend // Generated by the nGraph GPU backend
#include <cublas_v2.h> #include <cublas_v2.h>
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <cudnn.h> #include <cudnn.h>
#include "ngraph/descriptor/input.hpp" #include "ngraph/runtime/gpu/cuda_error_check.hpp"
#include "ngraph/descriptor/layout/dense_tensor_layout.hpp"
#include "ngraph/descriptor/output.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/function.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/node.hpp"
#include "ngraph/pass/assign_layout.hpp"
#include "ngraph/pass/dump_sorted.hpp"
#include "ngraph/pass/like_replacement.hpp"
#include "ngraph/pass/liveness.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/memory_layout.hpp"
#include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/runtime/gpu/cudnn_descriptors.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_ops.hpp"
#include "ngraph/runtime/gpu/gpu_invoke.hpp" #include "ngraph/runtime/gpu/gpu_invoke.hpp"
#include "ngraph/runtime/gpu/gpu_runtime_context.hpp" #include "ngraph/runtime/gpu/gpu_runtime_context.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp" #include "ngraph/runtime/gpu/gpu_util.hpp"
#include "ngraph/util.hpp"
)"; )";
return s_pch_header_source;
}
m_pch_header_source = m_writer.get_code(); const string& runtime::gpu::GPU_ExternalFunction::get_header_source()
{
m_writer += R"( static string s_header_source =
get_pch_header_source() + R"(
using namespace ngraph; using namespace ngraph;
using namespace ngraph::runtime; using namespace ngraph::runtime;
using namespace std; using namespace std;
)"; )"
// The "dso_handle" symbol is required by __cxa_atexit()
// which is enabled because the JIT uses it as the default mechanism
// to register cleanup handlers. We use it, and not atexit(), because
// atexit() happens too late, when the JIT is no longer alive
+ "void *__dso_handle = 0;\n\n" +
"static gpu::GPURuntimeContext* m_runtime_context = nullptr;\n";
return s_header_source;
}
// The "dso_handle" symbol is required by __cxa_atexit() void runtime::gpu::GPU_ExternalFunction::emit_header()
// which is enabled because the JIT uses it as the default mechanism {
// to register cleanup handlers. We use it, and not atexit(), because m_writer << get_header_source();
// atexit() happens too late, when the JIT is no longer alive
m_writer << "void *__dso_handle = 0;\n\n";
m_writer << "static gpu::GPURuntimeContext* m_runtime_context = nullptr;\n";
} }
void runtime::gpu::GPU_ExternalFunction::emit_timer_functions() void runtime::gpu::GPU_ExternalFunction::emit_timer_functions()
...@@ -368,26 +357,26 @@ void runtime::gpu::GPU_ExternalFunction::emit_function_declarations() ...@@ -368,26 +357,26 @@ void runtime::gpu::GPU_ExternalFunction::emit_function_declarations()
void runtime::gpu::GPU_ExternalFunction::emit_temp_mem_pool_allocation( void runtime::gpu::GPU_ExternalFunction::emit_temp_mem_pool_allocation(
shared_ptr<Function> current_function) shared_ptr<Function> current_function)
{ {
m_temporaries_used = false; bool temporaries_used = false;
size_t worst_case_tmp_size = 0; size_t worst_case_tmp_size = 0;
for (shared_ptr<Node> node : m_function_ordered_ops.at(current_function)) for (shared_ptr<Node> node : m_function_ordered_ops.at(current_function))
{ {
if (node->liveness_new_list.size() > 0) if (node->liveness_new_list.size() > 0)
{ {
m_temporaries_used = true; temporaries_used = true;
for (descriptor::Tensor* tensor : node->liveness_new_list) for (descriptor::Tensor* tensor : node->liveness_new_list)
{ {
worst_case_tmp_size += tensor->size(); worst_case_tmp_size += tensor->size();
} }
} }
} }
if (m_temporaries_used) if (temporaries_used)
{ {
m_writer << "// Allocate the memory pool\n"; m_writer << "// Allocate the memory pool\n";
// TODO memory pool malloc. // TODO memory pool malloc.
m_writer m_writer
<< "char* pool_base_ptr = (char*)ngraph::runtime::gpu::invoke_memory_primitive(ctx, " << "char* pool_base_ptr = (char*)ngraph::runtime::gpu::invoke_memory_primitive(ctx, "
<< m_tensor_memory_buffers->at(current_function->get_name()) << ");\n"; << m_tensor_memory_buffers.at(current_function->get_name()) << ");\n";
// Add temporaries to the variable name map // Add temporaries to the variable name map
for (shared_ptr<Node> node : m_function_ordered_ops.at(current_function)) for (shared_ptr<Node> node : m_function_ordered_ops.at(current_function))
...@@ -562,9 +551,6 @@ void runtime::gpu::GPU_ExternalFunction::compile() ...@@ -562,9 +551,6 @@ void runtime::gpu::GPU_ExternalFunction::compile()
m_function_name = m_function->get_name(); m_function_name = m_function->get_name();
auto allocator = std::make_shared<runtime::gpu::GPUAllocator>(
m_shared_context->m_primitive_emitter->get_memory_allocator());
m_pass_manager.register_pass<ngraph::pass::LikeReplacement>(); m_pass_manager.register_pass<ngraph::pass::LikeReplacement>();
m_pass_manager m_pass_manager
.register_pass<ngraph::pass::AssignLayout<descriptor::layout::DenseTensorLayout>>(); .register_pass<ngraph::pass::AssignLayout<descriptor::layout::DenseTensorLayout>>();
...@@ -574,6 +560,7 @@ void runtime::gpu::GPU_ExternalFunction::compile() ...@@ -574,6 +560,7 @@ void runtime::gpu::GPU_ExternalFunction::compile()
m_pass_manager.register_pass<ngraph::pass::MemoryLayout>(s_memory_pool_alignment); m_pass_manager.register_pass<ngraph::pass::MemoryLayout>(s_memory_pool_alignment);
GPUAllocator allocator = m_shared_context->m_primitive_emitter->get_memory_allocator();
m_pass_manager.register_pass<runtime::gpu::pass::TensorMemoryReservation>( m_pass_manager.register_pass<runtime::gpu::pass::TensorMemoryReservation>(
allocator, m_tensor_memory_buffers); allocator, m_tensor_memory_buffers);
...@@ -603,7 +590,7 @@ void runtime::gpu::GPU_ExternalFunction::compile() ...@@ -603,7 +590,7 @@ void runtime::gpu::GPU_ExternalFunction::compile()
emit_functions(); emit_functions();
// allocate device buffers for primitive arguments and workspace // allocate device buffers for primitive arguments and workspace
allocator->close(); allocator.close();
m_shared_context->m_primitive_emitter->allocate_primitive_memory(); m_shared_context->m_primitive_emitter->allocate_primitive_memory();
string code = m_writer.get_code(); string code = m_writer.get_code();
...@@ -611,7 +598,7 @@ void runtime::gpu::GPU_ExternalFunction::compile() ...@@ -611,7 +598,7 @@ void runtime::gpu::GPU_ExternalFunction::compile()
m_compiler.reset(new codegen::Compiler()); m_compiler.reset(new codegen::Compiler());
m_execution_engine.reset(new codegen::ExecutionEngine()); m_execution_engine.reset(new codegen::ExecutionEngine());
m_compiler->set_precompiled_header_source(m_pch_header_source); m_compiler->set_precompiled_header_source(get_pch_header_source());
auto codegen_module = m_compiler->compile(code); auto codegen_module = m_compiler->compile(code);
if (codegen_module == nullptr) if (codegen_module == nullptr)
...@@ -629,10 +616,6 @@ void runtime::gpu::GPU_ExternalFunction::compile() ...@@ -629,10 +616,6 @@ void runtime::gpu::GPU_ExternalFunction::compile()
} }
m_is_compiled = true; m_is_compiled = true;
if (m_release_function)
{
release_function();
}
} }
void runtime::gpu::GPU_ExternalFunction::emit_debug_function_entry(Node* node) void runtime::gpu::GPU_ExternalFunction::emit_debug_function_entry(Node* node)
......
...@@ -55,8 +55,7 @@ namespace ngraph ...@@ -55,8 +55,7 @@ namespace ngraph
public: public:
GPU_ExternalFunction(const std::shared_ptr<ngraph::Function>& function, GPU_ExternalFunction(const std::shared_ptr<ngraph::Function>& function,
std::shared_ptr<GPU_Backend::BackendContext>& shared_context, std::shared_ptr<GPU_Backend::BackendContext>& shared_context);
bool release_function = true);
~GPU_ExternalFunction(); ~GPU_ExternalFunction();
std::unique_ptr<runtime::gpu::GPURuntimeContext>& ctx(); std::unique_ptr<runtime::gpu::GPURuntimeContext>& ctx();
...@@ -90,11 +89,13 @@ namespace ngraph ...@@ -90,11 +89,13 @@ namespace ngraph
void emit_debug_function_exit(Node* node); void emit_debug_function_exit(Node* node);
void emit_temp_mem_pool_allocation(std::shared_ptr<Function> current_function); void emit_temp_mem_pool_allocation(std::shared_ptr<Function> current_function);
void emit_op(EMIT_ARGS); void emit_op(EMIT_ARGS);
void release_function() { m_function = nullptr; }
void store_emitted_functions(const std::string& code); void store_emitted_functions(const std::string& code);
std::string emit_op_as_function(const Node& node, const std::string& function_name); std::string emit_op_as_function(const Node& node, const std::string& function_name);
std::string strip_comments(const std::string& s) const; std::string strip_comments(const std::string& s) const;
static const std::string& get_pch_header_source();
static const std::string& get_header_source();
codegen::CodeWriter m_writer; codegen::CodeWriter m_writer;
ngraph::pass::Manager m_pass_manager; ngraph::pass::Manager m_pass_manager;
...@@ -110,14 +111,11 @@ namespace ngraph ...@@ -110,14 +111,11 @@ namespace ngraph
bool m_emit_timing; bool m_emit_timing;
bool m_is_compiled; bool m_is_compiled;
bool m_release_function;
bool m_temporaries_used;
size_t m_offset; size_t m_offset;
std::string m_function_name; std::string m_function_name;
std::string m_pch_header_source;
std::shared_ptr<std::unordered_map<std::string, size_t>> m_tensor_memory_buffers; std::unordered_map<std::string, size_t> m_tensor_memory_buffers;
std::shared_ptr<GPU_Backend::BackendContext> m_shared_context; std::shared_ptr<GPU_Backend::BackendContext> m_shared_context;
}; };
} }
......
...@@ -15,24 +15,24 @@ ...@@ -15,24 +15,24 @@
//***************************************************************************** //*****************************************************************************
#include "ngraph/runtime/gpu/gpu_runtime_context.hpp" #include "ngraph/runtime/gpu/gpu_runtime_context.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp"
using namespace ngraph; using namespace ngraph;
using namespace ngraph::runtime::gpu;
extern "C" void ngraph::runtime::gpu::start_stopwatch(GPURuntimeContext* ctx, size_t idx) extern "C" void runtime::gpu::start_stopwatch(GPURuntimeContext* ctx, size_t idx)
{ {
ctx->stopwatch_pool->get(idx).start(); ctx->stopwatch_pool->get(idx).start();
} }
extern "C" void ngraph::runtime::gpu::stop_stopwatch(GPURuntimeContext* ctx, size_t idx) extern "C" void runtime::gpu::stop_stopwatch(GPURuntimeContext* ctx, size_t idx)
{ {
ctx->stopwatch_pool->get(idx).stop(); ctx->stopwatch_pool->get(idx).stop();
} }
extern "C" size_t ngraph::runtime::gpu::count_stopwatch(GPURuntimeContext* ctx, size_t idx) extern "C" size_t runtime::gpu::count_stopwatch(GPURuntimeContext* ctx, size_t idx)
{ {
return ctx->stopwatch_pool->get(idx).get_call_count(); return ctx->stopwatch_pool->get(idx).get_call_count();
} }
extern "C" size_t ngraph::runtime::gpu::us_stopwatch(GPURuntimeContext* ctx, size_t idx) extern "C" size_t runtime::gpu::us_stopwatch(GPURuntimeContext* ctx, size_t idx)
{ {
return ctx->stopwatch_pool->get(idx).get_total_microseconds(); return ctx->stopwatch_pool->get(idx).get_total_microseconds();
} }
...@@ -16,12 +16,13 @@ ...@@ -16,12 +16,13 @@
#pragma once #pragma once
#include <cublas_v2.h>
#include <cudnn.h>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp" #include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_function_pool.hpp" #include "ngraph/runtime/gpu/gpu_cuda_function_pool.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp"
namespace ngraph namespace ngraph
{ {
...@@ -29,8 +30,10 @@ namespace ngraph ...@@ -29,8 +30,10 @@ namespace ngraph
{ {
namespace gpu namespace gpu
{ {
typedef std::function<void(void**, void**)> primitive; class StopWatchPool;
typedef std::function<void*(void)> memory_primitive;
using primitive = std::function<void(void**, void**)>;
using memory_primitive = std::function<void*(void)>;
extern "C" { extern "C" {
struct GPURuntimeContext struct GPURuntimeContext
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include "ngraph/descriptor/layout/dense_tensor_layout.hpp" #include "ngraph/descriptor/layout/dense_tensor_layout.hpp"
#include "ngraph/runtime/gpu/cuda_error_check.hpp"
#include "ngraph/runtime/gpu/gpu_backend.hpp" #include "ngraph/runtime/gpu/gpu_backend.hpp"
#include "ngraph/runtime/gpu/gpu_tensor_view.hpp" #include "ngraph/runtime/gpu/gpu_tensor_view.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp" #include "ngraph/runtime/gpu/gpu_util.hpp"
......
...@@ -16,14 +16,14 @@ ...@@ -16,14 +16,14 @@
#include <cassert> #include <cassert>
#include <cstdlib> #include <cstdlib>
#include <cuda.h>
#include <cuda_runtime.h>
#include <iostream> #include <iostream>
#include <stddef.h> #include <stddef.h>
#include <stdio.h> #include <stdio.h>
#include <string> #include <string>
#include <cuda.h> #include "ngraph/runtime/gpu/cuda_error_check.hpp"
#include <cuda_runtime.h>
#include "ngraph/runtime/gpu/gpu_util.hpp" #include "ngraph/runtime/gpu/gpu_util.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
......
...@@ -16,15 +16,9 @@ ...@@ -16,15 +16,9 @@
#pragma once #pragma once
#include <iostream> #include <cudnn.h>
#include <memory>
#include <string>
#include <tuple>
#include <vector> #include <vector>
#include "ngraph/runtime/gpu/cuda_error_check.hpp"
#include "ngraph/util.hpp"
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
...@@ -43,14 +37,16 @@ namespace ngraph ...@@ -43,14 +37,16 @@ namespace ngraph
std::pair<uint64_t, uint64_t> idiv_magic_u64(uint64_t divisor); std::pair<uint64_t, uint64_t> idiv_magic_u64(uint64_t divisor);
uint32_t idiv_ceil(int n, int d); uint32_t idiv_ceil(int n, int d);
template <typename T> // This is commented out because it increases the compile time.
void print_gpu_tensor(const void* p, size_t element_count) // It should be moved to a debug header.
{ // template <typename T>
std::vector<T> local(element_count); // void print_gpu_tensor(const void* p, size_t element_count)
size_t size_in_bytes = sizeof(T) * element_count; // {
cuda_memcpyDtH(local.data(), p, size_in_bytes); // std::vector<T> local(element_count);
std::cout << "{" << ngraph::join(local) << "}" << std::endl; // size_t size_in_bytes = sizeof(T) * element_count;
} // cuda_memcpyDtH(local.data(), p, size_in_bytes);
// std::cout << "{" << ngraph::join(local) << "}" << std::endl;
// }
class StopWatch class StopWatch
{ {
......
...@@ -17,30 +17,24 @@ ...@@ -17,30 +17,24 @@
#include <memory> #include <memory>
#include "ngraph/function.hpp" #include "ngraph/function.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/pass/manager_state.hpp" #include "ngraph/pass/manager_state.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/runtime/gpu/gpu_memory_manager.hpp" #include "ngraph/runtime/gpu/gpu_memory_manager.hpp"
#include "ngraph/runtime/gpu/pass/tensor_memory_reservation.hpp" #include "ngraph/runtime/gpu/pass/tensor_memory_reservation.hpp"
using namespace ngraph; using namespace ngraph;
using namespace std;
bool ngraph::runtime::gpu::pass::TensorMemoryReservation::run_on_function( bool runtime::gpu::pass::TensorMemoryReservation::run_on_function(shared_ptr<Function> f)
std::shared_ptr<Function> f)
{ {
auto allocator = m_allocator.lock(); size_t mem_pool_size = f->get_temporary_pool_size();
auto buffers = m_memory_buffers.lock(); if (mem_pool_size)
if (allocator && buffers)
{ {
size_t mem_pool_size = f->get_temporary_pool_size(); size_t pool_idx = m_allocator.reserve_workspace(mem_pool_size, false);
if (mem_pool_size) m_memory_buffers.insert({f->get_name(), pool_idx});
{
size_t pool_idx = allocator->reserve_workspace(mem_pool_size, false);
buffers->insert({f->get_name(), pool_idx});
return true; return true;
}
} }
return false; return false;
} }
...@@ -37,8 +37,8 @@ namespace ngraph ...@@ -37,8 +37,8 @@ namespace ngraph
class ngraph::runtime::gpu::pass::TensorMemoryReservation : public ngraph::pass::FunctionPass class ngraph::runtime::gpu::pass::TensorMemoryReservation : public ngraph::pass::FunctionPass
{ {
public: public:
TensorMemoryReservation(std::weak_ptr<ngraph::runtime::gpu::GPUAllocator> allocator, TensorMemoryReservation(GPUAllocator& allocator,
std::weak_ptr<std::unordered_map<std::string, size_t>> buffers) std::unordered_map<std::string, size_t>& buffers)
: ngraph::pass::FunctionPass() : ngraph::pass::FunctionPass()
, m_allocator(allocator) , m_allocator(allocator)
, m_memory_buffers(buffers) , m_memory_buffers(buffers)
...@@ -48,6 +48,6 @@ public: ...@@ -48,6 +48,6 @@ public:
virtual bool run_on_function(std::shared_ptr<ngraph::Function> f); virtual bool run_on_function(std::shared_ptr<ngraph::Function> f);
private: private:
std::weak_ptr<ngraph::runtime::gpu::GPUAllocator> m_allocator; GPUAllocator& m_allocator;
std::weak_ptr<std::unordered_map<std::string, size_t>> m_memory_buffers; std::unordered_map<std::string, size_t>& m_memory_buffers;
}; };
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment