Commit 05aa1be8 authored by Fenglei's avatar Fenglei Committed by Robert Kimball

reset buffer size, use original input size for memcpy (#1786)

* reset buffer size, use original input size for memcpy

* resolve comment and add test

* update comment
parent b0fd261b
...@@ -26,7 +26,7 @@ constexpr const uint32_t initial_buffer_size = 10 * 1024 * 1024; ...@@ -26,7 +26,7 @@ constexpr const uint32_t initial_buffer_size = 10 * 1024 * 1024;
runtime::gpu::GPUMemoryManager::GPUMemoryManager(GPUPrimitiveEmitter* emitter) runtime::gpu::GPUMemoryManager::GPUMemoryManager(GPUPrimitiveEmitter* emitter)
: m_buffer_offset(0) : m_buffer_offset(0)
, m_buffered_mem(initial_buffer_size) , m_buffered_mem(initial_buffer_size, 0)
, m_workspace_manager(new pass::MemoryManager(runtime::gpu::GPUMemoryManager::alignment)) , m_workspace_manager(new pass::MemoryManager(runtime::gpu::GPUMemoryManager::alignment))
, m_argspace_mem(1, {nullptr, 0}) , m_argspace_mem(1, {nullptr, 0})
, m_workspace_mem(1, {nullptr, 0}) , m_workspace_mem(1, {nullptr, 0})
...@@ -80,6 +80,8 @@ void runtime::gpu::GPUMemoryManager::allocate() ...@@ -80,6 +80,8 @@ void runtime::gpu::GPUMemoryManager::allocate()
m_argspace_mem.back().ptr, m_buffered_mem.data(), m_buffer_offset); m_argspace_mem.back().ptr, m_buffered_mem.data(), m_buffer_offset);
// add an empty node to the end of the list and zero offset // add an empty node to the end of the list and zero offset
m_argspace_mem.push_back({nullptr, 0}); m_argspace_mem.push_back({nullptr, 0});
m_buffered_mem.clear();
m_buffered_mem.resize(initial_buffer_size, 0);
m_buffer_offset = 0; m_buffer_offset = 0;
} }
...@@ -97,7 +99,9 @@ void runtime::gpu::GPUMemoryManager::allocate() ...@@ -97,7 +99,9 @@ void runtime::gpu::GPUMemoryManager::allocate()
size_t runtime::gpu::GPUMemoryManager::queue_for_transfer(const void* data, size_t size) size_t runtime::gpu::GPUMemoryManager::queue_for_transfer(const void* data, size_t size)
{ {
// if the current allocation will overflow the host buffer // if the current allocation will overflow the host buffer
size_t new_size = m_buffer_offset + size; size_t aligned_size =
ngraph::pass::MemoryManager::align(size, runtime::gpu::GPUMemoryManager::alignment);
size_t new_size = m_buffer_offset + aligned_size;
size_t buffer_size = m_buffered_mem.size(); size_t buffer_size = m_buffered_mem.size();
bool need_resize = false; bool need_resize = false;
while (buffer_size < new_size) while (buffer_size < new_size)
...@@ -109,12 +113,12 @@ size_t runtime::gpu::GPUMemoryManager::queue_for_transfer(const void* data, size ...@@ -109,12 +113,12 @@ size_t runtime::gpu::GPUMemoryManager::queue_for_transfer(const void* data, size
if (need_resize) if (need_resize)
{ {
m_buffered_mem.resize(buffer_size); m_buffered_mem.resize(buffer_size, 0);
} }
size_t offset = m_buffer_offset; size_t offset = m_buffer_offset;
std::memcpy(m_buffered_mem.data() + offset, data, size); std::memcpy(m_buffered_mem.data() + offset, data, size);
m_buffer_offset += size; m_buffer_offset += aligned_size;
return offset; return offset;
} }
...@@ -133,7 +137,6 @@ runtime::gpu::GPUAllocator::GPUAllocator(const GPUAllocator& g) ...@@ -133,7 +137,6 @@ runtime::gpu::GPUAllocator::GPUAllocator(const GPUAllocator& g)
size_t runtime::gpu::GPUAllocator::reserve_argspace(const void* data, size_t size) size_t runtime::gpu::GPUAllocator::reserve_argspace(const void* data, size_t size)
{ {
// add parameter data to host buffer that will be transfered to device // add parameter data to host buffer that will be transfered to device
size = ngraph::pass::MemoryManager::align(size, runtime::gpu::GPUMemoryManager::alignment);
size_t offset = m_manager->queue_for_transfer(data, size); size_t offset = m_manager->queue_for_transfer(data, size);
auto local = std::prev(m_manager->m_argspace_mem.end()); auto local = std::prev(m_manager->m_argspace_mem.end());
// return a lambda that will yield the gpu memory address. this // return a lambda that will yield the gpu memory address. this
......
...@@ -83,6 +83,30 @@ TEST(gpu_test, memory_manager_extract_arguments) ...@@ -83,6 +83,30 @@ TEST(gpu_test, memory_manager_extract_arguments)
EXPECT_EQ(host, fp32_args); EXPECT_EQ(host, fp32_args);
} }
// This test is add to catch a potential bug in allocator
// previously allocator will copy extra data
// for exampele: alignment = 8 bytes, you reserve 4 bytes space
// previously allocator will copy 8 bytes data from input_args, this will lead to two potential bug:
// 1. copy extrea data intead of initial alignment data to 0.
// 2. out of boundary access for input_args which lead to undefined behavior
TEST(gpu_test, memory_manager_argspace_alignment)
{
size_t alignment = 8;
std::vector<char> input_args = {0, 1, 2, 3, 4, 5, 6, 7};
std::vector<char> ref_args = {0, 1, 2, 3, 0, 0, 0, 0};
std::vector<char> result_args(alignment, 0);
size_t idx;
runtime::gpu::GPUPrimitiveEmitter emitter;
{
auto allocator = emitter.get_memory_allocator();
idx = allocator.reserve_argspace(input_args.data(), 4 * sizeof(char));
}
emitter.allocate_primitive_memory();
runtime::gpu::memory_primitive& mem_primitive = emitter.get_memory_primitives()[idx];
runtime::gpu::cuda_memcpyDtH(result_args.data(), mem_primitive(), alignment * sizeof(char));
EXPECT_EQ(result_args, ref_args);
}
TEST(gpu_test, memory_manager_argspace_size) TEST(gpu_test, memory_manager_argspace_size)
{ {
std::vector<float> fp32_args = {2112.0f, 2112.0f}; std::vector<float> fp32_args = {2112.0f, 2112.0f};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment