Commit e189f9c6 authored by Ashok Emani's avatar Ashok Emani Committed by Nick Korovaiko

enable TensorView to use pre-allocated mem (#795)

* enable TensorView to use pre-allocated mem

* proper check for nullptr

* add unittest for custom mem with tensorview and feedback

* minor fix from feedback

* support GPU TensorView custom mem

* feedback fix and code format
parent 913855ec
......@@ -52,6 +52,12 @@ namespace ngraph
make_primary_tensor_view(const ngraph::element::Type& element_type,
const Shape& shape) = 0;
/// @brief Return a handle for a tensor for given mem on backend device
virtual std::shared_ptr<ngraph::runtime::TensorView>
make_primary_tensor_view(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer) = 0;
template <typename T>
std::shared_ptr<ngraph::runtime::TensorView>
make_primary_tensor_view(const Shape& shape)
......
......@@ -35,3 +35,10 @@ std::shared_ptr<ngraph::runtime::TensorView>
auto rc = make_shared<runtime::cpu::CPUTensorView>(element_type, shape);
return dynamic_pointer_cast<runtime::TensorView>(rc);
}
std::shared_ptr<ngraph::runtime::TensorView> runtime::cpu::CPU_Backend::make_primary_tensor_view(
const ngraph::element::Type& element_type, const Shape& shape, void* memory_pointer)
{
auto rc = make_shared<runtime::cpu::CPUTensorView>(element_type, shape, memory_pointer);
return dynamic_pointer_cast<runtime::TensorView>(rc);
}
......@@ -34,6 +34,11 @@ namespace ngraph
std::shared_ptr<ngraph::runtime::TensorView>
make_primary_tensor_view(const ngraph::element::Type& element_type,
const Shape& shape) override;
std::shared_ptr<ngraph::runtime::TensorView>
make_primary_tensor_view(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer) override;
};
}
}
......
......@@ -36,6 +36,7 @@ const size_t runtime::cpu::CPUTensorView::BufferAlignment = 64;
runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer,
const string& name)
: runtime::TensorView(std::make_shared<ngraph::descriptor::PrimaryTensorView>(
std::make_shared<ngraph::TensorViewType>(element_type, shape), name, true, true, false))
......@@ -49,7 +50,12 @@ runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_
*m_descriptor, runtime::cpu::LayoutDescriptor::create_native_axis_order(shape.size())));
buffer_size = shape_size(shape) * element_type.size();
if (buffer_size)
if (memory_pointer != nullptr)
{
aligned_buffer = static_cast<char*>(memory_pointer);
}
else if (buffer_size > 0)
{
size_t allocation_size = buffer_size + BufferAlignment;
auto ptr = malloc(allocation_size);
......@@ -72,6 +78,13 @@ runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_
}
}
runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
const string& name)
: CPUTensorView(element_type, shape, nullptr, name)
{
}
runtime::cpu::CPUTensorView::~CPUTensorView()
{
free(buffer);
......
......@@ -33,6 +33,10 @@ namespace ngraph
CPUTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
const std::string& name = "external");
CPUTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer,
const std::string& name = "external");
virtual ~CPUTensorView() override;
char* get_data_ptr();
......
......@@ -34,3 +34,10 @@ std::shared_ptr<ngraph::runtime::TensorView>
auto rc = make_shared<runtime::gpu::GPU_TensorView>(element_type, shape);
return dynamic_pointer_cast<runtime::TensorView>(rc);
}
std::shared_ptr<ngraph::runtime::TensorView> runtime::gpu::GPU_Backend::make_primary_tensor_view(
const ngraph::element::Type& element_type, const Shape& shape, void* memory_pointer)
{
auto rc = make_shared<runtime::gpu::GPU_TensorView>(element_type, shape, memory_pointer);
return dynamic_pointer_cast<runtime::TensorView>(rc);
}
......@@ -36,6 +36,11 @@ namespace ngraph
std::shared_ptr<ngraph::runtime::TensorView>
make_primary_tensor_view(const ngraph::element::Type& element_type,
const Shape& shape) override;
std::shared_ptr<ngraph::runtime::TensorView>
make_primary_tensor_view(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer) override;
};
}
}
......
......@@ -27,27 +27,43 @@ using namespace ngraph;
using namespace std;
runtime::gpu::GPU_TensorView::GPU_TensorView(const ngraph::element::Type& element_type,
const Shape& shape)
const Shape& shape,
void* memory_pointer)
: runtime::TensorView(std::make_shared<ngraph::descriptor::PrimaryTensorView>(
std::make_shared<ngraph::TensorViewType>(element_type, shape),
"external",
true,
true,
false))
, m_custom_memory(false)
{
m_descriptor->set_tensor_view_layout(
std::make_shared<ngraph::descriptor::layout::DenseTensorViewLayout>(*m_descriptor));
m_buffer_size = shape_size(shape) * element_type.size();
if (m_buffer_size > 0)
if (memory_pointer != nullptr)
{
m_allocated_buffer_pool = memory_pointer;
m_custom_memory = true;
}
else if (m_buffer_size > 0)
{
cudaMalloc(static_cast<void**>(&m_allocated_buffer_pool), m_buffer_size);
}
}
runtime::gpu::GPU_TensorView::GPU_TensorView(const ngraph::element::Type& element_type,
const Shape& shape)
: GPU_TensorView(element_type, shape, nullptr)
{
}
runtime::gpu::GPU_TensorView::~GPU_TensorView()
{
cudaFree(m_allocated_buffer_pool);
if (!m_custom_memory)
{
cudaFree(m_allocated_buffer_pool);
}
}
void runtime::gpu::GPU_TensorView::write(const void* source, size_t tensor_offset, size_t n)
......
......@@ -37,6 +37,9 @@ class ngraph::runtime::gpu::GPU_TensorView : public ngraph::runtime::TensorView
{
public:
GPU_TensorView(const ngraph::element::Type& element_type, const Shape& shape);
GPU_TensorView(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer);
virtual ~GPU_TensorView();
/// @brief Write bytes directly into the tensor
......@@ -53,4 +56,5 @@ public:
void* m_allocated_buffer_pool;
size_t m_buffer_size;
bool m_custom_memory;
};
......@@ -26,6 +26,7 @@ using namespace std;
runtime::HostTensorView::HostTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer,
const string& name)
: runtime::TensorView(std::make_shared<ngraph::descriptor::PrimaryTensorView>(
std::make_shared<ngraph::TensorViewType>(element_type, shape), name, true, true, false))
......@@ -37,7 +38,12 @@ runtime::HostTensorView::HostTensorView(const ngraph::element::Type& element_typ
std::make_shared<ngraph::descriptor::layout::DenseTensorViewLayout>(*m_descriptor));
m_buffer_size = m_descriptor->get_tensor_view_layout()->get_size() * element_type.size();
if (m_buffer_size > 0)
if (memory_pointer != nullptr)
{
m_aligned_buffer_pool = static_cast<char*>(memory_pointer);
}
else if (m_buffer_size > 0)
{
size_t allocation_size = m_buffer_size + runtime::alignment;
m_allocated_buffer_pool = static_cast<char*>(malloc(allocation_size));
......@@ -50,6 +56,13 @@ runtime::HostTensorView::HostTensorView(const ngraph::element::Type& element_typ
}
}
runtime::HostTensorView::HostTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
const string& name)
: HostTensorView(element_type, shape, nullptr, name)
{
}
runtime::HostTensorView::~HostTensorView()
{
if (m_allocated_buffer_pool != nullptr)
......
......@@ -37,6 +37,10 @@ public:
HostTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
const std::string& name = "external");
HostTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer,
const std::string& name = "external");
virtual ~HostTensorView() override;
char* get_data_ptr();
......
......@@ -35,3 +35,10 @@ shared_ptr<runtime::TensorView>
auto rc = make_shared<runtime::HostTensorView>(element_type, shape, "external");
return static_pointer_cast<runtime::TensorView>(rc);
}
shared_ptr<runtime::TensorView> runtime::interpreter::INT_Backend::make_primary_tensor_view(
const element::Type& element_type, const Shape& shape, void* memory_pointer)
{
auto rc = make_shared<runtime::HostTensorView>(element_type, shape, memory_pointer, "external");
return static_pointer_cast<runtime::TensorView>(rc);
}
......@@ -34,6 +34,11 @@ namespace ngraph
std::shared_ptr<ngraph::runtime::TensorView>
make_primary_tensor_view(const ngraph::element::Type& element_type,
const Shape& shape) override;
std::shared_ptr<ngraph::runtime::TensorView>
make_primary_tensor_view(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer) override;
};
}
}
......
......@@ -8582,3 +8582,37 @@ TEST(${BACKEND_NAME}, softmax_underflow)
expf(low) / d0, expf(1) / d1, expf(2) / d2, expf(3) / d0, expf(4) / d1, expf(5) / d2};
EXPECT_TRUE(test::all_close(expected, read_vector<float>(result)));
}
TEST(${BACKEND_NAME}, tensorview_custom_mem)
{
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend();
Shape shape{2, 2};
auto make_external = [&]() {
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Divide>(A, B), op::ParameterVector{A, B});
auto external = manager->compile(f);
return external;
};
auto cf = backend->make_call_frame(make_external());
vector<float> av{2, 4, 8, 16};
vector<float> bv{1, 2, 4, 8};
// use custom mem with tensorview, no need to copy data
auto a = backend->make_primary_tensor_view(element::f32, shape, av.data());
auto b = backend->make_primary_tensor_view(element::f32, shape, bv.data());
// use custom mem with result tensorview
vector<float> rv{0, 0, 0, 0};
auto result = backend->make_primary_tensor_view(element::f32, shape, rv.data());
// result should be in memory without needing explict read
cf->call({result}, {a, b});
EXPECT_EQ((vector<float>{2, 2, 2, 2}), rv);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment