Commit e189f9c6 authored by Ashok Emani's avatar Ashok Emani Committed by Nick Korovaiko

enable TensorView to use pre-allocated mem (#795)

* enable TensorView to use pre-allocated mem

* proper check for nullptr

* add unittest for custom mem with tensorview and feedback

* minor fix from feedback

* support GPU TensorView custom mem

* feedback fix and code format
parent 913855ec
...@@ -52,6 +52,12 @@ namespace ngraph ...@@ -52,6 +52,12 @@ namespace ngraph
make_primary_tensor_view(const ngraph::element::Type& element_type, make_primary_tensor_view(const ngraph::element::Type& element_type,
const Shape& shape) = 0; const Shape& shape) = 0;
/// @brief Return a handle for a tensor for given mem on backend device
virtual std::shared_ptr<ngraph::runtime::TensorView>
make_primary_tensor_view(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer) = 0;
template <typename T> template <typename T>
std::shared_ptr<ngraph::runtime::TensorView> std::shared_ptr<ngraph::runtime::TensorView>
make_primary_tensor_view(const Shape& shape) make_primary_tensor_view(const Shape& shape)
......
...@@ -35,3 +35,10 @@ std::shared_ptr<ngraph::runtime::TensorView> ...@@ -35,3 +35,10 @@ std::shared_ptr<ngraph::runtime::TensorView>
auto rc = make_shared<runtime::cpu::CPUTensorView>(element_type, shape); auto rc = make_shared<runtime::cpu::CPUTensorView>(element_type, shape);
return dynamic_pointer_cast<runtime::TensorView>(rc); return dynamic_pointer_cast<runtime::TensorView>(rc);
} }
std::shared_ptr<ngraph::runtime::TensorView> runtime::cpu::CPU_Backend::make_primary_tensor_view(
const ngraph::element::Type& element_type, const Shape& shape, void* memory_pointer)
{
auto rc = make_shared<runtime::cpu::CPUTensorView>(element_type, shape, memory_pointer);
return dynamic_pointer_cast<runtime::TensorView>(rc);
}
...@@ -34,6 +34,11 @@ namespace ngraph ...@@ -34,6 +34,11 @@ namespace ngraph
std::shared_ptr<ngraph::runtime::TensorView> std::shared_ptr<ngraph::runtime::TensorView>
make_primary_tensor_view(const ngraph::element::Type& element_type, make_primary_tensor_view(const ngraph::element::Type& element_type,
const Shape& shape) override; const Shape& shape) override;
std::shared_ptr<ngraph::runtime::TensorView>
make_primary_tensor_view(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer) override;
}; };
} }
} }
......
...@@ -36,6 +36,7 @@ const size_t runtime::cpu::CPUTensorView::BufferAlignment = 64; ...@@ -36,6 +36,7 @@ const size_t runtime::cpu::CPUTensorView::BufferAlignment = 64;
runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_type, runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_type,
const Shape& shape, const Shape& shape,
void* memory_pointer,
const string& name) const string& name)
: runtime::TensorView(std::make_shared<ngraph::descriptor::PrimaryTensorView>( : runtime::TensorView(std::make_shared<ngraph::descriptor::PrimaryTensorView>(
std::make_shared<ngraph::TensorViewType>(element_type, shape), name, true, true, false)) std::make_shared<ngraph::TensorViewType>(element_type, shape), name, true, true, false))
...@@ -49,7 +50,12 @@ runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_ ...@@ -49,7 +50,12 @@ runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_
*m_descriptor, runtime::cpu::LayoutDescriptor::create_native_axis_order(shape.size()))); *m_descriptor, runtime::cpu::LayoutDescriptor::create_native_axis_order(shape.size())));
buffer_size = shape_size(shape) * element_type.size(); buffer_size = shape_size(shape) * element_type.size();
if (buffer_size)
if (memory_pointer != nullptr)
{
aligned_buffer = static_cast<char*>(memory_pointer);
}
else if (buffer_size > 0)
{ {
size_t allocation_size = buffer_size + BufferAlignment; size_t allocation_size = buffer_size + BufferAlignment;
auto ptr = malloc(allocation_size); auto ptr = malloc(allocation_size);
...@@ -72,6 +78,13 @@ runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_ ...@@ -72,6 +78,13 @@ runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_
} }
} }
runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
const string& name)
: CPUTensorView(element_type, shape, nullptr, name)
{
}
runtime::cpu::CPUTensorView::~CPUTensorView() runtime::cpu::CPUTensorView::~CPUTensorView()
{ {
free(buffer); free(buffer);
......
...@@ -33,6 +33,10 @@ namespace ngraph ...@@ -33,6 +33,10 @@ namespace ngraph
CPUTensorView(const ngraph::element::Type& element_type, CPUTensorView(const ngraph::element::Type& element_type,
const Shape& shape, const Shape& shape,
const std::string& name = "external"); const std::string& name = "external");
CPUTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer,
const std::string& name = "external");
virtual ~CPUTensorView() override; virtual ~CPUTensorView() override;
char* get_data_ptr(); char* get_data_ptr();
......
...@@ -34,3 +34,10 @@ std::shared_ptr<ngraph::runtime::TensorView> ...@@ -34,3 +34,10 @@ std::shared_ptr<ngraph::runtime::TensorView>
auto rc = make_shared<runtime::gpu::GPU_TensorView>(element_type, shape); auto rc = make_shared<runtime::gpu::GPU_TensorView>(element_type, shape);
return dynamic_pointer_cast<runtime::TensorView>(rc); return dynamic_pointer_cast<runtime::TensorView>(rc);
} }
std::shared_ptr<ngraph::runtime::TensorView> runtime::gpu::GPU_Backend::make_primary_tensor_view(
const ngraph::element::Type& element_type, const Shape& shape, void* memory_pointer)
{
auto rc = make_shared<runtime::gpu::GPU_TensorView>(element_type, shape, memory_pointer);
return dynamic_pointer_cast<runtime::TensorView>(rc);
}
...@@ -36,6 +36,11 @@ namespace ngraph ...@@ -36,6 +36,11 @@ namespace ngraph
std::shared_ptr<ngraph::runtime::TensorView> std::shared_ptr<ngraph::runtime::TensorView>
make_primary_tensor_view(const ngraph::element::Type& element_type, make_primary_tensor_view(const ngraph::element::Type& element_type,
const Shape& shape) override; const Shape& shape) override;
std::shared_ptr<ngraph::runtime::TensorView>
make_primary_tensor_view(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer) override;
}; };
} }
} }
......
...@@ -27,27 +27,43 @@ using namespace ngraph; ...@@ -27,27 +27,43 @@ using namespace ngraph;
using namespace std; using namespace std;
runtime::gpu::GPU_TensorView::GPU_TensorView(const ngraph::element::Type& element_type, runtime::gpu::GPU_TensorView::GPU_TensorView(const ngraph::element::Type& element_type,
const Shape& shape) const Shape& shape,
void* memory_pointer)
: runtime::TensorView(std::make_shared<ngraph::descriptor::PrimaryTensorView>( : runtime::TensorView(std::make_shared<ngraph::descriptor::PrimaryTensorView>(
std::make_shared<ngraph::TensorViewType>(element_type, shape), std::make_shared<ngraph::TensorViewType>(element_type, shape),
"external", "external",
true, true,
true, true,
false)) false))
, m_custom_memory(false)
{ {
m_descriptor->set_tensor_view_layout( m_descriptor->set_tensor_view_layout(
std::make_shared<ngraph::descriptor::layout::DenseTensorViewLayout>(*m_descriptor)); std::make_shared<ngraph::descriptor::layout::DenseTensorViewLayout>(*m_descriptor));
m_buffer_size = shape_size(shape) * element_type.size(); m_buffer_size = shape_size(shape) * element_type.size();
if (m_buffer_size > 0) if (memory_pointer != nullptr)
{
m_allocated_buffer_pool = memory_pointer;
m_custom_memory = true;
}
else if (m_buffer_size > 0)
{ {
cudaMalloc(static_cast<void**>(&m_allocated_buffer_pool), m_buffer_size); cudaMalloc(static_cast<void**>(&m_allocated_buffer_pool), m_buffer_size);
} }
} }
runtime::gpu::GPU_TensorView::GPU_TensorView(const ngraph::element::Type& element_type,
const Shape& shape)
: GPU_TensorView(element_type, shape, nullptr)
{
}
runtime::gpu::GPU_TensorView::~GPU_TensorView() runtime::gpu::GPU_TensorView::~GPU_TensorView()
{ {
if (!m_custom_memory)
{
cudaFree(m_allocated_buffer_pool); cudaFree(m_allocated_buffer_pool);
}
} }
void runtime::gpu::GPU_TensorView::write(const void* source, size_t tensor_offset, size_t n) void runtime::gpu::GPU_TensorView::write(const void* source, size_t tensor_offset, size_t n)
......
...@@ -37,6 +37,9 @@ class ngraph::runtime::gpu::GPU_TensorView : public ngraph::runtime::TensorView ...@@ -37,6 +37,9 @@ class ngraph::runtime::gpu::GPU_TensorView : public ngraph::runtime::TensorView
{ {
public: public:
GPU_TensorView(const ngraph::element::Type& element_type, const Shape& shape); GPU_TensorView(const ngraph::element::Type& element_type, const Shape& shape);
GPU_TensorView(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer);
virtual ~GPU_TensorView(); virtual ~GPU_TensorView();
/// @brief Write bytes directly into the tensor /// @brief Write bytes directly into the tensor
...@@ -53,4 +56,5 @@ public: ...@@ -53,4 +56,5 @@ public:
void* m_allocated_buffer_pool; void* m_allocated_buffer_pool;
size_t m_buffer_size; size_t m_buffer_size;
bool m_custom_memory;
}; };
...@@ -26,6 +26,7 @@ using namespace std; ...@@ -26,6 +26,7 @@ using namespace std;
runtime::HostTensorView::HostTensorView(const ngraph::element::Type& element_type, runtime::HostTensorView::HostTensorView(const ngraph::element::Type& element_type,
const Shape& shape, const Shape& shape,
void* memory_pointer,
const string& name) const string& name)
: runtime::TensorView(std::make_shared<ngraph::descriptor::PrimaryTensorView>( : runtime::TensorView(std::make_shared<ngraph::descriptor::PrimaryTensorView>(
std::make_shared<ngraph::TensorViewType>(element_type, shape), name, true, true, false)) std::make_shared<ngraph::TensorViewType>(element_type, shape), name, true, true, false))
...@@ -37,7 +38,12 @@ runtime::HostTensorView::HostTensorView(const ngraph::element::Type& element_typ ...@@ -37,7 +38,12 @@ runtime::HostTensorView::HostTensorView(const ngraph::element::Type& element_typ
std::make_shared<ngraph::descriptor::layout::DenseTensorViewLayout>(*m_descriptor)); std::make_shared<ngraph::descriptor::layout::DenseTensorViewLayout>(*m_descriptor));
m_buffer_size = m_descriptor->get_tensor_view_layout()->get_size() * element_type.size(); m_buffer_size = m_descriptor->get_tensor_view_layout()->get_size() * element_type.size();
if (m_buffer_size > 0)
if (memory_pointer != nullptr)
{
m_aligned_buffer_pool = static_cast<char*>(memory_pointer);
}
else if (m_buffer_size > 0)
{ {
size_t allocation_size = m_buffer_size + runtime::alignment; size_t allocation_size = m_buffer_size + runtime::alignment;
m_allocated_buffer_pool = static_cast<char*>(malloc(allocation_size)); m_allocated_buffer_pool = static_cast<char*>(malloc(allocation_size));
...@@ -50,6 +56,13 @@ runtime::HostTensorView::HostTensorView(const ngraph::element::Type& element_typ ...@@ -50,6 +56,13 @@ runtime::HostTensorView::HostTensorView(const ngraph::element::Type& element_typ
} }
} }
runtime::HostTensorView::HostTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
const string& name)
: HostTensorView(element_type, shape, nullptr, name)
{
}
runtime::HostTensorView::~HostTensorView() runtime::HostTensorView::~HostTensorView()
{ {
if (m_allocated_buffer_pool != nullptr) if (m_allocated_buffer_pool != nullptr)
......
...@@ -37,6 +37,10 @@ public: ...@@ -37,6 +37,10 @@ public:
HostTensorView(const ngraph::element::Type& element_type, HostTensorView(const ngraph::element::Type& element_type,
const Shape& shape, const Shape& shape,
const std::string& name = "external"); const std::string& name = "external");
HostTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer,
const std::string& name = "external");
virtual ~HostTensorView() override; virtual ~HostTensorView() override;
char* get_data_ptr(); char* get_data_ptr();
......
...@@ -35,3 +35,10 @@ shared_ptr<runtime::TensorView> ...@@ -35,3 +35,10 @@ shared_ptr<runtime::TensorView>
auto rc = make_shared<runtime::HostTensorView>(element_type, shape, "external"); auto rc = make_shared<runtime::HostTensorView>(element_type, shape, "external");
return static_pointer_cast<runtime::TensorView>(rc); return static_pointer_cast<runtime::TensorView>(rc);
} }
shared_ptr<runtime::TensorView> runtime::interpreter::INT_Backend::make_primary_tensor_view(
const element::Type& element_type, const Shape& shape, void* memory_pointer)
{
auto rc = make_shared<runtime::HostTensorView>(element_type, shape, memory_pointer, "external");
return static_pointer_cast<runtime::TensorView>(rc);
}
...@@ -34,6 +34,11 @@ namespace ngraph ...@@ -34,6 +34,11 @@ namespace ngraph
std::shared_ptr<ngraph::runtime::TensorView> std::shared_ptr<ngraph::runtime::TensorView>
make_primary_tensor_view(const ngraph::element::Type& element_type, make_primary_tensor_view(const ngraph::element::Type& element_type,
const Shape& shape) override; const Shape& shape) override;
std::shared_ptr<ngraph::runtime::TensorView>
make_primary_tensor_view(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer) override;
}; };
} }
} }
......
...@@ -8582,3 +8582,37 @@ TEST(${BACKEND_NAME}, softmax_underflow) ...@@ -8582,3 +8582,37 @@ TEST(${BACKEND_NAME}, softmax_underflow)
expf(low) / d0, expf(1) / d1, expf(2) / d2, expf(3) / d0, expf(4) / d1, expf(5) / d2}; expf(low) / d0, expf(1) / d1, expf(2) / d2, expf(3) / d0, expf(4) / d1, expf(5) / d2};
EXPECT_TRUE(test::all_close(expected, read_vector<float>(result))); EXPECT_TRUE(test::all_close(expected, read_vector<float>(result)));
} }
TEST(${BACKEND_NAME}, tensorview_custom_mem)
{
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend();
Shape shape{2, 2};
auto make_external = [&]() {
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Divide>(A, B), op::ParameterVector{A, B});
auto external = manager->compile(f);
return external;
};
auto cf = backend->make_call_frame(make_external());
vector<float> av{2, 4, 8, 16};
vector<float> bv{1, 2, 4, 8};
// use custom mem with tensorview, no need to copy data
auto a = backend->make_primary_tensor_view(element::f32, shape, av.data());
auto b = backend->make_primary_tensor_view(element::f32, shape, bv.data());
// use custom mem with result tensorview
vector<float> rv{0, 0, 0, 0};
auto result = backend->make_primary_tensor_view(element::f32, shape, rv.data());
// result should be in memory without needing explict read
cf->call({result}, {a, b});
EXPECT_EQ((vector<float>{2, 2, 2, 2}), rv);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment