Commit 1e6afbac authored by Fenglei's avatar Fenglei Committed by Scott Cyphers

nvgpu tensor copy_from (#2624)

* add copy_from on GPUTensor

* add test

* remove unused code in test

* fix warning

* fix type

* fix comment texg

* using all_cloase for test
parent a1957ca7
......@@ -72,12 +72,37 @@ runtime::gpu::GPUTensor::~GPUTensor()
}
}
void runtime::gpu::GPUTensor::write(const void* source, size_t tensor_offset, size_t n)
void runtime::gpu::GPUTensor::write(const void* source, size_t tensor_offset, size_t n_bytes)
{
CUDA_RT_SAFE_CALL(cudaMemcpy(m_allocated_buffer_pool, source, n, cudaMemcpyHostToDevice));
runtime::gpu::cuda_memcpyHtD(m_allocated_buffer_pool, source, n_bytes);
}
void runtime::gpu::GPUTensor::read(void* target, size_t tensor_offset, size_t n) const
void runtime::gpu::GPUTensor::read(void* target, size_t tensor_offset, size_t n_bytes) const
{
CUDA_RT_SAFE_CALL(cudaMemcpy(target, m_allocated_buffer_pool, n, cudaMemcpyDeviceToHost));
runtime::gpu::cuda_memcpyDtH(target, m_allocated_buffer_pool, n_bytes);
}
void runtime::gpu::GPUTensor::copy_from(const runtime::Tensor& source)
{
try
{
const GPUTensor& src = dynamic_cast<const GPUTensor&>(source);
if (get_element_count() != src.get_element_count())
{
throw invalid_argument("runtime::gpu::GPUTensor::copy_from element count must match.");
}
if (get_element_type() != src.get_element_type())
{
throw invalid_argument("runtime::gpu::GPUTensor::copy_from element types must match.");
}
runtime::gpu::cuda_memcpyDtD(
m_allocated_buffer_pool, src.m_allocated_buffer_pool, source.get_size_in_bytes());
}
catch (const std::bad_cast& e)
{
throw invalid_argument(
"runtime::gpu::GPUTensor::copy_from source must be a GPUTensor. ErrMsg:\n" +
std::string(e.what()));
}
}
......@@ -46,14 +46,18 @@ public:
/// \brief Write bytes directly into the tensor
/// \param p Pointer to source of data
/// \param tensor_offset Offset into tensor storage to begin writing. Must be element-aligned.
/// \param n Number of bytes to write, must be integral number of elements.
void write(const void* p, size_t tensor_offset, size_t n) override;
/// \param n_bytes Number of bytes to write, must be integral number of elements.
void write(const void* p, size_t tensor_offset, size_t n_bytes) override;
/// \brief Read bytes directly from the tensor
/// \param p Pointer to destination for data
/// \param tensor_offset Offset into tensor storage to begin reading. Must be element-aligned.
/// \param n Number of bytes to read, must be integral number of elements.
void read(void* p, size_t tensor_offset, size_t n) const override;
/// \param n_bytes Number of bytes to read, must be integral number of elements.
void read(void* p, size_t tensor_offset, size_t n_bytes) const override;
/// \brief Copy directly from the another GPU tensor
/// \param source Another GPU tensor
void copy_from(const runtime::Tensor& source) override;
void* m_allocated_buffer_pool = nullptr;
size_t m_buffer_size;
......
......@@ -76,3 +76,22 @@ NGRAPH_TEST(${BACKEND_NAME}, create_tensor_2)
vector<float> expected = {6, 8, 10, 12};
EXPECT_EQ(read_vector<float>(result), expected);
}
// This tests a backend's implementation of the copy_from for tensor
NGRAPH_TEST(${BACKEND_NAME}, tensor_copy_from)
{
Shape shape{2, 2};
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
vector<float> av = {1, 2, 3, 4};
vector<float> bv = {5, 6, 7, 8};
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::f32, shape);
copy_data(a, av);
copy_data(b, bv);
a->copy_from(*b);
EXPECT_TRUE(test::all_close(bv, read_vector<float>(a), 0.0f, 0.0f));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment