nvgpu tensor copy_from (#2624)

* add copy_from on GPUTensor * add test * remove unused code in test * fix warning * fix type * fix comment texg * using all_cloase for test

nvgpu tensor copy_from (#2624)
* add copy_from on GPUTensor * add test * remove unused code in test * fix warning * fix type * fix comment texg * using all_cloase for test
1e6afbac · Fenglei · Scott Cyphers · a1957ca7 · 1e6afbac · 1e6afbac
Commit 1e6afbac authored Mar 18, 2019 by Fenglei Committed by Scott Cyphers Mar 18, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 56 additions and 8 deletions

gpu_tensor.cpp src/ngraph/runtime/gpu/gpu_tensor.cpp +29 -4

gpu_tensor.hpp src/ngraph/runtime/gpu/gpu_tensor.hpp +8 -4

backend_api.in.cpp test/backend_api.in.cpp +19 -0

No files found.
--- a/src/ngraph/runtime/gpu/gpu_tensor.cpp
+++ b/src/ngraph/runtime/gpu/gpu_tensor.cpp
@@ -72,12 +72,37 @@ runtime::gpu::GPUTensor::~GPUTensor()
    }
 }
-void runtime::gpu::GPUTensor::write(const void* source, size_t tensor_offset, size_t n)
+void runtime::gpu::GPUTensor::write(const void* source, size_t tensor_offset, size_t n_bytes)
 {
-    CUDA_RT_SAFE_CALL(cudaMemcpy(m_allocated_buffer_pool, source, n, cudaMemcpyHostToDevice));
+    runtime::gpu::cuda_memcpyHtD(m_allocated_buffer_pool, source, n_bytes);
 }
-void runtime::gpu::GPUTensor::read(void* target, size_t tensor_offset, size_t n) const
+void runtime::gpu::GPUTensor::read(void* target, size_t tensor_offset, size_t n_bytes) const
 {
-    CUDA_RT_SAFE_CALL(cudaMemcpy(target, m_allocated_buffer_pool, n, cudaMemcpyDeviceToHost));
+    runtime::gpu::cuda_memcpyDtH(target, m_allocated_buffer_pool, n_bytes);
+}
+void runtime::gpu::GPUTensor::copy_from(const runtime::Tensor& source)
+{
+    try
+    {
+        const GPUTensor& src = dynamic_cast<const GPUTensor&>(source);
+        if (get_element_count() != src.get_element_count())
+        {
+            throw invalid_argument("runtime::gpu::GPUTensor::copy_from element count must match.");
+        }
+        if (get_element_type() != src.get_element_type())
+        {
+            throw invalid_argument("runtime::gpu::GPUTensor::copy_from element types must match.");
+        }
+        runtime::gpu::cuda_memcpyDtD(
+            m_allocated_buffer_pool, src.m_allocated_buffer_pool, source.get_size_in_bytes());
+    }
+    catch (const std::bad_cast& e)
+    {
+        throw invalid_argument(
+            "runtime::gpu::GPUTensor::copy_from source must be a GPUTensor. ErrMsg:\n" +
+            std::string(e.what()));
+    }
 }
--- a/src/ngraph/runtime/gpu/gpu_tensor.hpp
+++ b/src/ngraph/runtime/gpu/gpu_tensor.hpp
@@ -46,14 +46,18 @@ public:
    /// \brief Write bytes directly into the tensor
    /// \param p Pointer to source of data
    /// \param tensor_offset Offset into tensor storage to begin writing. Must be element-aligned.
-    /// \param n Number of bytes to write, must be integral number of elements.
+    /// \param n_bytes Number of bytes to write, must be integral number of elements.
-    void write(const void* p, size_t tensor_offset, size_t n) override;
+    void write(const void* p, size_t tensor_offset, size_t n_bytes) override;
    /// \brief Read bytes directly from the tensor
    /// \param p Pointer to destination for data
    /// \param tensor_offset Offset into tensor storage to begin reading. Must be element-aligned.
-    /// \param n Number of bytes to read, must be integral number of elements.
+    /// \param n_bytes Number of bytes to read, must be integral number of elements.
-    void read(void* p, size_t tensor_offset, size_t n) const override;
+    void read(void* p, size_t tensor_offset, size_t n_bytes) const override;
+    /// \brief Copy directly from the another GPU tensor
+    /// \param source Another GPU tensor
+    void copy_from(const runtime::Tensor& source) override;
    void* m_allocated_buffer_pool = nullptr;
    size_t m_buffer_size;

--- a/test/backend_api.in.cpp
+++ b/test/backend_api.in.cpp
@@ -76,3 +76,22 @@ NGRAPH_TEST(${BACKEND_NAME}, create_tensor_2)
    vector<float> expected = {6, 8, 10, 12};
    EXPECT_EQ(read_vector<float>(result), expected);
 }
+// This tests a backend's implementation of the copy_from for tensor
+NGRAPH_TEST(${BACKEND_NAME}, tensor_copy_from)
+{
+    Shape shape{2, 2};
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    // Create some tensors for input/output
+    vector<float> av = {1, 2, 3, 4};
+    vector<float> bv = {5, 6, 7, 8};
+    shared_ptr<runtime::Tensor> a = backend->create_tensor(element::f32, shape);
+    shared_ptr<runtime::Tensor> b = backend->create_tensor(element::f32, shape);
+    copy_data(a, av);
+    copy_data(b, bv);
+    a->copy_from(*b);
+    EXPECT_TRUE(test::all_close(bv, read_vector<float>(a), 0.0f, 0.0f));
+}