Merge branch 'aprocter/dyn-slice' of github.com:NervanaSystems/ngraph into aprocter/dyn-slice

3f888583 · Adam Procter · d1674497 · 6dc728e3 · 3f888583 · 3f888583
Commit 3f888583 authored Jun 08, 2019 by Adam Procter
Showing with 150 additions and 5 deletions

cpu_tensor_view.cpp src/ngraph/runtime/cpu/cpu_tensor_view.cpp +45 -0

cpu_tensor_view.hpp src/ngraph/runtime/cpu/cpu_tensor_view.hpp +11 -5

cpu_test.cpp test/cpu_test.cpp +94 -0

No files found.
--- a/src/ngraph/runtime/cpu/cpu_tensor_view.cpp
+++ b/src/ngraph/runtime/cpu/cpu_tensor_view.cpp
@@ -20,6 +20,7 @@
 #include "cpu_tensor_view.hpp"
 #include "ngraph/descriptor/layout/tensor_layout.hpp"
 #include "ngraph/except.hpp"
+#include "ngraph/runtime/aligned_buffer.hpp"
 #include "ngraph/runtime/cpu/cpu_executor.hpp"
 #include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
 #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
@@ -153,3 +154,47 @@ void runtime::cpu::CPUTensorView::read(void* target, size_t n) const
        memcpy(target, source, n);
    }
 }
+
+void runtime::cpu::CPUTensorView::copy_from(const ngraph::runtime::Tensor& source)
+{
+    if (get_element_count() != source.get_element_count())
+    {
+        throw invalid_argument("runtime::cpu::CPUTensorView::copy_from element count must match");
+    }
+
+    if (get_element_type() != source.get_element_type())
+    {
+        throw invalid_argument("runtime::cpu::CPUTensorView::copy_from element types must match");
+    }
+
+    if (auto cpu_source = dynamic_cast<const runtime::cpu::CPUTensorView*>(&source))
+    {
+        auto this_tl =
+            dynamic_cast<ngraph::runtime::cpu::LayoutDescriptor*>(this->get_tensor_layout().get());
+        auto other_tl =
+            dynamic_cast<ngraph::runtime::cpu::LayoutDescriptor*>(source.get_tensor_layout().get());
+        if ((this_tl != NULL) && (other_tl != NULL) && (*this_tl == *other_tl))
+        {
+            // Direct copy
+            memcpy(get_data_ptr(), cpu_source->get_data_ptr(), get_size_in_bytes());
+        }
+        else
+        {
+            // This will copy the data in native/row-major layout
+            source.read(get_data_ptr(), get_size_in_bytes());
+            // Set default layout
+            m_descriptor->set_tensor_layout(
+                std::make_shared<runtime::cpu::LayoutDescriptor>(*m_descriptor));
+        }
+    }
+    else
+    {
+        auto size = get_size_in_bytes();
+        AlignedBuffer tmp_buffer{size, static_cast<size_t>(BufferAlignment)};
+        source.read(tmp_buffer.get_ptr(), size);
+        write(tmp_buffer.get_ptr(), size);
+        // Set default layout
+        m_descriptor->set_tensor_layout(
+            std::make_shared<runtime::cpu::LayoutDescriptor>(*m_descriptor));
+    }
+}
--- a/src/ngraph/runtime/cpu/cpu_tensor_view.hpp
+++ b/src/ngraph/runtime/cpu/cpu_tensor_view.hpp
@@ -18,6 +18,7 @@

 #include <string>

+#include "ngraph/runtime/cpu/cpu_backend_visibility.h"
 #include "ngraph/runtime/tensor.hpp"
 #include "ngraph/type/element_type.hpp"

@@ -33,14 +34,15 @@ namespace ngraph
            class CPUTensorView : public ngraph::runtime::Tensor
            {
            public:
-                CPUTensorView(const ngraph::element::Type& element_type, const Shape& shape);
-                CPUTensorView(const ngraph::element::Type& element_type,
+                CPU_BACKEND_API CPUTensorView(const ngraph::element::Type& element_type,
+                                              const Shape& shape);
+                CPU_BACKEND_API CPUTensorView(const ngraph::element::Type& element_type,
                                              const Shape& shape,
                                              void* memory_pointer);
-                virtual ~CPUTensorView() override;
+                CPU_BACKEND_API virtual ~CPUTensorView() override;

-                char* get_data_ptr();
-                const char* get_data_ptr() const;
+                CPU_BACKEND_API char* get_data_ptr();
+                CPU_BACKEND_API const char* get_data_ptr() const;

                /// \brief Write bytes directly into the tensor
                /// \param p Pointer to source of data
@@ -52,6 +54,10 @@ namespace ngraph
                /// \param n Number of bytes to read, must be integral number of elements.
                void read(void* p, size_t n) const override;

+                /// \brief copy bytes directly from source to this tensor
+                /// \param source The source tensor
+                void copy_from(const ngraph::runtime::Tensor& source) override;
+
                static constexpr int BufferAlignment = NGRAPH_CPU_ALIGNMENT;

            private:

--- a/test/cpu_test.cpp
+++ b/test/cpu_test.cpp
@@ -38,6 +38,7 @@
 #include "ngraph/pass/visualize_tree.hpp"
 #include "ngraph/runtime/cpu/cpu_backend.hpp"
 #include "ngraph/runtime/cpu/cpu_builder.hpp"
+#include "ngraph/runtime/cpu/cpu_tensor_view.hpp"
 #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
 #include "ngraph/runtime/cpu/op/convert_layout.hpp"
 #include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
@@ -1799,3 +1800,96 @@ TEST(cpu_test, scatter_add_1d_indices_no_in_place)
        read_vector<float>(result),
        MIN_FLOAT_TOLERANCE_BITS));
 }
+
+TEST(cpu_test, tensor_copy_from_interpreter_to_cpu)
+{
+    // This test the copying of data between the tensor's having
+    // CPUtensorview and no CPUtensorview
+    auto backend = runtime::Backend::create("CPU");
+    auto backend_ref = runtime::Backend::create("INTERPRETER");
+    auto a = backend_ref->create_tensor(element::f32, Shape{2, 3});
+    auto b = backend->create_tensor(element::f32, Shape{2, 3});
+    copy_data(a, vector<float>{1, 2, 3, 4, 5, 6});
+    b->copy_from(*a);
+    ASSERT_EQ(read_vector<float>(a), read_vector<float>(b));
+}
+
+TEST(cpu_test, tensor_copy_from_different_shape)
+{
+    auto backend = runtime::Backend::create("CPU");
+    auto a = backend->create_tensor(element::f32, Shape{2, 3});
+    auto b = backend->create_tensor(element::f32, Shape{1, 3, 2});
+    copy_data(a, vector<float>{1, 2, 3, 4, 5, 6});
+    b->copy_from(*a);
+    ASSERT_EQ(read_vector<float>(a), read_vector<float>(b));
+}
+
+TEST(cpu_test, tensor_copy_from_same_native_layouts)
+{
+    // this test copying of data between two tensor having same
+    // layout
+    auto backend = runtime::Backend::create("CPU");
+    auto a = backend->create_tensor(element::f32, Shape{2, 3});
+    auto b = backend->create_tensor(element::f32, Shape{2, 3});
+    copy_data(a, vector<float>{1, 2, 3, 4, 5, 6});
+    b->copy_from(*a);
+    ASSERT_EQ(read_vector<float>(a), read_vector<float>(b));
+}
+
+TEST(cpu_test, tensor_copy_from_same_rotated_layouts)
+{
+    auto A = make_shared<op::Parameter>(element::u8, Shape{2, 3});
+    auto f1 = make_shared<Function>(make_shared<op::Reshape>(A, AxisVector{1, 0}, Shape{3, 2}),
+                                    ParameterVector{A});
+    auto B = make_shared<op::Parameter>(element::u8, Shape{2, 3});
+    auto f2 = make_shared<Function>(make_shared<op::Reshape>(B, AxisVector{1, 0}, Shape{3, 2}),
+                                    ParameterVector{B});
+
+    auto backend = runtime::Backend::create("CPU");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::u8, Shape{2, 3});
+    copy_data(a, vector<uint8_t>{1, 2, 3, 4, 5, 6});
+    auto result1 = backend->create_tensor(element::u8, Shape{3, 2});
+    backend->compile(f1)->call_with_validate({result1}, {a});
+
+    auto b = backend->create_tensor(element::u8, Shape{2, 3});
+    copy_data(a, vector<uint8_t>{1, 1, 1, 1, 1, 1});
+    auto result2 = backend->create_tensor(element::u8, Shape{3, 2});
+    backend->compile(f2)->call_with_validate({result2}, {b});
+    // Both result1 and result2 will be in rotated layouts at this point.
+
+    result2->copy_from(*result1);
+
+    // Check internal values in rotated layout
+    auto result2_internal_buffer = reinterpret_cast<uint8_t*>(
+        static_pointer_cast<runtime::cpu::CPUTensorView>(result2)->get_data_ptr());
+    vector<uint8_t> vec(result2_internal_buffer, result2_internal_buffer + 6);
+    // This check can be removed if the CPU backend stops optimizing reshapes using layout transformations
+    EXPECT_EQ((vector<uint8_t>{1, 2, 3, 4, 5, 6}), vec);
+
+    // Check native layout
+    EXPECT_EQ((vector<uint8_t>{1, 4, 2, 5, 3, 6}), read_vector<uint8_t>(result2));
+}
+
+TEST(cpu_test, tensor_copy_from_different_layout)
+{
+    auto A = make_shared<op::Parameter>(element::u8, Shape{2, 3});
+    auto f = make_shared<Function>(make_shared<op::Reshape>(A, AxisVector{1, 0}, Shape{3, 2}),
+                                   ParameterVector{A});
+
+    auto backend = runtime::Backend::create("CPU");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::u8, Shape{2, 3});
+    copy_data(a, vector<uint8_t>{1, 2, 3, 4, 5, 6});
+    auto result = backend->create_tensor(element::u8, Shape{3, 2});
+
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a});
+
+    auto b = backend->create_tensor(element::u8, Shape{3, 2});
+    b->copy_from(*result);
+
+    EXPECT_EQ((vector<uint8_t>{1, 4, 2, 5, 3, 6}), read_vector<uint8_t>(b));
+}