Update PlaidML backend for current nGraph (#3030)

* Rename PlaidML_Executable::save -> save_as_format * Repair regression in PlaidML tensor impl This was caused by the recent removal of the offset parameter for tensor read/write operations -- we missed a spot where read/write were being called for synchronization purposes. * Disable a few more PlaidML tests pending triage * Skip elision of reshape->reshape It turns out this doesn't work, because the downstream reshape's input_order axis vector is incorrect if the upstream reshape is removed. * Add element type to PlaidML tensor debug output * Use nGraph booleans for PlaidML boolean data We'd previously been using i8; that's been deprecated for boolean data now that we have an explicit boolean element type. * Set PlaidML convolution output shapes correctly We weren't transposing the output shape; we were computing the right data, but the incorrect shape metadata causes validation to fail. * Add a PlaidML implicit broadcast op Better nGraph shape validation was tripping up PlaidML's use of a reshape to replace explicit broadcasts with implicit NumPy-style broadcasts (since the reshape's output shape would be incorrect for the downstream elementwise operation). Adding this implicit broadcast operation lets PlaidML tell nGraph something useful about the shapes, making validation pass (when it's otherwise correct).

Update PlaidML backend for current nGraph (#3030)
* Rename PlaidML_Executable::save -> save_as_format * Repair regression in PlaidML tensor impl This was caused by the recent removal of the offset parameter for tensor read/write operations -- we missed a spot where read/write were being called for synchronization purposes. * Disable a few more PlaidML tests pending triage * Skip elision of reshape->reshape It turns out this doesn't work, because the downstream reshape's input_order axis vector is incorrect if the upstream reshape is removed. * Add element type to PlaidML tensor debug output * Use nGraph booleans for PlaidML boolean data We'd previously been using i8; that's been deprecated for boolean data now that we have an explicit boolean element type. * Set PlaidML convolution output shapes correctly We weren't transposing the output shape; we were computing the right data, but the incorrect shape metadata causes validation to fail. * Add a PlaidML implicit broadcast op Better nGraph shape validation was tripping up PlaidML's use of a reshape to replace explicit broadcasts with implicit NumPy-style broadcasts (since the reshape's output shape would be incorrect for the downstream elementwise operation). Adding this implicit broadcast operation lets PlaidML tell nGraph something useful about the shapes, making validation pass (when it's otherwise correct).
397740fe · Rob Earhart · Scott Cyphers · f0552cc8 · 397740fe · 397740fe
Commit 397740fe authored Jun 12, 2019 by Rob Earhart Committed by Scott Cyphers Jun 12, 2019
13 changed files
--- a/src/ngraph/pass/prefix_reshape_elimination.cpp
+++ b/src/ngraph/pass/prefix_reshape_elimination.cpp
@@ -71,8 +71,7 @@ pass::PrefixReshapeElimination::PrefixReshapeElimination()
        element::i8,
        Shape{},
        [](shared_ptr<Node> node) {
-            return pattern::has_class<op::Reshape>()(node) ||
-                   pattern::has_class<op::util::UnaryElementwiseArithmetic>()(node) ||
+            return pattern::has_class<op::util::UnaryElementwiseArithmetic>()(node) ||
                   pattern::has_class<op::util::BinaryElementwiseArithmetic>()(node);
        },
        NodeVector{reshape_op});

--- a/src/ngraph/runtime/plaidml/CMakeLists.txt
+++ b/src/ngraph/runtime/plaidml/CMakeLists.txt
@@ -33,6 +33,7 @@ set(SRC
    plaidml_ops_convolution.cpp
    plaidml_ops_dot.cpp
    plaidml_ops_general.cpp
+    plaidml_ops_implicit_broadcast.cpp
    plaidml_ops_index_reduction.cpp
    plaidml_ops_io.cpp
    plaidml_ops_local_response_norm.cpp

--- a/src/ngraph/runtime/plaidml/plaidml_compiler.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_compiler.cpp
@@ -49,13 +49,14 @@ namespace
        {
            ngraph::descriptor::Tensor* tensor = op_input.get_output().get_tensor_ptr().get();
            PLAIDML_DEBUG << "Input: descriptor::Tensor " << tensor << " "
-                          << op.get_input_shape(op_input.get_index());
+                          << op.get_input_shape(op_input.get_index())
+                          << op.get_input_element_type(op_input.get_index());
        }
        for (std::size_t out_idx = 0; out_idx < op.get_output_size(); ++out_idx)
        {
            ngraph::descriptor::Tensor* tensor = op.get_output_tensor_ptr(out_idx).get();
            PLAIDML_DEBUG << "Output: descriptor::Tensor " << tensor << " "
-                          << op.get_output_shape(out_idx);
+                          << op.get_output_shape(out_idx) << op.get_output_element_type(out_idx);
        }
        for (auto* t : op.liveness_new_list)
        {

--- a/src/ngraph/runtime/plaidml/plaidml_executable.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_executable.cpp
@@ -129,8 +129,8 @@ std::vector<ngraph::runtime::PerformanceCounter>
    return std::vector<ngraph::runtime::PerformanceCounter>{};
 }

-void ngraph::runtime::plaidml::PlaidML_Executable::save(const std::string& filename,
-                                                        plaidml_file_format format) const
+void ngraph::runtime::plaidml::PlaidML_Executable::save_as_format(const std::string& filename,
+                                                                  plaidml_file_format format) const
 {
    std::lock_guard<std::mutex> lock{m_mu};


--- a/src/ngraph/runtime/plaidml/plaidml_executable.hpp
+++ b/src/ngraph/runtime/plaidml/plaidml_executable.hpp
@@ -51,7 +51,7 @@ public:

    std::vector<PerformanceCounter> get_performance_data() const final;

-    void save(const std::string& filename, plaidml_file_format format) const;
+    void save_as_format(const std::string& filename, plaidml_file_format format) const;

    const std::shared_ptr<Function>& src_func() const { return m_src_func; }
 private:

--- a/src/ngraph/runtime/plaidml/plaidml_ops_convolution.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_convolution.cpp
@@ -52,7 +52,13 @@ ngraph::runtime::plaidml::op::Convolution::Convolution(std::shared_ptr<ngraph::o

 void ngraph::runtime::plaidml::op::Convolution::validate_and_infer_types()
 {
-    set_output_type(0, m_src->get_element_type(), m_src->get_output_partial_shape(0));
+    auto src_shape = m_src->get_output_shape(0);
+    Shape out_shape(src_shape.size());
+    for (std::size_t idx = 0; idx < src_shape.size(); ++idx)
+    {
+        out_shape[idx] = src_shape.at(m_output_axes.at(idx));
+    }
+    set_output_type(0, m_src->get_element_type(), out_shape);
 }

 std::shared_ptr<ngraph::Node>
@@ -83,7 +89,13 @@ ngraph::runtime::plaidml::op::ConvolutionBackpropData::ConvolutionBackpropData(

 void ngraph::runtime::plaidml::op::ConvolutionBackpropData::validate_and_infer_types()
 {
-    set_output_type(0, m_src->get_element_type(), m_src->get_output_partial_shape(0));
+    auto src_shape = m_src->get_output_shape(0);
+    Shape out_shape(src_shape.size());
+    for (std::size_t idx = 0; idx < src_shape.size(); ++idx)
+    {
+        out_shape[idx] = src_shape.at(m_output_axes.at(idx));
+    }
+    set_output_type(0, m_src->get_element_type(), out_shape);
 }

 std::shared_ptr<ngraph::Node>
@@ -115,7 +127,13 @@ ngraph::runtime::plaidml::op::ConvolutionBackpropFilters::ConvolutionBackpropFil

 void ngraph::runtime::plaidml::op::ConvolutionBackpropFilters::validate_and_infer_types()
 {
-    set_output_type(0, m_src->get_element_type(), m_src->get_output_partial_shape(0));
+    auto src_shape = m_src->get_output_shape(0);
+    Shape out_shape(src_shape.size());
+    for (std::size_t idx = 0; idx < src_shape.size(); ++idx)
+    {
+        out_shape[idx] = src_shape.at(m_output_axes.at(idx));
+    }
+    set_output_type(0, m_src->get_element_type(), out_shape);
 }

 std::shared_ptr<ngraph::Node>

--- a/src/ngraph/runtime/plaidml/plaidml_ops_implicit_broadcast.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_implicit_broadcast.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/runtime/plaidml/plaidml_ops_implicit_broadcast.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+
+namespace vp = vertexai::plaidml;
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            NGRAPH_PLAIDML_OP_CLASS(ImplImplicitBroadcast, OpImpl<plaidml::op::ImplicitBroadcast>);
+        }
+    }
+}
+
+ngraph::runtime::plaidml::op::ImplicitBroadcast::ImplicitBroadcast(std::shared_ptr<Node> input,
+                                                                   const Shape& shape)
+    : Op{"ImplicitBroadcast", {input}}
+    , m_shape{shape}
+{
+    constructor_validate_and_infer_types();
+}
+
+void ngraph::runtime::plaidml::op::ImplicitBroadcast::validate_and_infer_types()
+{
+    set_output_type(0, input(0).get_element_type(), m_shape);
+}
+
+std::shared_ptr<ngraph::Node> ngraph::runtime::plaidml::op::ImplicitBroadcast::copy_with_new_args(
+    const NodeVector& new_args) const
+{
+    if (new_args.size() != 1)
+    {
+        throw ngraph_error{"Implicit broadcast requires a single input"};
+    }
+    return std::make_shared<ImplicitBroadcast>(new_args.at(0), m_shape);
+}
+
+void ngraph::runtime::plaidml::ImplImplicitBroadcast::Apply()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(op_input(0));
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_implicit_broadcast.hpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_implicit_broadcast.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <memory>
+
+#include "ngraph/op/op.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            namespace op
+            {
+                // Implements NumPy-style broadcast semantics by passing its single argument through to its
+                // output and pretending that this changes the shape.  The creator of this node is responsible
+                // for ensuring that the downstream operation will perform a NumPy-style broadcast.
+                class ImplicitBroadcast;
+            }
+        }
+    }
+}
+
+class ngraph::runtime::plaidml::op::ImplicitBroadcast final : public ngraph::op::Op
+{
+public:
+    ImplicitBroadcast(std::shared_ptr<Node> input, const Shape& shape);
+
+    void validate_and_infer_types() final;
+
+    std::shared_ptr<Node> copy_with_new_args(const NodeVector& new_args) const final;
+
+private:
+    Shape m_shape;
+};
--- a/src/ngraph/runtime/plaidml/plaidml_pass_explicit_logicals.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_pass_explicit_logicals.cpp
@@ -84,8 +84,8 @@ void ngraph::runtime::plaidml::pass::ExplicitLogicals::construct_logical_to_data
                "Tile",
                "function (I) -> (O) { O = as_int(I ? 1 : 0, 8);}",
                NodeVector{producer},
-                std::vector<std::tuple<element::Type, PartialShape>>{
-                    {std::make_tuple(element::i8, PartialShape{producer->get_output_shape(0)})}}));
+                std::vector<std::tuple<element::Type, PartialShape>>{{std::make_tuple(
+                    element::boolean, PartialShape{producer->get_output_shape(0)})}}));
        return true;
    };


--- a/src/ngraph/runtime/plaidml/plaidml_pass_implicit_broadcast.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_pass_implicit_broadcast.cpp
@@ -23,50 +23,60 @@
 #include "ngraph/pattern/matcher.hpp"
 #include "ngraph/pattern/op/any_of.hpp"
 #include "ngraph/pattern/op/label.hpp"
+#include "ngraph/runtime/plaidml/plaidml_ops_implicit_broadcast.hpp"

 ngraph::runtime::plaidml::pass::ImplicitBroadcast::ImplicitBroadcast()
 {
    auto src_op = std::make_shared<pattern::op::Label>(
        element::i8, Shape{}, [](std::shared_ptr<Node>) { return true; });
-    auto broadcast_op = std::make_shared<op::Broadcast>(src_op, Shape{}, AxisSet{});
+    auto broadcast_op = std::make_shared<ngraph::op::Broadcast>(src_op, Shape{}, AxisSet{});

    auto target_op = std::make_shared<pattern::op::AnyOf>(
        element::i8,
        Shape{},
        [](std::shared_ptr<Node> node) {
-            return pattern::has_class<op::util::UnaryElementwiseArithmetic>()(node) ||
-                   pattern::has_class<op::util::BinaryElementwiseArithmetic>()(node);
+            return pattern::has_class<ngraph::op::util::UnaryElementwiseArithmetic>()(node) ||
+                   pattern::has_class<ngraph::op::util::BinaryElementwiseArithmetic>()(node);
        },
        NodeVector{broadcast_op});

    auto callback = [](pattern::Matcher& m) {
        // Since the broadcast is going to an elementwise operation, we
        // can always replace it with an equivalent reshape that uses ones
-        // for the broadcast axes.
+        // for the broadcast axes, followed by a fake op that fixes up the
+        // shape.
        auto src = m.get_matched_nodes().at(2);
        Shape src_shape = src->get_shape();
-        auto broadcast = std::static_pointer_cast<op::Broadcast>(m.get_matched_nodes().at(1));
+        auto broadcast =
+            std::static_pointer_cast<ngraph::op::Broadcast>(m.get_matched_nodes().at(1));

-        AxisVector reshape_order;
-        Shape reshape_shape;
-        std::size_t input_dim = 0;
-        std::size_t didx_limit = broadcast->get_broadcast_shape().size();
-        for (std::size_t didx = 0; didx < didx_limit; ++didx)
+        if (src_shape.size())
        {
-            if (broadcast->get_broadcast_axes().count(didx))
+            // Create a reshape operation to get the right target broadcast shape.  (Note that a zero-D tensor
+            // or constant can be passed directly into the ImplicitBroadcast op).
+            AxisVector reshape_order;
+            Shape reshape_shape;
+            std::size_t input_dim = 0;
+            std::size_t didx_limit = broadcast->get_broadcast_shape().size();
+            for (std::size_t didx = 0; didx < didx_limit; ++didx)
            {
-                reshape_shape.emplace_back(1);
-            }
-            else
-            {
-                reshape_order.emplace_back(input_dim);
-                reshape_shape.emplace_back(src_shape.at(input_dim++));
+                if (broadcast->get_broadcast_axes().count(didx))
+                {
+                    reshape_shape.emplace_back(1);
+                }
+                else
+                {
+                    reshape_order.emplace_back(input_dim);
+                    reshape_shape.emplace_back(src_shape.at(input_dim++));
+                }
            }
+            src = std::make_shared<ngraph::op::Reshape>(src, reshape_order, reshape_shape);
        }

-        auto reshape = std::make_shared<op::Reshape>(src, reshape_order, reshape_shape);
+        auto implicit_broadcast =
+            std::make_shared<plaidml::op::ImplicitBroadcast>(src, broadcast->get_shape());

-        replace_node(broadcast, reshape);
+        replace_node(broadcast, implicit_broadcast);

        return true;
    };

--- a/src/ngraph/runtime/plaidml/plaidml_tensor.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_tensor.cpp
@@ -118,7 +118,7 @@ void ngraph::runtime::plaidml::PlaidML_Tensor::sync_input()
        return;
    }
    NGRAPH_DEBUG << "Syncing input for tensor " << this;
-    write(m_memory, 0, m_memory_size);
+    write(m_memory, m_memory_size);
 }

 void ngraph::runtime::plaidml::PlaidML_Tensor::sync_output()
@@ -132,5 +132,5 @@ void ngraph::runtime::plaidml::PlaidML_Tensor::sync_output()
        return;
    }
    NGRAPH_DEBUG << "Syncing output for tensor " << this;
-    read(m_memory, 0, m_memory_size);
+    read(m_memory, m_memory_size);
 }
--- a/src/ngraph/runtime/plaidml/unit_test.manifest
+++ b/src/ngraph/runtime/plaidml/unit_test.manifest
@@ -255,3 +255,6 @@ backwards_softmax_all
 backwards_softmax_axis
 backwards_softmax_underflow
 backwards_softmax_3d
+batch_mat_mul_forward
+dot_matrix_2x0_0x2
+backwards_batchmatmul_tensor2_tensor2
--- a/src/tools/ngraph-to-plaidml/ngraph-to-plaidml.cpp
+++ b/src/tools/ngraph-to-plaidml/ngraph-to-plaidml.cpp
@@ -143,7 +143,8 @@ OPTIONS
    }

    auto exec = backend->compile(f);
-    static_cast<ngraph::runtime::plaidml::PlaidML_Executable*>(exec.get())->save(output, format);
+    static_cast<ngraph::runtime::plaidml::PlaidML_Executable*>(exec.get())
+        ->save_as_format(output, format);
    std::cerr << "Wrote output to " << output << "\n";

    return EXIT_SUCCESS;