RNNCell fused operator.

3308f7b2 · Adam Rogowiec · eca45d1c · 3308f7b2 · 3308f7b2 · 3308f7b2
Commit 3308f7b2 authored May 22, 2019 by Adam Rogowiec
7 changed files
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -310,6 +310,8 @@ set (SRC
    op/fused/normalize.hpp
    op/fused/prelu.cpp
    op/fused/prelu.hpp
+    op/fused/rnn_cell.cpp
+    op/fused/rnn_cell.hpp
    op/fused/rnn_cell_base.cpp
    op/fused/rnn_cell_base.hpp
    op/fused/scale_shift.cpp

--- a/src/ngraph/ngraph.hpp
+++ b/src/ngraph/ngraph.hpp
@@ -107,6 +107,7 @@
 #include "ngraph/op/fused/mvn.hpp"
 #include "ngraph/op/fused/normalize.hpp"
 #include "ngraph/op/fused/prelu.hpp"
+#include "ngraph/op/fused/rnn_cell.hpp"
 #include "ngraph/op/fused/scale_shift.hpp"
 #include "ngraph/op/fused/space_to_depth.hpp"
 #include "ngraph/op/fused/split.hpp"

--- a/src/ngraph/op/fused/rnn_cell.cpp
+++ b/src/ngraph/op/fused/rnn_cell.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <algorithm>
+#include <cmath>
+#include <functional>
+
+#include "ngraph/builder/split.hpp"
+#include "ngraph/op/add.hpp"
+#include "ngraph/op/constant.hpp"
+#include "ngraph/op/dot.hpp"
+#include "ngraph/op/fused/rnn_cell.hpp"
+#include "ngraph/op/util/reshape.hpp"
+#include "ngraph/shape.hpp"
+#include "ngraph/type/element_type.hpp"
+#include "ngraph/util.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+op::RNNCell::RNNCell(const shared_ptr<Node>& X,
+                       const shared_ptr<Node>& W,
+                       const shared_ptr<Node>& R,
+                       const shared_ptr<Node>& H_t,
+                       size_t hidden_size)
+    : RNNCell(X,
+               W,
+               R,
+               H_t,
+               hidden_size,
+               vector<string>{"tanh"},
+               vector<float>{},
+               vector<float>{},
+               0.f)
+{
+}
+
+op::RNNCell::RNNCell(const shared_ptr<Node>& X,
+                       const shared_ptr<Node>& W,
+                       const shared_ptr<Node>& R,
+                       const shared_ptr<Node>& H_t,
+                       size_t hidden_size,
+                       const vector<string>& activations,
+                       const vector<float>& activation_alpha,
+                       const vector<float>& activation_beta,
+                       float clip)
+    : FusedOp("RNNCell", {X, W, R, H_t})
+    , RNNCellBase(hidden_size, clip, activations, activation_alpha, activation_beta)
+    , m_X{X}
+    , m_W{W}
+    , m_R{R}
+    , m_H_t{H_t}
+    , m_activation_f{get_activation_function(0)}
+{
+    // Normally we would split B onto Wb an Rb and add them, however here they are all zeros,
+    // thus just initialize bias with appropriate shape and zeros.
+    m_bias = ngraph::op::Constant::create(element::f32,
+                                          Shape{m_gates_count * get_hidden_size()},
+                                          vector<float>(m_gates_count * get_hidden_size(), 0.f));
+
+    constructor_validate_and_infer_types();
+}
+
+op::RNNCell::RNNCell(const shared_ptr<Node>& X,
+                       const shared_ptr<Node>& W,
+                       const shared_ptr<Node>& R,
+                       const shared_ptr<Node>& H_t,
+                       size_t hidden_size,
+                       const shared_ptr<Node>& B,
+                       const vector<string>& activations,
+                       const vector<float>& activation_alpha,
+                       const vector<float>& activation_beta,
+                       float clip)
+    : FusedOp("RNNCell", {X, W, R, H_t, B})
+    , RNNCellBase(hidden_size, clip, activations, activation_alpha, activation_beta)
+    , m_X{X}
+    , m_W{W}
+    , m_R{R}
+    , m_H_t{H_t}
+    , m_activation_f{get_activation_function(0)}
+{
+    // Split B onto Wb and Rb and add them.
+    NODE_VALIDATION_CHECK(this,
+                          (B->get_shape() == Shape{2 * m_gates_count * get_hidden_size()}),
+                          "Input tensor B must have shape (",
+                          8 * get_hidden_size(),
+                          "). Actual shape is:",
+                          B->get_shape(),
+                          ".");
+
+    NodeVector b_W_R = builder::split(B, 2);
+    m_bias = b_W_R.at(0) + b_W_R.at(1);
+
+    constructor_validate_and_infer_types();
+}
+
+void op::RNNCell::pre_validate_and_infer_types()
+{
+    const auto& x_shape = input(0).get_shape();
+
+    const size_t batch_size = x_shape.at(0);
+    const size_t input_size = x_shape.at(1);
+
+    const auto& w_shape = input(1).get_shape();
+    const auto& r_shape = input(2).get_shape();
+    const auto& ht_shape = input(3).get_shape();
+
+    NODE_VALIDATION_CHECK(this,
+                          (w_shape == Shape{get_hidden_size(), input_size}),
+                          "Input tensor W must have shape (",
+                          get_hidden_size(),
+                          ", ",
+                          input_size,
+                          "). Actual shape is:",
+                          w_shape,
+                          ".");
+    NODE_VALIDATION_CHECK(this,
+                          (r_shape == Shape{get_hidden_size(), get_hidden_size()}),
+                          "Input tensor R must have shape (",
+                          get_hidden_size(),
+                          ", ",
+                          get_hidden_size(),
+                          "). Actual shape is:",
+                          w_shape,
+                          ".");
+    NODE_VALIDATION_CHECK(this,
+                          (ht_shape == Shape{batch_size, get_hidden_size()}),
+                          "Input tensor H_t must have shape (",
+                          batch_size,
+                          ", ",
+                          get_hidden_size(),
+                          "). Actual shape is:",
+                          w_shape,
+                          ".");
+}
+
+NodeVector op::RNNCell::decompose_op() const
+{
+    // ------ VARIABLE'S NAMES AND ACRONYM DEFINITIONS ------
+    // The names used below are analogous to the one used in ONNX documentation.
+    //
+    // ------ ACRONYMS ------
+    // i_t - input gate at current time step
+    // t - time step (t-1 means previous time step)
+    // W  - W parameter weight matrix for input, output, forget, and
+    //      cell gates.
+    // R  - R recurrence weight matrix for input, output, forget, and
+    //      cell gates.
+    // Wb - W bias vectors for input, output, forget, and cell gates.
+    // Rb - R bias vectors for input, output, forget, and cell gates.
+    // ------ VARIABLE NAMES ------
+    // Xt_W    - Input sequence multiplied by weights tensor at current time
+    //           step.
+    // Ht_R    - Hidden state multiplied by weights tensor at current time step.
+
+    // (.) - Denotes element-wise multiplication.
+    // *   - Denotes dot product.
+
+    // ---- Equations ----
+    // f - is activation functions.
+    // Ht = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Wbi + Rbi)
+    // --------------------
+
+    // Xt*(W^T)
+    auto Xt_W = std::make_shared<ngraph::op::Dot>(m_X, ngraph::op::util::transpose(m_W));
+    // Ht-1*(R^T)
+    auto Ht_R = std::make_shared<ngraph::op::Dot>(m_H_t, ngraph::op::util::transpose(m_R));
+    // Xt*(W^T) + Ht-1*(R^T) + Wb + Rb
+    auto i_t = add(Xt_W, add(Ht_R, m_bias));
+
+    // f(Xt*(Wi^T) + Ht-1*(Ri^T) + Wbi + Rbi)
+    i_t = m_activation_f(clip(i_t, get_clip()));
+
+    return {i_t};
+}
+
+shared_ptr<Node> op::RNNCell::copy_with_new_args(const NodeVector& new_args) const
+{
+    check_new_args_count(this, new_args);
+    if (new_args.size() == 4)
+    {
+        return make_shared<RNNCell>(new_args.at(0),
+                                     new_args.at(1),
+                                     new_args.at(2),
+                                     new_args.at(3),
+                                     get_hidden_size(),
+                                     get_activations(),
+                                     get_activation_alpha(),
+                                     get_activation_beta(),
+                                     get_clip());
+    }
+    else if (new_args.size() == 5)
+    {
+        return make_shared<RNNCell>(new_args.at(0),
+                                     new_args.at(1),
+                                     new_args.at(2),
+                                     new_args.at(3),
+                                     get_hidden_size(),
+                                     new_args.at(4),
+                                     get_activations(),
+                                     get_activation_alpha(),
+                                     get_activation_beta(),
+                                     get_clip());
+    }
+    else
+    {
+        throw ngraph_error("Incorrect number of new arguments");
+    }
+}
--- a/src/ngraph/op/fused/rnn_cell.hpp
+++ b/src/ngraph/op/fused/rnn_cell.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "ngraph/node.hpp"
+#include "ngraph/op/fused/rnn_cell_base.hpp"
+#include "ngraph/op/util/activation_functions.hpp"
+#include "ngraph/op/util/fused_op.hpp"
+
+namespace ngraph
+{
+    namespace op
+    {
+        ///
+        /// \brief      Class for RNN cell node.
+        ///
+        /// \note       It follows notation and equations defined as in ONNX standard:
+        ///             https://github.com/onnx/onnx/blob/master/docs/Operators.md#RNN
+        ///
+        ///             Note this class represents only single *cell* and not whole RNN *layer*.
+        ///
+        class RNNCell : public util::FusedOp, public RNNCellBase
+        {
+        public:
+            ///
+            /// \brief      Constructs RNNCell node.
+            ///
+            /// \param[in]  X            The input tensor with shape: [batch_size, input_size].
+            /// \param[in]  W            The weight tensor with shape: [hidden_size, input_size].
+            /// \param[in]  R            The recurrence weight tensor with shape:
+            ///                             [hidden_size, hidden_size].
+            /// \param[in]  H_t          The hidden state tensor at current time step with shape:
+            ///                             [batch_size, hidden_size].
+            /// \param[in]  hidden_size  The number of hidden units for recurrent cell.
+            ///
+            RNNCell(const std::shared_ptr<Node>& X,
+                     const std::shared_ptr<Node>& W,
+                     const std::shared_ptr<Node>& R,
+                     const std::shared_ptr<Node>& H_t,
+                     std::size_t hidden_size);
+
+            ///
+            /// \brief      Constructs RNNCell node.
+            ///
+            /// \param[in]  X                 The input tensor with shape: [batch_size, input_size].
+            /// \param[in]  W                 The weight tensor with shape: [hidden_size, input_size].
+            /// \param[in]  R                 The recurrence weight tensor with shape:
+            ///                               [hidden_size, hidden_size].
+            /// \param[in]  H_t               The hidden state tensor at current time step with
+            ///                               shape: [batch_size, hidden_size].
+            /// \param[in]  hidden_size       The number of hidden units for recurrent cell.
+            /// \param[in]  activations       The vector of activation functions used inside
+            ///                               recurrent cell.
+            /// \param[in]  activation_alpha  The vector of alpha parameters for activation
+            ///                               functions in order respective to activation list.
+            /// \param[in]  activation_beta   The vector of beta parameters for activation functions
+            ///                               in order respective to activation list.
+            /// \param[in]  clip              The value defining clipping range [-clip, clip] on
+            ///                               input of activation functions.
+            ///
+            RNNCell(const std::shared_ptr<Node>& X,
+                     const std::shared_ptr<Node>& W,
+                     const std::shared_ptr<Node>& R,
+                     const std::shared_ptr<Node>& H_t,
+                     std::size_t hidden_size,
+                     const std::vector<std::string>& activations,
+                     const std::vector<float>& activation_alpha,
+                     const std::vector<float>& activation_beta,
+                     float clip);
+
+            ///
+            /// \brief      Constructs RNNCell node.
+            ///
+            /// \param[in]  X                 The input tensor with shape: [batch_size, input_size].
+            /// \param[in]  W                 The weight tensor with shape: [hidden_size, input_size].
+            /// \param[in]  R                 The recurrence weight tensor with shape:
+            ///                               [hidden_size, hidden_size].
+            /// \param[in]  H_t               The hidden state tensor at current time step with
+            ///                               shape: [batch_size, hidden_size].
+            /// \param[in]  hidden_size       The number of hidden units for recurrent cell.
+            /// \param[in]  B                 The bias tensor for input gate with shape: [2*hidden_size].
+            /// \param[in]  activations       The vector of activation functions used inside
+            ///                               recurrent cell.
+            /// \param[in]  activation_alpha  The vector of alpha parameters for activation
+            ///                               functions in order respective to activation list.
+            /// \param[in]  activation_beta   The vector of beta parameters for activation functions
+            ///                               in order respective to activation list.
+            /// \param[in]  clip              The value defining clipping range [-clip, clip] on
+            ///                               input of activation functions.
+            ///
+            RNNCell(const std::shared_ptr<Node>& X,
+                     const std::shared_ptr<Node>& W,
+                     const std::shared_ptr<Node>& R,
+                     const std::shared_ptr<Node>& H_t,
+                     std::size_t hidden_size,
+                     const std::shared_ptr<Node>& B,
+                     const std::vector<std::string>& activations =
+                         std::vector<std::string>{"tanh"},
+                     const std::vector<float>& activation_alpha = {},
+                     const std::vector<float>& activation_beta = {},
+                     float clip = 0.f);
+
+            virtual void pre_validate_and_infer_types() override;
+            virtual NodeVector decompose_op() const override;
+            virtual std::shared_ptr<Node>
+                copy_with_new_args(const NodeVector& new_args) const override;
+
+        private:
+            ///
+            /// \brief      The input data tensor. Shape: [batch_size, input_size].
+            ///
+            std::shared_ptr<Node> m_X;
+            ///
+            /// \brief      The weight tensor. Shape: [hidden_size, input_size].
+            ///
+            std::shared_ptr<Node> m_W;
+            ///
+            /// \brief      The recurrence weight tensor. Shape: [hidden_size, hidden_size].
+            ///
+            std::shared_ptr<Node> m_R;
+            ///
+            /// \brief      The hidden state tensor at current time step. Shape: [batch_size, hidden_size].
+            ///
+            std::shared_ptr<Node> m_H_t;
+            ///
+            /// \brief The Activation function f.
+            ///
+            ActivationFunction m_activation_f;
+
+            static constexpr std::size_t m_gates_count{1};
+            ///
+            /// \brief Sum of biases (weight and recurrence) for input gate.
+            ///
+            /// Sum of `[Wb, Rb]`.
+            ///
+            std::shared_ptr<Node> m_bias;
+        };
+    }
+}
--- a/src/ngraph/op/fused_op_tbl.hpp
+++ b/src/ngraph/op/fused_op_tbl.hpp
@@ -31,6 +31,7 @@ NGRAPH_OP(LSTMCell, ngraph::op)
 NGRAPH_OP(MVN, ngraph::op)
 NGRAPH_OP(Normalize, ngraph::op)
 NGRAPH_OP(PRelu, ngraph::op)
+NGRAPH_OP(RNNCell, ngraph::op)
 NGRAPH_OP(ScaleShift, ngraph::op)
 NGRAPH_OP(SpaceToDepth, ngraph::op)
 NGRAPH_OP(SquaredDifference, ngraph::op)

--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
@@ -88,6 +88,7 @@
 #include "ngraph/op/fused/lstm_cell.hpp"
 #include "ngraph/op/fused/mvn.hpp"
 #include "ngraph/op/fused/normalize.hpp"
+#include "ngraph/op/fused/rnn_cell.hpp"
 #include "ngraph/op/fused/scale_shift.hpp"
 #include "ngraph/op/fused/space_to_depth.hpp"
 #include "ngraph/op/fused/squeeze.hpp"
@@ -2066,6 +2067,7 @@ shared_ptr<runtime::Executable>
        case OP_TYPEID::Normalize:
        case OP_TYPEID::PRelu:
        case OP_TYPEID::Passthrough:
+        case OP_TYPEID::RNNCell:
        case OP_TYPEID::QuantizedAvgPool:
        case OP_TYPEID::QuantizedConvolution:
        case OP_TYPEID::QuantizedConvolutionBias:
@@ -2182,6 +2184,7 @@ bool runtime::intelgpu::IntelGPUBackend::is_supported_impl(const Node& node)
    case OP_TYPEID::MVN:
    case OP_TYPEID::Normalize:
    case OP_TYPEID::PRelu:
+    case OP_TYPEID::RNNCell:
    case OP_TYPEID::ScaleShift:
    case OP_TYPEID::SpaceToDepth:
    case OP_TYPEID::Split:

--- a/src/ngraph/serializer.cpp
+++ b/src/ngraph/serializer.cpp
@@ -78,6 +78,7 @@
 #include "ngraph/op/fused/mvn.hpp"
 #include "ngraph/op/fused/normalize.hpp"
 #include "ngraph/op/fused/prelu.hpp"
+#include "ngraph/op/fused/rnn_cell.hpp"
 #include "ngraph/op/fused/scale_shift.hpp"
 #include "ngraph/op/fused/space_to_depth.hpp"
 #include "ngraph/op/fused/split.hpp"
@@ -1282,6 +1283,25 @@ static shared_ptr<ngraph::Function>
                node = make_shared<op::Product>(args[0], reduction_axes);
                break;
            }
+            case OP_TYPEID::RNNCell:
+            {
+                auto hidden_size = node_js.at("hidden_size").get<size_t>();
+                auto clip = node_js.at("clip").get<float>();
+                auto activations = node_js.at("activations").get<vector<string>>();
+                auto activation_alpha = node_js.at("activation_alpha").get<vector<float>>();
+                auto activation_beta = node_js.at("activation_beta").get<vector<float>>();
+                node = make_shared<op::RNNCell>(args[0],
+                                                args[1],
+                                                args[2],
+                                                args[3],
+                                                hidden_size,
+                                                args[4],
+                                                activations,
+                                                activation_alpha,
+                                                activation_beta,
+                                                clip);
+                break;
+            }
            case OP_TYPEID::Quantize:
            {
                auto type = read_element_type(node_js.at("type"));
@@ -2125,6 +2145,16 @@ static json write(const Node& n, bool binary_constant_data)
    }
    case OP_TYPEID::Power: { break;
    }
+    case OP_TYPEID::RNNCell:
+    {
+        auto tmp = dynamic_cast<const op::RNNCell*>(&n);
+        node["hidden_size"] = tmp->get_hidden_size();
+        node["clip"] = tmp->get_clip();
+        node["activations"] = tmp->get_activations();
+        node["activation_alpha"] = tmp->get_activation_alpha();
+        node["activation_beta"] = tmp->get_activation_beta();
+        break;
+    }
    case OP_TYPEID::Quantize:
    {
        auto tmp = dynamic_cast<const op::Quantize*>(&n);