Add Gelu OP (#3181)

* Add Gelu OP. * CPU backend: Enable Erf UTs * Add UT. * Address review comment.

Add Gelu OP (#3181)
* Add Gelu OP. * CPU backend: Enable Erf UTs * Add UT. * Address review comment.
e51582a9 · Sang Ik Lee · Scott Cyphers · 93d91734 · e51582a9 · e51582a9
Commit e51582a9 authored Jul 15, 2019 by Sang Ik Lee Committed by Scott Cyphers Jul 15, 2019
12 changed files
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -316,6 +316,8 @@ set (SRC
    op/fused/elu.hpp
    op/fused/fake_quantize.cpp
    op/fused/fake_quantize.hpp
+    op/fused/gelu.cpp
+    op/fused/gelu.hpp
    op/fused/gemm.cpp
    op/fused/gemm.hpp
    op/fused/grn.cpp

--- a/src/ngraph/ngraph.hpp
+++ b/src/ngraph/ngraph.hpp
@@ -102,6 +102,7 @@
 #include "ngraph/op/fused/depth_to_space.hpp"
 #include "ngraph/op/fused/elu.hpp"
 #include "ngraph/op/fused/fake_quantize.hpp"
+#include "ngraph/op/fused/gelu.hpp"
 #include "ngraph/op/fused/gemm.hpp"
 #include "ngraph/op/fused/grn.hpp"
 #include "ngraph/op/fused/group_conv.hpp"

--- a/src/ngraph/op/fused/gelu.cpp
+++ b/src/ngraph/op/fused/gelu.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include "ngraph/op/fused/gelu.hpp"
+
+#include <cmath>
+#include "ngraph/builder/make_constant.hpp"
+#include "ngraph/op/add.hpp"
+#include "ngraph/op/constant.hpp"
+#include "ngraph/op/divide.hpp"
+#include "ngraph/op/erf.hpp"
+#include "ngraph/op/multiply.hpp"
+#include "ngraph/op/subtract.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+op::Gelu::Gelu(const shared_ptr<Node>& data)
+    : FusedOp("Gelu", {data})
+{
+    constructor_validate_and_infer_types();
+}
+
+// f(x) = 0.5 * x * (1.0 + erf( x / sqrt(2.0) )
+NodeVector op::Gelu::decompose_op() const
+{
+    auto data = get_argument(0);
+
+    shared_ptr<ngraph::Node> half =
+        builder::make_constant(data->get_element_type(), data->get_shape(), 0.5);
+
+    shared_ptr<ngraph::Node> one =
+        builder::make_constant(data->get_element_type(), data->get_shape(), 1.0);
+
+    shared_ptr<ngraph::Node> sqrt_two =
+        builder::make_constant(data->get_element_type(), data->get_shape(), std::sqrt(2.0));
+
+    return {half * data * (one + make_shared<ngraph::op::Erf>(data / sqrt_two))};
+}
+
+shared_ptr<Node> op::Gelu::copy_with_new_args(const NodeVector& new_args) const
+{
+    if (new_args.size() != 1)
+    {
+        throw ngraph_error("Incorrect number of new arguments");
+    }
+    return make_shared<Gelu>(new_args.at(0));
+}
+
+void op::Gelu::pre_validate_and_infer_types()
+{
+    element::Type input_element_type = get_input_element_type(0);
+
+    NODE_VALIDATION_CHECK(this,
+                          input_element_type.is_dynamic() || input_element_type == element::f32 ||
+                              input_element_type == element::f64 ||
+                              input_element_type == element::f16 ||
+                              input_element_type == element::bf16,
+                          "Argument element type must be f16, bf16, f32, f64 or dynamic (got ",
+                          input_element_type,
+                          ").");
+}
--- a/src/ngraph/op/fused/gelu.hpp
+++ b/src/ngraph/op/fused/gelu.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "ngraph/node.hpp"
+#include "ngraph/op/op.hpp"
+#include "ngraph/op/util/fused_op.hpp"
+
+namespace ngraph
+{
+    namespace op
+    {
+        /// \brief Gaussian Error Linear Unit
+        /// f(x) = 0.5 * x * (1 + erf( x / sqrt(2) )
+        /// erf'(x) = 2 / sqrt(pi) * exp (-x^2)
+        /// f'(x) = 0.5 * (1 + erf( x / sqrt(2)) + x * sqrt(2 / pi) * exp (-(x / sqrt(2))^2))
+        ///
+        class Gelu : public ngraph::op::util::FusedOp
+        {
+        public:
+            /// \brief Constructs an Gelu operation.
+            ///
+            /// \param data Input tensor
+            Gelu(const std::shared_ptr<ngraph::Node>& data);
+
+            virtual NodeVector decompose_op() const override;
+
+            void pre_validate_and_infer_types() override;
+
+            virtual std::shared_ptr<Node>
+                copy_with_new_args(const NodeVector& new_args) const override;
+        };
+    }
+}
--- a/src/ngraph/op/fused_op_tbl.hpp
+++ b/src/ngraph/op/fused_op_tbl.hpp
@@ -24,6 +24,7 @@ NGRAPH_OP(ConvolutionBiasBackpropFiltersBias, ngraph::op)
 NGRAPH_OP(DepthToSpace, ngraph::op)
 NGRAPH_OP(Elu, ngraph::op)
 NGRAPH_OP(FakeQuantize, ngraph::op)
+NGRAPH_OP(Gelu, ngraph::op)
 NGRAPH_OP(Gemm, ngraph::op)
 NGRAPH_OP(GRN, ngraph::op)
 NGRAPH_OP(GroupConvolution, ngraph::op)

--- a/src/ngraph/runtime/cpu/unit_test.manifest
+++ b/src/ngraph/runtime/cpu/unit_test.manifest
@@ -8,7 +8,5 @@ sum_stable_acc_double
 max_3d_to_scalar_int32

 # Not implemented
-erf
-zero_sized_erf
 send_recv
 send_recv_ring
--- a/src/ngraph/runtime/gpu/unit_test.manifest
+++ b/src/ngraph/runtime/gpu/unit_test.manifest
@@ -212,3 +212,5 @@ fake_quantize_with_clip
 fake_quantize_with_clip_across_channels
 send_recv
 send_recv_ring
+gelu_f32
+gelu_f64
--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
@@ -2066,6 +2066,7 @@ shared_ptr<runtime::Executable>
        case OP_TYPEID::FakeQuantize:
        case OP_TYPEID::Gather:
        case OP_TYPEID::GatherND:
+        case OP_TYPEID::Gelu:
        case OP_TYPEID::GenerateMask:
        case OP_TYPEID::GRN:
        case OP_TYPEID::GroupConvolutionTranspose:
@@ -2193,6 +2194,7 @@ bool runtime::intelgpu::IntelGPUBackend::is_supported_impl(const Node& node)
    case OP_TYPEID::DepthToSpace:
    case OP_TYPEID::Elu:
    case OP_TYPEID::FakeQuantize:
+    case OP_TYPEID::Gelu:
    case OP_TYPEID::Gemm:
    case OP_TYPEID::GRN:
    case OP_TYPEID::GroupConvolutionTranspose:

--- a/src/ngraph/runtime/intelgpu/unit_test.manifest
+++ b/src/ngraph/runtime/intelgpu/unit_test.manifest
@@ -109,6 +109,8 @@ fake_quantize_with_clip
 fake_quantize_with_clip_across_channels
 send_recv
 send_recv_ring
+gelu_f32
+gelu_f64

 # Not supported quant ops
 model_dequantize_linear_1d_zero_scale_int8

--- a/src/ngraph/runtime/plaidml/unit_test.manifest
+++ b/src/ngraph/runtime/plaidml/unit_test.manifest
@@ -15,7 +15,7 @@ min_3d_eliminate_zero_dim               # Out-of-range for PlaidML
 reverse_sequence_n2c3h4w2               # No plans to implement ReverseSequence
 reverse_sequence_n4c3h2w2               # No plans to implement ReverseSequence
 reverse_sequence_n4d2c3h2w2             # No plans to implement ReverseSequence
-topk_1d_max_all                         # No plans to implemenxfplait TopK
+topk_1d_max_all                         # No plans to implement TopK
 topk_1d_max_partial                     # No plans to implement TopK
 topk_1d_max_one                         # No plans to implement TopK
 topk_1d_min_all                         # No plans to implement TopK
@@ -230,6 +230,8 @@ backwards_softmax_underflow
 backwards_softmax_3d
 batch_mat_mul_forward
 dot_matrix_2x0_0x2
+gelu_f32
+gelu_f64
 # From onnx tests
 model_quant_conv_linear_2d
 model_quant_conv_linear_3d

--- a/src/ngraph/serializer.cpp
+++ b/src/ngraph/serializer.cpp
@@ -75,6 +75,7 @@
 #include "ngraph/op/fused/depth_to_space.hpp"
 #include "ngraph/op/fused/elu.hpp"
 #include "ngraph/op/fused/fake_quantize.hpp"
+#include "ngraph/op/fused/gelu.hpp"
 #include "ngraph/op/fused/gemm.hpp"
 #include "ngraph/op/fused/grn.hpp"
 #include "ngraph/op/fused/group_conv.hpp"
@@ -1221,6 +1222,11 @@ shared_ptr<Node> JSONDeserializer::deserialize_node(json node_js)
            node = make_shared<op::GatherND>(args[0], args[1]);
            break;
        }
+        case OP_TYPEID::Gelu:
+        {
+            node = make_shared<op::Gelu>(args[0]);
+            break;
+        }
        case OP_TYPEID::Gemm:
        {
            auto alpha = node_js.at("alpha").get<double>();
@@ -2407,6 +2413,8 @@ json JSONSerializer::serialize_node(const Node& n)
        node["n"] = tmp->get_n();
        break;
    }
+    case OP_TYPEID::Gelu: { break;
+    }
    case OP_TYPEID::Gemm:
    {
        auto tmp = dynamic_cast<const op::Gemm*>(&n);

--- a/test/backend_unary_elementwise.in.cpp
+++ b/test/backend_unary_elementwise.in.cpp
@@ -306,6 +306,52 @@ NGRAPH_TEST(${BACKEND_NAME}, floor_int32)
              read_vector<int32_t>(result));
 }

+NGRAPH_TEST(${BACKEND_NAME}, gelu_f32)
+{
+    Shape shape{8};
+    auto A = make_shared<op::Parameter>(element::f32, shape);
+    auto f = make_shared<Function>(make_shared<op::Gelu>(A), ParameterVector{A});
+
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::f32, shape);
+    vector<float> input{-4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f};
+    copy_data(a, input);
+    auto result = backend->create_tensor(element::f32, shape);
+
+    std::transform(input.begin(), input.end(), input.begin(), [](float x) -> float {
+        return 0.5f * x * (1.0f + erf(x / sqrt(2.0f)));
+    });
+
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a});
+    EXPECT_TRUE(test::all_close_f(input, read_vector<float>(result)));
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, gelu_f64)
+{
+    Shape shape{8};
+    auto A = make_shared<op::Parameter>(element::f64, shape);
+    auto f = make_shared<Function>(make_shared<op::Gelu>(A), ParameterVector{A});
+
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::f64, shape);
+    vector<double> input{-4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0};
+    copy_data(a, input);
+    auto result = backend->create_tensor(element::f64, shape);
+
+    std::transform(input.begin(), input.end(), input.begin(), [](double x) -> double {
+        return 0.5 * x * (1.0 + erf(x / sqrt(2.0)));
+    });
+
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a});
+    EXPECT_TRUE(test::all_close_f(input, read_vector<double>(result)));
+}
+
 NGRAPH_TEST(${BACKEND_NAME}, log)
 {
    Shape shape{2, 2, 2};