LayerNorm (#3678)

* LayerNorm (#3630) * Constructors. Type prop. Decompose LayerNorm. Add serialize. * Add dummy test case. * Add dummy type prop test. * Fix some build errors. * Remove build errors. * Update decompose for bprop. * Change begin_norm_axis default value to 1. * Style. * Reorder class members. * Add actual type prop tests. * Add fprop test. * Working on bprop test. * Bprop tests. * Allow flattened scale and bias. * Add support for flattened scale and bias. * Fix incorrect type_name. * PlaidML: Decompose fused_op LayerNorm * Update Backprop constructors. * PlaidML: Add missing header file. * Remove doc about removed param. * Fix type prop tests. * PlaidML: Disable unit test. * Fix stats flattening axes bug. * Upgrade description to type_info.

LayerNorm (#3678)
* LayerNorm (#3630) * Constructors. Type prop. Decompose LayerNorm. Add serialize. * Add dummy test case. * Add dummy type prop test. * Fix some build errors. * Remove build errors. * Update decompose for bprop. * Change begin_norm_axis default value to 1. * Style. * Reorder class members. * Add actual type prop tests. * Add fprop test. * Working on bprop test. * Bprop tests. * Allow flattened scale and bias. * Add support for flattened scale and bias. * Fix incorrect type_name. * PlaidML: Decompose fused_op LayerNorm * Update Backprop constructors. * PlaidML: Add missing header file. * Remove doc about removed param. * Fix type prop tests. * PlaidML: Disable unit test. * Fix stats flattening axes bug. * Upgrade description to type_info.
385770d8 · Sang Ik Lee · Scott Cyphers · 40eb9587 · 385770d8 · 385770d8
Commit 385770d8 authored Sep 29, 2019 by Sang Ik Lee Committed by Scott Cyphers Sep 29, 2019
11 changed files
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -336,6 +336,8 @@ set (SRC
    op/fused/group_conv_transpose.cpp
    op/fused/gru_cell.cpp
    op/fused/gru_cell.hpp
+    op/fused/layer_norm.cpp
+    op/fused/layer_norm.hpp
    op/fused/lstm_cell.cpp
    op/fused/lstm_cell.hpp
    op/fused/matmul.cpp

--- a/src/ngraph/ngraph.hpp
+++ b/src/ngraph/ngraph.hpp
@@ -136,6 +136,7 @@ namespace ngraph
 #include "ngraph/op/fused/group_conv_transpose.hpp"
 #include "ngraph/op/fused/gru_cell.hpp"
 #include "ngraph/op/fused/hard_sigmoid.hpp"
+#include "ngraph/op/fused/layer_norm.hpp"
 #include "ngraph/op/fused/lstm_cell.hpp"
 #include "ngraph/op/fused/matmul.hpp"
 #include "ngraph/op/fused/mvn.hpp"

--- a/src/ngraph/op/fused/layer_norm.cpp
+++ b/src/ngraph/op/fused/layer_norm.cpp
--- a/src/ngraph/op/fused/layer_norm.hpp
+++ b/src/ngraph/op/fused/layer_norm.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+#include "ngraph/node.hpp"
+#include "ngraph/op/op.hpp"
+#include "ngraph/op/util/fused_op.hpp"
+namespace ngraph
+{
+    namespace op
+    {
+        /// \brief Layer Normalization
+        ///
+        class LayerNorm : public ngraph::op::util::FusedOp
+        {
+        public:
+            NGRAPH_API
+            static constexpr NodeTypeInfo type_info{"LayerNorm", 0};
+            const NodeTypeInfo& get_type_info() const override { return type_info; }
+            LayerNorm() = default;
+            /// \brief Constructs an LayerNorm operation.
+            ///
+            /// \param data Input tensor
+            /// \param scale Scale tensor
+            /// \param bias Bias tensor
+            /// \param keep_stats Generated addition output mean and variance, default true
+            /// \param begin_norm_axis Axis where normalization starts, default - -1
+            /// \param epsilon Small number to add for stability of rsqrt, default 1e-5
+            LayerNorm(const Output<Node>& data,
+                      const Output<Node>& scale,
+                      const Output<Node>& bias,
+                      bool keep_stats = true,
+                      int64_t begin_norm_axis = 1,
+                      double epsilon = 1e-5);
+            LayerNorm(const Output<Node>& data,
+                      bool keep_stats = true,
+                      int64_t begin_norm_axis = 1,
+                      double epsilon = 1e-5);
+            virtual NodeVector decompose_op() const override;
+            void pre_validate_and_infer_types() override;
+            virtual std::shared_ptr<Node>
+                copy_with_new_args(const NodeVector& new_args) const override;
+            bool get_keep_stats() const { return m_keep_stats; }
+            bool get_use_affine() const { return m_use_affine; }
+            double get_epsilon() const { return m_epsilon; }
+            int64_t get_begin_norm_axis() const { return m_begin_norm_axis; }
+        protected:
+            virtual void generate_adjoints(autodiff::Adjoints& adjoints,
+                                           const NodeVector& deltas) override;
+        private:
+            bool m_keep_stats{true};
+            bool m_use_affine{true};
+            int64_t m_begin_norm_axis{1};
+            double m_epsilon{1e-5};
+        };
+        /// \brief Layer Normalization Backprop
+        ///
+        class LayerNormBackprop : public ngraph::op::util::FusedOp
+        {
+        public:
+            NGRAPH_API
+            static constexpr NodeTypeInfo type_info{"LayerNormBackprop", 0};
+            const NodeTypeInfo& get_type_info() const override { return type_info; }
+            LayerNormBackprop() = default;
+            /// \brief Constructs an LayerNormBackprop operation.
+            ///
+            /// \param data Input tensor
+            /// \param mean Mean tensor from fprop
+            /// \param variance Variance tensor from fprop
+            /// \param delta Delta tensor
+            /// \param scale Scale tensor
+            /// \param begin_norm_axis Axis where normalization starts, default - -1
+            /// \param epsilon Small number to add for stability of rsqrt, default 1e-5
+            LayerNormBackprop(const Output<Node>& data,
+                              const Output<Node>& delta,
+                              const Output<Node>& mean,
+                              const Output<Node>& variance,
+                              const Output<Node>& scale,
+                              int64_t begin_norm_axis = 1,
+                              double epsilon = 1e-5);
+            LayerNormBackprop(const Output<Node>& data,
+                              const Output<Node>& delta,
+                              const Output<Node>& mean,
+                              const Output<Node>& variance,
+                              int64_t begin_norm_axis = 1,
+                              double epsilon = 1e-5);
+            LayerNormBackprop(const Output<Node>& data,
+                              const Output<Node>& delta,
+                              const Output<Node>& scale,
+                              int64_t begin_norm_axis = 1,
+                              double epsilon = 1e-5);
+            LayerNormBackprop(const Output<Node>& data,
+                              const Output<Node>& delta,
+                              int64_t begin_norm_axis = 1,
+                              double epsilon = 1e-5);
+            virtual NodeVector decompose_op() const override;
+            void pre_validate_and_infer_types() override;
+            virtual std::shared_ptr<Node>
+                copy_with_new_args(const NodeVector& new_args) const override;
+            bool get_use_stats() const { return m_use_stats; }
+            bool get_use_affine() const { return m_use_affine; }
+            double get_epsilon() const { return m_epsilon; }
+            int64_t get_begin_norm_axis() const { return m_begin_norm_axis; }
+        private:
+            bool m_use_stats{true};
+            bool m_use_affine{true};
+            int64_t m_begin_norm_axis{1};
+            double m_epsilon{1e-5};
+        };
+    }
+}
--- a/src/ngraph/op/fused_op_tbl.hpp
+++ b/src/ngraph/op/fused_op_tbl.hpp
@@ -37,6 +37,8 @@ NGRAPH_OP(GroupConvolution, ngraph::op)
 NGRAPH_OP(GroupConvolutionTranspose, ngraph::op)
 NGRAPH_OP(GRUCell, ngraph::op)
 NGRAPH_OP(HardSigmoid, ngraph::op)
+NGRAPH_OP(LayerNorm, ngraph::op)
+NGRAPH_OP(LayerNormBackprop, ngraph::op)
 NGRAPH_OP(LSTMCell, ngraph::op)
 NGRAPH_OP(MatMul, ngraph::op)
 NGRAPH_OP(MVN, ngraph::op)

--- a/src/ngraph/runtime/plaidml/plaidml_compiler.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_compiler.cpp
@@ -18,6 +18,7 @@
 #include "ngraph/graph_util.hpp"
 #include "ngraph/log.hpp"
 #include "ngraph/op/fused/group_conv.hpp"
+#include "ngraph/op/fused/layer_norm.hpp"
 #include "ngraph/pass/algebraic_simplification.hpp"
 #include "ngraph/pass/core_fusion.hpp"
 #include "ngraph/pass/cse.hpp"
@@ -90,7 +91,13 @@ std::shared_ptr<ngraph::runtime::plaidml::PlaidML_Executable>
    // We apply the same general-purposes passes as the CPU backend.
    pass_manager.register_pass<ngraph::pass::FusedOpDecomposition>([](const Node& node) -> bool {
        if (node.description() == ngraph::op::GroupConvolution().description())
+        {
            return true;
+        }
+        else if (node.description() == ngraph::op::LayerNorm().description())
+        {
+            return true;
+        }
        return false;
    });
    pass_manager.register_pass<ngraph::pass::LikeReplacement>();

--- a/src/ngraph/runtime/plaidml/unit_test.manifest
+++ b/src/ngraph/runtime/plaidml/unit_test.manifest
@@ -282,5 +282,10 @@ random_uniform_seed_use_dynamic
 random_uniform_all_static_range_dynamic
 random_uniform_dynamic_shapes
+# Fused op test fails on mac
+layer_norm_affine_stats
+layer_norm_bprop_affine_stats
+layer_norm_bprop_affine
 # shapes with zeros dimensions like (5, 0, 5) not supported in PlaidML backend
 dyn_replace_slice
\ No newline at end of file
--- a/src/ngraph/serializer.cpp
+++ b/src/ngraph/serializer.cpp
@@ -80,6 +80,7 @@
 #include "ngraph/op/fused/group_conv_transpose.hpp"
 #include "ngraph/op/fused/gru_cell.hpp"
 #include "ngraph/op/fused/hard_sigmoid.hpp"
+#include "ngraph/op/fused/layer_norm.hpp"
 #include "ngraph/op/fused/lstm_cell.hpp"
 #include "ngraph/op/fused/matmul.hpp"
 #include "ngraph/op/fused/mvn.hpp"
@@ -1404,7 +1405,51 @@ shared_ptr<Node> JSONDeserializer::deserialize_node(json node_js)
            node = make_shared<op::HardSigmoid>(args[0], alpha, beta);
            break;
        }
+        case OP_TYPEID::LayerNorm:
+        {
+            auto keep_stats = node_js.at("keep_stats").get<bool>();
+            auto use_affine = node_js.at("use_affine").get<bool>();
+            auto epsilon = node_js.at("epsilon").get<double>();
+            auto begin_norm_axis = node_js.at("begin_norm_axis").get<int64_t>();
+            if (use_affine)
+            {
+                node = make_shared<op::LayerNorm>(
+                    args[0], args[1], args[2], keep_stats, begin_norm_axis, epsilon);
+            }
+            else
+            {
+                node = make_shared<op::LayerNorm>(args[0], keep_stats, begin_norm_axis, epsilon);
+            }
+            break;
+        }
+        case OP_TYPEID::LayerNormBackprop:
+        {
+            auto use_stats = node_js.at("use_stats").get<bool>();
+            auto use_affine = node_js.at("use_affine").get<bool>();
+            auto epsilon = node_js.at("epsilon").get<double>();
+            auto begin_norm_axis = node_js.at("begin_norm_axis").get<int64_t>();
+            if (use_stats && use_affine)
+            {
+                node = make_shared<op::LayerNormBackprop>(
+                    args[0], args[1], args[2], args[3], args[4], begin_norm_axis, epsilon);
+            }
+            else if (use_stats)
+            {
+                node = make_shared<op::LayerNormBackprop>(
+                    args[0], args[1], args[2], args[3], begin_norm_axis, epsilon);
+            }
+            else if (use_affine)
+            {
+                node = make_shared<op::LayerNormBackprop>(
+                    args[0], args[1], args[2], begin_norm_axis, epsilon);
+            }
+            else
+            {
+                node =
+                    make_shared<op::LayerNormBackprop>(args[0], args[1], begin_norm_axis, epsilon);
+            }
+            break;
+        }
        case OP_TYPEID::Less:
        {
            node = make_shared<op::Less>(
@@ -2687,6 +2732,24 @@ json JSONSerializer::serialize_node(const Node& n)
        node["beta"] = tmp->get_beta();
        break;
    }
+    case OP_TYPEID::LayerNorm:
+    {
+        auto tmp = dynamic_cast<const op::LayerNorm*>(&n);
+        node["keep_stats"] = tmp->get_keep_stats();
+        node["use_affine"] = tmp->get_use_affine();
+        node["epsilon"] = tmp->get_epsilon();
+        node["begin_norm_axis"] = tmp->get_begin_norm_axis();
+        break;
+    }
+    case OP_TYPEID::LayerNormBackprop:
+    {
+        auto tmp = dynamic_cast<const op::LayerNormBackprop*>(&n);
+        node["use_stats"] = tmp->get_use_stats();
+        node["use_affine"] = tmp->get_use_affine();
+        node["epsilon"] = tmp->get_epsilon();
+        node["begin_norm_axis"] = tmp->get_begin_norm_axis();
+        break;
+    }
    case OP_TYPEID::Less:
    {
        auto tmp = dynamic_cast<const op::Less*>(&n);

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -124,6 +124,7 @@ set(SRC
    type_prop/gru_cell.cpp
    type_prop/hard_sigmoid.cpp
    type_prop/index_reduction.cpp
+    type_prop/layer_norm.cpp
    type_prop/lrn.cpp
    type_prop/lstm_cell.cpp
    type_prop/matmul.cpp
@@ -272,6 +273,7 @@ set(MULTI_TEST_SRC
    backend/gather.in.cpp
    backend/gelu.in.cpp
    backend/generate_mask.in.cpp
+    backend/layer_norm.in.cpp
    backend/log.in.cpp
    backend/logical_and.in.cpp
    backend/logical_or.in.cpp

--- a/test/backend/layer_norm.in.cpp
+++ b/test/backend/layer_norm.in.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include <algorithm>
+#include <cinttypes>
+#include <cmath>
+#include <cstdlib>
+#include <random>
+#include <string>
+// clang-format off
+#ifdef ${BACKEND_NAME}_FLOAT_TOLERANCE_BITS
+#define DEFAULT_FLOAT_TOLERANCE_BITS ${BACKEND_NAME}_FLOAT_TOLERANCE_BITS
+#endif
+#ifdef ${BACKEND_NAME}_DOUBLE_TOLERANCE_BITS
+#define DEFAULT_DOUBLE_TOLERANCE_BITS ${BACKEND_NAME}_DOUBLE_TOLERANCE_BITS
+#endif
+// clang-format on
+#include "gtest/gtest.h"
+#include "ngraph/ngraph.hpp"
+#include "util/all_close.hpp"
+#include "util/all_close_f.hpp"
+#include "util/autodiff/numeric_compare.hpp"
+#include "util/ndarray.hpp"
+#include "util/test_control.hpp"
+#include "util/test_tools.hpp"
+using namespace std;
+using namespace ngraph;
+static string s_manifest = "${MANIFEST}";
+NGRAPH_TEST(${BACKEND_NAME}, layer_norm_affine_stats)
+{
+    auto p_data = make_shared<op::Parameter>(element::f32, Shape{2, 4});
+    auto p_scale = make_shared<op::Parameter>(element::f32, Shape{4});
+    auto p_bias = make_shared<op::Parameter>(element::f32, Shape{4});
+    auto ln = make_shared<op::LayerNorm>(p_data, p_scale, p_bias);
+    auto f = make_shared<Function>(ln->outputs(), ParameterVector{p_data, p_scale, p_bias});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    // Create tensors for input
+    auto data = backend->create_tensor(element::f32, Shape{2, 4});
+    auto scale = backend->create_tensor(element::f32, Shape{4});
+    auto bias = backend->create_tensor(element::f32, Shape{4});
+    // Fill in input tensors
+    vector<float> d_input{-4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f};
+    copy_data(data, d_input);
+    vector<float> s_input{-1.0f, 1.0f, 2.0f, 3.0f};
+    copy_data(scale, s_input);
+    vector<float> b_input{-4.0f, -3.0f, -2.0f, -1.0f};
+    copy_data(bias, b_input);
+    // Create tensors for output
+    auto norm = backend->create_tensor(element::f32, Shape{2, 4});
+    auto mean = backend->create_tensor(element::f32, Shape{2});
+    auto var = backend->create_tensor(element::f32, Shape{2});
+    // Expected results (Manually computed)
+    vector<float> exp_norm{-2.658364534378051758f,
+                           -3.447211742401123047f,
+                           -1.105576276779174805f,
+                           3.024906158447265625f,
+                           -2.658364534378051758f,
+                           -3.447211742401123047f,
+                           -1.105576276779174805f,
+                           3.024906158447265625f};
+    vector<float> exp_mean{-2.5f, 1.5f};
+    vector<float> exp_var{1.25f, 1.25f};
+    auto handle = backend->compile(f);
+    handle->call_with_validate({norm, mean, var}, {data, scale, bias});
+    EXPECT_TRUE(test::all_close_f(exp_norm, read_vector<float>(norm)));
+    EXPECT_TRUE(test::all_close_f(exp_mean, read_vector<float>(mean)));
+    EXPECT_TRUE(test::all_close_f(exp_var, read_vector<float>(var)));
+}
+NGRAPH_TEST(${BACKEND_NAME}, layer_norm_bprop_affine_stats)
+{
+    auto p_data = make_shared<op::Parameter>(element::f32, Shape{2, 4});
+    auto p_delta = make_shared<op::Parameter>(element::f32, Shape{2, 4});
+    auto p_mean = make_shared<op::Parameter>(element::f32, Shape{2});
+    auto p_var = make_shared<op::Parameter>(element::f32, Shape{2});
+    auto p_scale = make_shared<op::Parameter>(element::f32, Shape{4});
+    auto lnb = make_shared<op::LayerNormBackprop>(p_data, p_delta, p_mean, p_var, p_scale);
+    auto f = make_shared<Function>(lnb->outputs(),
+                                   ParameterVector{p_data, p_delta, p_mean, p_var, p_scale});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    // Create tensors for input
+    auto data = backend->create_tensor(element::f32, Shape{2, 4});
+    auto delta = backend->create_tensor(element::f32, Shape{2, 4});
+    auto mean = backend->create_tensor(element::f32, Shape{2});
+    auto var = backend->create_tensor(element::f32, Shape{2});
+    auto scale = backend->create_tensor(element::f32, Shape{4});
+    // Fill in input tensors
+    vector<float> d_input{-4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f};
+    copy_data(data, d_input);
+    vector<float> dt_input{0.1f, -0.1f, 0.2f, -0.2f, 0.1f, -0.1f, 0.2f, -0.2f};
+    copy_data(delta, dt_input);
+    vector<float> s_input{-1.0f, 1.0f, 2.0f, 3.0f};
+    copy_data(scale, s_input);
+    vector<float> m_input{-2.5f, 1.5f};
+    copy_data(mean, m_input);
+    vector<float> v_input{1.25f, 1.25f};
+    copy_data(var, v_input);
+    // Create tensors for output
+    auto d_data = backend->create_tensor(element::f32, Shape{2, 4});
+    auto d_scale = backend->create_tensor(element::f32, Shape{4});
+    auto d_bias = backend->create_tensor(element::f32, Shape{4});
+    // Expected results (Manually compute)
+    vector<float> exp_d_data{-0.1341624855995178223f,
+                             -0.04472083225846290588f,
+                             0.4919326305389404297f,
+                             -0.31304931640625f,
+                             -0.1341624855995178223f,
+                             -0.04472083225846290588f,
+                             0.4919326305389404297f,
+                             -0.31304931640625f};
+    vector<float> exp_d_scale{-0.2683270871639251709f,
+                              0.08944236487150192261f,
+                              0.1788847297430038452f,
+                              -0.5366541743278503418f};
+    vector<float> exp_d_bias{0.2f, -0.2f, 0.4f, -0.4f};
+    auto handle = backend->compile(f);
+    handle->call_with_validate({d_data, d_scale, d_bias}, {data, delta, mean, var, scale});
+    EXPECT_TRUE(test::all_close_f(exp_d_data, read_vector<float>(d_data)));
+    EXPECT_TRUE(test::all_close_f(exp_d_scale, read_vector<float>(d_scale)));
+    EXPECT_TRUE(test::all_close_f(exp_d_bias, read_vector<float>(d_bias)));
+}
+NGRAPH_TEST(${BACKEND_NAME}, layer_norm_bprop_affine)
+{
+    auto p_data = make_shared<op::Parameter>(element::f32, Shape{2, 4});
+    auto p_delta = make_shared<op::Parameter>(element::f32, Shape{2, 4});
+    auto p_scale = make_shared<op::Parameter>(element::f32, Shape{4});
+    auto lnb = make_shared<op::LayerNormBackprop>(p_data, p_delta, p_scale);
+    auto f = make_shared<Function>(lnb->outputs(), ParameterVector{p_data, p_delta, p_scale});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    // Create tensors for input
+    auto data = backend->create_tensor(element::f32, Shape{2, 4});
+    auto delta = backend->create_tensor(element::f32, Shape{2, 4});
+    auto scale = backend->create_tensor(element::f32, Shape{4});
+    // Fill in input tensors
+    vector<float> d_input{-4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f};
+    copy_data(data, d_input);
+    vector<float> dt_input{0.1f, -0.1f, 0.2f, -0.2f, 0.1f, -0.1f, 0.2f, -0.2f};
+    copy_data(delta, dt_input);
+    vector<float> s_input{-1.0f, 1.0f, 2.0f, 3.0f};
+    copy_data(scale, s_input);
+    // Create tensors for output
+    auto d_data = backend->create_tensor(element::f32, Shape{2, 4});
+    auto d_scale = backend->create_tensor(element::f32, Shape{4});
+    auto d_bias = backend->create_tensor(element::f32, Shape{4});
+    // Expected results (Manually computed)
+    vector<float> exp_d_data{-0.1341624855995178223f,
+                             -0.04472083225846290588f,
+                             0.4919326305389404297f,
+                             -0.31304931640625f,
+                             -0.1341624855995178223f,
+                             -0.04472083225846290588f,
+                             0.4919326305389404297f,
+                             -0.31304931640625f};
+    vector<float> exp_d_scale{-0.2683270871639251709f,
+                              0.08944236487150192261f,
+                              0.1788847297430038452f,
+                              -0.5366541743278503418f};
+    vector<float> exp_d_bias{0.2f, -0.2f, 0.4f, -0.4f};
+    auto handle = backend->compile(f);
+    handle->call_with_validate({d_data, d_scale, d_bias}, {data, delta, scale});
+    EXPECT_TRUE(test::all_close_f(exp_d_data, read_vector<float>(d_data)));
+    EXPECT_TRUE(test::all_close_f(exp_d_scale, read_vector<float>(d_scale)));
+    EXPECT_TRUE(test::all_close_f(exp_d_bias, read_vector<float>(d_bias)));
+}
--- a/test/type_prop/layer_norm.cpp
+++ b/test/type_prop/layer_norm.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include "gtest/gtest.h"
+#include "ngraph/ngraph.hpp"
+#include "util/type_prop.hpp"
+using namespace std;
+using namespace ngraph;
+TEST(type_prop, layer_norm_element_type)
+{
+    auto data = make_shared<op::Parameter>(element::i32, Shape{2, 4});
+    auto scale = make_shared<op::Parameter>(element::f32, Shape{4});
+    auto bias = make_shared<op::Parameter>(element::f32, Shape{4});
+    try
+    {
+        auto ln = make_shared<op::LayerNorm>(data, scale, bias);
+        // Should have thrown, so fail if it didn't
+        FAIL() << "Incorrect element type";
+    }
+    catch (const NodeValidationFailure& error)
+    {
+        EXPECT_HAS_SUBSTRING(
+            error.what(),
+            std::string("Argument element type must be f16, bf16, f32, f64 or dynamic"));
+    }
+    catch (...)
+    {
+        FAIL() << "Deduced type check failed for unexpected reason";
+    }
+}
+TEST(type_prop, layer_norm_begin_norm_axis)
+{
+    auto data = make_shared<op::Parameter>(element::f32, Shape{2, 4});
+    auto scale = make_shared<op::Parameter>(element::f32, Shape{4});
+    auto bias = make_shared<op::Parameter>(element::f32, Shape{4});
+    try
+    {
+        auto ln = make_shared<op::LayerNorm>(data, scale, bias, false, 2);
+        // Should have thrown, so fail if it didn't
+        FAIL() << "Incorrect begin norm axis";
+    }
+    catch (const NodeValidationFailure& error)
+    {
+        EXPECT_HAS_SUBSTRING(error.what(), std::string("begin_norm_axis is out of range"));
+    }
+    catch (...)
+    {
+        FAIL() << "Deduced type check failed for unexpected reason";
+    }
+}
+TEST(type_prop, layer_norm_affine_rank)
+{
+    auto data = make_shared<op::Parameter>(element::f32, Shape{2, 4});
+    auto scale = make_shared<op::Parameter>(element::f32, Shape{2, 4});
+    auto bias = make_shared<op::Parameter>(element::f32, Shape{4});
+    try
+    {
+        auto ln = make_shared<op::LayerNorm>(data, scale, bias);
+        // Should have thrown, so fail if it didn't
+        FAIL() << "Incorrect affine ranks";
+    }
+    catch (const NodeValidationFailure& error)
+    {
+        EXPECT_HAS_SUBSTRING(error.what(), std::string("Scale and/or bias rank is incorrect"));
+    }
+    catch (...)
+    {
+        FAIL() << "Deduced type check failed for unexpected reason";
+    }
+}
+TEST(type_prop, layer_norm_bprop_element_type)
+{
+    auto data = make_shared<op::Parameter>(element::i32, Shape{2, 4});
+    auto delta = make_shared<op::Parameter>(element::f32, Shape{2, 4});
+    try
+    {
+        auto lnb = make_shared<op::LayerNormBackprop>(data, delta);
+        // Should have thrown, so fail if it didn't
+        FAIL() << "Incorrect element type";
+    }
+    catch (const NodeValidationFailure& error)
+    {
+        EXPECT_HAS_SUBSTRING(
+            error.what(),
+            std::string("Argument element type must be f16, bf16, f32, f64 or dynamic"));
+    }
+    catch (...)
+    {
+        FAIL() << "Deduced type check failed for unexpected reason";
+    }
+}
+TEST(type_prop, layer_norm_bprop_begin_norm_axis)
+{
+    auto data = make_shared<op::Parameter>(element::f32, Shape{2, 4});
+    auto delta = make_shared<op::Parameter>(element::f32, Shape{2, 4});
+    try
+    {
+        auto lnb = make_shared<op::LayerNormBackprop>(data, delta, 2);
+        // Should have thrown, so fail if it didn't
+        FAIL() << "Incorrect begin norm axis";
+    }
+    catch (const NodeValidationFailure& error)
+    {
+        EXPECT_HAS_SUBSTRING(error.what(), std::string("begin_norm_axis is out of range"));
+    }
+    catch (...)
+    {
+        FAIL() << "Deduced type check failed for unexpected reason";
+    }
+}
+TEST(type_prop, layer_norm_bprop_delta)
+{
+    auto data = make_shared<op::Parameter>(element::f32, Shape{2, 4});
+    auto delta = make_shared<op::Parameter>(element::f32, Shape{4});
+    try
+    {
+        auto lnb = make_shared<op::LayerNormBackprop>(data, delta);
+        // Should have thrown, so fail if it didn't
+        FAIL() << "Incorrect delta rank";
+    }
+    catch (const NodeValidationFailure& error)
+    {
+        EXPECT_HAS_SUBSTRING(error.what(), std::string("Delta rank is incorrect"));
+    }
+    catch (...)
+    {
+        FAIL() << "Deduced type check failed for unexpected reason";
+    }
+}
+TEST(type_prop, layer_norm_bprop_stats)
+{
+    auto data = make_shared<op::Parameter>(element::f32, Shape{2, 4});
+    auto delta = make_shared<op::Parameter>(element::f32, Shape{2, 4});
+    auto mean = make_shared<op::Parameter>(element::f32, Shape{2, 4});
+    auto variance = make_shared<op::Parameter>(element::f32, Shape{2});
+    try
+    {
+        auto lnb = make_shared<op::LayerNormBackprop>(data, delta, mean, variance);
+        // Should have thrown, so fail if it didn't
+        FAIL() << "Incorrect stats rank";
+    }
+    catch (const NodeValidationFailure& error)
+    {
+        EXPECT_HAS_SUBSTRING(error.what(), std::string("Mean and/or variance rank is incorrect"));
+    }
+    catch (...)
+    {
+        FAIL() << "Deduced type check failed for unexpected reason";
+    }
+}
+TEST(type_prop, layer_norm_bprop_affine)
+{
+    auto data = make_shared<op::Parameter>(element::f32, Shape{2, 4});
+    auto delta = make_shared<op::Parameter>(element::f32, Shape{2, 4});
+    auto scale = make_shared<op::Parameter>(element::f32, Shape{2, 4});
+    try
+    {
+        auto lnb = make_shared<op::LayerNormBackprop>(data, delta, scale);
+        // Should have thrown, so fail if it didn't
+        FAIL() << "Incorrect affine rank";
+    }
+    catch (const NodeValidationFailure& error)
+    {
+        EXPECT_HAS_SUBSTRING(error.what(), std::string("Scale rank is incorrect"));
+    }
+    catch (...)
+    {
+        FAIL() << "Deduced type check failed for unexpected reason";
+    }
+}