Commit 385770d8 authored by Sang Ik Lee's avatar Sang Ik Lee Committed by Scott Cyphers

LayerNorm (#3678)

* LayerNorm (#3630)

* Constructors.

Type prop.

Decompose LayerNorm.

Add serialize.

* Add dummy test case.

* Add dummy type prop test.

* Fix some build errors.

* Remove build errors.

* Update decompose for bprop.

* Change begin_norm_axis default value to 1.

* Style.

* Reorder class members.

* Add actual type prop tests.

* Add fprop test.

* Working on bprop test.

* Bprop tests.

* Allow flattened scale and bias.

* Add support for flattened scale and bias.

* Fix incorrect type_name.

* PlaidML: Decompose fused_op LayerNorm

* Update Backprop constructors.

* PlaidML: Add missing header file.

* Remove doc about removed param.

* Fix type prop tests.

* PlaidML: Disable unit test.

* Fix stats flattening axes bug.

* Upgrade description to type_info.
parent 40eb9587
...@@ -336,6 +336,8 @@ set (SRC ...@@ -336,6 +336,8 @@ set (SRC
op/fused/group_conv_transpose.cpp op/fused/group_conv_transpose.cpp
op/fused/gru_cell.cpp op/fused/gru_cell.cpp
op/fused/gru_cell.hpp op/fused/gru_cell.hpp
op/fused/layer_norm.cpp
op/fused/layer_norm.hpp
op/fused/lstm_cell.cpp op/fused/lstm_cell.cpp
op/fused/lstm_cell.hpp op/fused/lstm_cell.hpp
op/fused/matmul.cpp op/fused/matmul.cpp
......
...@@ -136,6 +136,7 @@ namespace ngraph ...@@ -136,6 +136,7 @@ namespace ngraph
#include "ngraph/op/fused/group_conv_transpose.hpp" #include "ngraph/op/fused/group_conv_transpose.hpp"
#include "ngraph/op/fused/gru_cell.hpp" #include "ngraph/op/fused/gru_cell.hpp"
#include "ngraph/op/fused/hard_sigmoid.hpp" #include "ngraph/op/fused/hard_sigmoid.hpp"
#include "ngraph/op/fused/layer_norm.hpp"
#include "ngraph/op/fused/lstm_cell.hpp" #include "ngraph/op/fused/lstm_cell.hpp"
#include "ngraph/op/fused/matmul.hpp" #include "ngraph/op/fused/matmul.hpp"
#include "ngraph/op/fused/mvn.hpp" #include "ngraph/op/fused/mvn.hpp"
......
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/node.hpp"
#include "ngraph/op/op.hpp"
#include "ngraph/op/util/fused_op.hpp"
namespace ngraph
{
namespace op
{
/// \brief Layer Normalization
///
class LayerNorm : public ngraph::op::util::FusedOp
{
public:
NGRAPH_API
static constexpr NodeTypeInfo type_info{"LayerNorm", 0};
const NodeTypeInfo& get_type_info() const override { return type_info; }
LayerNorm() = default;
/// \brief Constructs an LayerNorm operation.
///
/// \param data Input tensor
/// \param scale Scale tensor
/// \param bias Bias tensor
/// \param keep_stats Generated addition output mean and variance, default true
/// \param begin_norm_axis Axis where normalization starts, default - -1
/// \param epsilon Small number to add for stability of rsqrt, default 1e-5
LayerNorm(const Output<Node>& data,
const Output<Node>& scale,
const Output<Node>& bias,
bool keep_stats = true,
int64_t begin_norm_axis = 1,
double epsilon = 1e-5);
LayerNorm(const Output<Node>& data,
bool keep_stats = true,
int64_t begin_norm_axis = 1,
double epsilon = 1e-5);
virtual NodeVector decompose_op() const override;
void pre_validate_and_infer_types() override;
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
bool get_keep_stats() const { return m_keep_stats; }
bool get_use_affine() const { return m_use_affine; }
double get_epsilon() const { return m_epsilon; }
int64_t get_begin_norm_axis() const { return m_begin_norm_axis; }
protected:
virtual void generate_adjoints(autodiff::Adjoints& adjoints,
const NodeVector& deltas) override;
private:
bool m_keep_stats{true};
bool m_use_affine{true};
int64_t m_begin_norm_axis{1};
double m_epsilon{1e-5};
};
/// \brief Layer Normalization Backprop
///
class LayerNormBackprop : public ngraph::op::util::FusedOp
{
public:
NGRAPH_API
static constexpr NodeTypeInfo type_info{"LayerNormBackprop", 0};
const NodeTypeInfo& get_type_info() const override { return type_info; }
LayerNormBackprop() = default;
/// \brief Constructs an LayerNormBackprop operation.
///
/// \param data Input tensor
/// \param mean Mean tensor from fprop
/// \param variance Variance tensor from fprop
/// \param delta Delta tensor
/// \param scale Scale tensor
/// \param begin_norm_axis Axis where normalization starts, default - -1
/// \param epsilon Small number to add for stability of rsqrt, default 1e-5
LayerNormBackprop(const Output<Node>& data,
const Output<Node>& delta,
const Output<Node>& mean,
const Output<Node>& variance,
const Output<Node>& scale,
int64_t begin_norm_axis = 1,
double epsilon = 1e-5);
LayerNormBackprop(const Output<Node>& data,
const Output<Node>& delta,
const Output<Node>& mean,
const Output<Node>& variance,
int64_t begin_norm_axis = 1,
double epsilon = 1e-5);
LayerNormBackprop(const Output<Node>& data,
const Output<Node>& delta,
const Output<Node>& scale,
int64_t begin_norm_axis = 1,
double epsilon = 1e-5);
LayerNormBackprop(const Output<Node>& data,
const Output<Node>& delta,
int64_t begin_norm_axis = 1,
double epsilon = 1e-5);
virtual NodeVector decompose_op() const override;
void pre_validate_and_infer_types() override;
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
bool get_use_stats() const { return m_use_stats; }
bool get_use_affine() const { return m_use_affine; }
double get_epsilon() const { return m_epsilon; }
int64_t get_begin_norm_axis() const { return m_begin_norm_axis; }
private:
bool m_use_stats{true};
bool m_use_affine{true};
int64_t m_begin_norm_axis{1};
double m_epsilon{1e-5};
};
}
}
...@@ -37,6 +37,8 @@ NGRAPH_OP(GroupConvolution, ngraph::op) ...@@ -37,6 +37,8 @@ NGRAPH_OP(GroupConvolution, ngraph::op)
NGRAPH_OP(GroupConvolutionTranspose, ngraph::op) NGRAPH_OP(GroupConvolutionTranspose, ngraph::op)
NGRAPH_OP(GRUCell, ngraph::op) NGRAPH_OP(GRUCell, ngraph::op)
NGRAPH_OP(HardSigmoid, ngraph::op) NGRAPH_OP(HardSigmoid, ngraph::op)
NGRAPH_OP(LayerNorm, ngraph::op)
NGRAPH_OP(LayerNormBackprop, ngraph::op)
NGRAPH_OP(LSTMCell, ngraph::op) NGRAPH_OP(LSTMCell, ngraph::op)
NGRAPH_OP(MatMul, ngraph::op) NGRAPH_OP(MatMul, ngraph::op)
NGRAPH_OP(MVN, ngraph::op) NGRAPH_OP(MVN, ngraph::op)
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "ngraph/graph_util.hpp" #include "ngraph/graph_util.hpp"
#include "ngraph/log.hpp" #include "ngraph/log.hpp"
#include "ngraph/op/fused/group_conv.hpp" #include "ngraph/op/fused/group_conv.hpp"
#include "ngraph/op/fused/layer_norm.hpp"
#include "ngraph/pass/algebraic_simplification.hpp" #include "ngraph/pass/algebraic_simplification.hpp"
#include "ngraph/pass/core_fusion.hpp" #include "ngraph/pass/core_fusion.hpp"
#include "ngraph/pass/cse.hpp" #include "ngraph/pass/cse.hpp"
...@@ -90,7 +91,13 @@ std::shared_ptr<ngraph::runtime::plaidml::PlaidML_Executable> ...@@ -90,7 +91,13 @@ std::shared_ptr<ngraph::runtime::plaidml::PlaidML_Executable>
// We apply the same general-purposes passes as the CPU backend. // We apply the same general-purposes passes as the CPU backend.
pass_manager.register_pass<ngraph::pass::FusedOpDecomposition>([](const Node& node) -> bool { pass_manager.register_pass<ngraph::pass::FusedOpDecomposition>([](const Node& node) -> bool {
if (node.description() == ngraph::op::GroupConvolution().description()) if (node.description() == ngraph::op::GroupConvolution().description())
{
return true; return true;
}
else if (node.description() == ngraph::op::LayerNorm().description())
{
return true;
}
return false; return false;
}); });
pass_manager.register_pass<ngraph::pass::LikeReplacement>(); pass_manager.register_pass<ngraph::pass::LikeReplacement>();
......
...@@ -282,5 +282,10 @@ random_uniform_seed_use_dynamic ...@@ -282,5 +282,10 @@ random_uniform_seed_use_dynamic
random_uniform_all_static_range_dynamic random_uniform_all_static_range_dynamic
random_uniform_dynamic_shapes random_uniform_dynamic_shapes
# Fused op test fails on mac
layer_norm_affine_stats
layer_norm_bprop_affine_stats
layer_norm_bprop_affine
# shapes with zeros dimensions like (5, 0, 5) not supported in PlaidML backend # shapes with zeros dimensions like (5, 0, 5) not supported in PlaidML backend
dyn_replace_slice dyn_replace_slice
...@@ -80,6 +80,7 @@ ...@@ -80,6 +80,7 @@
#include "ngraph/op/fused/group_conv_transpose.hpp" #include "ngraph/op/fused/group_conv_transpose.hpp"
#include "ngraph/op/fused/gru_cell.hpp" #include "ngraph/op/fused/gru_cell.hpp"
#include "ngraph/op/fused/hard_sigmoid.hpp" #include "ngraph/op/fused/hard_sigmoid.hpp"
#include "ngraph/op/fused/layer_norm.hpp"
#include "ngraph/op/fused/lstm_cell.hpp" #include "ngraph/op/fused/lstm_cell.hpp"
#include "ngraph/op/fused/matmul.hpp" #include "ngraph/op/fused/matmul.hpp"
#include "ngraph/op/fused/mvn.hpp" #include "ngraph/op/fused/mvn.hpp"
...@@ -1404,7 +1405,51 @@ shared_ptr<Node> JSONDeserializer::deserialize_node(json node_js) ...@@ -1404,7 +1405,51 @@ shared_ptr<Node> JSONDeserializer::deserialize_node(json node_js)
node = make_shared<op::HardSigmoid>(args[0], alpha, beta); node = make_shared<op::HardSigmoid>(args[0], alpha, beta);
break; break;
} }
case OP_TYPEID::LayerNorm:
{
auto keep_stats = node_js.at("keep_stats").get<bool>();
auto use_affine = node_js.at("use_affine").get<bool>();
auto epsilon = node_js.at("epsilon").get<double>();
auto begin_norm_axis = node_js.at("begin_norm_axis").get<int64_t>();
if (use_affine)
{
node = make_shared<op::LayerNorm>(
args[0], args[1], args[2], keep_stats, begin_norm_axis, epsilon);
}
else
{
node = make_shared<op::LayerNorm>(args[0], keep_stats, begin_norm_axis, epsilon);
}
break;
}
case OP_TYPEID::LayerNormBackprop:
{
auto use_stats = node_js.at("use_stats").get<bool>();
auto use_affine = node_js.at("use_affine").get<bool>();
auto epsilon = node_js.at("epsilon").get<double>();
auto begin_norm_axis = node_js.at("begin_norm_axis").get<int64_t>();
if (use_stats && use_affine)
{
node = make_shared<op::LayerNormBackprop>(
args[0], args[1], args[2], args[3], args[4], begin_norm_axis, epsilon);
}
else if (use_stats)
{
node = make_shared<op::LayerNormBackprop>(
args[0], args[1], args[2], args[3], begin_norm_axis, epsilon);
}
else if (use_affine)
{
node = make_shared<op::LayerNormBackprop>(
args[0], args[1], args[2], begin_norm_axis, epsilon);
}
else
{
node =
make_shared<op::LayerNormBackprop>(args[0], args[1], begin_norm_axis, epsilon);
}
break;
}
case OP_TYPEID::Less: case OP_TYPEID::Less:
{ {
node = make_shared<op::Less>( node = make_shared<op::Less>(
...@@ -2687,6 +2732,24 @@ json JSONSerializer::serialize_node(const Node& n) ...@@ -2687,6 +2732,24 @@ json JSONSerializer::serialize_node(const Node& n)
node["beta"] = tmp->get_beta(); node["beta"] = tmp->get_beta();
break; break;
} }
case OP_TYPEID::LayerNorm:
{
auto tmp = dynamic_cast<const op::LayerNorm*>(&n);
node["keep_stats"] = tmp->get_keep_stats();
node["use_affine"] = tmp->get_use_affine();
node["epsilon"] = tmp->get_epsilon();
node["begin_norm_axis"] = tmp->get_begin_norm_axis();
break;
}
case OP_TYPEID::LayerNormBackprop:
{
auto tmp = dynamic_cast<const op::LayerNormBackprop*>(&n);
node["use_stats"] = tmp->get_use_stats();
node["use_affine"] = tmp->get_use_affine();
node["epsilon"] = tmp->get_epsilon();
node["begin_norm_axis"] = tmp->get_begin_norm_axis();
break;
}
case OP_TYPEID::Less: case OP_TYPEID::Less:
{ {
auto tmp = dynamic_cast<const op::Less*>(&n); auto tmp = dynamic_cast<const op::Less*>(&n);
......
...@@ -124,6 +124,7 @@ set(SRC ...@@ -124,6 +124,7 @@ set(SRC
type_prop/gru_cell.cpp type_prop/gru_cell.cpp
type_prop/hard_sigmoid.cpp type_prop/hard_sigmoid.cpp
type_prop/index_reduction.cpp type_prop/index_reduction.cpp
type_prop/layer_norm.cpp
type_prop/lrn.cpp type_prop/lrn.cpp
type_prop/lstm_cell.cpp type_prop/lstm_cell.cpp
type_prop/matmul.cpp type_prop/matmul.cpp
...@@ -272,6 +273,7 @@ set(MULTI_TEST_SRC ...@@ -272,6 +273,7 @@ set(MULTI_TEST_SRC
backend/gather.in.cpp backend/gather.in.cpp
backend/gelu.in.cpp backend/gelu.in.cpp
backend/generate_mask.in.cpp backend/generate_mask.in.cpp
backend/layer_norm.in.cpp
backend/log.in.cpp backend/log.in.cpp
backend/logical_and.in.cpp backend/logical_and.in.cpp
backend/logical_or.in.cpp backend/logical_or.in.cpp
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <algorithm>
#include <cinttypes>
#include <cmath>
#include <cstdlib>
#include <random>
#include <string>
// clang-format off
#ifdef ${BACKEND_NAME}_FLOAT_TOLERANCE_BITS
#define DEFAULT_FLOAT_TOLERANCE_BITS ${BACKEND_NAME}_FLOAT_TOLERANCE_BITS
#endif
#ifdef ${BACKEND_NAME}_DOUBLE_TOLERANCE_BITS
#define DEFAULT_DOUBLE_TOLERANCE_BITS ${BACKEND_NAME}_DOUBLE_TOLERANCE_BITS
#endif
// clang-format on
#include "gtest/gtest.h"
#include "ngraph/ngraph.hpp"
#include "util/all_close.hpp"
#include "util/all_close_f.hpp"
#include "util/autodiff/numeric_compare.hpp"
#include "util/ndarray.hpp"
#include "util/test_control.hpp"
#include "util/test_tools.hpp"
using namespace std;
using namespace ngraph;
static string s_manifest = "${MANIFEST}";
NGRAPH_TEST(${BACKEND_NAME}, layer_norm_affine_stats)
{
auto p_data = make_shared<op::Parameter>(element::f32, Shape{2, 4});
auto p_scale = make_shared<op::Parameter>(element::f32, Shape{4});
auto p_bias = make_shared<op::Parameter>(element::f32, Shape{4});
auto ln = make_shared<op::LayerNorm>(p_data, p_scale, p_bias);
auto f = make_shared<Function>(ln->outputs(), ParameterVector{p_data, p_scale, p_bias});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create tensors for input
auto data = backend->create_tensor(element::f32, Shape{2, 4});
auto scale = backend->create_tensor(element::f32, Shape{4});
auto bias = backend->create_tensor(element::f32, Shape{4});
// Fill in input tensors
vector<float> d_input{-4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f};
copy_data(data, d_input);
vector<float> s_input{-1.0f, 1.0f, 2.0f, 3.0f};
copy_data(scale, s_input);
vector<float> b_input{-4.0f, -3.0f, -2.0f, -1.0f};
copy_data(bias, b_input);
// Create tensors for output
auto norm = backend->create_tensor(element::f32, Shape{2, 4});
auto mean = backend->create_tensor(element::f32, Shape{2});
auto var = backend->create_tensor(element::f32, Shape{2});
// Expected results (Manually computed)
vector<float> exp_norm{-2.658364534378051758f,
-3.447211742401123047f,
-1.105576276779174805f,
3.024906158447265625f,
-2.658364534378051758f,
-3.447211742401123047f,
-1.105576276779174805f,
3.024906158447265625f};
vector<float> exp_mean{-2.5f, 1.5f};
vector<float> exp_var{1.25f, 1.25f};
auto handle = backend->compile(f);
handle->call_with_validate({norm, mean, var}, {data, scale, bias});
EXPECT_TRUE(test::all_close_f(exp_norm, read_vector<float>(norm)));
EXPECT_TRUE(test::all_close_f(exp_mean, read_vector<float>(mean)));
EXPECT_TRUE(test::all_close_f(exp_var, read_vector<float>(var)));
}
NGRAPH_TEST(${BACKEND_NAME}, layer_norm_bprop_affine_stats)
{
auto p_data = make_shared<op::Parameter>(element::f32, Shape{2, 4});
auto p_delta = make_shared<op::Parameter>(element::f32, Shape{2, 4});
auto p_mean = make_shared<op::Parameter>(element::f32, Shape{2});
auto p_var = make_shared<op::Parameter>(element::f32, Shape{2});
auto p_scale = make_shared<op::Parameter>(element::f32, Shape{4});
auto lnb = make_shared<op::LayerNormBackprop>(p_data, p_delta, p_mean, p_var, p_scale);
auto f = make_shared<Function>(lnb->outputs(),
ParameterVector{p_data, p_delta, p_mean, p_var, p_scale});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create tensors for input
auto data = backend->create_tensor(element::f32, Shape{2, 4});
auto delta = backend->create_tensor(element::f32, Shape{2, 4});
auto mean = backend->create_tensor(element::f32, Shape{2});
auto var = backend->create_tensor(element::f32, Shape{2});
auto scale = backend->create_tensor(element::f32, Shape{4});
// Fill in input tensors
vector<float> d_input{-4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f};
copy_data(data, d_input);
vector<float> dt_input{0.1f, -0.1f, 0.2f, -0.2f, 0.1f, -0.1f, 0.2f, -0.2f};
copy_data(delta, dt_input);
vector<float> s_input{-1.0f, 1.0f, 2.0f, 3.0f};
copy_data(scale, s_input);
vector<float> m_input{-2.5f, 1.5f};
copy_data(mean, m_input);
vector<float> v_input{1.25f, 1.25f};
copy_data(var, v_input);
// Create tensors for output
auto d_data = backend->create_tensor(element::f32, Shape{2, 4});
auto d_scale = backend->create_tensor(element::f32, Shape{4});
auto d_bias = backend->create_tensor(element::f32, Shape{4});
// Expected results (Manually compute)
vector<float> exp_d_data{-0.1341624855995178223f,
-0.04472083225846290588f,
0.4919326305389404297f,
-0.31304931640625f,
-0.1341624855995178223f,
-0.04472083225846290588f,
0.4919326305389404297f,
-0.31304931640625f};
vector<float> exp_d_scale{-0.2683270871639251709f,
0.08944236487150192261f,
0.1788847297430038452f,
-0.5366541743278503418f};
vector<float> exp_d_bias{0.2f, -0.2f, 0.4f, -0.4f};
auto handle = backend->compile(f);
handle->call_with_validate({d_data, d_scale, d_bias}, {data, delta, mean, var, scale});
EXPECT_TRUE(test::all_close_f(exp_d_data, read_vector<float>(d_data)));
EXPECT_TRUE(test::all_close_f(exp_d_scale, read_vector<float>(d_scale)));
EXPECT_TRUE(test::all_close_f(exp_d_bias, read_vector<float>(d_bias)));
}
NGRAPH_TEST(${BACKEND_NAME}, layer_norm_bprop_affine)
{
auto p_data = make_shared<op::Parameter>(element::f32, Shape{2, 4});
auto p_delta = make_shared<op::Parameter>(element::f32, Shape{2, 4});
auto p_scale = make_shared<op::Parameter>(element::f32, Shape{4});
auto lnb = make_shared<op::LayerNormBackprop>(p_data, p_delta, p_scale);
auto f = make_shared<Function>(lnb->outputs(), ParameterVector{p_data, p_delta, p_scale});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create tensors for input
auto data = backend->create_tensor(element::f32, Shape{2, 4});
auto delta = backend->create_tensor(element::f32, Shape{2, 4});
auto scale = backend->create_tensor(element::f32, Shape{4});
// Fill in input tensors
vector<float> d_input{-4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f};
copy_data(data, d_input);
vector<float> dt_input{0.1f, -0.1f, 0.2f, -0.2f, 0.1f, -0.1f, 0.2f, -0.2f};
copy_data(delta, dt_input);
vector<float> s_input{-1.0f, 1.0f, 2.0f, 3.0f};
copy_data(scale, s_input);
// Create tensors for output
auto d_data = backend->create_tensor(element::f32, Shape{2, 4});
auto d_scale = backend->create_tensor(element::f32, Shape{4});
auto d_bias = backend->create_tensor(element::f32, Shape{4});
// Expected results (Manually computed)
vector<float> exp_d_data{-0.1341624855995178223f,
-0.04472083225846290588f,
0.4919326305389404297f,
-0.31304931640625f,
-0.1341624855995178223f,
-0.04472083225846290588f,
0.4919326305389404297f,
-0.31304931640625f};
vector<float> exp_d_scale{-0.2683270871639251709f,
0.08944236487150192261f,
0.1788847297430038452f,
-0.5366541743278503418f};
vector<float> exp_d_bias{0.2f, -0.2f, 0.4f, -0.4f};
auto handle = backend->compile(f);
handle->call_with_validate({d_data, d_scale, d_bias}, {data, delta, scale});
EXPECT_TRUE(test::all_close_f(exp_d_data, read_vector<float>(d_data)));
EXPECT_TRUE(test::all_close_f(exp_d_scale, read_vector<float>(d_scale)));
EXPECT_TRUE(test::all_close_f(exp_d_bias, read_vector<float>(d_bias)));
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "gtest/gtest.h"
#include "ngraph/ngraph.hpp"
#include "util/type_prop.hpp"
using namespace std;
using namespace ngraph;
TEST(type_prop, layer_norm_element_type)
{
auto data = make_shared<op::Parameter>(element::i32, Shape{2, 4});
auto scale = make_shared<op::Parameter>(element::f32, Shape{4});
auto bias = make_shared<op::Parameter>(element::f32, Shape{4});
try
{
auto ln = make_shared<op::LayerNorm>(data, scale, bias);
// Should have thrown, so fail if it didn't
FAIL() << "Incorrect element type";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(
error.what(),
std::string("Argument element type must be f16, bf16, f32, f64 or dynamic"));
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, layer_norm_begin_norm_axis)
{
auto data = make_shared<op::Parameter>(element::f32, Shape{2, 4});
auto scale = make_shared<op::Parameter>(element::f32, Shape{4});
auto bias = make_shared<op::Parameter>(element::f32, Shape{4});
try
{
auto ln = make_shared<op::LayerNorm>(data, scale, bias, false, 2);
// Should have thrown, so fail if it didn't
FAIL() << "Incorrect begin norm axis";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(), std::string("begin_norm_axis is out of range"));
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, layer_norm_affine_rank)
{
auto data = make_shared<op::Parameter>(element::f32, Shape{2, 4});
auto scale = make_shared<op::Parameter>(element::f32, Shape{2, 4});
auto bias = make_shared<op::Parameter>(element::f32, Shape{4});
try
{
auto ln = make_shared<op::LayerNorm>(data, scale, bias);
// Should have thrown, so fail if it didn't
FAIL() << "Incorrect affine ranks";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(), std::string("Scale and/or bias rank is incorrect"));
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, layer_norm_bprop_element_type)
{
auto data = make_shared<op::Parameter>(element::i32, Shape{2, 4});
auto delta = make_shared<op::Parameter>(element::f32, Shape{2, 4});
try
{
auto lnb = make_shared<op::LayerNormBackprop>(data, delta);
// Should have thrown, so fail if it didn't
FAIL() << "Incorrect element type";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(
error.what(),
std::string("Argument element type must be f16, bf16, f32, f64 or dynamic"));
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, layer_norm_bprop_begin_norm_axis)
{
auto data = make_shared<op::Parameter>(element::f32, Shape{2, 4});
auto delta = make_shared<op::Parameter>(element::f32, Shape{2, 4});
try
{
auto lnb = make_shared<op::LayerNormBackprop>(data, delta, 2);
// Should have thrown, so fail if it didn't
FAIL() << "Incorrect begin norm axis";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(), std::string("begin_norm_axis is out of range"));
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, layer_norm_bprop_delta)
{
auto data = make_shared<op::Parameter>(element::f32, Shape{2, 4});
auto delta = make_shared<op::Parameter>(element::f32, Shape{4});
try
{
auto lnb = make_shared<op::LayerNormBackprop>(data, delta);
// Should have thrown, so fail if it didn't
FAIL() << "Incorrect delta rank";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(), std::string("Delta rank is incorrect"));
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, layer_norm_bprop_stats)
{
auto data = make_shared<op::Parameter>(element::f32, Shape{2, 4});
auto delta = make_shared<op::Parameter>(element::f32, Shape{2, 4});
auto mean = make_shared<op::Parameter>(element::f32, Shape{2, 4});
auto variance = make_shared<op::Parameter>(element::f32, Shape{2});
try
{
auto lnb = make_shared<op::LayerNormBackprop>(data, delta, mean, variance);
// Should have thrown, so fail if it didn't
FAIL() << "Incorrect stats rank";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(), std::string("Mean and/or variance rank is incorrect"));
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, layer_norm_bprop_affine)
{
auto data = make_shared<op::Parameter>(element::f32, Shape{2, 4});
auto delta = make_shared<op::Parameter>(element::f32, Shape{2, 4});
auto scale = make_shared<op::Parameter>(element::f32, Shape{2, 4});
try
{
auto lnb = make_shared<op::LayerNormBackprop>(data, delta, scale);
// Should have thrown, so fail if it didn't
FAIL() << "Incorrect affine rank";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(), std::string("Scale rank is incorrect"));
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment