Implementation of CrossEntropy and CrossEntropyBackprop as fused Op's (#3818)

* - Implementaion of CrossEntropy and CrossEntropyBackprop as fused Op's * - unit test case for CE fprop - fix bug in decompose_op * WIP debug PDPD unit test failure * fixed broadcasting issue * -fix bdcast issue for multi dim tensor * utilities to restore the original tensor shape * i) style-fix ii) rename variables * - unit test for multiple dimensions ii) refactor create_mask to seperate function * - fixed unit tests * fix style * set output element type to dynamic in pre_validate and infer shape * disable ce with one hot unit test on PlaidML * add CE op to fused_op_tbl * - add serialzier support for CE and CE Backprop

Implementation of CrossEntropy and CrossEntropyBackprop as fused Op's (#3818)
* - Implementaion of CrossEntropy and CrossEntropyBackprop as fused Op's * - unit test case for CE fprop - fix bug in decompose_op * WIP debug PDPD unit test failure * fixed broadcasting issue * -fix bdcast issue for multi dim tensor * utilities to restore the original tensor shape * i) style-fix ii) rename variables * - unit test for multiple dimensions ii) refactor create_mask to seperate function * - fixed unit tests * fix style * set output element type to dynamic in pre_validate and infer shape * disable ce with one hot unit test on PlaidML * add CE op to fused_op_tbl * - add serialzier support for CE and CE Backprop
a1a8a7e3 · Pruthvi · Scott Cyphers · 19e2434a · a1a8a7e3 · a1a8a7e3
Commit a1a8a7e3 authored Nov 20, 2019 by Pruthvi Committed by Scott Cyphers Nov 20, 2019
9 changed files
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -337,6 +337,8 @@ set (SRC
    op/fused/clamp.hpp
    op/fused/conv_fused.cpp
    op/fused/conv_fused.hpp
+    op/fused/crossentropy.cpp
+    op/fused/crossentropy.hpp
    op/fused/hard_sigmoid.cpp
    op/fused/hard_sigmoid.hpp
    op/fused/depth_to_space.cpp

--- a/src/ngraph/ngraph.hpp
+++ b/src/ngraph/ngraph.hpp
@@ -133,6 +133,7 @@ namespace ngraph
 #include "ngraph/op/floor_mod.hpp"
 #include "ngraph/op/fused/clamp.hpp"
 #include "ngraph/op/fused/conv_fused.hpp"
+#include "ngraph/op/fused/crossentropy.hpp"
 #include "ngraph/op/fused/depth_to_space.hpp"
 #include "ngraph/op/fused/elu.hpp"
 #include "ngraph/op/fused/fake_quantize.hpp"

--- a/src/ngraph/op/fused/crossentropy.cpp
+++ b/src/ngraph/op/fused/crossentropy.cpp
--- a/src/ngraph/op/fused/crossentropy.hpp
+++ b/src/ngraph/op/fused/crossentropy.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "ngraph/node.hpp"
+#include "ngraph/op/op.hpp"
+#include "ngraph/op/util/fused_op.hpp"
+
+namespace ngraph
+{
+    namespace op
+    {
+        class CrossEntropy : public ngraph::op::util::FusedOp
+        {
+        public:
+            NGRAPH_API
+            static constexpr NodeTypeInfo type_info{"CrossEntropy", 0};
+            const NodeTypeInfo& get_type_info() const override { return type_info; }
+            CrossEntropy() = default;
+            /// \brief CrossEntropy for computing loss
+            /// \param arg1 Node that produces the input tensor
+            /// \param arg2 Node that produces ground truth lables for the input
+            /// \param soft_label flag indicating whether to interpretate the given labels as soft
+            /// labels
+            /// \param ignore_index Specifies a target value that is ignored and does not contribute
+            /// to the input gradient Only valid if soft_label is set to False
+            CrossEntropy(const Output<Node>& arg1,
+                         const Output<Node>& arg2,
+                         bool soft_label = false,
+                         int64_t ignore_index = -100);
+
+            virtual NodeVector decompose_op() const override;
+
+            void pre_validate_and_infer_types() override;
+
+            virtual std::shared_ptr<Node>
+                copy_with_new_args(const NodeVector& new_args) const override;
+
+            bool get_soft_label() const { return m_soft_label; }
+            int64_t get_ignore_index() const { return m_ignore_index; }
+        private:
+            bool m_soft_label;
+            int64_t m_ignore_index;
+        };
+
+        class CrossEntropyBackprop : public util::FusedOp
+        {
+        public:
+            NGRAPH_API
+            static constexpr NodeTypeInfo type_info{"CrossEntropyBackprop", 0};
+            const NodeTypeInfo& get_type_info() const override { return type_info; }
+            CrossEntropyBackprop() = default;
+
+            /// \brief Backprop for CrossEntropy
+            /// \param input Node that produces tensor from the fprop
+            /// \param labels Node that produces ground truth labels for input
+            /// \param delta Node that produces the delta during bprop
+            /// \param soft_label flag indicating whether to interpretate the given labels as soft
+            /// labels
+            /// \param ignore_index Specifies a target value that is ignored and does not contribute
+            /// to the input gradient Only valid if soft_label is set to False
+            CrossEntropyBackprop(const Output<Node>& input,
+                                 const Output<Node>& labels,
+                                 const Output<Node>& delta,
+                                 bool soft_label = false,
+                                 int64_t ignore_index = -100);
+
+            virtual NodeVector decompose_op() const override;
+
+            void pre_validate_and_infer_types() override;
+
+            virtual std::shared_ptr<Node>
+                copy_with_new_args(const NodeVector& new_args) const override;
+            bool get_soft_label() const { return m_soft_label; }
+            int64_t get_ignore_index() const { return m_ignore_index; }
+        private:
+            bool m_soft_label;
+            int64_t m_ignore_index;
+        };
+    } // namespace op
+} // namespace ngraph
--- a/src/ngraph/op/fused_op_tbl.hpp
+++ b/src/ngraph/op/fused_op_tbl.hpp
@@ -26,6 +26,8 @@ NGRAPH_OP(Clamp, ngraph::op)
 NGRAPH_OP(ConvolutionBias, ngraph::op)
 NGRAPH_OP(ConvolutionBiasAdd, ngraph::op)
 NGRAPH_OP(ConvolutionBiasBackpropFiltersBias, ngraph::op)
+NGRAPH_OP(CrossEntropy, ngraph::op)
+NGRAPH_OP(CrossEntropyBackprop, ngraph::op)
 NGRAPH_OP(DepthToSpace, ngraph::op)
 NGRAPH_OP(Elu, ngraph::op)
 NGRAPH_OP(FakeQuantize, ngraph::op)

--- a/src/ngraph/runtime/plaidml/unit_test.manifest
+++ b/src/ngraph/runtime/plaidml/unit_test.manifest
@@ -147,6 +147,7 @@ pad_reflect_1d_multi_reflect
 pad_reflect_2d
 pad_reflect_2d_with_neg
 pad_symmetric
+cross_entropy_with_one_hot

 # No double precision FP support in PlaidML
 sum_trivial_in_double

--- a/src/ngraph/serializer.cpp
+++ b/src/ngraph/serializer.cpp
@@ -73,6 +73,7 @@
 #include "ngraph/op/floor_mod.hpp"
 #include "ngraph/op/fused/clamp.hpp"
 #include "ngraph/op/fused/conv_fused.hpp"
+#include "ngraph/op/fused/crossentropy.hpp"
 #include "ngraph/op/fused/depth_to_space.hpp"
 #include "ngraph/op/fused/elu.hpp"
 #include "ngraph/op/fused/fake_quantize.hpp"
@@ -1390,6 +1391,21 @@ shared_ptr<Node> JSONDeserializer::deserialize_node(json node_js)
            node = make_shared<op::Cosh>(args[0]);
            break;
        }
+        case OP_TYPEID::CrossEntropy:
+        {
+            auto soft_label = node_js.at("soft_label");
+            auto ignore_index = node_js.at("ignore_index");
+            node = make_shared<op::CrossEntropy>(args[0], args[1], soft_label, ignore_index);
+            break;
+        }
+        case OP_TYPEID::CrossEntropyBackprop:
+        {
+            auto soft_label = node_js.at("soft_label");
+            auto ignore_index = node_js.at("ignore_index");
+            node = make_shared<op::CrossEntropyBackprop>(
+                args[0], args[1], args[2], soft_label, ignore_index);
+            break;
+        }
        case OP_TYPEID::DepthToSpace:
        {
            auto mode = node_js.at("mode").get<op::DepthToSpace::DepthToSpaceMode>();
@@ -3279,6 +3295,20 @@ json JSONSerializer::serialize_node(const Node& n)
    }
    case OP_TYPEID::Cosh: { break;
    }
+    case OP_TYPEID::CrossEntropy:
+    {
+        auto tmp = static_cast<const op::CrossEntropy*>(&n);
+        node["soft_label"] = tmp->get_soft_label();
+        node["ignore_index"] = tmp->get_ignore_index();
+        break;
+    }
+    case OP_TYPEID::CrossEntropyBackprop:
+    {
+        auto tmp = static_cast<const op::CrossEntropyBackprop*>(&n);
+        node["soft_label"] = tmp->get_soft_label();
+        node["ignore_index"] = tmp->get_ignore_index();
+        break;
+    }
    case OP_TYPEID::Dequantize:
    {
        auto tmp = static_cast<const op::Dequantize*>(&n);

--- a/test/backend/fused_op.in.cpp
+++ b/test/backend/fused_op.in.cpp
@@ -2548,3 +2548,47 @@ NGRAPH_TEST(${BACKEND_NAME}, gru_cell_activation_function)

    test_case.run();
 }
+
+NGRAPH_TEST(${BACKEND_NAME}, cross_entropy_with_soft_labels)
+{
+    Shape tensor_shape{2, 4};
+    auto input = make_shared<op::Parameter>(element::f32, tensor_shape);
+    auto labels = make_shared<op::Parameter>(element::i32, Shape{2, 4});
+    auto cross_entropy = make_shared<op::CrossEntropy>(input, labels, true);
+    auto f0 = make_shared<Function>(NodeVector{cross_entropy}, ParameterVector{input, labels});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::f32, tensor_shape);
+    copy_data(a, vector<float>{0.25f, 0.25f, 0.25f, 0.25f, 0.01f, 0.01f, 0.01f, 0.96f});
+    auto b = backend->create_tensor(element::i32, Shape{2, 4});
+    copy_data(b, vector<int32_t>{0, 0, 0, 1, 0, 0, 0, 1});
+    auto result0 = backend->create_tensor(element::f32, Shape{2, 1});
+    auto handle = backend->compile(f0);
+    handle->call_with_validate({result0}, {a, b});
+    vector<float> expected{1.38629f, 0.040822f};
+    auto result = read_vector<float>(result0);
+    EXPECT_TRUE(test::all_close_f(result, expected, 23));
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, cross_entropy_with_one_hot)
+{
+    Shape tensor_shape{2, 4};
+    auto input = make_shared<op::Parameter>(element::f32, tensor_shape);
+    auto labels = make_shared<op::Parameter>(element::i32, Shape{2, 1});
+    auto cross_entropy = make_shared<op::CrossEntropy>(input, labels, false);
+    auto f0 = make_shared<Function>(NodeVector{cross_entropy}, ParameterVector{input, labels});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::f32, tensor_shape);
+    copy_data(a, vector<float>{0.25f, 0.25f, 0.25f, 0.25f, 0.01f, 0.01f, 0.01f, 0.96f});
+    auto b = backend->create_tensor(element::i32, Shape{2, 1});
+    copy_data(b, vector<int32_t>{1, 1});
+    auto result0 = backend->create_tensor(element::f32, Shape{2, 1});
+    auto handle = backend->compile(f0);
+    handle->call_with_validate({result0}, {a, b});
+    vector<float> expected{1.38629f, 4.60517f};
+    auto result = read_vector<float>(result0);
+    EXPECT_TRUE(test::all_close_f(result, expected, 23));
+}
--- a/test/core_fusion.cpp
+++ b/test/core_fusion.cpp
@@ -853,3 +853,42 @@ TEST(core_fusion, softmax_crossentropy)
    test_softmax_crossentropy(Shape{41, 37}, Shape{41, 37}, true, -1);
    test_softmax_crossentropy(Shape{41, 37}, Shape{41, 1}, false, 5);
 }
+
+void test_crossentropy(Shape input_shape, Shape label_shape, bool soft_label, int64_t ignore_index)
+{
+    auto input = std::make_shared<op::Parameter>(element::f64, input_shape);
+    auto labels = std::make_shared<op::Parameter>(element::i64, label_shape);
+    auto sm_ce = std::make_shared<op::CrossEntropy>(input, labels, soft_label, ignore_index);
+    auto cpu_f = make_shared<Function>(sm_ce, ParameterVector{input, labels});
+
+    test::Uniform<double> rng(-1.0, 1.0);
+    vector<vector<double>> args;
+    for (shared_ptr<op::Parameter> param : cpu_f->get_parameters())
+    {
+        vector<double> tensor_val(shape_size(param->get_shape()));
+        rng.initialize(tensor_val);
+        args.push_back(tensor_val);
+    }
+
+    auto cpu_results = execute(cpu_f, args, "CPU");
+    // if softlabels = flase, we will have one one hot encoding for labels
+    if (!soft_label)
+    {
+        size_t onehot = count_ops_of_type<op::OneHot>(cpu_f);
+        ASSERT_EQ(onehot, 1);
+    }
+    if (ignore_index >= 0 && !soft_label)
+    // check for the mask
+    {
+        size_t not_equal = count_ops_of_type<op::NotEqual>(cpu_f);
+        ASSERT_EQ(not_equal, 1);
+    }
+}
+
+TEST(core_fusion, crossentropy)
+{
+    test_crossentropy(Shape{41, 37}, Shape{41, 37}, true, -1);
+    test_crossentropy(Shape{41, 37}, Shape{41, 1}, false, 5);
+    test_crossentropy(Shape{10, 2, 4, 10}, Shape{10, 2, 4, 1}, false, 5);
+    test_crossentropy(Shape{4, 3, 2, 4}, Shape{4, 3, 2, 4}, true, -1);
+}