Implementation of CrossEntropy and CrossEntropyBackprop as fused Op's (#3818)

* - Implementaion of CrossEntropy and CrossEntropyBackprop as fused Op's * - unit test case for CE fprop - fix bug in decompose_op * WIP debug PDPD unit test failure * fixed broadcasting issue * -fix bdcast issue for multi dim tensor * utilities to restore the original tensor shape * i) style-fix ii) rename variables * - unit test for multiple dimensions ii) refactor create_mask to seperate function * - fixed unit tests * fix style * set output element type to dynamic in pre_validate and infer shape * disable ce with one hot unit test on PlaidML * add CE op to fused_op_tbl * - add serialzier support for CE and CE Backprop

Implementation of CrossEntropy and CrossEntropyBackprop as fused Op's (#3818)
* - Implementaion of CrossEntropy and CrossEntropyBackprop as fused Op's * - unit test case for CE fprop - fix bug in decompose_op * WIP debug PDPD unit test failure * fixed broadcasting issue * -fix bdcast issue for multi dim tensor * utilities to restore the original tensor shape * i) style-fix ii) rename variables * - unit test for multiple dimensions ii) refactor create_mask to seperate function * - fixed unit tests * fix style * set output element type to dynamic in pre_validate and infer shape * disable ce with one hot unit test on PlaidML * add CE op to fused_op_tbl * - add serialzier support for CE and CE Backprop
a1a8a7e3 · Pruthvi · Scott Cyphers · 19e2434a · a1a8a7e3 · a1a8a7e3
Commit a1a8a7e3 authored Nov 20, 2019 by Pruthvi Committed by Scott Cyphers Nov 20, 2019
9 changed files
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -337,6 +337,8 @@ set (SRC
    op/fused/clamp.hpp
    op/fused/conv_fused.cpp
    op/fused/conv_fused.hpp
+    op/fused/crossentropy.cpp
+    op/fused/crossentropy.hpp
    op/fused/hard_sigmoid.cpp
    op/fused/hard_sigmoid.hpp
    op/fused/depth_to_space.cpp

--- a/src/ngraph/ngraph.hpp
+++ b/src/ngraph/ngraph.hpp
@@ -133,6 +133,7 @@ namespace ngraph
 #include "ngraph/op/floor_mod.hpp"
 #include "ngraph/op/fused/clamp.hpp"
 #include "ngraph/op/fused/conv_fused.hpp"
+#include "ngraph/op/fused/crossentropy.hpp"
 #include "ngraph/op/fused/depth_to_space.hpp"
 #include "ngraph/op/fused/elu.hpp"
 #include "ngraph/op/fused/fake_quantize.hpp"

--- a/src/ngraph/op/fused/crossentropy.cpp
+++ b/src/ngraph/op/fused/crossentropy.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include "ngraph/op/fused/crossentropy.hpp"
+
+#include "ngraph/builder/make_constant.hpp"
+#include "ngraph/op/constant.hpp"
+#include "ngraph/op/convert.hpp"
+#include "ngraph/op/divide.hpp"
+#include "ngraph/op/log.hpp"
+#include "ngraph/op/multiply.hpp"
+#include "ngraph/op/negative.hpp"
+#include "ngraph/op/not_equal.hpp"
+#include "ngraph/op/one_hot.hpp"
+#include "ngraph/op/reshape.hpp"
+#include "ngraph/op/subtract.hpp"
+#include "ngraph/op/sum.hpp"
+#include "ngraph/op/util/broadcasting.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+constexpr NodeTypeInfo op::CrossEntropy::type_info;
+
+op::CrossEntropy::CrossEntropy(const Output<Node>& arg1,
+                               const Output<Node>& arg2,
+                               bool soft_label,
+                               int64_t ignore_index)
+    : FusedOp({arg1, arg2})
+    , m_soft_label(soft_label)
+    , m_ignore_index(ignore_index)
+{
+    constructor_validate_and_infer_types();
+}
+
+static AxisVector get_axis_vector(size_t rank)
+{
+    AxisVector axis_vector;
+
+    for (size_t i = 0; i < rank; i++)
+    {
+        axis_vector.push_back(i);
+    }
+    return axis_vector;
+}
+
+static Shape get_result_shape(Shape& target_shape, int start, int end)
+{
+    Shape result;
+    for (size_t i = start; i < end; i++)
+    {
+        result.push_back(target_shape[i]);
+    }
+    return result;
+}
+
+static Output<Node> get_2d_tensor(Output<Node> node)
+{
+    if (node.get_shape().size() == 2)
+    {
+        return node;
+    }
+    Shape node_shape = node.get_shape();
+    size_t rank = node_shape.size();
+    Shape result_shape{(shape_size(node_shape) / node_shape[rank - 1]), node_shape[rank - 1]};
+
+    auto reshape = std::make_shared<ngraph::op::Reshape>(node, get_axis_vector(rank), result_shape);
+    return reshape;
+}
+
+static std::shared_ptr<Node> expand_shape(std::shared_ptr<Node> result, Output<Node> original)
+{
+    Shape result_shape = result->get_shape();
+    Shape original_shape = original.get_shape();
+
+    if (result_shape == original_shape && result_shape.size() == 2)
+    {
+        return result;
+    }
+    size_t original_rank = original_shape.size();
+    size_t result_rank = result_shape.size();
+
+    // expand the first dimension of the computed result to match the original tensor shape
+    Shape new_shape = get_result_shape(original_shape, 0, original_rank - 1);
+
+    // restore the last dimension of computed result
+    new_shape.push_back(result_shape[result_rank - 1]);
+
+    if (new_shape.size() != original_shape.size())
+    {
+        throw ngraph_error(
+            "CrossEntropy shape size mismatch in restoring the original tensor shape");
+    }
+    auto reshape = std::make_shared<ngraph::op::Reshape>(result, AxisVector{0, 1}, new_shape);
+    return reshape;
+}
+
+// create mask based on ignore_index
+static std::shared_ptr<ngraph::Node>
+    create_mask(Output<Node> labels, Output<Node> input, int64_t ignore_index)
+{
+    auto mask_constant =
+        ngraph::op::Constant::create(labels.get_element_type(), labels.get_shape(), {ignore_index});
+    auto not_equal = std::make_shared<ngraph::op::NotEqual>(labels, mask_constant);
+    auto convert = std::make_shared<ngraph::op::Convert>(not_equal, input.get_element_type());
+    return convert;
+}
+
+NodeVector op::CrossEntropy::decompose_op() const
+{
+    // we will reshape the labels and input tensor to 2d
+    auto input_to_normalize = get_2d_tensor(input_value(0));
+    auto labels = get_2d_tensor(input_value(1));
+    auto reduction_axis = input_to_normalize.get_shape().size() - 1;
+
+    auto create_xe = [&](const Output<Node>& one_hot, const Output<Node>& input) {
+        auto node_log = std::make_shared<ngraph::op::Log>(input);
+        auto node_mul = one_hot * node_log;
+        auto node_sum = std::make_shared<ngraph::op::Sum>(
+            node_mul, AxisSet{static_cast<size_t>(reduction_axis)});
+        return -node_sum;
+    };
+
+    // mask
+    std::shared_ptr<ngraph::Node> mask = create_mask(labels, input_to_normalize, m_ignore_index);
+
+    if (m_soft_label)
+    {
+        // insert dtype conversion if required
+        if (labels.get_element_type() != input_to_normalize.get_element_type())
+        {
+            labels = std::make_shared<ngraph::op::Convert>(labels,
+                                                           input_to_normalize.get_element_type());
+        }
+
+        if (labels.get_shape()[reduction_axis] == 1)
+        {
+            auto reshape_labels = std::make_shared<ngraph::op::Reshape>(
+                labels, AxisVector{0, 1}, Shape{labels.get_shape().at(0)});
+            labels = std::make_shared<ngraph::op::Broadcast>(
+                reshape_labels,
+                input_to_normalize.get_shape(),
+                AxisSet{input_to_normalize.get_shape().size() - 1});
+        }
+        auto xe = create_xe(labels, input_to_normalize);
+        auto reshape_xe = std::make_shared<ngraph::op::Reshape>(
+            xe, AxisVector{0}, Shape{xe->get_shape().at(0), 1});
+        return {expand_shape(reshape_xe, input_value(0))};
+    }
+    else
+    {
+        // we will have one_hot encoding on labels if softmax_labels = false
+        size_t one_hot_axis = input_to_normalize.get_shape().size() - 1;
+        auto reshape_labels =
+            make_shared<op::Reshape>(labels, AxisVector{0, 1}, Shape{labels.get_shape().at(0)});
+        auto one_hot_labels = std::make_shared<ngraph::op::OneHot>(
+            reshape_labels, input_to_normalize.get_shape(), one_hot_axis);
+        auto convert_one_hot = std::make_shared<ngraph::op::Convert>(
+            one_hot_labels, input_to_normalize.get_element_type());
+
+        // calculate loss
+        auto xe = create_xe(convert_one_hot, input_to_normalize);
+        auto reshape_xe = std::make_shared<ngraph::op::Reshape>(
+            xe, AxisVector{0}, Shape{xe->get_shape().at(0), 1});
+        if (m_ignore_index > 0)
+        {
+            return {reshape_xe * mask};
+        }
+        return {expand_shape(reshape_xe, input_value(0))};
+    }
+}
+
+shared_ptr<Node> op::CrossEntropy::copy_with_new_args(const NodeVector& new_args) const
+{
+    check_new_args_count(this, new_args);
+    return make_shared<CrossEntropy>(new_args.at(0), new_args.at(1), m_soft_label, m_ignore_index);
+}
+
+void op::CrossEntropy::pre_validate_and_infer_types()
+{
+    element::Type input_element_type = get_input_element_type(0);
+
+    NODE_VALIDATION_CHECK(this,
+                          input_element_type.is_dynamic() || input_element_type.is_real(),
+                          "Argument element type must be f16, bf16, f32, f64 or dynamic (got ",
+                          input_element_type,
+                          ").");
+    set_output_type(0, get_input_element_type(0), PartialShape::dynamic());
+
+    if (is_dynamic())
+    {
+        return;
+    }
+}
+
+constexpr NodeTypeInfo op::CrossEntropyBackprop::type_info;
+
+op::CrossEntropyBackprop::CrossEntropyBackprop(const Output<Node>& input,
+                                               const Output<Node>& labels,
+                                               const Output<Node>& delta,
+                                               bool soft_label,
+                                               int64_t ignore_index)
+    : FusedOp({input, labels, delta})
+    , m_soft_label(soft_label)
+    , m_ignore_index(ignore_index)
+{
+    constructor_validate_and_infer_types();
+}
+
+void op::CrossEntropyBackprop::pre_validate_and_infer_types()
+{
+    element::Type input_element_type = get_input_element_type(0);
+
+    NODE_VALIDATION_CHECK(this,
+                          input_element_type.is_dynamic() || input_element_type.is_real(),
+                          "Argument element type must be f16, bf16, f32, f64 or dynamic (got ",
+                          input_element_type,
+                          ").");
+    set_output_type(0, get_input_element_type(0), PartialShape::dynamic());
+}
+
+shared_ptr<Node> op::CrossEntropyBackprop::copy_with_new_args(const NodeVector& new_args) const
+{
+    check_new_args_count(this, new_args);
+    return make_shared<CrossEntropyBackprop>(
+        new_args.at(0), new_args.at(1), new_args.at(2), m_soft_label, m_ignore_index);
+}
+
+NodeVector op::CrossEntropyBackprop::decompose_op() const
+{
+    auto input = get_2d_tensor(input_value(0));
+    auto labels = get_2d_tensor(input_value(1));
+    auto delta = get_2d_tensor(input_value(2));
+    auto rank = input.get_shape().size();
+
+    size_t one_hot_axis = delta.get_shape().size() - 1;
+
+    // always reduces the sum on the last axis
+    auto reduction_axis = delta.get_shape().size() - 1;
+
+    // mask
+    std::shared_ptr<ngraph::Node> mask = nullptr;
+
+    // remove trailing ones from delta
+    auto delta_reshape = std::make_shared<ngraph::op::Reshape>(
+        delta, AxisVector{0, 1}, Shape{delta.get_shape().at(0)});
+    auto delta_bcast = std::make_shared<ngraph::op::Broadcast>(
+        delta_reshape, input.get_shape(), AxisSet{rank - 1});
+
+    if (!m_soft_label)
+    {
+        // ignore mask
+        if (m_ignore_index > 0)
+        {
+            mask = create_mask(labels, input, m_ignore_index);
+            mask = std::make_shared<ngraph::op::Reshape>(
+                mask, AxisVector{0, 1}, Shape{mask->get_shape().at(0)});
+            mask =
+                std::make_shared<ngraph::op::Broadcast>(mask, input.get_shape(), AxisSet{rank - 1});
+        }
+        if (labels.get_shape()[reduction_axis] == 1)
+        {
+            labels =
+                make_shared<op::Reshape>(labels, AxisVector{0, 1}, Shape{labels.get_shape().at(0)});
+        }
+        // one hot encoding of labels
+        auto one_hot =
+            std::make_shared<ngraph::op::OneHot>(labels, input.get_shape(), one_hot_axis);
+        labels = std::make_shared<ngraph::op::Convert>(one_hot, input.get_element_type());
+    }
+
+    std::shared_ptr<ngraph::Node> xe_grad =
+        std::make_shared<ngraph::op::Divide>(-labels * delta_bcast, input);
+
+    if (!m_soft_label && m_ignore_index > 0)
+    {
+        xe_grad = xe_grad * mask;
+    }
+    return {expand_shape(xe_grad, input_value(0))};
+}
--- a/src/ngraph/op/fused/crossentropy.hpp
+++ b/src/ngraph/op/fused/crossentropy.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "ngraph/node.hpp"
+#include "ngraph/op/op.hpp"
+#include "ngraph/op/util/fused_op.hpp"
+
+namespace ngraph
+{
+    namespace op
+    {
+        class CrossEntropy : public ngraph::op::util::FusedOp
+        {
+        public:
+            NGRAPH_API
+            static constexpr NodeTypeInfo type_info{"CrossEntropy", 0};
+            const NodeTypeInfo& get_type_info() const override { return type_info; }
+            CrossEntropy() = default;
+            /// \brief CrossEntropy for computing loss
+            /// \param arg1 Node that produces the input tensor
+            /// \param arg2 Node that produces ground truth lables for the input
+            /// \param soft_label flag indicating whether to interpretate the given labels as soft
+            /// labels
+            /// \param ignore_index Specifies a target value that is ignored and does not contribute
+            /// to the input gradient Only valid if soft_label is set to False
+            CrossEntropy(const Output<Node>& arg1,
+                         const Output<Node>& arg2,
+                         bool soft_label = false,
+                         int64_t ignore_index = -100);
+
+            virtual NodeVector decompose_op() const override;
+
+            void pre_validate_and_infer_types() override;
+
+            virtual std::shared_ptr<Node>
+                copy_with_new_args(const NodeVector& new_args) const override;
+
+            bool get_soft_label() const { return m_soft_label; }
+            int64_t get_ignore_index() const { return m_ignore_index; }
+        private:
+            bool m_soft_label;
+            int64_t m_ignore_index;
+        };
+
+        class CrossEntropyBackprop : public util::FusedOp
+        {
+        public:
+            NGRAPH_API
+            static constexpr NodeTypeInfo type_info{"CrossEntropyBackprop", 0};
+            const NodeTypeInfo& get_type_info() const override { return type_info; }
+            CrossEntropyBackprop() = default;
+
+            /// \brief Backprop for CrossEntropy
+            /// \param input Node that produces tensor from the fprop
+            /// \param labels Node that produces ground truth labels for input
+            /// \param delta Node that produces the delta during bprop
+            /// \param soft_label flag indicating whether to interpretate the given labels as soft
+            /// labels
+            /// \param ignore_index Specifies a target value that is ignored and does not contribute
+            /// to the input gradient Only valid if soft_label is set to False
+            CrossEntropyBackprop(const Output<Node>& input,
+                                 const Output<Node>& labels,
+                                 const Output<Node>& delta,
+                                 bool soft_label = false,
+                                 int64_t ignore_index = -100);
+
+            virtual NodeVector decompose_op() const override;
+
+            void pre_validate_and_infer_types() override;
+
+            virtual std::shared_ptr<Node>
+                copy_with_new_args(const NodeVector& new_args) const override;
+            bool get_soft_label() const { return m_soft_label; }
+            int64_t get_ignore_index() const { return m_ignore_index; }
+        private:
+            bool m_soft_label;
+            int64_t m_ignore_index;
+        };
+    } // namespace op
+} // namespace ngraph
--- a/src/ngraph/op/fused_op_tbl.hpp
+++ b/src/ngraph/op/fused_op_tbl.hpp
@@ -26,6 +26,8 @@ NGRAPH_OP(Clamp, ngraph::op)
 NGRAPH_OP(ConvolutionBias, ngraph::op)
 NGRAPH_OP(ConvolutionBiasAdd, ngraph::op)
 NGRAPH_OP(ConvolutionBiasBackpropFiltersBias, ngraph::op)
+NGRAPH_OP(CrossEntropy, ngraph::op)
+NGRAPH_OP(CrossEntropyBackprop, ngraph::op)
 NGRAPH_OP(DepthToSpace, ngraph::op)
 NGRAPH_OP(Elu, ngraph::op)
 NGRAPH_OP(FakeQuantize, ngraph::op)

--- a/src/ngraph/runtime/plaidml/unit_test.manifest
+++ b/src/ngraph/runtime/plaidml/unit_test.manifest
@@ -147,6 +147,7 @@ pad_reflect_1d_multi_reflect
 pad_reflect_2d
 pad_reflect_2d_with_neg
 pad_symmetric
+cross_entropy_with_one_hot

 # No double precision FP support in PlaidML
 sum_trivial_in_double

--- a/src/ngraph/serializer.cpp
+++ b/src/ngraph/serializer.cpp
@@ -73,6 +73,7 @@
 #include "ngraph/op/floor_mod.hpp"
 #include "ngraph/op/fused/clamp.hpp"
 #include "ngraph/op/fused/conv_fused.hpp"
+#include "ngraph/op/fused/crossentropy.hpp"
 #include "ngraph/op/fused/depth_to_space.hpp"
 #include "ngraph/op/fused/elu.hpp"
 #include "ngraph/op/fused/fake_quantize.hpp"
@@ -1390,6 +1391,21 @@ shared_ptr<Node> JSONDeserializer::deserialize_node(json node_js)
            node = make_shared<op::Cosh>(args[0]);
            break;
        }
+        case OP_TYPEID::CrossEntropy:
+        {
+            auto soft_label = node_js.at("soft_label");
+            auto ignore_index = node_js.at("ignore_index");
+            node = make_shared<op::CrossEntropy>(args[0], args[1], soft_label, ignore_index);
+            break;
+        }
+        case OP_TYPEID::CrossEntropyBackprop:
+        {
+            auto soft_label = node_js.at("soft_label");
+            auto ignore_index = node_js.at("ignore_index");
+            node = make_shared<op::CrossEntropyBackprop>(
+                args[0], args[1], args[2], soft_label, ignore_index);
+            break;
+        }
        case OP_TYPEID::DepthToSpace:
        {
            auto mode = node_js.at("mode").get<op::DepthToSpace::DepthToSpaceMode>();
@@ -3279,6 +3295,20 @@ json JSONSerializer::serialize_node(const Node& n)
    }
    case OP_TYPEID::Cosh: { break;
    }
+    case OP_TYPEID::CrossEntropy:
+    {
+        auto tmp = static_cast<const op::CrossEntropy*>(&n);
+        node["soft_label"] = tmp->get_soft_label();
+        node["ignore_index"] = tmp->get_ignore_index();
+        break;
+    }
+    case OP_TYPEID::CrossEntropyBackprop:
+    {
+        auto tmp = static_cast<const op::CrossEntropyBackprop*>(&n);
+        node["soft_label"] = tmp->get_soft_label();
+        node["ignore_index"] = tmp->get_ignore_index();
+        break;
+    }
    case OP_TYPEID::Dequantize:
    {
        auto tmp = static_cast<const op::Dequantize*>(&n);

--- a/test/backend/fused_op.in.cpp
+++ b/test/backend/fused_op.in.cpp
@@ -2548,3 +2548,47 @@ NGRAPH_TEST(${BACKEND_NAME}, gru_cell_activation_function)

    test_case.run();
 }
+
+NGRAPH_TEST(${BACKEND_NAME}, cross_entropy_with_soft_labels)
+{
+    Shape tensor_shape{2, 4};
+    auto input = make_shared<op::Parameter>(element::f32, tensor_shape);
+    auto labels = make_shared<op::Parameter>(element::i32, Shape{2, 4});
+    auto cross_entropy = make_shared<op::CrossEntropy>(input, labels, true);
+    auto f0 = make_shared<Function>(NodeVector{cross_entropy}, ParameterVector{input, labels});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::f32, tensor_shape);
+    copy_data(a, vector<float>{0.25f, 0.25f, 0.25f, 0.25f, 0.01f, 0.01f, 0.01f, 0.96f});
+    auto b = backend->create_tensor(element::i32, Shape{2, 4});
+    copy_data(b, vector<int32_t>{0, 0, 0, 1, 0, 0, 0, 1});
+    auto result0 = backend->create_tensor(element::f32, Shape{2, 1});
+    auto handle = backend->compile(f0);
+    handle->call_with_validate({result0}, {a, b});
+    vector<float> expected{1.38629f, 0.040822f};
+    auto result = read_vector<float>(result0);
+    EXPECT_TRUE(test::all_close_f(result, expected, 23));
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, cross_entropy_with_one_hot)
+{
+    Shape tensor_shape{2, 4};
+    auto input = make_shared<op::Parameter>(element::f32, tensor_shape);
+    auto labels = make_shared<op::Parameter>(element::i32, Shape{2, 1});
+    auto cross_entropy = make_shared<op::CrossEntropy>(input, labels, false);
+    auto f0 = make_shared<Function>(NodeVector{cross_entropy}, ParameterVector{input, labels});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::f32, tensor_shape);
+    copy_data(a, vector<float>{0.25f, 0.25f, 0.25f, 0.25f, 0.01f, 0.01f, 0.01f, 0.96f});
+    auto b = backend->create_tensor(element::i32, Shape{2, 1});
+    copy_data(b, vector<int32_t>{1, 1});
+    auto result0 = backend->create_tensor(element::f32, Shape{2, 1});
+    auto handle = backend->compile(f0);
+    handle->call_with_validate({result0}, {a, b});
+    vector<float> expected{1.38629f, 4.60517f};
+    auto result = read_vector<float>(result0);
+    EXPECT_TRUE(test::all_close_f(result, expected, 23));
+}
--- a/test/core_fusion.cpp
+++ b/test/core_fusion.cpp
@@ -853,3 +853,42 @@ TEST(core_fusion, softmax_crossentropy)
    test_softmax_crossentropy(Shape{41, 37}, Shape{41, 37}, true, -1);
    test_softmax_crossentropy(Shape{41, 37}, Shape{41, 1}, false, 5);
 }
+
+void test_crossentropy(Shape input_shape, Shape label_shape, bool soft_label, int64_t ignore_index)
+{
+    auto input = std::make_shared<op::Parameter>(element::f64, input_shape);
+    auto labels = std::make_shared<op::Parameter>(element::i64, label_shape);
+    auto sm_ce = std::make_shared<op::CrossEntropy>(input, labels, soft_label, ignore_index);
+    auto cpu_f = make_shared<Function>(sm_ce, ParameterVector{input, labels});
+
+    test::Uniform<double> rng(-1.0, 1.0);
+    vector<vector<double>> args;
+    for (shared_ptr<op::Parameter> param : cpu_f->get_parameters())
+    {
+        vector<double> tensor_val(shape_size(param->get_shape()));
+        rng.initialize(tensor_val);
+        args.push_back(tensor_val);
+    }
+
+    auto cpu_results = execute(cpu_f, args, "CPU");
+    // if softlabels = flase, we will have one one hot encoding for labels
+    if (!soft_label)
+    {
+        size_t onehot = count_ops_of_type<op::OneHot>(cpu_f);
+        ASSERT_EQ(onehot, 1);
+    }
+    if (ignore_index >= 0 && !soft_label)
+    // check for the mask
+    {
+        size_t not_equal = count_ops_of_type<op::NotEqual>(cpu_f);
+        ASSERT_EQ(not_equal, 1);
+    }
+}
+
+TEST(core_fusion, crossentropy)
+{
+    test_crossentropy(Shape{41, 37}, Shape{41, 37}, true, -1);
+    test_crossentropy(Shape{41, 37}, Shape{41, 1}, false, 5);
+    test_crossentropy(Shape{10, 2, 4, 10}, Shape{10, 2, 4, 1}, false, 5);
+    test_crossentropy(Shape{4, 3, 2, 4}, Shape{4, 3, 2, 4}, true, -1);
+}