Commit a1a8a7e3 authored by Pruthvi's avatar Pruthvi Committed by Scott Cyphers

Implementation of CrossEntropy and CrossEntropyBackprop as fused Op's (#3818)

* - Implementaion of CrossEntropy and CrossEntropyBackprop as fused Op's

* - unit test case for CE fprop
- fix bug in decompose_op

* WIP debug PDPD unit test failure

* fixed broadcasting issue

* -fix bdcast issue for multi dim tensor

* utilities to restore the original tensor shape

* i) style-fix ii) rename variables

* - unit test for multiple dimensions ii) refactor create_mask to seperate function

* - fixed unit tests

* fix style

* set output element type to dynamic in pre_validate and infer shape

* disable ce with one hot unit test on PlaidML

* add CE op to fused_op_tbl

* - add serialzier support for CE and CE Backprop
parent 19e2434a
......@@ -337,6 +337,8 @@ set (SRC
op/fused/clamp.hpp
op/fused/conv_fused.cpp
op/fused/conv_fused.hpp
op/fused/crossentropy.cpp
op/fused/crossentropy.hpp
op/fused/hard_sigmoid.cpp
op/fused/hard_sigmoid.hpp
op/fused/depth_to_space.cpp
......
......@@ -133,6 +133,7 @@ namespace ngraph
#include "ngraph/op/floor_mod.hpp"
#include "ngraph/op/fused/clamp.hpp"
#include "ngraph/op/fused/conv_fused.hpp"
#include "ngraph/op/fused/crossentropy.hpp"
#include "ngraph/op/fused/depth_to_space.hpp"
#include "ngraph/op/fused/elu.hpp"
#include "ngraph/op/fused/fake_quantize.hpp"
......
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/node.hpp"
#include "ngraph/op/op.hpp"
#include "ngraph/op/util/fused_op.hpp"
namespace ngraph
{
namespace op
{
class CrossEntropy : public ngraph::op::util::FusedOp
{
public:
NGRAPH_API
static constexpr NodeTypeInfo type_info{"CrossEntropy", 0};
const NodeTypeInfo& get_type_info() const override { return type_info; }
CrossEntropy() = default;
/// \brief CrossEntropy for computing loss
/// \param arg1 Node that produces the input tensor
/// \param arg2 Node that produces ground truth lables for the input
/// \param soft_label flag indicating whether to interpretate the given labels as soft
/// labels
/// \param ignore_index Specifies a target value that is ignored and does not contribute
/// to the input gradient Only valid if soft_label is set to False
CrossEntropy(const Output<Node>& arg1,
const Output<Node>& arg2,
bool soft_label = false,
int64_t ignore_index = -100);
virtual NodeVector decompose_op() const override;
void pre_validate_and_infer_types() override;
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
bool get_soft_label() const { return m_soft_label; }
int64_t get_ignore_index() const { return m_ignore_index; }
private:
bool m_soft_label;
int64_t m_ignore_index;
};
class CrossEntropyBackprop : public util::FusedOp
{
public:
NGRAPH_API
static constexpr NodeTypeInfo type_info{"CrossEntropyBackprop", 0};
const NodeTypeInfo& get_type_info() const override { return type_info; }
CrossEntropyBackprop() = default;
/// \brief Backprop for CrossEntropy
/// \param input Node that produces tensor from the fprop
/// \param labels Node that produces ground truth labels for input
/// \param delta Node that produces the delta during bprop
/// \param soft_label flag indicating whether to interpretate the given labels as soft
/// labels
/// \param ignore_index Specifies a target value that is ignored and does not contribute
/// to the input gradient Only valid if soft_label is set to False
CrossEntropyBackprop(const Output<Node>& input,
const Output<Node>& labels,
const Output<Node>& delta,
bool soft_label = false,
int64_t ignore_index = -100);
virtual NodeVector decompose_op() const override;
void pre_validate_and_infer_types() override;
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
bool get_soft_label() const { return m_soft_label; }
int64_t get_ignore_index() const { return m_ignore_index; }
private:
bool m_soft_label;
int64_t m_ignore_index;
};
} // namespace op
} // namespace ngraph
......@@ -26,6 +26,8 @@ NGRAPH_OP(Clamp, ngraph::op)
NGRAPH_OP(ConvolutionBias, ngraph::op)
NGRAPH_OP(ConvolutionBiasAdd, ngraph::op)
NGRAPH_OP(ConvolutionBiasBackpropFiltersBias, ngraph::op)
NGRAPH_OP(CrossEntropy, ngraph::op)
NGRAPH_OP(CrossEntropyBackprop, ngraph::op)
NGRAPH_OP(DepthToSpace, ngraph::op)
NGRAPH_OP(Elu, ngraph::op)
NGRAPH_OP(FakeQuantize, ngraph::op)
......
......@@ -147,6 +147,7 @@ pad_reflect_1d_multi_reflect
pad_reflect_2d
pad_reflect_2d_with_neg
pad_symmetric
cross_entropy_with_one_hot
# No double precision FP support in PlaidML
sum_trivial_in_double
......
......@@ -73,6 +73,7 @@
#include "ngraph/op/floor_mod.hpp"
#include "ngraph/op/fused/clamp.hpp"
#include "ngraph/op/fused/conv_fused.hpp"
#include "ngraph/op/fused/crossentropy.hpp"
#include "ngraph/op/fused/depth_to_space.hpp"
#include "ngraph/op/fused/elu.hpp"
#include "ngraph/op/fused/fake_quantize.hpp"
......@@ -1390,6 +1391,21 @@ shared_ptr<Node> JSONDeserializer::deserialize_node(json node_js)
node = make_shared<op::Cosh>(args[0]);
break;
}
case OP_TYPEID::CrossEntropy:
{
auto soft_label = node_js.at("soft_label");
auto ignore_index = node_js.at("ignore_index");
node = make_shared<op::CrossEntropy>(args[0], args[1], soft_label, ignore_index);
break;
}
case OP_TYPEID::CrossEntropyBackprop:
{
auto soft_label = node_js.at("soft_label");
auto ignore_index = node_js.at("ignore_index");
node = make_shared<op::CrossEntropyBackprop>(
args[0], args[1], args[2], soft_label, ignore_index);
break;
}
case OP_TYPEID::DepthToSpace:
{
auto mode = node_js.at("mode").get<op::DepthToSpace::DepthToSpaceMode>();
......@@ -3279,6 +3295,20 @@ json JSONSerializer::serialize_node(const Node& n)
}
case OP_TYPEID::Cosh: { break;
}
case OP_TYPEID::CrossEntropy:
{
auto tmp = static_cast<const op::CrossEntropy*>(&n);
node["soft_label"] = tmp->get_soft_label();
node["ignore_index"] = tmp->get_ignore_index();
break;
}
case OP_TYPEID::CrossEntropyBackprop:
{
auto tmp = static_cast<const op::CrossEntropyBackprop*>(&n);
node["soft_label"] = tmp->get_soft_label();
node["ignore_index"] = tmp->get_ignore_index();
break;
}
case OP_TYPEID::Dequantize:
{
auto tmp = static_cast<const op::Dequantize*>(&n);
......
......@@ -2548,3 +2548,47 @@ NGRAPH_TEST(${BACKEND_NAME}, gru_cell_activation_function)
test_case.run();
}
NGRAPH_TEST(${BACKEND_NAME}, cross_entropy_with_soft_labels)
{
Shape tensor_shape{2, 4};
auto input = make_shared<op::Parameter>(element::f32, tensor_shape);
auto labels = make_shared<op::Parameter>(element::i32, Shape{2, 4});
auto cross_entropy = make_shared<op::CrossEntropy>(input, labels, true);
auto f0 = make_shared<Function>(NodeVector{cross_entropy}, ParameterVector{input, labels});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, tensor_shape);
copy_data(a, vector<float>{0.25f, 0.25f, 0.25f, 0.25f, 0.01f, 0.01f, 0.01f, 0.96f});
auto b = backend->create_tensor(element::i32, Shape{2, 4});
copy_data(b, vector<int32_t>{0, 0, 0, 1, 0, 0, 0, 1});
auto result0 = backend->create_tensor(element::f32, Shape{2, 1});
auto handle = backend->compile(f0);
handle->call_with_validate({result0}, {a, b});
vector<float> expected{1.38629f, 0.040822f};
auto result = read_vector<float>(result0);
EXPECT_TRUE(test::all_close_f(result, expected, 23));
}
NGRAPH_TEST(${BACKEND_NAME}, cross_entropy_with_one_hot)
{
Shape tensor_shape{2, 4};
auto input = make_shared<op::Parameter>(element::f32, tensor_shape);
auto labels = make_shared<op::Parameter>(element::i32, Shape{2, 1});
auto cross_entropy = make_shared<op::CrossEntropy>(input, labels, false);
auto f0 = make_shared<Function>(NodeVector{cross_entropy}, ParameterVector{input, labels});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, tensor_shape);
copy_data(a, vector<float>{0.25f, 0.25f, 0.25f, 0.25f, 0.01f, 0.01f, 0.01f, 0.96f});
auto b = backend->create_tensor(element::i32, Shape{2, 1});
copy_data(b, vector<int32_t>{1, 1});
auto result0 = backend->create_tensor(element::f32, Shape{2, 1});
auto handle = backend->compile(f0);
handle->call_with_validate({result0}, {a, b});
vector<float> expected{1.38629f, 4.60517f};
auto result = read_vector<float>(result0);
EXPECT_TRUE(test::all_close_f(result, expected, 23));
}
......@@ -853,3 +853,42 @@ TEST(core_fusion, softmax_crossentropy)
test_softmax_crossentropy(Shape{41, 37}, Shape{41, 37}, true, -1);
test_softmax_crossentropy(Shape{41, 37}, Shape{41, 1}, false, 5);
}
void test_crossentropy(Shape input_shape, Shape label_shape, bool soft_label, int64_t ignore_index)
{
auto input = std::make_shared<op::Parameter>(element::f64, input_shape);
auto labels = std::make_shared<op::Parameter>(element::i64, label_shape);
auto sm_ce = std::make_shared<op::CrossEntropy>(input, labels, soft_label, ignore_index);
auto cpu_f = make_shared<Function>(sm_ce, ParameterVector{input, labels});
test::Uniform<double> rng(-1.0, 1.0);
vector<vector<double>> args;
for (shared_ptr<op::Parameter> param : cpu_f->get_parameters())
{
vector<double> tensor_val(shape_size(param->get_shape()));
rng.initialize(tensor_val);
args.push_back(tensor_val);
}
auto cpu_results = execute(cpu_f, args, "CPU");
// if softlabels = flase, we will have one one hot encoding for labels
if (!soft_label)
{
size_t onehot = count_ops_of_type<op::OneHot>(cpu_f);
ASSERT_EQ(onehot, 1);
}
if (ignore_index >= 0 && !soft_label)
// check for the mask
{
size_t not_equal = count_ops_of_type<op::NotEqual>(cpu_f);
ASSERT_EQ(not_equal, 1);
}
}
TEST(core_fusion, crossentropy)
{
test_crossentropy(Shape{41, 37}, Shape{41, 37}, true, -1);
test_crossentropy(Shape{41, 37}, Shape{41, 1}, false, 5);
test_crossentropy(Shape{10, 2, 4, 10}, Shape{10, 2, 4, 1}, false, 5);
test_crossentropy(Shape{4, 3, 2, 4}, Shape{4, 3, 2, 4}, true, -1);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment