Cyphers/fp16 (#2845)

* Basic float16 * Basic fp16 support * typo

Cyphers/fp16 (#2845)
* Basic float16 * Basic fp16 support * typo
8933fed0 · Scott Cyphers · GitHub · e8b5d11b · 8933fed0 · 8933fed0
Unverified Commit 8933fed0 authored May 01, 2019 by Scott Cyphers Committed by GitHub May 01, 2019
10 changed files
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -403,6 +403,9 @@ set (SRC
    strides.cpp
    strides.hpp
    type/bfloat16.cpp
+    type/bfloat16.hpp
+    type/float16.cpp
+    type/float16.hpp
    type/element_type.cpp
    util.cpp
    util.hpp

--- a/src/ngraph/runtime/interpreter/int_executable.hpp
+++ b/src/ngraph/runtime/interpreter/int_executable.hpp
@@ -571,6 +571,7 @@ private:
            case element::Type_t::undefined:
            case element::Type_t::dynamic:
            case element::Type_t::bf16:
+            case element::Type_t::f16:
                ss << "unsupported element type " << type << " op Convert";
                throw std::runtime_error(ss.str());
            }

--- a/src/ngraph/type/element_type.cpp
+++ b/src/ngraph/type/element_type.cpp
@@ -27,6 +27,7 @@ using namespace std;
 NGRAPH_API const element::Type element::dynamic(element::Type_t::dynamic);
 NGRAPH_API const element::Type element::boolean(element::Type_t::boolean);
 NGRAPH_API const element::Type element::bf16(element::Type_t::bf16);
+NGRAPH_API const element::Type element::f16(element::Type_t::f16);
 NGRAPH_API const element::Type element::f32(element::Type_t::f32);
 NGRAPH_API const element::Type element::f64(element::Type_t::f64);
 NGRAPH_API const element::Type element::i8(element::Type_t::i8);
@@ -72,6 +73,7 @@ static const map<element::Type_t, const TypeInfo>& get_type_info_map()
        {element::Type_t::dynamic, TypeInfo(0, false, false, false, "dynamic", "dynamic")},
        {element::Type_t::boolean, TypeInfo(8, false, true, false, "char", "boolean")},
        {element::Type_t::bf16, TypeInfo(16, true, true, false, "bfloat16", "bf16")},
+        {element::Type_t::f16, TypeInfo(16, true, true, false, "float16", "f16")},
        {element::Type_t::f32, TypeInfo(32, true, true, false, "float", "f32")},
        {element::Type_t::f64, TypeInfo(64, true, true, false, "double", "f64")},
        {element::Type_t::i8, TypeInfo(8, false, true, true, "int8_t", "i8")},
@@ -91,6 +93,7 @@ std::vector<const element::Type*> element::Type::get_known_types()
    std::vector<const element::Type*> rc = {&element::dynamic,
                                            &element::boolean,
                                            &element::bf16,
+                                            &element::f16,
                                            &element::f32,
                                            &element::f64,
                                            &element::i8,
@@ -164,6 +167,11 @@ namespace ngraph
            return boolean;
        }
        template <>
+        const Type& from<ngraph::float16>()
+        {
+            return f16;
+        }
+        template <>
        const Type& from<float>()
        {
            return f32;

--- a/src/ngraph/type/element_type.hpp
+++ b/src/ngraph/type/element_type.hpp
@@ -29,6 +29,7 @@
 #include "ngraph/except.hpp"
 #include "ngraph/ngraph_visibility.hpp"
 #include "ngraph/type/bfloat16.hpp"
+#include "ngraph/type/float16.hpp"
 namespace ngraph
 {
@@ -40,6 +41,7 @@ namespace ngraph
            dynamic,
            boolean,
            bf16,
+            f16,
            f32,
            f64,
            i8,
@@ -124,6 +126,7 @@ namespace ngraph
        extern NGRAPH_API const Type dynamic;
        extern NGRAPH_API const Type boolean;
        extern NGRAPH_API const Type bf16;
+        extern NGRAPH_API const Type f16;
        extern NGRAPH_API const Type f32;
        extern NGRAPH_API const Type f64;
        extern NGRAPH_API const Type i8;
@@ -166,6 +169,8 @@ namespace ngraph
        const Type& from<uint64_t>();
        template <>
        const Type& from<ngraph::bfloat16>();
+        template <>
+        const Type& from<ngraph::float16>();
        std::ostream& operator<<(std::ostream& out, const ngraph::element::Type& obj);
    }

--- a/src/ngraph/type/float16.cpp
+++ b/src/ngraph/type/float16.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+// Contains logic derived from TensorFlow’s bfloat16 implementation
+// https://github.com/tensorflow/tensorflow/blob/d354efc/tensorflow/core/lib/float16/float16.h
+// Copyright notice from original source file is as follows.
+//*******************************************************************************
+//  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+//  Licensed under the Apache License, Version 2.0 (the "License");
+//  you may not use this file except in compliance with the License.
+//  You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software
+//  distributed under the License is distributed on an "AS IS" BASIS,
+//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//  See the License for the specific language governing permissions and
+//  limitations under the License.
+//==============================================================================
+#include <cmath>
+#include <iostream>
+#include <limits>
+#include "ngraph/type/float16.hpp"
+using namespace std;
+using namespace ngraph;
+static_assert(sizeof(float16) == 2, "class float16 must be exactly 2 bytes");
+float16::float16(float value)
+{
+    union {
+        float fv;
+        uint32_t iv;
+    };
+    fv = value;
+    uint32_t sign = iv & 0x80000000;
+    uint32_t biased_exp = (iv & 0x7F800000) >> 23;
+    uint32_t raw_frac = (iv & 0x007FFFFF);
+    int32_t exp = biased_exp - 127;
+    int32_t min_exp = -14 - frac_size;
+    if (biased_exp == 0 || exp < min_exp)
+    {
+        // Goes to 0
+        biased_exp = 0;
+    }
+    else if (biased_exp == 0xFF)
+    {
+        // Infinity or NAN.
+        biased_exp = 0x1F;
+        raw_frac = raw_frac >> (23 - frac_size);
+    }
+    else if (exp < -14)
+    {
+        // denorm or 0
+        biased_exp = 0;
+        raw_frac |= 0x00800000;
+        raw_frac = raw_frac >> (exp + 16);
+    }
+    else if (exp > 15)
+    {
+        biased_exp = 0x1F;
+        raw_frac = 0;
+    }
+    else
+    {
+        raw_frac = raw_frac >> (23 - frac_size);
+        biased_exp = exp + exp_bias;
+    }
+    m_value = (sign >> 16) | (biased_exp << frac_size) | raw_frac;
+}
+std::string float16::to_string() const
+{
+    return std::to_string(static_cast<float>(*this));
+}
+size_t float16::size() const
+{
+    return sizeof(m_value);
+}
+bool float16::operator==(const float16& other) const
+{
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wfloat-equal"
+    return (static_cast<float>(*this) == static_cast<float>(other));
+#pragma clang diagnostic pop
+}
+bool float16::operator<(const float16& other) const
+{
+    return (static_cast<float>(*this) < static_cast<float>(other));
+}
+bool float16::operator<=(const float16& other) const
+{
+    return (static_cast<float>(*this) <= static_cast<float>(other));
+}
+bool float16::operator>(const float16& other) const
+{
+    return (static_cast<float>(*this) > static_cast<float>(other));
+}
+bool float16::operator>=(const float16& other) const
+{
+    return (static_cast<float>(*this) >= static_cast<float>(other));
+}
+float16::operator float() const
+{
+    union {
+        uint32_t i_val;
+        float f_val;
+    };
+    uint32_t exp = 0x1F & (m_value >> frac_size);
+    uint32_t fexp = exp + 127 - 15;
+    uint32_t frac = m_value & 0x03FF;
+    if (exp == 0)
+    {
+        if (frac == 0)
+        {
+            fexp = 0;
+        }
+        else
+        {
+            // Normalize
+            fexp++;
+            while (0 == (frac & 0x0400))
+            {
+                fexp--;
+                frac = frac << 1;
+            }
+            frac &= 0x03FF;
+        }
+    }
+    else if (exp == 0x1F)
+    {
+        fexp = 0xFF;
+    }
+    frac = frac << (23 - frac_size);
+    i_val = static_cast<uint32_t>((m_value & 0x8000)) << 16 | (fexp << 23) | frac;
+    return f_val;
+}
+uint16_t float16::to_bits() const
+{
+    return m_value;
+}
--- a/src/ngraph/type/float16.hpp
+++ b/src/ngraph/type/float16.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+#include <cmath>
+#include <iostream>
+#include <limits>
+#include <memory>
+#include <string>
+#include <vector>
+#define ROUND_MODE_TO_NEAREST_EVEN
+namespace ngraph
+{
+    class float16
+    {
+    public:
+        constexpr float16()
+            : m_value{0}
+        {
+        }
+        static uint32_t constexpr frac_size = 10;
+        static uint32_t constexpr exp_size = 5;
+        static uint32_t constexpr exp_bias = 15;
+        float16(uint32_t sign, uint32_t biased_exponent, uint32_t fraction)
+            : m_value((sign & 0x01) << 15 | (biased_exponent & 0x1F) << 10 | (fraction & 0x03FF))
+        {
+        }
+        float16(float value);
+        std::string to_string() const;
+        size_t size() const;
+        bool operator==(const float16& other) const;
+        bool operator!=(const float16& other) const { return !(*this == other); }
+        bool operator<(const float16& other) const;
+        bool operator<=(const float16& other) const;
+        bool operator>(const float16& other) const;
+        bool operator>=(const float16& other) const;
+        operator float() const;
+        static constexpr float16 from_bits(uint16_t bits) { return float16(bits, true); }
+        uint16_t to_bits() const;
+        friend std::ostream& operator<<(std::ostream& out, const float16& obj)
+        {
+            out << static_cast<float>(obj);
+            return out;
+        }
+    private:
+        constexpr float16(uint16_t x, bool)
+            : m_value{x}
+        {
+        }
+        union F32 {
+            F32(float val)
+                : f{val}
+            {
+            }
+            F32(uint32_t val)
+                : i{val}
+            {
+            }
+            float f;
+            uint32_t i;
+        };
+        uint16_t m_value;
+    };
+}
+namespace std
+{
+    template <>
+    class numeric_limits<ngraph::float16>
+    {
+    public:
+        static constexpr bool is_specialized = true;
+        static constexpr ngraph::float16 min() noexcept
+        {
+            return ngraph::float16::from_bits(0x0200);
+        }
+        static constexpr ngraph::float16 max() noexcept
+        {
+            return ngraph::float16::from_bits(0x7BFF);
+        }
+        static constexpr ngraph::float16 lowest() noexcept
+        {
+            return ngraph::float16::from_bits(0xFBFF);
+        }
+        static constexpr int digits = 11;
+        static constexpr int digits10 = 3;
+        static constexpr bool is_signed = true;
+        static constexpr bool is_integer = false;
+        static constexpr bool is_exact = false;
+        static constexpr int radix = 2;
+        static constexpr ngraph::float16 epsilon() noexcept
+        {
+            return ngraph::float16::from_bits(0x1200);
+        }
+        static constexpr ngraph::float16 round_error() noexcept
+        {
+            return ngraph::float16::from_bits(0x3C00);
+        }
+        static constexpr int min_exponent = -13;
+        static constexpr int min_exponent10 = -4;
+        static constexpr int max_exponent = 16;
+        static constexpr int max_exponent10 = 4;
+        static constexpr bool has_infinity = true;
+        static constexpr bool has_quiet_NaN = true;
+        static constexpr bool has_signaling_NaN = true;
+        static constexpr float_denorm_style has_denorm = denorm_absent;
+        static constexpr bool has_denorm_loss = false;
+        static constexpr ngraph::float16 infinity() noexcept
+        {
+            return ngraph::float16::from_bits(0x7C00);
+        }
+        static constexpr ngraph::float16 quiet_NaN() noexcept
+        {
+            return ngraph::float16::from_bits(0x7FFF);
+        }
+        static constexpr ngraph::float16 signaling_NaN() noexcept
+        {
+            return ngraph::float16::from_bits(0x7DFF);
+        }
+        static constexpr ngraph::float16 denorm_min() noexcept
+        {
+            return ngraph::float16::from_bits(0);
+        }
+        static constexpr bool is_iec559 = false;
+        static constexpr bool is_bounded = false;
+        static constexpr bool is_modulo = false;
+        static constexpr bool traps = false;
+        static constexpr bool tinyness_before = false;
+        static constexpr float_round_style round_style = round_to_nearest;
+    };
+}
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -43,6 +43,7 @@ set(SRC
    cse.cpp
    element_type.cpp
    file_util.cpp
+    float16.cpp
    includes.cpp
    input_output_assign.cpp
    main.cpp

--- a/test/float16.cpp
+++ b/test/float16.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include <climits>
+#include <random>
+#include "gtest/gtest.h"
+#include "ngraph/runtime/aligned_buffer.hpp"
+#include "ngraph/type/float16.hpp"
+#include "util/float_util.hpp"
+using namespace std;
+using namespace ngraph;
+TEST(float16, conversions)
+{
+    float16 f16;
+    const char* source_string;
+    std::string f16_string;
+    // 1.f
+    source_string = "0  01111  00 0000 0000";
+    f16 = test::bits_to_float16(source_string);
+    EXPECT_EQ(f16, float16(1.0));
+    f16_string = test::float16_to_bits(f16);
+    EXPECT_STREQ(source_string, f16_string.c_str());
+    EXPECT_EQ(static_cast<float>(f16), 1.0);
+    // -1.f
+    source_string = "1  01111  00 0000 0000";
+    f16 = test::bits_to_float16(source_string);
+    EXPECT_EQ(f16, float16(-1.0));
+    f16_string = test::float16_to_bits(f16);
+    EXPECT_STREQ(source_string, f16_string.c_str());
+    EXPECT_EQ(static_cast<float>(f16), -1.0);
+    // 0.f
+    source_string = "0  00000  00 0000 0000";
+    f16 = test::bits_to_float16(source_string);
+    EXPECT_EQ(f16, float16(0.0));
+    f16_string = test::float16_to_bits(f16);
+    EXPECT_STREQ(source_string, f16_string.c_str());
+    EXPECT_EQ(static_cast<float>(f16), 0.0);
+    // 1.5f
+    source_string = "0  01111  10 0000 0000";
+    f16 = test::bits_to_float16(source_string);
+    EXPECT_EQ(f16, float16(1.5));
+    f16_string = test::float16_to_bits(f16);
+    EXPECT_STREQ(source_string, f16_string.c_str());
+    EXPECT_EQ(static_cast<float>(f16), 1.5);
+}
--- a/test/util/float_util.cpp
+++ b/test/util/float_util.cpp
@@ -55,6 +55,29 @@ std::string ngraph::test::bfloat16_to_bits(bfloat16 f)
    return formatted;
 }
+std::string ngraph::test::float16_to_bits(float16 f)
+{
+    std::stringstream ss;
+    ss << std::bitset<16>(f.to_bits());
+    std::string unformatted = ss.str();
+    std::string formatted;
+    formatted.reserve(41);
+    // Sign
+    formatted.push_back(unformatted[0]);
+    formatted.append("  ");
+    // Exponent
+    formatted.append(unformatted, 1, 5);
+    formatted.append("  ");
+    // Mantissa
+    formatted.append(unformatted, 6, 2);
+    for (int i = 8; i < 16; i += 4)
+    {
+        formatted.push_back(' ');
+        formatted.append(unformatted, i, 4);
+    }
+    return formatted;
+}
 std::string ngraph::test::float_to_bits(float f)
 {
    FloatUnion fu{f};
@@ -116,6 +139,20 @@ ngraph::bfloat16 ngraph::test::bits_to_bfloat16(const std::string& s)
    return bfloat16::from_bits(static_cast<uint16_t>(bs.to_ulong()));
 }
+ngraph::float16 ngraph::test::bits_to_float16(const std::string& s)
+{
+    std::string unformatted = s;
+    unformatted.erase(remove_if(unformatted.begin(), unformatted.end(), ::isspace),
+                      unformatted.end());
+    if (unformatted.size() != 16)
+    {
+        throw ngraph_error("Input length must be 16");
+    }
+    std::bitset<16> bs(unformatted);
+    return float16::from_bits(static_cast<uint16_t>(bs.to_ulong()));
+}
 float ngraph::test::bits_to_float(const std::string& s)
 {
    std::string unformatted = s;

--- a/test/util/float_util.hpp
+++ b/test/util/float_util.hpp
@@ -44,6 +44,8 @@ namespace ngraph
        std::string bfloat16_to_bits(bfloat16 f);
+        std::string float16_to_bits(float16 f);
        std::string float_to_bits(float f);
        std::string double_to_bits(double d);
@@ -53,5 +55,7 @@ namespace ngraph
        float bits_to_float(const std::string& s);
        double bits_to_double(const std::string& s);
+        float16 bits_to_float16(const std::string& s);
    }
 }