bfloat16 testing and fixes (#2693)

* add test file * add new float_util files * Add unit tests for bfloat. Fix bfloat rounding code since it was incorrectly rounding. * add more tests * cleanup * change trucate to be endian agnostic * prep work for constexpr ctors * ready for constexpr * constexpr ctor for bfloat16 * more bfloating * write constexpr isnan since it is not constexpr on Macos * remove cast operator * add benchmark test and cleanup * use aligned buffers for benchmark tests * fix numbers printed in benchmark * remove union and use cast operator * all tests passing * cleanup

bfloat16 testing and fixes (#2693)
* add test file * add new float_util files * Add unit tests for bfloat. Fix bfloat rounding code since it was incorrectly rounding. * add more tests * cleanup * change trucate to be endian agnostic * prep work for constexpr ctors * ready for constexpr * constexpr ctor for bfloat16 * more bfloating * write constexpr isnan since it is not constexpr on Macos * remove cast operator * add benchmark test and cleanup * use aligned buffers for benchmark tests * fix numbers printed in benchmark * remove union and use cast operator * all tests passing * cleanup
667d8f9e · Robert Kimball · Scott Cyphers · 69486262 · 667d8f9e · 667d8f9e
Commit 667d8f9e authored Apr 04, 2019 by Robert Kimball Committed by Scott Cyphers Apr 04, 2019
8 changed files
--- a/src/ngraph/type/bfloat16.cpp
+++ b/src/ngraph/type/bfloat16.cpp
@@ -33,14 +33,16 @@

 #include <cmath>
 #include <iostream>
+#include <limits>

 #include "ngraph/type/bfloat16.hpp"

 using namespace std;
 using namespace ngraph;

-// A value represents NaN in bfloat16
-static const uint16_t BF16_NAN_VALUE = 0x7FC0;
+static_assert(sizeof(bfloat16) == 2, "class bfloat16 must be exactly 2 bytes");
+
+uint16_t bfloat16::BF16_NAN_VALUE = 0x7FC0;

 bool float_isnan(const float& x)
 {
@@ -63,33 +65,6 @@ std::vector<bfloat16> bfloat16::from_float_vector(const std::vector<float>& v_f3
    return v_bf16;
 }

-bfloat16::bfloat16(float value, bool rounding)
-{
-    if (float_isnan(value))
-    {
-        m_value = BF16_NAN_VALUE;
-    }
-    else if (!rounding)
-    {
-        // Truncate off 16 LSB, no rounding
-        // Treat system as little endian (Intel x86 family)
-        uint16_t* u16_ptr = reinterpret_cast<uint16_t*>(&value);
-        m_value = u16_ptr[1];
-    }
-    else
-    {
-        // Rounding with round-nearest-to-even to create bfloat16
-        // from float. Refer to TF implementation explanation:
-        // https://github.com/tensorflow/tensorflow/blob/d354efc/tensorflow/core/lib/bfloat16/bfloat16.h#L199
-        uint32_t* u32_ptr = reinterpret_cast<uint32_t*>(&value);
-        uint32_t u32_value = *u32_ptr;
-        uint32_t lsb = (u32_value >> 16) & 1;
-        uint32_t rounding_bias = 0x7fff + lsb;
-        u32_value += rounding_bias;
-        m_value = static_cast<uint16_t>(u32_value >> 16);
-    }
-}
-
 std::string bfloat16::to_string() const
 {
    return std::to_string(static_cast<float>(*this));
@@ -130,12 +105,9 @@ bool bfloat16::operator>=(const bfloat16& other) const

 bfloat16::operator float() const
 {
-    // float result = 0;
-    // uint16_t* u16_ptr = reinterpret_cast<uint16_t*>(&result);
-
-    // // Treat the system as little endian (Intel x86 family)
-    // u16_ptr[1] = m_value;
-    return static_cast<float>(static_cast<uint32_t>(m_value) << 16);
+    uint32_t tmp = (static_cast<uint32_t>(m_value) << 16);
+    const float* f = reinterpret_cast<const float*>(&tmp);
+    return *f;
 }

 bfloat16::operator double() const
@@ -143,7 +115,7 @@ bfloat16::operator double() const
    return static_cast<float>(m_value);
 }

-std::ostream& operator<<(std::ostream& out, const bfloat16& obj)
+uint16_t bfloat16::to_bits() const
 {
-    return (out << static_cast<float>(obj));
+    return m_value;
 }
--- a/src/ngraph/type/bfloat16.hpp
+++ b/src/ngraph/type/bfloat16.hpp
@@ -14,27 +14,42 @@
 // limitations under the License.
 //*****************************************************************************

-//================================================================================================
-// bfloat16 type
-//================================================================================================
-
 #pragma once

+#include <cmath>
 #include <iostream>
 #include <memory>
 #include <string>
 #include <vector>

+#define ROUND_MODE_TO_NEAREST_EVEN
+
 namespace ngraph
 {
    class bfloat16
    {
    public:
-        bfloat16() {}
-        bfloat16(float value, bool rounding = false);
-        bfloat16(const bfloat16&) = default;
-        bfloat16& operator=(const bfloat16&) = default;
-        virtual ~bfloat16() {}
+        bfloat16()
+            : m_value{0}
+        {
+        }
+        bfloat16(float value)
+            : m_value
+        {
+#if defined ROUND_MODE_TO_NEAREST
+            round_to_nearest(value)
+#elif defined ROUND_MODE_TO_NEAREST_EVEN
+            round_to_nearest_even(value)
+#elif defined ROUND_MODE_TRUNCATE
+            truncate(value)
+#else
+#error                                                                                             \
+    "ROUNDING_MODE must be one of ROUND_MODE_TO_NEAREST, ROUND_MODE_TO_NEAREST_EVEN, or ROUND_MODE_TRUNCATE"
+#endif
+        }
+        {
+        }
+
        std::string to_string() const;
        size_t size() const;
        bool operator==(const bfloat16& other) const;
@@ -48,10 +63,49 @@ namespace ngraph

        static std::vector<float> to_float_vector(const std::vector<bfloat16>&);
        static std::vector<bfloat16> from_float_vector(const std::vector<float>&);
+        static bfloat16 from_bits(uint16_t bits) { return bfloat16(bits, false); }
+        uint16_t to_bits() const;
+        friend std::ostream& operator<<(std::ostream& out, const bfloat16& obj)
+        {
+            out << static_cast<float>(obj);
+            return out;
+        }

-        friend std::ostream& operator<<(std::ostream&, const bfloat16&);
+#define cu32(x) (F32(x).i)
+
+        static uint16_t round_to_nearest_even(float x)
+        {
+            return static_cast<uint16_t>((cu32(x) + ((cu32(x) & 0x00010000) >> 1)) >> 16);
+        }

+        static uint16_t round_to_nearest(float x)
+        {
+            return static_cast<uint16_t>((cu32(x) + 0x8000) >> 16);
+        }
+
+        static uint16_t truncate(float x) { return static_cast<uint16_t>((cu32(x)) >> 16); }
    private:
-        uint16_t m_value{0};
+        union F32 {
+            F32(float val)
+                : f{val}
+            {
+            }
+            F32(uint32_t val)
+                : i{val}
+            {
+            }
+            float f;
+            uint32_t i;
+        };
+        // This should be private since it is ugly. Need the bool so the signature can't match
+        // the float version of the ctor.
+        bfloat16(uint16_t value, bool)
+            : m_value{value}
+        {
+        }
+
+        uint16_t m_value;
+
+        static uint16_t BF16_NAN_VALUE;
    };
 }
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -30,6 +30,7 @@ set(SRC
    algebraic_simplification.cpp
    all_close_f.cpp
    assertion.cpp
+    bfloat16.cpp
    build_graph.cpp
    builder_autobroadcast.cpp
    constant_folding.cpp

--- a/test/all_close_f.cpp
+++ b/test/all_close_f.cpp
--- a/test/bfloat16.cpp
+++ b/test/bfloat16.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <random>
+
+#include "gtest/gtest.h"
+
+#include "ngraph/runtime/aligned_buffer.hpp"
+#include "ngraph/type/bfloat16.hpp"
+#include "util/float_util.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+template <typename T>
+string to_hex(T value)
+{
+    stringstream ss;
+    ss << "0x" << hex << setw(sizeof(T) * 2) << setfill('0') << value;
+    return ss.str();
+}
+
+//***********************
+// NOTE
+//***********************
+// This test uses exact comparisons of floating point values. It is testing for bit-exact
+// creation and truncation/rounding of bfloat16 values.
+TEST(bfloat16, conversions)
+{
+    bfloat16 bf;
+    string source_string;
+    string bf_string;
+
+    // 1.f, the ground-truth value
+    source_string = "0  01111111  000 0000";
+    bf = test::bits_to_bfloat16(source_string);
+    EXPECT_EQ(bf, bfloat16(1.0));
+    bf_string = test::bfloat16_to_bits(bf);
+    EXPECT_STREQ(source_string.c_str(), bf_string.c_str());
+
+    // 1.03125f, the exact upper bound
+    source_string = "0  01111111  000 0100";
+    bf = test::bits_to_bfloat16(source_string);
+    EXPECT_EQ(bf, bfloat16(1.03125));
+    bf_string = test::bfloat16_to_bits(bf);
+    EXPECT_STREQ(source_string.c_str(), bf_string.c_str());
+}
+
+TEST(bfloat16, round_to_nearest)
+{
+    string fstring;
+    string expected;
+    float fvalue;
+    uint16_t bf_round;
+
+    fstring = "0  01111111  000 0100 1000 0000 0000 0000";
+    fvalue = test::bits_to_float(fstring);
+    bf_round = bfloat16::round_to_nearest(fvalue);
+    EXPECT_EQ(bf_round, 0x3F85);
+
+    fstring = "0  01111111  000 0100 0000 0000 0000 0000";
+    fvalue = test::bits_to_float(fstring);
+    bf_round = bfloat16::round_to_nearest(fvalue);
+    EXPECT_EQ(bf_round, 0x3F84);
+
+    fstring = "0  01111111  111 1111 1000 0000 0000 0000";
+    fvalue = test::bits_to_float(fstring);
+    bf_round = bfloat16::round_to_nearest(fvalue);
+    EXPECT_EQ(bf_round, 0x4000);
+
+    // 1.9921875f, the next representable number which should not round up
+    fstring = "0  01111111  111 1111 0000 0000 0000 0000";
+    fvalue = test::bits_to_float(fstring);
+    bf_round = bfloat16::round_to_nearest(fvalue);
+    EXPECT_EQ(bf_round, 0x3FFF);
+}
+
+TEST(bfloat16, round_to_nearest_even)
+{
+    string fstring;
+    float fvalue;
+    uint16_t bf_round;
+
+    fstring = "0  01111111  000 0100 1000 0000 0000 0000";
+    fvalue = test::bits_to_float(fstring);
+    bf_round = bfloat16::round_to_nearest_even(fvalue);
+    EXPECT_EQ(bf_round, 0x3F84);
+
+    fstring = "0  01111111  000 0101 1000 0000 0000 0000";
+    fvalue = test::bits_to_float(fstring);
+    bf_round = bfloat16::round_to_nearest_even(fvalue);
+    EXPECT_EQ(bf_round, 0x3F86);
+
+    fstring = "0  01111111  000 0101 0000 0000 0000 0000";
+    fvalue = test::bits_to_float(fstring);
+    bf_round = bfloat16::round_to_nearest_even(fvalue);
+    EXPECT_EQ(bf_round, 0x3F85);
+
+    fstring = "0  01111111  111 1111 1000 0000 0000 0000";
+    fvalue = test::bits_to_float(fstring);
+    bf_round = bfloat16::round_to_nearest_even(fvalue);
+    EXPECT_EQ(bf_round, 0x4000);
+
+    fstring = "0  01111111  111 1111 0000 0000 0000 0000";
+    fvalue = test::bits_to_float(fstring);
+    bf_round = bfloat16::round_to_nearest_even(fvalue);
+    EXPECT_EQ(bf_round, 0x3FFF);
+}
+
+TEST(bfloat16, to_float)
+{
+    bfloat16 bf;
+    string source_string;
+
+    // 1.f, the ground-truth value
+    source_string = "0  01111111  000 0000";
+    bf = test::bits_to_bfloat16(source_string);
+    float f = static_cast<float>(bf);
+    EXPECT_EQ(f, 1.0f);
+
+    // 1.03125f, the exact upper bound
+    source_string = "0  01111111  000 0100";
+    bf = test::bits_to_bfloat16(source_string);
+    f = static_cast<float>(bf);
+    EXPECT_EQ(f, 1.03125f);
+}
+
+TEST(benchmark, bfloat16)
+{
+    size_t buffer_size = 128 * 3 * 224 * 224;
+    ngraph::runtime::AlignedBuffer data(buffer_size * sizeof(float), 4096);
+    float* f = static_cast<float*>(data.get_ptr());
+    // vector<float> data(buffer_size);
+    mt19937 rng(2112);
+    uniform_real_distribution<float> distribution(-300, 300);
+    for (size_t i = 0; i < buffer_size; ++i)
+    {
+        f[i] = distribution(rng);
+    }
+    NGRAPH_INFO << "buffer size " << buffer_size << " floats or " << data.size() << " bytes";
+
+    {
+        ngraph::runtime::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096);
+        bfloat16* p = static_cast<bfloat16*>(bf_data.get_ptr());
+        stopwatch timer;
+        timer.start();
+        for (size_t i = 0; i < buffer_size; ++i)
+        {
+            p[i] = bfloat16(f[i]);
+        }
+        timer.stop();
+        NGRAPH_INFO << "float to bfloat16 ctor                  " << timer.get_milliseconds()
+                    << "ms";
+    }
+
+    {
+        ngraph::runtime::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096);
+        bfloat16* p = static_cast<bfloat16*>(bf_data.get_ptr());
+        stopwatch timer;
+        timer.start();
+        for (size_t i = 0; i < buffer_size; ++i)
+        {
+            p[i] = bfloat16::truncate(f[i]);
+        }
+        timer.stop();
+        NGRAPH_INFO << "float to bfloat16 truncate              " << timer.get_milliseconds()
+                    << "ms";
+    }
+
+    {
+        ngraph::runtime::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096);
+        bfloat16* p = static_cast<bfloat16*>(bf_data.get_ptr());
+        stopwatch timer;
+        timer.start();
+        for (size_t i = 0; i < buffer_size; ++i)
+        {
+            p[i] = bfloat16::round_to_nearest(f[i]);
+        }
+        timer.stop();
+        NGRAPH_INFO << "float to bfloat16 round to nearest      " << timer.get_milliseconds()
+                    << "ms";
+    }
+
+    {
+        ngraph::runtime::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096);
+        bfloat16* p = static_cast<bfloat16*>(bf_data.get_ptr());
+        stopwatch timer;
+        timer.start();
+        for (size_t i = 0; i < buffer_size; ++i)
+        {
+            p[i] = bfloat16::round_to_nearest_even(f[i]);
+        }
+        timer.stop();
+        NGRAPH_INFO << "float to bfloat16 round to nearest even " << timer.get_milliseconds()
+                    << "ms";
+    }
+}
--- a/test/util/CMakeLists.txt
+++ b/test/util/CMakeLists.txt
@@ -17,6 +17,7 @@
 set (SRC
    autodiff/backprop_function.cpp
    all_close_f.cpp
+    float_util.cpp
    test_tools.cpp
    test_control.cpp
 )

--- a/test/util/float_util.cpp
+++ b/test/util/float_util.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "util/float_util.hpp"
+
+union FloatUnion {
+    FloatUnion() { i = 0; }
+    FloatUnion(float val) { f = val; }
+    FloatUnion(uint32_t val) { i = val; }
+    float f;
+    uint32_t i;
+};
+
+union DoubleUnion {
+    DoubleUnion() { i = 0; }
+    DoubleUnion(double val) { d = val; }
+    DoubleUnion(uint64_t val) { i = val; }
+    double d;
+    uint64_t i;
+};
+
+std::string ngraph::test::bfloat16_to_bits(bfloat16 f)
+{
+    std::stringstream ss;
+    ss << std::bitset<16>(f.to_bits());
+    std::string unformatted = ss.str();
+    std::string formatted;
+    formatted.reserve(41);
+    // Sign
+    formatted.push_back(unformatted[0]);
+    formatted.append("  ");
+    // Exponent
+    formatted.append(unformatted, 1, 8);
+    formatted.append("  ");
+    // Mantissa
+    formatted.append(unformatted, 9, 3);
+    for (int i = 12; i < 16; i += 4)
+    {
+        formatted.push_back(' ');
+        formatted.append(unformatted, i, 4);
+    }
+    return formatted;
+}
+
+std::string ngraph::test::float_to_bits(float f)
+{
+    FloatUnion fu{f};
+    std::stringstream ss;
+    ss << std::bitset<32>(fu.i);
+    std::string unformatted = ss.str();
+    std::string formatted;
+    formatted.reserve(41);
+    // Sign
+    formatted.push_back(unformatted[0]);
+    formatted.append("  ");
+    // Exponent
+    formatted.append(unformatted, 1, 8);
+    formatted.append("  ");
+    // Mantissa
+    formatted.append(unformatted, 9, 3);
+    for (int i = 12; i < 32; i += 4)
+    {
+        formatted.push_back(' ');
+        formatted.append(unformatted, i, 4);
+    }
+    return formatted;
+}
+
+std::string ngraph::test::double_to_bits(double d)
+{
+    DoubleUnion du{d};
+    std::stringstream ss;
+    ss << std::bitset<64>(du.i);
+    std::string unformatted = ss.str();
+    std::string formatted;
+    formatted.reserve(80);
+    // Sign
+    formatted.push_back(unformatted[0]);
+    formatted.append("  ");
+    // Exponent
+    formatted.append(unformatted, 1, 11);
+    formatted.push_back(' ');
+    // Mantissa
+    for (int i = 12; i < 64; i += 4)
+    {
+        formatted.push_back(' ');
+        formatted.append(unformatted, i, 4);
+    }
+    return formatted;
+}
+
+ngraph::bfloat16 ngraph::test::bits_to_bfloat16(const std::string& s)
+{
+    std::string unformatted = s;
+    unformatted.erase(remove_if(unformatted.begin(), unformatted.end(), ::isspace),
+                      unformatted.end());
+
+    if (unformatted.size() != 16)
+    {
+        throw ngraph_error("Input length must be 16");
+    }
+    std::bitset<16> bs(unformatted);
+    return bfloat16::from_bits(static_cast<uint16_t>(bs.to_ulong()));
+}
+
+float ngraph::test::bits_to_float(const std::string& s)
+{
+    std::string unformatted = s;
+    unformatted.erase(remove_if(unformatted.begin(), unformatted.end(), ::isspace),
+                      unformatted.end());
+
+    if (unformatted.size() != 32)
+    {
+        throw ngraph_error("Input length must be 32");
+    }
+    std::bitset<32> bs(unformatted);
+    FloatUnion fu;
+    fu.i = static_cast<uint32_t>(bs.to_ulong());
+    return fu.f;
+}
+
+double ngraph::test::bits_to_double(const std::string& s)
+{
+    std::string unformatted = s;
+    unformatted.erase(remove_if(unformatted.begin(), unformatted.end(), ::isspace),
+                      unformatted.end());
+
+    if (unformatted.size() != 64)
+    {
+        throw ngraph_error("Input length must be 64");
+    }
+    std::bitset<64> bs(unformatted);
+    DoubleUnion du;
+    du.i = static_cast<uint64_t>(bs.to_ullong());
+    return du.d;
+}
--- a/test/util/float_util.hpp
+++ b/test/util/float_util.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <algorithm>
+#include <bitset>
+#include <cmath>
+#include <limits>
+#include <sstream>
+
+#include "ngraph/ngraph.hpp"
+
+namespace ngraph
+{
+    namespace test
+    {
+        union FloatUnion {
+            FloatUnion() { i = 0; }
+            FloatUnion(float val) { f = val; }
+            FloatUnion(uint32_t val) { i = val; }
+            float f;
+            uint32_t i;
+        };
+
+        union DoubleUnion {
+            DoubleUnion() { i = 0; }
+            DoubleUnion(double val) { d = val; }
+            DoubleUnion(uint64_t val) { i = val; }
+            double d;
+            uint64_t i;
+        };
+
+        std::string bfloat16_to_bits(bfloat16 f);
+
+        std::string float_to_bits(float f);
+
+        std::string double_to_bits(double d);
+
+        bfloat16 bits_to_bfloat16(const std::string& s);
+
+        float bits_to_float(const std::string& s);
+
+        double bits_to_double(const std::string& s);
+    }
+}