Commit 667d8f9e authored by Robert Kimball's avatar Robert Kimball Committed by Scott Cyphers

bfloat16 testing and fixes (#2693)

* add test file

* add new float_util files

* Add unit tests for bfloat. Fix bfloat rounding code since it was incorrectly rounding.

* add more tests

* cleanup

* change trucate to be endian agnostic

* prep work for constexpr ctors

* ready for constexpr

* constexpr ctor for bfloat16

* more bfloating

* write constexpr isnan since it is not constexpr on Macos

* remove cast operator

* add benchmark test and cleanup

* use aligned buffers for benchmark tests

* fix numbers printed in benchmark

* remove union and use cast operator

* all tests passing

* cleanup
parent 69486262
......@@ -33,14 +33,16 @@
#include <cmath>
#include <iostream>
#include <limits>
#include "ngraph/type/bfloat16.hpp"
using namespace std;
using namespace ngraph;
// A value represents NaN in bfloat16
static const uint16_t BF16_NAN_VALUE = 0x7FC0;
static_assert(sizeof(bfloat16) == 2, "class bfloat16 must be exactly 2 bytes");
uint16_t bfloat16::BF16_NAN_VALUE = 0x7FC0;
bool float_isnan(const float& x)
{
......@@ -63,33 +65,6 @@ std::vector<bfloat16> bfloat16::from_float_vector(const std::vector<float>& v_f3
return v_bf16;
}
bfloat16::bfloat16(float value, bool rounding)
{
if (float_isnan(value))
{
m_value = BF16_NAN_VALUE;
}
else if (!rounding)
{
// Truncate off 16 LSB, no rounding
// Treat system as little endian (Intel x86 family)
uint16_t* u16_ptr = reinterpret_cast<uint16_t*>(&value);
m_value = u16_ptr[1];
}
else
{
// Rounding with round-nearest-to-even to create bfloat16
// from float. Refer to TF implementation explanation:
// https://github.com/tensorflow/tensorflow/blob/d354efc/tensorflow/core/lib/bfloat16/bfloat16.h#L199
uint32_t* u32_ptr = reinterpret_cast<uint32_t*>(&value);
uint32_t u32_value = *u32_ptr;
uint32_t lsb = (u32_value >> 16) & 1;
uint32_t rounding_bias = 0x7fff + lsb;
u32_value += rounding_bias;
m_value = static_cast<uint16_t>(u32_value >> 16);
}
}
std::string bfloat16::to_string() const
{
return std::to_string(static_cast<float>(*this));
......@@ -130,12 +105,9 @@ bool bfloat16::operator>=(const bfloat16& other) const
bfloat16::operator float() const
{
// float result = 0;
// uint16_t* u16_ptr = reinterpret_cast<uint16_t*>(&result);
// // Treat the system as little endian (Intel x86 family)
// u16_ptr[1] = m_value;
return static_cast<float>(static_cast<uint32_t>(m_value) << 16);
uint32_t tmp = (static_cast<uint32_t>(m_value) << 16);
const float* f = reinterpret_cast<const float*>(&tmp);
return *f;
}
bfloat16::operator double() const
......@@ -143,7 +115,7 @@ bfloat16::operator double() const
return static_cast<float>(m_value);
}
std::ostream& operator<<(std::ostream& out, const bfloat16& obj)
uint16_t bfloat16::to_bits() const
{
return (out << static_cast<float>(obj));
return m_value;
}
......@@ -14,27 +14,42 @@
// limitations under the License.
//*****************************************************************************
//================================================================================================
// bfloat16 type
//================================================================================================
#pragma once
#include <cmath>
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#define ROUND_MODE_TO_NEAREST_EVEN
namespace ngraph
{
class bfloat16
{
public:
bfloat16() {}
bfloat16(float value, bool rounding = false);
bfloat16(const bfloat16&) = default;
bfloat16& operator=(const bfloat16&) = default;
virtual ~bfloat16() {}
bfloat16()
: m_value{0}
{
}
bfloat16(float value)
: m_value
{
#if defined ROUND_MODE_TO_NEAREST
round_to_nearest(value)
#elif defined ROUND_MODE_TO_NEAREST_EVEN
round_to_nearest_even(value)
#elif defined ROUND_MODE_TRUNCATE
truncate(value)
#else
#error \
"ROUNDING_MODE must be one of ROUND_MODE_TO_NEAREST, ROUND_MODE_TO_NEAREST_EVEN, or ROUND_MODE_TRUNCATE"
#endif
}
{
}
std::string to_string() const;
size_t size() const;
bool operator==(const bfloat16& other) const;
......@@ -48,10 +63,49 @@ namespace ngraph
static std::vector<float> to_float_vector(const std::vector<bfloat16>&);
static std::vector<bfloat16> from_float_vector(const std::vector<float>&);
static bfloat16 from_bits(uint16_t bits) { return bfloat16(bits, false); }
uint16_t to_bits() const;
friend std::ostream& operator<<(std::ostream& out, const bfloat16& obj)
{
out << static_cast<float>(obj);
return out;
}
friend std::ostream& operator<<(std::ostream&, const bfloat16&);
#define cu32(x) (F32(x).i)
static uint16_t round_to_nearest_even(float x)
{
return static_cast<uint16_t>((cu32(x) + ((cu32(x) & 0x00010000) >> 1)) >> 16);
}
static uint16_t round_to_nearest(float x)
{
return static_cast<uint16_t>((cu32(x) + 0x8000) >> 16);
}
static uint16_t truncate(float x) { return static_cast<uint16_t>((cu32(x)) >> 16); }
private:
uint16_t m_value{0};
union F32 {
F32(float val)
: f{val}
{
}
F32(uint32_t val)
: i{val}
{
}
float f;
uint32_t i;
};
// This should be private since it is ugly. Need the bool so the signature can't match
// the float version of the ctor.
bfloat16(uint16_t value, bool)
: m_value{value}
{
}
uint16_t m_value;
static uint16_t BF16_NAN_VALUE;
};
}
......@@ -30,6 +30,7 @@ set(SRC
algebraic_simplification.cpp
all_close_f.cpp
assertion.cpp
bfloat16.cpp
build_graph.cpp
builder_autobroadcast.cpp
constant_folding.cpp
......
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <random>
#include "gtest/gtest.h"
#include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/type/bfloat16.hpp"
#include "util/float_util.hpp"
using namespace std;
using namespace ngraph;
template <typename T>
string to_hex(T value)
{
stringstream ss;
ss << "0x" << hex << setw(sizeof(T) * 2) << setfill('0') << value;
return ss.str();
}
//***********************
// NOTE
//***********************
// This test uses exact comparisons of floating point values. It is testing for bit-exact
// creation and truncation/rounding of bfloat16 values.
TEST(bfloat16, conversions)
{
bfloat16 bf;
string source_string;
string bf_string;
// 1.f, the ground-truth value
source_string = "0 01111111 000 0000";
bf = test::bits_to_bfloat16(source_string);
EXPECT_EQ(bf, bfloat16(1.0));
bf_string = test::bfloat16_to_bits(bf);
EXPECT_STREQ(source_string.c_str(), bf_string.c_str());
// 1.03125f, the exact upper bound
source_string = "0 01111111 000 0100";
bf = test::bits_to_bfloat16(source_string);
EXPECT_EQ(bf, bfloat16(1.03125));
bf_string = test::bfloat16_to_bits(bf);
EXPECT_STREQ(source_string.c_str(), bf_string.c_str());
}
TEST(bfloat16, round_to_nearest)
{
string fstring;
string expected;
float fvalue;
uint16_t bf_round;
fstring = "0 01111111 000 0100 1000 0000 0000 0000";
fvalue = test::bits_to_float(fstring);
bf_round = bfloat16::round_to_nearest(fvalue);
EXPECT_EQ(bf_round, 0x3F85);
fstring = "0 01111111 000 0100 0000 0000 0000 0000";
fvalue = test::bits_to_float(fstring);
bf_round = bfloat16::round_to_nearest(fvalue);
EXPECT_EQ(bf_round, 0x3F84);
fstring = "0 01111111 111 1111 1000 0000 0000 0000";
fvalue = test::bits_to_float(fstring);
bf_round = bfloat16::round_to_nearest(fvalue);
EXPECT_EQ(bf_round, 0x4000);
// 1.9921875f, the next representable number which should not round up
fstring = "0 01111111 111 1111 0000 0000 0000 0000";
fvalue = test::bits_to_float(fstring);
bf_round = bfloat16::round_to_nearest(fvalue);
EXPECT_EQ(bf_round, 0x3FFF);
}
TEST(bfloat16, round_to_nearest_even)
{
string fstring;
float fvalue;
uint16_t bf_round;
fstring = "0 01111111 000 0100 1000 0000 0000 0000";
fvalue = test::bits_to_float(fstring);
bf_round = bfloat16::round_to_nearest_even(fvalue);
EXPECT_EQ(bf_round, 0x3F84);
fstring = "0 01111111 000 0101 1000 0000 0000 0000";
fvalue = test::bits_to_float(fstring);
bf_round = bfloat16::round_to_nearest_even(fvalue);
EXPECT_EQ(bf_round, 0x3F86);
fstring = "0 01111111 000 0101 0000 0000 0000 0000";
fvalue = test::bits_to_float(fstring);
bf_round = bfloat16::round_to_nearest_even(fvalue);
EXPECT_EQ(bf_round, 0x3F85);
fstring = "0 01111111 111 1111 1000 0000 0000 0000";
fvalue = test::bits_to_float(fstring);
bf_round = bfloat16::round_to_nearest_even(fvalue);
EXPECT_EQ(bf_round, 0x4000);
fstring = "0 01111111 111 1111 0000 0000 0000 0000";
fvalue = test::bits_to_float(fstring);
bf_round = bfloat16::round_to_nearest_even(fvalue);
EXPECT_EQ(bf_round, 0x3FFF);
}
TEST(bfloat16, to_float)
{
bfloat16 bf;
string source_string;
// 1.f, the ground-truth value
source_string = "0 01111111 000 0000";
bf = test::bits_to_bfloat16(source_string);
float f = static_cast<float>(bf);
EXPECT_EQ(f, 1.0f);
// 1.03125f, the exact upper bound
source_string = "0 01111111 000 0100";
bf = test::bits_to_bfloat16(source_string);
f = static_cast<float>(bf);
EXPECT_EQ(f, 1.03125f);
}
TEST(benchmark, bfloat16)
{
size_t buffer_size = 128 * 3 * 224 * 224;
ngraph::runtime::AlignedBuffer data(buffer_size * sizeof(float), 4096);
float* f = static_cast<float*>(data.get_ptr());
// vector<float> data(buffer_size);
mt19937 rng(2112);
uniform_real_distribution<float> distribution(-300, 300);
for (size_t i = 0; i < buffer_size; ++i)
{
f[i] = distribution(rng);
}
NGRAPH_INFO << "buffer size " << buffer_size << " floats or " << data.size() << " bytes";
{
ngraph::runtime::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096);
bfloat16* p = static_cast<bfloat16*>(bf_data.get_ptr());
stopwatch timer;
timer.start();
for (size_t i = 0; i < buffer_size; ++i)
{
p[i] = bfloat16(f[i]);
}
timer.stop();
NGRAPH_INFO << "float to bfloat16 ctor " << timer.get_milliseconds()
<< "ms";
}
{
ngraph::runtime::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096);
bfloat16* p = static_cast<bfloat16*>(bf_data.get_ptr());
stopwatch timer;
timer.start();
for (size_t i = 0; i < buffer_size; ++i)
{
p[i] = bfloat16::truncate(f[i]);
}
timer.stop();
NGRAPH_INFO << "float to bfloat16 truncate " << timer.get_milliseconds()
<< "ms";
}
{
ngraph::runtime::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096);
bfloat16* p = static_cast<bfloat16*>(bf_data.get_ptr());
stopwatch timer;
timer.start();
for (size_t i = 0; i < buffer_size; ++i)
{
p[i] = bfloat16::round_to_nearest(f[i]);
}
timer.stop();
NGRAPH_INFO << "float to bfloat16 round to nearest " << timer.get_milliseconds()
<< "ms";
}
{
ngraph::runtime::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096);
bfloat16* p = static_cast<bfloat16*>(bf_data.get_ptr());
stopwatch timer;
timer.start();
for (size_t i = 0; i < buffer_size; ++i)
{
p[i] = bfloat16::round_to_nearest_even(f[i]);
}
timer.stop();
NGRAPH_INFO << "float to bfloat16 round to nearest even " << timer.get_milliseconds()
<< "ms";
}
}
......@@ -17,6 +17,7 @@
set (SRC
autodiff/backprop_function.cpp
all_close_f.cpp
float_util.cpp
test_tools.cpp
test_control.cpp
)
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "util/float_util.hpp"
union FloatUnion {
FloatUnion() { i = 0; }
FloatUnion(float val) { f = val; }
FloatUnion(uint32_t val) { i = val; }
float f;
uint32_t i;
};
union DoubleUnion {
DoubleUnion() { i = 0; }
DoubleUnion(double val) { d = val; }
DoubleUnion(uint64_t val) { i = val; }
double d;
uint64_t i;
};
std::string ngraph::test::bfloat16_to_bits(bfloat16 f)
{
std::stringstream ss;
ss << std::bitset<16>(f.to_bits());
std::string unformatted = ss.str();
std::string formatted;
formatted.reserve(41);
// Sign
formatted.push_back(unformatted[0]);
formatted.append(" ");
// Exponent
formatted.append(unformatted, 1, 8);
formatted.append(" ");
// Mantissa
formatted.append(unformatted, 9, 3);
for (int i = 12; i < 16; i += 4)
{
formatted.push_back(' ');
formatted.append(unformatted, i, 4);
}
return formatted;
}
std::string ngraph::test::float_to_bits(float f)
{
FloatUnion fu{f};
std::stringstream ss;
ss << std::bitset<32>(fu.i);
std::string unformatted = ss.str();
std::string formatted;
formatted.reserve(41);
// Sign
formatted.push_back(unformatted[0]);
formatted.append(" ");
// Exponent
formatted.append(unformatted, 1, 8);
formatted.append(" ");
// Mantissa
formatted.append(unformatted, 9, 3);
for (int i = 12; i < 32; i += 4)
{
formatted.push_back(' ');
formatted.append(unformatted, i, 4);
}
return formatted;
}
std::string ngraph::test::double_to_bits(double d)
{
DoubleUnion du{d};
std::stringstream ss;
ss << std::bitset<64>(du.i);
std::string unformatted = ss.str();
std::string formatted;
formatted.reserve(80);
// Sign
formatted.push_back(unformatted[0]);
formatted.append(" ");
// Exponent
formatted.append(unformatted, 1, 11);
formatted.push_back(' ');
// Mantissa
for (int i = 12; i < 64; i += 4)
{
formatted.push_back(' ');
formatted.append(unformatted, i, 4);
}
return formatted;
}
ngraph::bfloat16 ngraph::test::bits_to_bfloat16(const std::string& s)
{
std::string unformatted = s;
unformatted.erase(remove_if(unformatted.begin(), unformatted.end(), ::isspace),
unformatted.end());
if (unformatted.size() != 16)
{
throw ngraph_error("Input length must be 16");
}
std::bitset<16> bs(unformatted);
return bfloat16::from_bits(static_cast<uint16_t>(bs.to_ulong()));
}
float ngraph::test::bits_to_float(const std::string& s)
{
std::string unformatted = s;
unformatted.erase(remove_if(unformatted.begin(), unformatted.end(), ::isspace),
unformatted.end());
if (unformatted.size() != 32)
{
throw ngraph_error("Input length must be 32");
}
std::bitset<32> bs(unformatted);
FloatUnion fu;
fu.i = static_cast<uint32_t>(bs.to_ulong());
return fu.f;
}
double ngraph::test::bits_to_double(const std::string& s)
{
std::string unformatted = s;
unformatted.erase(remove_if(unformatted.begin(), unformatted.end(), ::isspace),
unformatted.end());
if (unformatted.size() != 64)
{
throw ngraph_error("Input length must be 64");
}
std::bitset<64> bs(unformatted);
DoubleUnion du;
du.i = static_cast<uint64_t>(bs.to_ullong());
return du.d;
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <algorithm>
#include <bitset>
#include <cmath>
#include <limits>
#include <sstream>
#include "ngraph/ngraph.hpp"
namespace ngraph
{
namespace test
{
union FloatUnion {
FloatUnion() { i = 0; }
FloatUnion(float val) { f = val; }
FloatUnion(uint32_t val) { i = val; }
float f;
uint32_t i;
};
union DoubleUnion {
DoubleUnion() { i = 0; }
DoubleUnion(double val) { d = val; }
DoubleUnion(uint64_t val) { i = val; }
double d;
uint64_t i;
};
std::string bfloat16_to_bits(bfloat16 f);
std::string float_to_bits(float f);
std::string double_to_bits(double d);
bfloat16 bits_to_bfloat16(const std::string& s);
float bits_to_float(const std::string& s);
double bits_to_double(const std::string& s);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment