Unverified Commit 8933fed0 authored by Scott Cyphers's avatar Scott Cyphers Committed by GitHub

Cyphers/fp16 (#2845)

* Basic float16

* Basic fp16 support

* typo
parent e8b5d11b
...@@ -403,6 +403,9 @@ set (SRC ...@@ -403,6 +403,9 @@ set (SRC
strides.cpp strides.cpp
strides.hpp strides.hpp
type/bfloat16.cpp type/bfloat16.cpp
type/bfloat16.hpp
type/float16.cpp
type/float16.hpp
type/element_type.cpp type/element_type.cpp
util.cpp util.cpp
util.hpp util.hpp
......
...@@ -571,6 +571,7 @@ private: ...@@ -571,6 +571,7 @@ private:
case element::Type_t::undefined: case element::Type_t::undefined:
case element::Type_t::dynamic: case element::Type_t::dynamic:
case element::Type_t::bf16: case element::Type_t::bf16:
case element::Type_t::f16:
ss << "unsupported element type " << type << " op Convert"; ss << "unsupported element type " << type << " op Convert";
throw std::runtime_error(ss.str()); throw std::runtime_error(ss.str());
} }
......
...@@ -27,6 +27,7 @@ using namespace std; ...@@ -27,6 +27,7 @@ using namespace std;
NGRAPH_API const element::Type element::dynamic(element::Type_t::dynamic); NGRAPH_API const element::Type element::dynamic(element::Type_t::dynamic);
NGRAPH_API const element::Type element::boolean(element::Type_t::boolean); NGRAPH_API const element::Type element::boolean(element::Type_t::boolean);
NGRAPH_API const element::Type element::bf16(element::Type_t::bf16); NGRAPH_API const element::Type element::bf16(element::Type_t::bf16);
NGRAPH_API const element::Type element::f16(element::Type_t::f16);
NGRAPH_API const element::Type element::f32(element::Type_t::f32); NGRAPH_API const element::Type element::f32(element::Type_t::f32);
NGRAPH_API const element::Type element::f64(element::Type_t::f64); NGRAPH_API const element::Type element::f64(element::Type_t::f64);
NGRAPH_API const element::Type element::i8(element::Type_t::i8); NGRAPH_API const element::Type element::i8(element::Type_t::i8);
...@@ -72,6 +73,7 @@ static const map<element::Type_t, const TypeInfo>& get_type_info_map() ...@@ -72,6 +73,7 @@ static const map<element::Type_t, const TypeInfo>& get_type_info_map()
{element::Type_t::dynamic, TypeInfo(0, false, false, false, "dynamic", "dynamic")}, {element::Type_t::dynamic, TypeInfo(0, false, false, false, "dynamic", "dynamic")},
{element::Type_t::boolean, TypeInfo(8, false, true, false, "char", "boolean")}, {element::Type_t::boolean, TypeInfo(8, false, true, false, "char", "boolean")},
{element::Type_t::bf16, TypeInfo(16, true, true, false, "bfloat16", "bf16")}, {element::Type_t::bf16, TypeInfo(16, true, true, false, "bfloat16", "bf16")},
{element::Type_t::f16, TypeInfo(16, true, true, false, "float16", "f16")},
{element::Type_t::f32, TypeInfo(32, true, true, false, "float", "f32")}, {element::Type_t::f32, TypeInfo(32, true, true, false, "float", "f32")},
{element::Type_t::f64, TypeInfo(64, true, true, false, "double", "f64")}, {element::Type_t::f64, TypeInfo(64, true, true, false, "double", "f64")},
{element::Type_t::i8, TypeInfo(8, false, true, true, "int8_t", "i8")}, {element::Type_t::i8, TypeInfo(8, false, true, true, "int8_t", "i8")},
...@@ -91,6 +93,7 @@ std::vector<const element::Type*> element::Type::get_known_types() ...@@ -91,6 +93,7 @@ std::vector<const element::Type*> element::Type::get_known_types()
std::vector<const element::Type*> rc = {&element::dynamic, std::vector<const element::Type*> rc = {&element::dynamic,
&element::boolean, &element::boolean,
&element::bf16, &element::bf16,
&element::f16,
&element::f32, &element::f32,
&element::f64, &element::f64,
&element::i8, &element::i8,
...@@ -164,6 +167,11 @@ namespace ngraph ...@@ -164,6 +167,11 @@ namespace ngraph
return boolean; return boolean;
} }
template <> template <>
const Type& from<ngraph::float16>()
{
return f16;
}
template <>
const Type& from<float>() const Type& from<float>()
{ {
return f32; return f32;
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "ngraph/except.hpp" #include "ngraph/except.hpp"
#include "ngraph/ngraph_visibility.hpp" #include "ngraph/ngraph_visibility.hpp"
#include "ngraph/type/bfloat16.hpp" #include "ngraph/type/bfloat16.hpp"
#include "ngraph/type/float16.hpp"
namespace ngraph namespace ngraph
{ {
...@@ -40,6 +41,7 @@ namespace ngraph ...@@ -40,6 +41,7 @@ namespace ngraph
dynamic, dynamic,
boolean, boolean,
bf16, bf16,
f16,
f32, f32,
f64, f64,
i8, i8,
...@@ -124,6 +126,7 @@ namespace ngraph ...@@ -124,6 +126,7 @@ namespace ngraph
extern NGRAPH_API const Type dynamic; extern NGRAPH_API const Type dynamic;
extern NGRAPH_API const Type boolean; extern NGRAPH_API const Type boolean;
extern NGRAPH_API const Type bf16; extern NGRAPH_API const Type bf16;
extern NGRAPH_API const Type f16;
extern NGRAPH_API const Type f32; extern NGRAPH_API const Type f32;
extern NGRAPH_API const Type f64; extern NGRAPH_API const Type f64;
extern NGRAPH_API const Type i8; extern NGRAPH_API const Type i8;
...@@ -166,6 +169,8 @@ namespace ngraph ...@@ -166,6 +169,8 @@ namespace ngraph
const Type& from<uint64_t>(); const Type& from<uint64_t>();
template <> template <>
const Type& from<ngraph::bfloat16>(); const Type& from<ngraph::bfloat16>();
template <>
const Type& from<ngraph::float16>();
std::ostream& operator<<(std::ostream& out, const ngraph::element::Type& obj); std::ostream& operator<<(std::ostream& out, const ngraph::element::Type& obj);
} }
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
// Contains logic derived from TensorFlow’s bfloat16 implementation
// https://github.com/tensorflow/tensorflow/blob/d354efc/tensorflow/core/lib/float16/float16.h
// Copyright notice from original source file is as follows.
//*******************************************************************************
// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//==============================================================================
#include <cmath>
#include <iostream>
#include <limits>
#include "ngraph/type/float16.hpp"
using namespace std;
using namespace ngraph;
static_assert(sizeof(float16) == 2, "class float16 must be exactly 2 bytes");
float16::float16(float value)
{
union {
float fv;
uint32_t iv;
};
fv = value;
uint32_t sign = iv & 0x80000000;
uint32_t biased_exp = (iv & 0x7F800000) >> 23;
uint32_t raw_frac = (iv & 0x007FFFFF);
int32_t exp = biased_exp - 127;
int32_t min_exp = -14 - frac_size;
if (biased_exp == 0 || exp < min_exp)
{
// Goes to 0
biased_exp = 0;
}
else if (biased_exp == 0xFF)
{
// Infinity or NAN.
biased_exp = 0x1F;
raw_frac = raw_frac >> (23 - frac_size);
}
else if (exp < -14)
{
// denorm or 0
biased_exp = 0;
raw_frac |= 0x00800000;
raw_frac = raw_frac >> (exp + 16);
}
else if (exp > 15)
{
biased_exp = 0x1F;
raw_frac = 0;
}
else
{
raw_frac = raw_frac >> (23 - frac_size);
biased_exp = exp + exp_bias;
}
m_value = (sign >> 16) | (biased_exp << frac_size) | raw_frac;
}
std::string float16::to_string() const
{
return std::to_string(static_cast<float>(*this));
}
size_t float16::size() const
{
return sizeof(m_value);
}
bool float16::operator==(const float16& other) const
{
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wfloat-equal"
return (static_cast<float>(*this) == static_cast<float>(other));
#pragma clang diagnostic pop
}
bool float16::operator<(const float16& other) const
{
return (static_cast<float>(*this) < static_cast<float>(other));
}
bool float16::operator<=(const float16& other) const
{
return (static_cast<float>(*this) <= static_cast<float>(other));
}
bool float16::operator>(const float16& other) const
{
return (static_cast<float>(*this) > static_cast<float>(other));
}
bool float16::operator>=(const float16& other) const
{
return (static_cast<float>(*this) >= static_cast<float>(other));
}
float16::operator float() const
{
union {
uint32_t i_val;
float f_val;
};
uint32_t exp = 0x1F & (m_value >> frac_size);
uint32_t fexp = exp + 127 - 15;
uint32_t frac = m_value & 0x03FF;
if (exp == 0)
{
if (frac == 0)
{
fexp = 0;
}
else
{
// Normalize
fexp++;
while (0 == (frac & 0x0400))
{
fexp--;
frac = frac << 1;
}
frac &= 0x03FF;
}
}
else if (exp == 0x1F)
{
fexp = 0xFF;
}
frac = frac << (23 - frac_size);
i_val = static_cast<uint32_t>((m_value & 0x8000)) << 16 | (fexp << 23) | frac;
return f_val;
}
uint16_t float16::to_bits() const
{
return m_value;
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <cmath>
#include <iostream>
#include <limits>
#include <memory>
#include <string>
#include <vector>
#define ROUND_MODE_TO_NEAREST_EVEN
namespace ngraph
{
class float16
{
public:
constexpr float16()
: m_value{0}
{
}
static uint32_t constexpr frac_size = 10;
static uint32_t constexpr exp_size = 5;
static uint32_t constexpr exp_bias = 15;
float16(uint32_t sign, uint32_t biased_exponent, uint32_t fraction)
: m_value((sign & 0x01) << 15 | (biased_exponent & 0x1F) << 10 | (fraction & 0x03FF))
{
}
float16(float value);
std::string to_string() const;
size_t size() const;
bool operator==(const float16& other) const;
bool operator!=(const float16& other) const { return !(*this == other); }
bool operator<(const float16& other) const;
bool operator<=(const float16& other) const;
bool operator>(const float16& other) const;
bool operator>=(const float16& other) const;
operator float() const;
static constexpr float16 from_bits(uint16_t bits) { return float16(bits, true); }
uint16_t to_bits() const;
friend std::ostream& operator<<(std::ostream& out, const float16& obj)
{
out << static_cast<float>(obj);
return out;
}
private:
constexpr float16(uint16_t x, bool)
: m_value{x}
{
}
union F32 {
F32(float val)
: f{val}
{
}
F32(uint32_t val)
: i{val}
{
}
float f;
uint32_t i;
};
uint16_t m_value;
};
}
namespace std
{
template <>
class numeric_limits<ngraph::float16>
{
public:
static constexpr bool is_specialized = true;
static constexpr ngraph::float16 min() noexcept
{
return ngraph::float16::from_bits(0x0200);
}
static constexpr ngraph::float16 max() noexcept
{
return ngraph::float16::from_bits(0x7BFF);
}
static constexpr ngraph::float16 lowest() noexcept
{
return ngraph::float16::from_bits(0xFBFF);
}
static constexpr int digits = 11;
static constexpr int digits10 = 3;
static constexpr bool is_signed = true;
static constexpr bool is_integer = false;
static constexpr bool is_exact = false;
static constexpr int radix = 2;
static constexpr ngraph::float16 epsilon() noexcept
{
return ngraph::float16::from_bits(0x1200);
}
static constexpr ngraph::float16 round_error() noexcept
{
return ngraph::float16::from_bits(0x3C00);
}
static constexpr int min_exponent = -13;
static constexpr int min_exponent10 = -4;
static constexpr int max_exponent = 16;
static constexpr int max_exponent10 = 4;
static constexpr bool has_infinity = true;
static constexpr bool has_quiet_NaN = true;
static constexpr bool has_signaling_NaN = true;
static constexpr float_denorm_style has_denorm = denorm_absent;
static constexpr bool has_denorm_loss = false;
static constexpr ngraph::float16 infinity() noexcept
{
return ngraph::float16::from_bits(0x7C00);
}
static constexpr ngraph::float16 quiet_NaN() noexcept
{
return ngraph::float16::from_bits(0x7FFF);
}
static constexpr ngraph::float16 signaling_NaN() noexcept
{
return ngraph::float16::from_bits(0x7DFF);
}
static constexpr ngraph::float16 denorm_min() noexcept
{
return ngraph::float16::from_bits(0);
}
static constexpr bool is_iec559 = false;
static constexpr bool is_bounded = false;
static constexpr bool is_modulo = false;
static constexpr bool traps = false;
static constexpr bool tinyness_before = false;
static constexpr float_round_style round_style = round_to_nearest;
};
}
...@@ -43,6 +43,7 @@ set(SRC ...@@ -43,6 +43,7 @@ set(SRC
cse.cpp cse.cpp
element_type.cpp element_type.cpp
file_util.cpp file_util.cpp
float16.cpp
includes.cpp includes.cpp
input_output_assign.cpp input_output_assign.cpp
main.cpp main.cpp
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <climits>
#include <random>
#include "gtest/gtest.h"
#include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/type/float16.hpp"
#include "util/float_util.hpp"
using namespace std;
using namespace ngraph;
TEST(float16, conversions)
{
float16 f16;
const char* source_string;
std::string f16_string;
// 1.f
source_string = "0 01111 00 0000 0000";
f16 = test::bits_to_float16(source_string);
EXPECT_EQ(f16, float16(1.0));
f16_string = test::float16_to_bits(f16);
EXPECT_STREQ(source_string, f16_string.c_str());
EXPECT_EQ(static_cast<float>(f16), 1.0);
// -1.f
source_string = "1 01111 00 0000 0000";
f16 = test::bits_to_float16(source_string);
EXPECT_EQ(f16, float16(-1.0));
f16_string = test::float16_to_bits(f16);
EXPECT_STREQ(source_string, f16_string.c_str());
EXPECT_EQ(static_cast<float>(f16), -1.0);
// 0.f
source_string = "0 00000 00 0000 0000";
f16 = test::bits_to_float16(source_string);
EXPECT_EQ(f16, float16(0.0));
f16_string = test::float16_to_bits(f16);
EXPECT_STREQ(source_string, f16_string.c_str());
EXPECT_EQ(static_cast<float>(f16), 0.0);
// 1.5f
source_string = "0 01111 10 0000 0000";
f16 = test::bits_to_float16(source_string);
EXPECT_EQ(f16, float16(1.5));
f16_string = test::float16_to_bits(f16);
EXPECT_STREQ(source_string, f16_string.c_str());
EXPECT_EQ(static_cast<float>(f16), 1.5);
}
...@@ -55,6 +55,29 @@ std::string ngraph::test::bfloat16_to_bits(bfloat16 f) ...@@ -55,6 +55,29 @@ std::string ngraph::test::bfloat16_to_bits(bfloat16 f)
return formatted; return formatted;
} }
std::string ngraph::test::float16_to_bits(float16 f)
{
std::stringstream ss;
ss << std::bitset<16>(f.to_bits());
std::string unformatted = ss.str();
std::string formatted;
formatted.reserve(41);
// Sign
formatted.push_back(unformatted[0]);
formatted.append(" ");
// Exponent
formatted.append(unformatted, 1, 5);
formatted.append(" ");
// Mantissa
formatted.append(unformatted, 6, 2);
for (int i = 8; i < 16; i += 4)
{
formatted.push_back(' ');
formatted.append(unformatted, i, 4);
}
return formatted;
}
std::string ngraph::test::float_to_bits(float f) std::string ngraph::test::float_to_bits(float f)
{ {
FloatUnion fu{f}; FloatUnion fu{f};
...@@ -116,6 +139,20 @@ ngraph::bfloat16 ngraph::test::bits_to_bfloat16(const std::string& s) ...@@ -116,6 +139,20 @@ ngraph::bfloat16 ngraph::test::bits_to_bfloat16(const std::string& s)
return bfloat16::from_bits(static_cast<uint16_t>(bs.to_ulong())); return bfloat16::from_bits(static_cast<uint16_t>(bs.to_ulong()));
} }
ngraph::float16 ngraph::test::bits_to_float16(const std::string& s)
{
std::string unformatted = s;
unformatted.erase(remove_if(unformatted.begin(), unformatted.end(), ::isspace),
unformatted.end());
if (unformatted.size() != 16)
{
throw ngraph_error("Input length must be 16");
}
std::bitset<16> bs(unformatted);
return float16::from_bits(static_cast<uint16_t>(bs.to_ulong()));
}
float ngraph::test::bits_to_float(const std::string& s) float ngraph::test::bits_to_float(const std::string& s)
{ {
std::string unformatted = s; std::string unformatted = s;
......
...@@ -44,6 +44,8 @@ namespace ngraph ...@@ -44,6 +44,8 @@ namespace ngraph
std::string bfloat16_to_bits(bfloat16 f); std::string bfloat16_to_bits(bfloat16 f);
std::string float16_to_bits(float16 f);
std::string float_to_bits(float f); std::string float_to_bits(float f);
std::string double_to_bits(double d); std::string double_to_bits(double d);
...@@ -53,5 +55,7 @@ namespace ngraph ...@@ -53,5 +55,7 @@ namespace ngraph
float bits_to_float(const std::string& s); float bits_to_float(const std::string& s);
double bits_to_double(const std::string& s); double bits_to_double(const std::string& s);
float16 bits_to_float16(const std::string& s);
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment