Commit c5d7791b authored by Tomoaki Teshima's avatar Tomoaki Teshima

brush up fp16 implementation

  * DRY
  * switch to Cv32suf and remove fp32Int32
  * add Cv16suf
parent e884bbab
...@@ -307,11 +307,32 @@ enum CpuFeatures { ...@@ -307,11 +307,32 @@ enum CpuFeatures {
#define CV_2PI 6.283185307179586476925286766559 #define CV_2PI 6.283185307179586476925286766559
#define CV_LOG2 0.69314718055994530941723212145818 #define CV_LOG2 0.69314718055994530941723212145818
typedef union Cv16suf
{
short i;
#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
__fp16 h;
#endif
struct _fp16Format
{
unsigned int significand : 10;
unsigned int exponent : 5;
unsigned int sign : 1;
} fmt;
}
Cv16suf;
typedef union Cv32suf typedef union Cv32suf
{ {
int i; int i;
unsigned u; unsigned u;
float f; float f;
struct _fp32Format
{
unsigned int significand : 23;
unsigned int exponent : 8;
unsigned int sign : 1;
} fmt;
} }
Cv32suf; Cv32suf;
......
...@@ -4367,39 +4367,13 @@ const unsigned int kShiftSignificand = 13; ...@@ -4367,39 +4367,13 @@ const unsigned int kShiftSignificand = 13;
const unsigned int kMaskFp16Significand = 0x3ff; const unsigned int kMaskFp16Significand = 0x3ff;
const unsigned int kBiasFp16Exponent = 15; const unsigned int kBiasFp16Exponent = 15;
const unsigned int kBiasFp32Exponent = 127; const unsigned int kBiasFp32Exponent = 127;
union fp32Int32
{
int i;
float f;
struct _fp32Format
{
unsigned int significand : 23;
unsigned int exponent : 8;
unsigned int sign : 1;
} fmt;
};
#endif #endif
union fp16Int16
{
short i;
#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
__fp16 h;
#endif
struct _fp16Format
{
unsigned int significand : 10;
unsigned int exponent : 5;
unsigned int sign : 1;
} fmt;
};
#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) #if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
static float convertFp16SW(short fp16) static float convertFp16SW(short fp16)
{ {
// Fp16 -> Fp32 // Fp16 -> Fp32
fp16Int16 a; Cv16suf a;
a.i = fp16; a.i = fp16;
return (float)a.h; return (float)a.h;
} }
...@@ -4407,12 +4381,12 @@ static float convertFp16SW(short fp16) ...@@ -4407,12 +4381,12 @@ static float convertFp16SW(short fp16)
static float convertFp16SW(short fp16) static float convertFp16SW(short fp16)
{ {
// Fp16 -> Fp32 // Fp16 -> Fp32
fp16Int16 b; Cv16suf b;
b.i = fp16; b.i = fp16;
int exponent = b.fmt.exponent - kBiasFp16Exponent; int exponent = b.fmt.exponent - kBiasFp16Exponent;
int significand = b.fmt.significand; int significand = b.fmt.significand;
fp32Int32 a; Cv32suf a;
a.i = 0; a.i = 0;
a.fmt.sign = b.fmt.sign; // sign bit a.fmt.sign = b.fmt.sign; // sign bit
if( exponent == 16 ) if( exponent == 16 )
...@@ -4461,7 +4435,7 @@ static float convertFp16SW(short fp16) ...@@ -4461,7 +4435,7 @@ static float convertFp16SW(short fp16)
static short convertFp16SW(float fp32) static short convertFp16SW(float fp32)
{ {
// Fp32 -> Fp16 // Fp32 -> Fp16
fp16Int16 a; Cv16suf a;
a.h = (__fp16)fp32; a.h = (__fp16)fp32;
return a.i; return a.i;
} }
...@@ -4469,12 +4443,12 @@ static short convertFp16SW(float fp32) ...@@ -4469,12 +4443,12 @@ static short convertFp16SW(float fp32)
static short convertFp16SW(float fp32) static short convertFp16SW(float fp32)
{ {
// Fp32 -> Fp16 // Fp32 -> Fp16
fp32Int32 a; Cv32suf a;
a.f = fp32; a.f = fp32;
int exponent = a.fmt.exponent - kBiasFp32Exponent; int exponent = a.fmt.exponent - kBiasFp32Exponent;
int significand = a.fmt.significand; int significand = a.fmt.significand;
fp16Int16 result; Cv16suf result;
result.i = 0; result.i = 0;
unsigned int absolute = a.i & 0x7fffffff; unsigned int absolute = a.i & 0x7fffffff;
if( 0x477ff000 <= absolute ) if( 0x477ff000 <= absolute )
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment