Commit fbfd3158 authored by Tomoaki Teshima's avatar Tomoaki Teshima

fix corner case when number is small

parent 15f8bc6f
...@@ -4471,17 +4471,31 @@ static short convertFp16SW(float fp32) ...@@ -4471,17 +4471,31 @@ static short convertFp16SW(float fp32)
fp16Int16 result; fp16Int16 result;
result.i = 0; result.i = 0;
if( 0x477ff000 <= ( a.i & 0x7fffffff ) ) unsigned int absolute = a.i & 0x7fffffff;
if( 0x477ff000 <= absolute )
{ {
// Inf in Fp16 // Inf in Fp16
result.i = result.i | 0x7C00; result.i = result.i | 0x7C00;
if( exponent == 128 && significand != 0 ) if( exponent == 128 && significand != 0 )
{ {
// NaN // NaN
result.i = (short)(result.i | 0x200 | (significand >> kShiftSignificand)); result.i = (short)( result.i | 0x200 | ( significand >> kShiftSignificand ) );
} }
} }
else if ( ( a.i & 0x7fffffff ) <= 0x387fe000 ) else if ( absolute < 0x33000001 )
{
// too small for fp16
result.i = 0;
}
else if ( absolute < 0x33c00000 )
{
result.i = 1;
}
else if ( absolute < 0x34200001 )
{
result.i = 2;
}
else if ( absolute < 0x387fe000 )
{ {
// subnormal in Fp16 // subnormal in Fp16
int fp16Significand = significand | 0x800000; int fp16Significand = significand | 0x800000;
...@@ -4489,8 +4503,9 @@ static short convertFp16SW(float fp32) ...@@ -4489,8 +4503,9 @@ static short convertFp16SW(float fp32)
fp16Significand = fp16Significand >> bitShift; fp16Significand = fp16Significand >> bitShift;
// special cases to round up // special cases to round up
int threshold = 0x8000 + ( ( fp16Significand & 1 ) ? 0 : 1 ); bitShift = exponent + 24;
if( threshold <= ( significand & 0xffff ) ) unsigned int threshold = ( ( 0x400000 >> bitShift ) | ( ( ( significand & ( 0x800000 >> bitShift ) ) >> ( 126 - a.fmt.exponent ) ) ^ 1 ) );
if( threshold <= ( significand & ( 0xffffff >> ( exponent + 25 ) ) ) )
{ {
fp16Significand++; fp16Significand++;
} }
...@@ -4500,7 +4515,7 @@ static short convertFp16SW(float fp32) ...@@ -4500,7 +4515,7 @@ static short convertFp16SW(float fp32)
{ {
// usual situation // usual situation
// exponent // exponent
result.fmt.exponent = (exponent + kBiasFp16Exponent); result.fmt.exponent = ( exponent + kBiasFp16Exponent );
// significand; // significand;
short fp16Significand = (short)(significand >> kShiftSignificand); short fp16Significand = (short)(significand >> kShiftSignificand);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment