Commit 0b1a386e authored by Nishant Patel's avatar Nishant Patel Committed by Scott Cyphers

Modify convert op to convert from bf16 to float and vice-versa (#3689)

* Modify convert op to convert from bf16 to float and vice-versa

* Add a check

* Remove conversion routines and use the convert op for casting

* disable unit tests for plaidml
parent 9d45682c
...@@ -45,6 +45,11 @@ namespace ngraph ...@@ -45,6 +45,11 @@ namespace ngraph
SELECT_KERNEL( SELECT_KERNEL(
kernel, args[0].get_element_type(), runtime::cpu::kernel::convert_to_bool) kernel, args[0].get_element_type(), runtime::cpu::kernel::convert_to_bool)
} }
else if (args[0].get_element_type() == element::bf16 &&
out[0].get_element_type() == element::f32)
{
kernel = runtime::cpu::kernel::convert_to_float32<bfloat16>;
}
else if (out[0].get_element_type() == element::f32) else if (out[0].get_element_type() == element::f32)
{ {
SELECT_KERNEL(kernel, SELECT_KERNEL(kernel,
...@@ -97,6 +102,11 @@ namespace ngraph ...@@ -97,6 +102,11 @@ namespace ngraph
SELECT_KERNEL( SELECT_KERNEL(
kernel, args[0].get_element_type(), runtime::cpu::kernel::convert_to_u64) kernel, args[0].get_element_type(), runtime::cpu::kernel::convert_to_u64)
} }
else if (args[0].get_element_type() == element::f32 &&
out[0].get_element_type() == element::bf16)
{
kernel = runtime::cpu::kernel::convert_to_bf16<float>;
}
else else
{ {
throw ngraph_error("Cannot convert from an invalid input element type"); throw ngraph_error("Cannot convert from an invalid input element type");
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <unsupported/Eigen/CXX11/Tensor> #include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/cpu_executor.hpp" #include "ngraph/runtime/cpu/cpu_executor.hpp"
#include "ngraph/runtime/reference/convert.hpp"
namespace ngraph namespace ngraph
{ {
...@@ -110,6 +111,12 @@ namespace ngraph ...@@ -110,6 +111,12 @@ namespace ngraph
{ {
convert<InputElementType, bool>(input, output, count, arena); convert<InputElementType, bool>(input, output, count, arena);
} }
template <typename InputElementType>
void convert_to_bf16(void* input, void* output, size_t count, int arena)
{
convert<InputElementType, bfloat16>(input, output, count, arena);
}
} }
} }
} }
......
...@@ -9,3 +9,7 @@ model_matmul_integer_4d_no_zero_point ...@@ -9,3 +9,7 @@ model_matmul_integer_4d_no_zero_point
fake_quantize fake_quantize
fake_quantize_with_clip fake_quantize_with_clip
fake_quantize_with_clip_across_channels fake_quantize_with_clip_across_channels
# casting not supported on interpreter
convert_float32_bf16
convert_bf16_float32
...@@ -294,3 +294,7 @@ layer_norm_bprop_affine ...@@ -294,3 +294,7 @@ layer_norm_bprop_affine
# shapes with zeros dimensions like (5, 0, 5) not supported in PlaidML backend # shapes with zeros dimensions like (5, 0, 5) not supported in PlaidML backend
dyn_replace_slice dyn_replace_slice
# bf16 test cases not supported
convert_float32_bf16
convert_bf16_float32
...@@ -110,3 +110,62 @@ NGRAPH_TEST(${BACKEND_NAME}, convert_float32_bool) ...@@ -110,3 +110,62 @@ NGRAPH_TEST(${BACKEND_NAME}, convert_float32_bool)
handle->call_with_validate({result}, {a}); handle->call_with_validate({result}, {a});
EXPECT_EQ((vector<char>{0, 1, 1, 0, 1, 1, 1, 1, 1}), read_vector<char>(result)); EXPECT_EQ((vector<char>{0, 1, 1, 0, 1, 1, 1, 1, 1}), read_vector<char>(result));
} }
NGRAPH_TEST(${BACKEND_NAME}, convert_float32_bf16)
{
Shape shape_a{1, 1, 3, 5};
// input data
vector<float> a_data = {
0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
auto convert = make_shared<op::Convert>(A, element::bf16);
auto f = make_shared<Function>(NodeVector{convert}, ParameterVector{A});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape_a);
copy_data(a, a_data);
auto result = backend->create_tensor(element::bf16, shape_a);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a});
EXPECT_EQ((vector<bfloat16>{
0.5, 1.5, 0.5, 2.5, 1.5, 0.5, 3.5, 2.5, 0.5, 0.5, 2.5, 0.5, 0.5, 0.5, 1.5}),
read_vector<bfloat16>(result));
}
NGRAPH_TEST(${BACKEND_NAME}, convert_bf16_float32)
{
Shape shape_a{1, 1, 3, 5};
// input data
vector<bfloat16> a_data = {
0.5, 1.5, 0.5, 2.5, 1.5, 0.5, 3.5, 2.5, 0.5, 0.5, 2.5, 0.5, 0.5, 0.5, 1.5};
auto A = make_shared<op::Parameter>(element::bf16, shape_a);
auto convert = make_shared<op::Convert>(A, element::f32);
auto f = make_shared<Function>(NodeVector{convert}, ParameterVector{A});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::bf16, shape_a);
copy_data(a, a_data);
auto result = backend->create_tensor(element::f32, shape_a);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a});
EXPECT_EQ((vector<float>{0.5f,
1.5f,
0.5f,
2.5f,
1.5f,
0.5f,
3.5f,
2.5f,
0.5f,
0.5f,
2.5f,
0.5f,
0.5f,
0.5f,
1.5f}),
read_vector<float>(result));
}
...@@ -2169,55 +2169,36 @@ TEST(cpu_test, max_pool_bf16) ...@@ -2169,55 +2169,36 @@ TEST(cpu_test, max_pool_bf16)
vector<float> a_data = { vector<float> a_data = {
0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}; 0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f};
// allocate memory for destination auto A = make_shared<op::Parameter>(element::f32, shape_a);
int size = a_data.size() * sizeof(float) / 2; auto A_bf16 = make_shared<op::Convert>(A, element::bf16);
void* bf16_dst = std::malloc(size);
// convert float data to bfloat16
ngraph::test::float_to_bf16(a_data.data(), bf16_dst, a_data.size());
auto A = make_shared<op::Parameter>(element::bf16, shape_a);
auto QMP = make_shared<ngraph::op::MaxPool>( auto QMP = make_shared<ngraph::op::MaxPool>(
A, window_shape, window_movement_strides, padding_below, padding_above); A_bf16, window_shape, window_movement_strides, padding_below, padding_above);
auto f = make_shared<Function>(NodeVector{QMP}, ParameterVector{A}); auto f = make_shared<Function>(NodeVector{QMP}, ParameterVector{A});
auto backend = runtime::Backend::create("CPU"); auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output // Create some tensors for input/output
auto a = backend->create_tensor(element::bf16, shape_a); auto a = backend->create_tensor(element::f32, shape_a);
a->write(bf16_dst, size); copy_data(a, a_data);
auto result = backend->create_tensor(element::bf16, shape_r); auto result = backend->create_tensor(element::bf16, shape_r);
auto handle = backend->compile(f); auto handle = backend->compile(f);
handle->call_with_validate({result}, {a}); handle->call_with_validate({result}, {a});
// convert the output back to float EXPECT_EQ((vector<bfloat16>{3.5, 3.5, 2.5, 3.5, 3.5, 2.5}), read_vector<bfloat16>(result));
void* fp_dst = malloc(shape_size(shape_r) * 4);
ngraph::test::bf16_to_float(
static_pointer_cast<runtime::cpu::CPUTensorView>(result)->get_data_ptr(),
fp_dst,
shape_size(shape_r));
auto b = backend->create_tensor(element::f32, shape_r);
b->write(fp_dst, shape_size(shape_r) * 4);
EXPECT_EQ((vector<float>{3.5f, 3.5f, 2.5f, 3.5f, 3.5f, 2.5f}), read_vector<float>(b));
} }
TEST(cpu_test, convolution_simple_bf16) TEST(cpu_test, convolution_simple_bf16)
{ {
Shape shape_a{1, 2, 2, 2}; Shape shape_a{1, 2, 2, 2};
auto A = make_shared<op::Parameter>(element::bf16, shape_a); auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_b{2, 2, 1, 1}; Shape shape_b{2, 2, 1, 1};
auto B = make_shared<op::Parameter>(element::bf16, shape_b); auto B = make_shared<op::Parameter>(element::f32, shape_b);
Shape shape_r{1, 2, 2, 2}; Shape shape_r{1, 2, 2, 2};
vector<float> input = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; vector<float> input = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
vector<float> weights = {3.0f, 3.0f, 3.0f, 3.0f}; vector<float> weights = {3.0f, 3.0f, 3.0f, 3.0f};
int input_size = input.size() * sizeof(float) / 2; auto A_bf16 = make_shared<op::Convert>(A, element::bf16);
int weights_size = weights.size() * sizeof(float) / 2; auto B_bf16 = make_shared<op::Convert>(B, element::bf16);
void* bf16_input_dst = std::malloc(input_size); auto conv1 = make_shared<op::Convolution>(A_bf16,
void* bf16_weights_dst = std::malloc(weights_size); B_bf16,
// convert float data to bfloat16
ngraph::test::float_to_bf16(input.data(), bf16_input_dst, input.size());
ngraph::test::float_to_bf16(weights.data(), bf16_weights_dst, weights.size());
auto conv1 = make_shared<op::Convolution>(A,
B,
Strides{1, 1}, Strides{1, 1},
Strides{1, 1}, Strides{1, 1},
CoordinateDiff{0, 0}, CoordinateDiff{0, 0},
...@@ -2229,24 +2210,15 @@ TEST(cpu_test, convolution_simple_bf16) ...@@ -2229,24 +2210,15 @@ TEST(cpu_test, convolution_simple_bf16)
auto backend = runtime::Backend::create("CPU"); auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output // Create some tensors for input/output
auto a = backend->create_tensor(element::bf16, shape_a); auto a = backend->create_tensor(element::f32, shape_a);
a->write(bf16_input_dst, input_size); copy_data(a, input);
auto b = backend->create_tensor(element::bf16, shape_b); auto b = backend->create_tensor(element::f32, shape_b);
b->write(bf16_weights_dst, weights_size); copy_data(b, weights);
auto result = backend->create_tensor(element::bf16, shape_r); auto result = backend->create_tensor(element::bf16, shape_r);
vector<float> expected_result{18.0f, 24.0f, 30.0f, 36.0f, 18.0f, 24.0f, 30.0f, 36.0f};
auto handle = backend->compile(f); auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b}); handle->call_with_validate({result}, {a, b});
// convert the output back to float EXPECT_EQ((vector<bfloat16>{18.0, 24.0, 30.0, 36.0, 18.0, 24.0, 30.0, 36.0}),
void* fp_dst = malloc(shape_size(shape_r) * 4); read_vector<bfloat16>(result));
ngraph::test::bf16_to_float(
static_pointer_cast<runtime::cpu::CPUTensorView>(result)->get_data_ptr(),
fp_dst,
shape_size(shape_r));
auto c = backend->create_tensor(element::f32, shape_r);
c->write(fp_dst, shape_size(shape_r) * 4);
EXPECT_TRUE(test::all_close_f(vector<float>{expected_result}, read_vector<float>(c)));
} }
#endif #endif
...@@ -184,35 +184,3 @@ double ngraph::test::bits_to_double(const std::string& s) ...@@ -184,35 +184,3 @@ double ngraph::test::bits_to_double(const std::string& s)
du.i = static_cast<uint64_t>(bs.to_ullong()); du.i = static_cast<uint64_t>(bs.to_ullong());
return du.d; return du.d;
} }
//
// f32 Mantissa
// <---------------------->
// bf16 Mantissa
// S E <------>
// 0|00011110|0101010|1000011111111000
// 1. Right shift number >> 16 which gives 0|00011110|0101010
// 2. Logical & with 0xffff gives & 1|11111111|1111111
// ---------------------
// 0|00011110|0101010
void ngraph::test::float_to_bf16(void* src, void* dst, int size)
{
int* a = static_cast<int*>(src);
char16_t* b = static_cast<char16_t*>(dst);
for (; size != 0; b++, size--, a++)
{
*b = (a[0] >> 16) & 0xffff;
}
}
void ngraph::test::bf16_to_float(void* src, void* dst, int size)
{
char16_t* a = static_cast<char16_t*>(src);
int* b = static_cast<int*>(dst);
for (; size != 0; a++, b++, size--)
{
*b = (a[0] & 0xffff) << 16;
}
}
...@@ -57,9 +57,5 @@ namespace ngraph ...@@ -57,9 +57,5 @@ namespace ngraph
double bits_to_double(const std::string& s); double bits_to_double(const std::string& s);
float16 bits_to_float16(const std::string& s); float16 bits_to_float16(const std::string& s);
void float_to_bf16(void* src, void* dst, int size);
void bf16_to_float(void* src, void* dst, int size);
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment