Commit 7ea34fc2 authored by Nishant Patel's avatar Nishant Patel Committed by Scott Cyphers

Simple bf16 test case for CPU backend (#3598)

* test bfloat16 with CPU maxpool

* Move test to cpu_test

* convert result from bfloat16 to float

* Modify result op to support bfloat16

* Correct comment

* Add a simple convolution test case

* initialize floats with float literals

* Guard bf16 tests with MKLDNN_VERSION_MAJOR >= 1
parent 61c9d2b0
......@@ -350,7 +350,31 @@ namespace ngraph
template <>
void Builder::BUILDER_DECL(ngraph::op::Result)
{
BUILD_UNARY_ELEMWISE_FUNCTOR(runtime::cpu::kernel::result);
if (args[0].get_element_type() == element::bf16)
{
auto& functors = external_function->get_functors();
std::function<void(void*, void*, size_t, int)> kernel;
kernel = ngraph::runtime::cpu::kernel::result<bfloat16>;
auto element_count = out[0].get_size();
auto arg0_buffer_index =
external_function->get_buffer_index(args[0].get_name());
auto out0_buffer_index = external_function->get_buffer_index(out[0].get_name());
auto functor = [&, kernel, element_count, arg0_buffer_index, out0_buffer_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
kernel(ctx->buffer_data[arg0_buffer_index],
ctx->buffer_data[out0_buffer_index],
element_count,
ectx->arena);
};
functors.emplace_back(functor);
}
else
{
BUILD_UNARY_ELEMWISE_FUNCTOR(runtime::cpu::kernel::result);
}
}
template <>
......
......@@ -204,13 +204,15 @@ namespace ngraph
// Data
if (node->get_input_element_type(0) != element::f32 &&
node->get_input_element_type(0) != element::i8 &&
node->get_input_element_type(0) != element::u8)
node->get_input_element_type(0) != element::u8 &&
node->get_input_element_type(0) != element::bf16)
{
return false;
}
// Weights
if (node->get_input_element_type(1) != element::f32 &&
node->get_input_element_type(1) != element::i8)
node->get_input_element_type(1) != element::i8 &&
node->get_input_element_type(1) != element::bf16)
{
return false;
}
......@@ -218,7 +220,8 @@ namespace ngraph
if (node->get_output_element_type(0) != element::f32 &&
node->get_output_element_type(0) != element::i8 &&
node->get_output_element_type(0) != element::u8 &&
node->get_output_element_type(0) != element::i32)
node->get_output_element_type(0) != element::i32 &&
node->get_output_element_type(0) != element::bf16)
{
return false;
}
......
......@@ -408,7 +408,8 @@ namespace ngraph
(arg0_rank == 5 && max_pool->get_window_shape().size() == 3)) &&
(node->get_input_element_type(0) == element::f32 ||
node->get_input_element_type(0) == element::u8 ||
node->get_input_element_type(0) == element::i8))
node->get_input_element_type(0) == element::i8 ||
node->get_input_element_type(0) == element::bf16))
{
runtime::cpu::mkldnn_utils::assign_mkldnn_kernel(node);
}
......
......@@ -25,6 +25,7 @@
#include "ngraph/ngraph.hpp"
#include "util/all_close.hpp"
#include "util/all_close_f.hpp"
#include "util/float_util.hpp"
#include "util/ndarray.hpp"
#include "util/random.hpp"
#include "util/test_control.hpp"
......
......@@ -49,6 +49,7 @@
#include "util/all_close_f.hpp"
#include "util/autodiff/backprop_function.hpp"
#include "util/autodiff/numeric_compare.hpp"
#include "util/float_util.hpp"
#include "util/ndarray.hpp"
#include "util/random.hpp"
#include "util/test_tools.hpp"
......@@ -2153,3 +2154,99 @@ TEST(cpu_test, tensor_copy_from_different_layout)
EXPECT_EQ((vector<uint8_t>{1, 4, 2, 5, 3, 6}), read_vector<uint8_t>(b));
}
#if MKLDNN_VERSION_MAJOR >= 1
TEST(cpu_test, max_pool_bf16)
{
Shape shape_a{1, 1, 3, 5};
Shape window_shape{2, 3};
auto window_movement_strides = Strides{1, 1};
Shape padding_below{0, 0};
Shape padding_above{0, 0};
Shape shape_r{1, 1, 2, 3};
// input data
vector<float> a_data = {
0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f};
// allocate memory for destination
int size = a_data.size() * sizeof(float) / 2;
void* bf16_dst = std::malloc(size);
// convert float data to bfloat16
ngraph::test::float_to_bf16(a_data.data(), bf16_dst, a_data.size());
auto A = make_shared<op::Parameter>(element::bf16, shape_a);
auto QMP = make_shared<ngraph::op::MaxPool>(
A, window_shape, window_movement_strides, padding_below, padding_above);
auto f = make_shared<Function>(NodeVector{QMP}, ParameterVector{A});
auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output
auto a = backend->create_tensor(element::bf16, shape_a);
a->write(bf16_dst, size);
auto result = backend->create_tensor(element::bf16, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a});
// convert the output back to float
void* fp_dst = malloc(shape_size(shape_r) * 4);
ngraph::test::bf16_to_float(
static_pointer_cast<runtime::cpu::CPUTensorView>(result)->get_data_ptr(),
fp_dst,
shape_size(shape_r));
auto b = backend->create_tensor(element::f32, shape_r);
b->write(fp_dst, shape_size(shape_r) * 4);
EXPECT_EQ((vector<float>{3.5f, 3.5f, 2.5f, 3.5f, 3.5f, 2.5f}), read_vector<float>(b));
}
TEST(cpu_test, convolution_simple_bf16)
{
Shape shape_a{1, 2, 2, 2};
auto A = make_shared<op::Parameter>(element::bf16, shape_a);
Shape shape_b{2, 2, 1, 1};
auto B = make_shared<op::Parameter>(element::bf16, shape_b);
Shape shape_r{1, 2, 2, 2};
vector<float> input = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
vector<float> weights = {3.0f, 3.0f, 3.0f, 3.0f};
int input_size = input.size() * sizeof(float) / 2;
int weights_size = weights.size() * sizeof(float) / 2;
void* bf16_input_dst = std::malloc(input_size);
void* bf16_weights_dst = std::malloc(weights_size);
// convert float data to bfloat16
ngraph::test::float_to_bf16(input.data(), bf16_input_dst, input.size());
ngraph::test::float_to_bf16(weights.data(), bf16_weights_dst, weights.size());
auto conv1 = make_shared<op::Convolution>(A,
B,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
auto f = make_shared<Function>(conv1, ParameterVector{A, B});
auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output
auto a = backend->create_tensor(element::bf16, shape_a);
a->write(bf16_input_dst, input_size);
auto b = backend->create_tensor(element::bf16, shape_b);
b->write(bf16_weights_dst, weights_size);
auto result = backend->create_tensor(element::bf16, shape_r);
vector<float> expected_result{18.0f, 24.0f, 30.0f, 36.0f, 18.0f, 24.0f, 30.0f, 36.0f};
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
// convert the output back to float
void* fp_dst = malloc(shape_size(shape_r) * 4);
ngraph::test::bf16_to_float(
static_pointer_cast<runtime::cpu::CPUTensorView>(result)->get_data_ptr(),
fp_dst,
shape_size(shape_r));
auto c = backend->create_tensor(element::f32, shape_r);
c->write(fp_dst, shape_size(shape_r) * 4);
EXPECT_TRUE(test::all_close_f(vector<float>{expected_result}, read_vector<float>(c)));
}
#endif
......@@ -184,3 +184,35 @@ double ngraph::test::bits_to_double(const std::string& s)
du.i = static_cast<uint64_t>(bs.to_ullong());
return du.d;
}
//
// f32 Mantissa
// <---------------------->
// bf16 Mantissa
// S E <------>
// 0|00011110|0101010|1000011111111000
// 1. Right shift number >> 16 which gives 0|00011110|0101010
// 2. Logical & with 0xffff gives & 1|11111111|1111111
// ---------------------
// 0|00011110|0101010
void ngraph::test::float_to_bf16(void* src, void* dst, int size)
{
int* a = static_cast<int*>(src);
char16_t* b = static_cast<char16_t*>(dst);
for (; size != 0; b++, size--, a++)
{
*b = (a[0] >> 16) & 0xffff;
}
}
void ngraph::test::bf16_to_float(void* src, void* dst, int size)
{
char16_t* a = static_cast<char16_t*>(src);
int* b = static_cast<int*>(dst);
for (; size != 0; a++, b++, size--)
{
*b = (a[0] & 0xffff) << 16;
}
}
......@@ -57,5 +57,9 @@ namespace ngraph
double bits_to_double(const std::string& s);
float16 bits_to_float16(const std::string& s);
void float_to_bf16(void* src, void* dst, int size);
void bf16_to_float(void* src, void* dst, int size);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment