Commit 62e1bc26 authored by Sergey Shalnov's avatar Sergey Shalnov Committed by Scott Cyphers

IntelGPU backend: Double datatype workaround implemented (#2435)

parent f75e10c3
...@@ -1332,7 +1332,8 @@ shared_ptr<runtime::Executable> ...@@ -1332,7 +1332,8 @@ shared_ptr<runtime::Executable>
arguments_check(op, 5, 1); arguments_check(op, 5, 1);
if (get_input_shape(op, 2).size() != 4) if ((get_input_shape(op, 2).size() != 4) ||
(get_input_type(op) != ngraph::element::f32))
{ {
do_batch_norm_operation(topology, do_batch_norm_operation(topology,
get_output_name(op), get_output_name(op),
...@@ -1364,7 +1365,8 @@ shared_ptr<runtime::Executable> ...@@ -1364,7 +1365,8 @@ shared_ptr<runtime::Executable>
static_pointer_cast<op::BatchNormTraining>(op); static_pointer_cast<op::BatchNormTraining>(op);
const double eps = bnorm->get_eps_value(); const double eps = bnorm->get_eps_value();
if (get_input_shape(op, 2).size() != 4) if ((get_input_shape(op, 2).size() != 4) ||
(get_input_type(op) != ngraph::element::f32))
{ {
string mean_name; string mean_name;
string variance_name; string variance_name;
......
...@@ -54,32 +54,19 @@ bool runtime::intelgpu::IntelGPULayout:: ...@@ -54,32 +54,19 @@ bool runtime::intelgpu::IntelGPULayout::
cldnn::data_types cldnn::data_types
runtime::intelgpu::IntelGPULayout::get_cldnn_type(const element::Type& element_type) runtime::intelgpu::IntelGPULayout::get_cldnn_type(const element::Type& element_type)
{ {
if ((element_type == ngraph::element::i8) || (element_type == ngraph::element::boolean)) switch (element_type.get_type_enum())
{ {
return cldnn::data_types::i8; case element::Type_t::i8:
} case element::Type_t::boolean: return cldnn::data_types::i8;
else if (element_type == ngraph::element::u8) case element::Type_t::u8: return cldnn::data_types::u8;
{ case element::Type_t::i32: return cldnn::data_types::i32;
return cldnn::data_types::u8; case element::Type_t::i64: return cldnn::data_types::i64;
} case element::Type_t::f32: return cldnn::data_types::f32;
else if (element_type == ngraph::element::i32)
{
return cldnn::data_types::i32;
}
else if (element_type == ngraph::element::i64)
{
return cldnn::data_types::i64;
}
else if (element_type == ngraph::element::f32)
{
return cldnn::data_types::f32;
}
else
{
ostringstream os;
os << "IntelGPULayout::get_cldnn_type: Unknown type " << element_type;
throw invalid_argument(os.str());
} }
ostringstream os;
os << "IntelGPULayout::get_cldnn_type: Unknown type " << element_type;
throw invalid_argument(os.str());
} }
cldnn::tensor runtime::intelgpu::IntelGPULayout::create_cldnn_tensor(const Shape& element_shape) cldnn::tensor runtime::intelgpu::IntelGPULayout::create_cldnn_tensor(const Shape& element_shape)
...@@ -131,13 +118,27 @@ cldnn::layout runtime::intelgpu::IntelGPULayout::create_cldnn_layout( ...@@ -131,13 +118,27 @@ cldnn::layout runtime::intelgpu::IntelGPULayout::create_cldnn_layout(
const cldnn::tensor tensor = create_cldnn_tensor(element_shape); const cldnn::tensor tensor = create_cldnn_tensor(element_shape);
cldnn::data_types data_type; cldnn::data_types data_type;
if ((element_type == ngraph::element::i16) || (element_type == ngraph::element::u16)) switch (element_type.get_type_enum())
{
case element::Type_t::i16:
case element::Type_t::u16:
{ {
data_type = cldnn::data_types::f16; data_type = cldnn::data_types::f16;
break;
} }
else case element::Type_t::u32:
{ {
data_type = get_cldnn_type(element_type); data_type = cldnn::data_types::i32;
break;
}
case element::Type_t::u64:
case element::Type_t::f64:
{
data_type = cldnn::data_types::i64;
break;
}
default: { data_type = get_cldnn_type(element_type);
}
} }
return cldnn::layout(data_type, format, tensor); return cldnn::layout(data_type, format, tensor);
......
...@@ -64,11 +64,12 @@ void runtime::intelgpu::do_create_mean(cldnn::topology& topology, ...@@ -64,11 +64,12 @@ void runtime::intelgpu::do_create_mean(cldnn::topology& topology,
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, channel_shape); const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, channel_shape);
const string entry_point_name = "create_mean_" + output_name; const string entry_point_name = "create_mean_" + output_name;
const size_t output_counts = shape_size<Shape>(input_shape) / input_shape.at(channel_axis); const size_t output_counts = shape_size<Shape>(input_shape) / input_shape.at(channel_axis);
const string kernel_data_type = get_opencl_type_name(output_type);
codegen::CodeWriter writer; codegen::CodeWriter writer;
writer << "__kernel void " << entry_point_name << "( const __global float input" writer << "__kernel void " << entry_point_name << "( const __global " << kernel_data_type
<< array_dims(input_shape) << ", __global float output" << array_dims(channel_shape) << " input" << array_dims(input_shape) << ", __global " << kernel_data_type << " output"
<< ")\n"; << array_dims(channel_shape) << ")\n";
writer.block_begin(); writer.block_begin();
{ // Main function body { // Main function body
...@@ -78,7 +79,7 @@ void runtime::intelgpu::do_create_mean(cldnn::topology& topology, ...@@ -78,7 +79,7 @@ void runtime::intelgpu::do_create_mean(cldnn::topology& topology,
<< input_shape.at(channel_axis) << "; ++i" << channel_axis << ")\n"; << input_shape.at(channel_axis) << "; ++i" << channel_axis << ")\n";
writer.block_begin(); writer.block_begin();
{ {
writer << "float sum = 0.0f;\n"; writer << kernel_data_type << " sum = 0.0f;\n";
size_t var_idx = 0; size_t var_idx = 0;
// Main loops // Main loops
for (auto const& i : input_shape) for (auto const& i : input_shape)
...@@ -139,11 +140,13 @@ void runtime::intelgpu::do_create_variance(cldnn::topology& topology, ...@@ -139,11 +140,13 @@ void runtime::intelgpu::do_create_variance(cldnn::topology& topology,
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, channel_shape); const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, channel_shape);
const string entry_point_name = "create_variance_" + output_name; const string entry_point_name = "create_variance_" + output_name;
const size_t output_counts = shape_size<Shape>(input_shape) / input_shape.at(channel_axis); const size_t output_counts = shape_size<Shape>(input_shape) / input_shape.at(channel_axis);
const string kernel_data_type = get_opencl_type_name(output_type);
codegen::CodeWriter writer; codegen::CodeWriter writer;
writer << "__kernel void " << entry_point_name << "( const __global float input" writer << "__kernel void " << entry_point_name << "( const __global " << kernel_data_type
<< array_dims(input_shape) << ", const __global float mean" << array_dims(channel_shape) << " input" << array_dims(input_shape) << ", const __global " << kernel_data_type
<< ", __global float output" << array_dims(channel_shape) << ")\n"; << " mean" << array_dims(channel_shape) << ", __global " << kernel_data_type << " output"
<< array_dims(channel_shape) << ")\n";
writer.block_begin(); writer.block_begin();
{ // Main function body { // Main function body
...@@ -153,7 +156,7 @@ void runtime::intelgpu::do_create_variance(cldnn::topology& topology, ...@@ -153,7 +156,7 @@ void runtime::intelgpu::do_create_variance(cldnn::topology& topology,
<< input_shape.at(channel_axis) << "; ++i" << channel_axis << ")\n"; << input_shape.at(channel_axis) << "; ++i" << channel_axis << ")\n";
writer.block_begin(); writer.block_begin();
{ {
writer << "float sum = 0.0f;\n"; writer << kernel_data_type << " sum = 0.0f;\n";
size_t var_idx = 0; size_t var_idx = 0;
// Main loops // Main loops
...@@ -168,8 +171,8 @@ void runtime::intelgpu::do_create_variance(cldnn::topology& topology, ...@@ -168,8 +171,8 @@ void runtime::intelgpu::do_create_variance(cldnn::topology& topology,
++var_idx; ++var_idx;
} }
writer << "float mean_diff = input" << access_dims(input_shape) << " - mean[i" writer << kernel_data_type << " mean_diff = input" << access_dims(input_shape)
<< channel_axis << "];\n"; << " - mean[i" << channel_axis << "];\n";
writer << "sum += mean_diff * mean_diff;\n"; writer << "sum += mean_diff * mean_diff;\n";
var_idx = 0; var_idx = 0;
...@@ -217,14 +220,17 @@ void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology, ...@@ -217,14 +220,17 @@ void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology,
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, input_shape); const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, input_shape);
const vector<size_t> gws(input_shape.begin(), input_shape.begin() + 2); const vector<size_t> gws(input_shape.begin(), input_shape.begin() + 2);
const string entry_point_name = "batch_norm_" + output_name; const string entry_point_name = "batch_norm_" + output_name;
const string kernel_data_type = get_opencl_type_name(output_type);
codegen::CodeWriter writer; codegen::CodeWriter writer;
// The kernel name and parameters // The kernel name and parameters
writer << "__attribute__((reqd_work_group_size(1,1,1)))\n" writer << "__attribute__((reqd_work_group_size(1,1,1)))\n"
<< "__kernel void " << entry_point_name << "__kernel void " << entry_point_name << "(const __global " << kernel_data_type
<< "(const __global float *input0, const __global float *input1," << " *input0, const __global " << kernel_data_type << " *input1,"
<< " const __global float *input2, const __global float *input3," << " const __global " << kernel_data_type << " *input2, const __global "
<< " const __global float *input4, __global float *output)\n"; << kernel_data_type << " *input3,"
<< " const __global " << kernel_data_type << " *input4, __global " << kernel_data_type
<< " *output)\n";
writer.block_begin(); writer.block_begin();
{ // Main function body { // Main function body
...@@ -234,11 +240,12 @@ void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology, ...@@ -234,11 +240,12 @@ void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology,
<< "); /* channel_axis trip count " << input_shape.at(channel_axis) << "*/\n"; << "); /* channel_axis trip count " << input_shape.at(channel_axis) << "*/\n";
// Invariants for the rest of the loops // Invariants for the rest of the loops
writer << "const float gamma = input1[i" << channel_axis << "];\n" writer << "const " << kernel_data_type << " gamma = input1[i" << channel_axis << "];\n"
<< "const float beta = input2[i" << channel_axis << "];\n" << "const " << kernel_data_type << " beta = input2[i" << channel_axis << "];\n"
<< "const float mean = input3[i" << channel_axis << "];\n" << "const " << kernel_data_type << " mean = input3[i" << channel_axis << "];\n"
<< "const float variance = input4[i" << channel_axis << "];\n" << "const " << kernel_data_type << " variance = input4[i" << channel_axis << "];\n"
<< "const float var_sqrt = (gamma / sqrt(variance + " << eps << "));\n"; << "const " << kernel_data_type << " var_sqrt = (gamma / sqrt(variance + " << eps
<< "));\n";
writer << "const uint i0 = get_global_id(0);" writer << "const uint i0 = get_global_id(0);"
<< " /* batch axis trip count " << input_shape.at(0) << "*/\n"; << " /* batch axis trip count " << input_shape.at(0) << "*/\n";
...@@ -285,14 +292,16 @@ void runtime::intelgpu::do_create_variance_back(cldnn::topology& topology, ...@@ -285,14 +292,16 @@ void runtime::intelgpu::do_create_variance_back(cldnn::topology& topology,
const Shape channel_shape = get_channel_shape(input_shape, "create_variance_back"); const Shape channel_shape = get_channel_shape(input_shape, "create_variance_back");
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, channel_shape); const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, channel_shape);
const string entry_point_name = "create_variance_back_" + output_name; const string entry_point_name = "create_variance_back_" + output_name;
const string kernel_data_type = get_opencl_type_name(output_type);
codegen::CodeWriter writer; codegen::CodeWriter writer;
vector<size_t> gws; vector<size_t> gws;
writer << "__kernel void " << entry_point_name << "(const __global float input" writer << "__kernel void " << entry_point_name << "(const __global " << kernel_data_type
<< array_dims(input_shape) << ", const __global float delta" << array_dims(input_shape) << " input" << array_dims(input_shape) << ", const __global " << kernel_data_type
<< ", const __global float mean" << array_dims(channel_shape) << " delta" << array_dims(input_shape) << ", const __global " << kernel_data_type
<< ", const __global float variance" << array_dims(channel_shape) << " mean" << array_dims(channel_shape) << ", const __global " << kernel_data_type
<< ", __global float output" << array_dims(channel_shape) << ")\n"; << " variance" << array_dims(channel_shape) << ", __global " << kernel_data_type
<< " output" << array_dims(channel_shape) << ")\n";
writer.block_begin(); writer.block_begin();
{ // Main function body { // Main function body
...@@ -302,10 +311,12 @@ void runtime::intelgpu::do_create_variance_back(cldnn::topology& topology, ...@@ -302,10 +311,12 @@ void runtime::intelgpu::do_create_variance_back(cldnn::topology& topology,
writer << "\nconst uint i" << channel_axis << " = get_global_id(" << channel_axis writer << "\nconst uint i" << channel_axis << " = get_global_id(" << channel_axis
<< "); /* channel_axis trip count " << input_shape.at(channel_axis) << "*/\n"; << "); /* channel_axis trip count " << input_shape.at(channel_axis) << "*/\n";
gws.push_back(input_shape.at(channel_axis)); gws.push_back(input_shape.at(channel_axis));
writer << "const float mean_loc = mean[i" << channel_axis << "];\n" writer << "const " << kernel_data_type << " mean_loc = mean[i" << channel_axis << "];\n"
<< "const float variance_loc = variance[i" << channel_axis << "];\n" << "const " << kernel_data_type << " variance_loc = variance[i" << channel_axis
<< "const float var_sqrt = 1.0f / sqrt(variance_loc + " << eps << ");\n"; << "];\n"
writer << "float sum = 0.0f;\n"; << "const " << kernel_data_type << " var_sqrt = 1.0f / sqrt(variance_loc + " << eps
<< ");\n";
writer << kernel_data_type << " sum = 0.0f;\n";
// Main loops // Main loops
writer << "for (uint i0 = 0; i0 < " << input_shape.at(0) << "; ++i0)\n"; writer << "for (uint i0 = 0; i0 < " << input_shape.at(0) << "; ++i0)\n";
...@@ -317,8 +328,10 @@ void runtime::intelgpu::do_create_variance_back(cldnn::topology& topology, ...@@ -317,8 +328,10 @@ void runtime::intelgpu::do_create_variance_back(cldnn::topology& topology,
writer << "for (uint i3 = 0; i3 < " << input_shape.at(3) << "; ++i3)\n"; writer << "for (uint i3 = 0; i3 < " << input_shape.at(3) << "; ++i3)\n";
writer.block_begin(); writer.block_begin();
{ {
writer << "const float input_loc = input" << access_dims(input_shape) << ";\n"; writer << "const " << kernel_data_type << " input_loc = input"
writer << "const float delta_loc = delta" << access_dims(input_shape) << ";\n"; << access_dims(input_shape) << ";\n";
writer << "const " << kernel_data_type << " delta_loc = delta"
<< access_dims(input_shape) << ";\n";
writer << "sum += (input_loc - mean_loc) * var_sqrt * delta_loc;\n"; writer << "sum += (input_loc - mean_loc) * var_sqrt * delta_loc;\n";
} }
writer.block_end(); writer.block_end();
...@@ -360,17 +373,19 @@ void runtime::intelgpu::do_batch_norm_backprop_operation(cldnn::topology& topolo ...@@ -360,17 +373,19 @@ void runtime::intelgpu::do_batch_norm_backprop_operation(cldnn::topology& topolo
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(type, shape); const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(type, shape);
const string entry_point_name = "batch_norm_backprop_" + output_name; const string entry_point_name = "batch_norm_backprop_" + output_name;
const size_t r_axes_size = shape_size(shape) / shape_size(channel_shape); const size_t r_axes_size = shape_size(shape) / shape_size(channel_shape);
const string kernel_data_type = get_opencl_type_name(type);
codegen::CodeWriter writer; codegen::CodeWriter writer;
vector<size_t> gws; vector<size_t> gws;
writer << "__kernel void " << entry_point_name << "(const __global float input" writer << "__kernel void " << entry_point_name << "(const __global " << kernel_data_type
<< array_dims(shape) << ", const __global float delta" << array_dims(shape) << " input" << array_dims(shape) << ", const __global " << kernel_data_type << " delta"
<< ", const __global float mean" << array_dims(channel_shape) << array_dims(shape) << ", const __global " << kernel_data_type << " mean"
<< ", const __global float variance" << array_dims(channel_shape) << array_dims(channel_shape) << ", const __global " << kernel_data_type << " variance"
<< ", const __global float gamma" << array_dims(channel_shape) << array_dims(channel_shape) << ", const __global " << kernel_data_type << " gamma"
<< ", const __global float gamma_backprop" << array_dims(channel_shape) << array_dims(channel_shape) << ", const __global " << kernel_data_type
<< ", const __global float beta_backprop" << array_dims(channel_shape) << " gamma_backprop" << array_dims(channel_shape) << ", const __global "
<< ", __global float output" << array_dims(shape) << ")\n"; << kernel_data_type << " beta_backprop" << array_dims(channel_shape) << ", __global "
<< kernel_data_type << " output" << array_dims(shape) << ")\n";
writer.block_begin(); writer.block_begin();
{ // Main function body { // Main function body
...@@ -378,10 +393,11 @@ void runtime::intelgpu::do_batch_norm_backprop_operation(cldnn::topology& topolo ...@@ -378,10 +393,11 @@ void runtime::intelgpu::do_batch_norm_backprop_operation(cldnn::topology& topolo
// Main loops // Main loops
gws = runtime::intelgpu::generate_loops(writer, shape, true); gws = runtime::intelgpu::generate_loops(writer, shape, true);
writer << "float stddev = sqrt(variance[i" << channel_axis << "] + " << eps << ");\n"; writer << kernel_data_type << " stddev = sqrt(variance[i" << channel_axis << "] + " << eps
writer << "float xhat = (input" << access_dims(shape) << " - mean[i" << channel_axis << ");\n";
<< "]) / stddev;\n"; writer << kernel_data_type << " xhat = (input" << access_dims(shape) << " - mean[i"
writer << "float norma = gamma[i" << channel_axis << "] / stddev;\n"; << channel_axis << "]) / stddev;\n";
writer << kernel_data_type << " norma = gamma[i" << channel_axis << "] / stddev;\n";
writer << "output" << access_dims(shape) << " = norma * (delta" << access_dims(shape) writer << "output" << access_dims(shape) << " = norma * (delta" << access_dims(shape)
<< " - (xhat * gamma_backprop[i" << channel_axis << "] + beta_backprop[i" << " - (xhat * gamma_backprop[i" << channel_axis << "] + beta_backprop[i"
......
all_2x2x3_eliminate_dims_0_1 all_2x2x3_eliminate_dims_0_1
argmin_trivial_in_double
avg_pool_2d_2channel_2image_padded_only_above_do_not_include_in_computation avg_pool_2d_2channel_2image_padded_only_above_do_not_include_in_computation
avg_pool_2d_2channel_2image_padded_only_above_include_in_computation avg_pool_2d_2channel_2image_padded_only_above_include_in_computation
avg_pool_3d_uneven_strided_padded avg_pool_3d_uneven_strided_padded
...@@ -30,10 +29,7 @@ embedding_lookup_10x1_arbitrary ...@@ -30,10 +29,7 @@ embedding_lookup_10x1_arbitrary
embedding_lookup_10x1_arbitrary_index_type_int embedding_lookup_10x1_arbitrary_index_type_int
embedding_lookup_4x5_reverse embedding_lookup_4x5_reverse
generate_mask generate_mask
max_3d_to_scalar_double
max_pool_3d max_pool_3d
numeric_double_inf
numeric_double_nan
quantize quantize
quantize_axes quantize_axes
quantize_clamp_int32 quantize_clamp_int32
...@@ -69,9 +65,6 @@ shape_of_matrix ...@@ -69,9 +65,6 @@ shape_of_matrix
shape_of_scalar shape_of_scalar
shape_of_vector shape_of_vector
softmax_axis_3d_double softmax_axis_3d_double
sum_stable_acc_double
sum_stable_simple_double
sum_trivial_in_double
topk_1d_max_all topk_1d_max_all
topk_1d_max_one topk_1d_max_one
topk_1d_max_partial topk_1d_max_partial
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment