Commit cd76c79f authored by Sergey Shalnov's avatar Sergey Shalnov Committed by Scott Cyphers

IntelGPU backend: Fix Convert, Pad and Pool (#2837)

parent 1ec803ee
...@@ -190,6 +190,11 @@ static void do_cldnn_unary(cldnn::topology& topology, ...@@ -190,6 +190,11 @@ static void do_cldnn_unary(cldnn::topology& topology,
topology.add(cldnn_unary); topology.add(cldnn_unary);
} }
static bool has_non_zero(const Shape& shape)
{
return accumulate(shape.begin(), shape.end(), 0);
}
static void static void
do_custom_unary(cldnn::topology& topology, const shared_ptr<Node>& op, const string& operation) do_custom_unary(cldnn::topology& topology, const shared_ptr<Node>& op, const string& operation)
{ {
...@@ -802,7 +807,8 @@ shared_ptr<runtime::Executable> ...@@ -802,7 +807,8 @@ shared_ptr<runtime::Executable>
if ((op->get_input_shape(0).size() > 4) || if ((op->get_input_shape(0).size() > 4) ||
(op->get_output_element_type(0) != element::f32) || (op->get_output_element_type(0) != element::f32) ||
!max_pool->get_padding_below().empty() || !max_pool->get_padding_above().empty()) has_non_zero(max_pool->get_padding_below()) ||
has_non_zero(max_pool->get_padding_above()))
{ {
const shared_ptr<Node> def_val = max_pool->get_default_value(); const shared_ptr<Node> def_val = max_pool->get_default_value();
const shared_ptr<op::Constant> def_const = const shared_ptr<op::Constant> def_const =
...@@ -869,7 +875,8 @@ shared_ptr<runtime::Executable> ...@@ -869,7 +875,8 @@ shared_ptr<runtime::Executable>
if ((op->get_input_shape(0).size() > 4) || if ((op->get_input_shape(0).size() > 4) ||
(op->get_output_element_type(0) != element::f32) || (op->get_output_element_type(0) != element::f32) ||
avg_pool->get_include_padding_in_avg_computation() || avg_pool->get_include_padding_in_avg_computation() ||
!avg_pool->get_padding_below().empty() || !avg_pool->get_padding_above().empty()) has_non_zero(avg_pool->get_padding_below()) ||
has_non_zero(avg_pool->get_padding_above()))
{ {
const shared_ptr<Node> def_val = avg_pool->get_default_value(); const shared_ptr<Node> def_val = avg_pool->get_default_value();
const shared_ptr<op::Constant> def_const = const shared_ptr<op::Constant> def_const =
...@@ -1194,13 +1201,29 @@ shared_ptr<runtime::Executable> ...@@ -1194,13 +1201,29 @@ shared_ptr<runtime::Executable>
break; break;
} }
case OP_TYPEID::Ceiling: case OP_TYPEID::Ceiling:
{
if (!op->get_input_element_type(0).is_real())
{
do_equal_propagation(
topology, op->get_input_tensor_name(0), op->get_output_tensor_name(0));
}
else
{ {
do_custom_unary(topology, op, "ceil(input_var)"); do_custom_unary(topology, op, "ceil(input_var)");
}
break; break;
} }
case OP_TYPEID::Floor: case OP_TYPEID::Floor:
{
if (!op->get_input_element_type(0).is_real())
{
do_equal_propagation(
topology, op->get_input_tensor_name(0), op->get_output_tensor_name(0));
}
else
{ {
do_custom_unary(topology, op, "floor(input_var)"); do_custom_unary(topology, op, "floor(input_var)");
}
break; break;
} }
case OP_TYPEID::Sign: case OP_TYPEID::Sign:
......
...@@ -337,7 +337,12 @@ void runtime::intelgpu::do_pad_operation(cldnn::topology& topology, ...@@ -337,7 +337,12 @@ void runtime::intelgpu::do_pad_operation(cldnn::topology& topology,
Shape pad_interior(pad_below.size(), 0); Shape pad_interior(pad_below.size(), 0);
// The kernel name and parameters // The kernel name and parameters
gen_func_def(writer, entry_point_name, {2, "float"}, {input_shape, {1}}, "float", output_shape); gen_func_def(writer,
entry_point_name,
{2, get_opencl_type_name(output_type)},
{input_shape, {1}},
get_opencl_type_name(output_type),
output_shape);
writer.block_begin(); writer.block_begin();
{ {
...@@ -1677,6 +1682,31 @@ void runtime::intelgpu::do_one_hot_operation(cldnn::topology& topology, ...@@ -1677,6 +1682,31 @@ void runtime::intelgpu::do_one_hot_operation(cldnn::topology& topology,
topology.add(op_one_hot); topology.add(op_one_hot);
} }
static string emit_convert_bool(const string& input_type)
{
CodeWriter writer;
writer << "bool convert_bool(const " << input_type << " input)";
writer.block_begin();
{
writer << "if (input)\n";
writer.block_begin();
{
writer << "return 1;\n";
}
writer.block_end();
writer << "else\n";
writer.block_begin();
{
writer << "return 0;\n";
}
writer.block_end();
}
writer.block_end();
return writer.get_code();
}
void runtime::intelgpu::do_convert_operation(cldnn::topology& topology, void runtime::intelgpu::do_convert_operation(cldnn::topology& topology,
const string& input_name, const string& input_name,
const Shape& input_shape, const Shape& input_shape,
...@@ -1692,6 +1722,11 @@ void runtime::intelgpu::do_convert_operation(cldnn::topology& topology, ...@@ -1692,6 +1722,11 @@ void runtime::intelgpu::do_convert_operation(cldnn::topology& topology,
CodeWriter writer; CodeWriter writer;
vector<size_t> gws; vector<size_t> gws;
if (output_type == element::Type_t::boolean)
{
writer << emit_convert_bool(input_type_name);
}
gen_func_def( gen_func_def(
writer, entry_point_name, {input_type_name}, {input_shape}, output_type_name, output_shape); writer, entry_point_name, {input_type_name}, {input_shape}, output_type_name, output_shape);
...@@ -1764,8 +1799,12 @@ void runtime::intelgpu::do_sigmoid_backprop_operation(cldnn::topology& topology, ...@@ -1764,8 +1799,12 @@ void runtime::intelgpu::do_sigmoid_backprop_operation(cldnn::topology& topology,
CodeWriter writer; CodeWriter writer;
vector<size_t> gws; vector<size_t> gws;
gen_func_def( gen_func_def(writer,
writer, entry_point_name, {2, "float"}, {input_shape, delta_shape}, "float", output_shape); entry_point_name,
{2, get_opencl_type_name(output_type)},
{input_shape, delta_shape},
get_opencl_type_name(output_type),
output_shape);
writer.block_begin(); writer.block_begin();
{ {
...@@ -1935,8 +1974,12 @@ void runtime::intelgpu::do_reshape_operation(cldnn::topology& topology, ...@@ -1935,8 +1974,12 @@ void runtime::intelgpu::do_reshape_operation(cldnn::topology& topology,
{ {
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape); const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
const string entry_point_name = "reshape_" + output_name; const string entry_point_name = "reshape_" + output_name;
const string& input_type_name = get_opencl_type_name(input_type);
const string& output_type_name = get_opencl_type_name(output_type); // Workaround on openCL bool datatype. Need to be the same as CPU
const string& input_type_name =
(input_type == element::Type_t::boolean) ? "char" : get_opencl_type_name(input_type);
const string& output_type_name =
(output_type == element::Type_t::boolean) ? "char" : get_opencl_type_name(output_type);
const size_t dst_shape_size = shape_size(output_shape); const size_t dst_shape_size = shape_size(output_shape);
CodeWriter writer; CodeWriter writer;
......
all_2x2x3_eliminate_dims_0_1
backwards_batch_norm_training backwards_batch_norm_training
backwards_dot_scalar_tensor backwards_dot_scalar_tensor
backwards_dot_tensor_scalar backwards_dot_tensor_scalar
...@@ -22,9 +21,6 @@ shape_of_5d ...@@ -22,9 +21,6 @@ shape_of_5d
shape_of_matrix shape_of_matrix
shape_of_scalar shape_of_scalar
shape_of_vector shape_of_vector
floor_int32
convert_int32_bool
convert_float32_bool
prelu prelu
prelu_shared_slope prelu_shared_slope
prelu_negative_slope prelu_negative_slope
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment