Commit f8632ea0 authored by Sergey Shalnov's avatar Sergey Shalnov Committed by Sang Ik Lee

IntelGPU backend: Max and Avg pool fix (#2482)

parent fd0ed37c
......@@ -765,14 +765,40 @@ shared_ptr<runtime::Executable>
}
case OP_TYPEID::MaxPool:
{
arguments_check(op, 1, 1);
const shared_ptr<op::MaxPool> max_pool = static_pointer_cast<op::MaxPool>(op);
do_pooling_operation(topology,
op,
max_pool->get_window_shape(),
max_pool->get_window_movement_strides(),
max_pool->get_padding_below(),
cldnn::pooling_mode::max);
if ((get_input_shape(op).size() > 4) || (get_output_type(op) != element::f32) ||
!max_pool->get_padding_below().empty() || !max_pool->get_padding_above().empty())
{
const shared_ptr<Node> def_val = max_pool->get_default_value();
const shared_ptr<op::Constant> def_const =
static_pointer_cast<op::Constant>(def_val);
const vector<std::string>& values = def_const->get_value_strings();
do_max_avg_pool_operation(topology,
get_input_name(op),
get_input_shape(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
max_pool->get_window_shape(),
max_pool->get_window_movement_strides(),
max_pool->get_padding_below(),
false,
values.at(0),
true);
}
else
{
do_pooling_operation(topology,
op,
max_pool->get_window_shape(),
max_pool->get_window_movement_strides(),
max_pool->get_padding_below(),
cldnn::pooling_mode::max);
}
break;
}
case OP_TYPEID::MaxPoolBackprop:
......@@ -804,17 +830,45 @@ shared_ptr<runtime::Executable>
}
case OP_TYPEID::AvgPool:
{
arguments_check(op, 1, 1);
const shared_ptr<op::AvgPool> avg_pool = static_pointer_cast<op::AvgPool>(op);
const cldnn::pooling_mode mode = avg_pool->get_include_padding_in_avg_computation()
? cldnn::pooling_mode::average
: cldnn::pooling_mode::average_no_padding;
do_pooling_operation(topology,
op,
avg_pool->get_window_shape(),
avg_pool->get_window_movement_strides(),
avg_pool->get_padding_below(),
mode);
if ((get_input_shape(op).size() > 4) || (get_output_type(op) != element::f32) ||
avg_pool->get_include_padding_in_avg_computation() ||
!avg_pool->get_padding_below().empty() || !avg_pool->get_padding_above().empty())
{
const shared_ptr<Node> def_val = avg_pool->get_default_value();
const shared_ptr<op::Constant> def_const =
static_pointer_cast<op::Constant>(def_val);
const vector<std::string>& values = def_const->get_value_strings();
do_max_avg_pool_operation(topology,
get_input_name(op),
get_input_shape(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
avg_pool->get_window_shape(),
avg_pool->get_window_movement_strides(),
avg_pool->get_padding_below(),
avg_pool->get_include_padding_in_avg_computation(),
values.at(0),
false);
}
else
{
const cldnn::pooling_mode mode = avg_pool->get_include_padding_in_avg_computation()
? cldnn::pooling_mode::average
: cldnn::pooling_mode::average_no_padding;
do_pooling_operation(topology,
op,
avg_pool->get_window_shape(),
avg_pool->get_window_movement_strides(),
avg_pool->get_padding_below(),
mode);
}
break;
}
case OP_TYPEID::AvgPoolBackprop:
......@@ -825,8 +879,8 @@ shared_ptr<runtime::Executable>
static_pointer_cast<op::AvgPoolBackprop>(op);
do_avg_pool_backprop_operation(topology,
get_input_name(op, 0),
get_input_shape(op, 0),
get_input_name(op),
get_input_shape(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
......
......@@ -471,26 +471,31 @@ void runtime::intelgpu::do_max_pool_backprop_operation(cldnn::topology& topology
const Shape& pad_below)
{
const string entry_point_name = "op_max_pool_backprop_" + output_name;
const string type_name = get_opencl_type_name(output_type);
const Shape delta_data(delta_shape.cbegin() + 2, delta_shape.cend());
const Shape output_data(output_shape.cbegin() + 2, output_shape.cend());
codegen::CodeWriter writer;
vector<size_t> gws;
// The kernel name and parameters
gen_func_def(
writer, entry_point_name, {2, "float"}, {input_shape, delta_shape}, "float", output_shape);
gen_func_def(writer,
entry_point_name,
{2, type_name},
{input_shape, delta_shape},
type_name,
output_shape);
writer.block_begin();
{
// Main loop over delta input array
writer << "const uint i0 = get_global_id(0);";
gws.push_back(delta_shape.at(0));
writer << "// for (uint i0 = 0; i0 < " << delta_shape.at(0) << "; ++i0)\n";
writer << "/*trip count " << delta_shape.at(0) << "*/\n";
writer.block_begin();
{
writer << "const uint i1 = get_global_id(1);";
gws.push_back(delta_shape.at(1));
writer << "// for (uint i1 = 0; i1 < " << delta_shape.at(1) << "; ++i1)\n";
writer << "/*trip count " << delta_shape.at(1) << "*/\n";
writer.block_begin();
{
// Initialization output
......@@ -509,7 +514,7 @@ void runtime::intelgpu::do_max_pool_backprop_operation(cldnn::topology& topology
{
writer << "[j" << i << "]";
}
writer << " = 0.0f;\n";
writer << " = 0;\n";
// Closing brackets for Initialization loop
for (auto const& i : output_data)
......@@ -533,7 +538,9 @@ void runtime::intelgpu::do_max_pool_backprop_operation(cldnn::topology& topology
{
writer << "uint save_i" << i + 2 << " = 0;\n";
}
writer << "float max_elem = FLT_MIN;\n"
writer << type_name
<< " max_elem = " << get_opencl_type_min_max_value(output_type, true)
<< ";\n"
<< "uint elem_exists = 0;\n";
// Loop over window shape
......@@ -541,7 +548,7 @@ void runtime::intelgpu::do_max_pool_backprop_operation(cldnn::topology& topology
gen_window_loop(writer, output_shape, win_shape, win_stride, pad_below, true);
{
writer << "const float max_local = input0[i0][i1]";
writer << "const " << type_name << " max_local = input0[i0][i1]";
// additional dimensions for input
for (size_t i = 0; i < win_shape.size(); ++i)
{
......@@ -606,6 +613,169 @@ void runtime::intelgpu::do_max_pool_backprop_operation(cldnn::topology& topology
topology.add(op_max_pool_backprop);
}
void runtime::intelgpu::do_max_avg_pool_operation(cldnn::topology& topology,
const string& input_name,
const Shape& input_shape,
const string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const Shape& win_shape,
const Shape& win_stride,
const Shape& pad_below,
bool include_padding,
const string& def_val,
bool is_max_pool)
{
const string entry_point_name = "op_pool_" + to_string(is_max_pool) + "_" + output_name;
const string type_name = get_opencl_type_name(output_type);
const string init_accumulator = is_max_pool ? "-FLT_MAX" : def_val;
codegen::CodeWriter writer;
vector<size_t> gws;
const Shape input_data(input_shape.cbegin() + 2, input_shape.cend());
const Shape output_data(output_shape.cbegin() + 2, output_shape.cend());
// The kernel name and parameters
gen_func_def(writer, entry_point_name, {type_name}, {input_shape}, type_name, output_shape);
writer.block_begin();
{ // Main function body
writer << "//Window:" << win_shape << " Stride: " << win_stride << "\n"
<< "//padding included:" << include_padding << "\n"
<< "//init value:" << def_val << "\n\n";
writer << "const uint N_dim = get_global_id(0);/*trip count " << input_shape.at(0)
<< "*/\n";
gws.push_back(output_shape.at(0));
writer << "const uint C_dim = get_global_id(1);/*trip count " << input_shape.at(1)
<< "*/\n";
gws.push_back(output_shape.at(1));
// Loops over output dimensions
size_t var_idx = 0;
for (auto i = output_data.begin(); i != output_data.end(); ++i)
{
writer << "for (uint i" << var_idx << " = 0; i" << var_idx << " < " << *i << "; ++i"
<< var_idx << ")\n";
writer.block_begin();
++var_idx;
}
writer << type_name << " accumulator = " << init_accumulator << ";\n"
<< "uint element_count = 0;\n\n";
// Loop over window
writer << "// Over window iterations\n";
var_idx = 0;
for (auto const i : win_shape)
{
writer << "for (uint f" << var_idx << " = 0; f" << var_idx << " < " << i << "; ++f"
<< var_idx << ")\n";
writer.block_begin();
writer << "uint input_idx" << var_idx << " = (i" << var_idx << " * "
<< win_stride.at(var_idx) << " /*win_stride*/"
<< ") + (f" << var_idx << ")"
<< " - " << pad_below.at(var_idx) << " /*pad_below*/;\n";
++var_idx;
}
// Generate conditionals
writer << "if (";
var_idx = 0;
for (auto const& i : input_data)
{
if (var_idx)
{
writer << " && ";
}
writer << "(input_idx" << var_idx << " < " << i << ")";
++var_idx;
}
writer << ")\n";
writer.block_begin();
{
// Output element calculation
if (is_max_pool)
{
writer << "accumulator = max(accumulator, input0[N_dim][C_dim]"
<< access_dims(win_shape, "input_idx") << ");\n";
}
else
{
writer << "accumulator += input0[N_dim][C_dim]"
<< access_dims(win_shape, "input_idx") << ";\n";
}
writer << "++element_count;\n";
}
writer.block_end();
if (include_padding)
{
writer << "else\n";
writer.block_begin();
{
// Output element calculation
writer << "accumulator += " << def_val << ";\n"
<< "++element_count;\n";
}
writer.block_end();
}
// End of conditional generation
// Closing brackets for window loop
for (auto const& i : win_shape)
{
writer.block_end();
}
writer << "\nif (element_count)\n";
writer.block_begin();
{
writer << "output[N_dim][C_dim]" << access_dims(output_data) << " = accumulator";
if (!is_max_pool)
{
writer << " / element_count";
}
writer << ";\n";
}
writer.block_end();
writer << "else\n";
writer.block_begin();
{
writer << "output[N_dim][C_dim]" << access_dims(output_data) << " = "
<< init_accumulator << ";\n";
}
writer.block_end();
// Closing brackets for output dimensions
for (const auto i : output_data)
{
writer.block_end();
}
} // Main function body
writer.block_end();
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
const cldnn::custom_gpu_primitive op_avg_pool(output_name,
{input_name},
{writer.get_code()},
entry_point_name,
get_kernel_args(1, 1),
"",
layout,
gws);
topology.add(op_avg_pool);
}
void runtime::intelgpu::do_avg_pool_backprop_operation(cldnn::topology& topology,
const string& delta_name,
const Shape& delta_shape,
......@@ -618,6 +788,7 @@ void runtime::intelgpu::do_avg_pool_backprop_operation(cldnn::topology& topology
const bool include_padding)
{
const string entry_point_name = "op_avg_pool_backprop_" + output_name;
const string type_name = get_opencl_type_name(output_type);
codegen::CodeWriter writer;
vector<size_t> gws;
......@@ -627,22 +798,22 @@ void runtime::intelgpu::do_avg_pool_backprop_operation(cldnn::topology& topology
size_t win_elems_size = shape_size<Shape>(win_shape);
// The kernel name and parameters
gen_func_def(writer, entry_point_name, {"float"}, {delta_shape}, "float", output_shape);
gen_func_def(writer, entry_point_name, {type_name}, {delta_shape}, type_name, output_shape);
writer.block_begin();
{
writer << "size_t win_elems_size = " << win_elems_size << ";\n";
writer << "float computed_val = 0.0f;\n";
writer << type_name << " computed_val = 0.0;\n";
// Main loop over delta input array
writer << "const uint i0 = get_global_id(0);";
gws.push_back(delta_shape.at(0));
writer << "// for (uint i0 = 0; i0 < " << delta_shape.at(0) << "; ++i0)\n";
writer << "/*trip count " << delta_shape.at(0) << "*/\n";
writer.block_begin();
{
writer << "const uint i1 = get_global_id(1);";
gws.push_back(delta_shape.at(1));
writer << "// for (uint i1 = 0; i1 < " << delta_shape.at(1) << "; ++i1)\n";
writer << "/*trip count " << delta_shape.at(1) << "*/\n";
writer.block_begin();
{
// Initialization output
......@@ -661,7 +832,7 @@ void runtime::intelgpu::do_avg_pool_backprop_operation(cldnn::topology& topology
{
writer << "[j" << i << "]";
}
writer << " = 0.0f;\n";
writer << " = 0;\n";
// Closing brackets for Initialization loop
for (auto const& i : output_data)
......
......@@ -58,6 +58,19 @@ namespace ngraph
const Shape& win_stride,
const Shape& pad_below);
void do_max_avg_pool_operation(cldnn::topology& topology,
const std::string& input_name,
const Shape& input_shape,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const Shape& win_shape,
const Shape& win_stride,
const Shape& pad_below,
bool include_padding,
const std::string& def_val,
bool is_max_pool);
void do_avg_pool_backprop_operation(cldnn::topology& topology,
const std::string& delta_name,
const Shape& delta_shape,
......
all_2x2x3_eliminate_dims_0_1
avg_pool_2d_2channel_2image_padded_only_above_do_not_include_in_computation
avg_pool_2d_2channel_2image_padded_only_above_include_in_computation
avg_pool_3d_uneven_strided_padded
backwards_batch_norm_training
backwards_dot_scalar_tensor
backwards_dot_tensor_scalar
backwards_dot_tensor_vector
backwards_maxpool_n2_c1_hw5_3x3_str2_max
backwards_maxpool_n4_c1_hw4_2x2_max
backwards_replace_slice
backwards_reverse_sequence_n3_c2_h3
backwards_reverse_sequence_n4d2c3h2w2
......@@ -18,7 +13,6 @@ embedding_lookup_10x1_arbitrary
embedding_lookup_10x1_arbitrary_index_type_int
embedding_lookup_4x5_reverse
generate_mask
max_pool_3d
replace_slice_3d
replace_slice_3d_strided
replace_slice_3d_strided_different_strides
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment