Commit b7ae2c14 authored by shssf's avatar shssf Committed by Robert Kimball

IntelGPU backend: Pad operation optimization (#1542)

parent 4d64be33
......@@ -764,7 +764,6 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
arguments_check(op, 2, 1);
const shared_ptr<op::Pad> pad = static_pointer_cast<op::Pad>(op);
const Shape& pad_above = pad->get_padding_above();
const Shape& pad_below = pad->get_padding_below();
const Shape& pad_interior = pad->get_padding_interior();
......
......@@ -201,7 +201,8 @@ void runtime::intelgpu::do_pad_operation(cldnn::topology& topology,
const Shape& pad_below,
const Shape& pad_interior)
{
const string entry_point_name = "op_pad_kernel_" + output_name;
const string entry_point_name = "op_pad_" + output_name;
const size_t cldnn_gws_lim = 3;
codegen::CodeWriter writer;
vector<size_t> gws;
......@@ -210,30 +211,65 @@ void runtime::intelgpu::do_pad_operation(cldnn::topology& topology,
writer.block_begin();
{
// Loop for Broadcast scalar over full output tensor
gws = generate_loops(writer, output_shape, true);
writer << "output" << access_dims(output_shape) << " = input1[0];\n";
// Closing brackets for Broadcast loop
generate_loops(writer, output_shape, false);
// Loop for Copy input matrix into output matrix with padding.
// Padding include "pad_below" and "pad_interior" according nGraph documentation
size_t var_idx = 0;
for (auto const& i : input_shape)
for (auto const& i : output_shape)
{
if (var_idx < cldnn_gws_lim)
{
writer << "\nconst uint i" << var_idx << " = get_global_id(" << var_idx
<< "); /*trip count " << i << "*/\n";
gws.push_back(i);
}
else
{
writer << "for (uint i" << var_idx << " = 0; i" << var_idx << " < " << i << "; ++i"
<< var_idx << ")\n";
}
writer.block_begin();
writer << "uint input_idx" << var_idx << " = i" << var_idx << " - "
<< pad_below.at(var_idx) << " /*pad_below*/;\n";
writer << "uint input_idx_interior" << var_idx << " = input_idx" << var_idx << " / ("
<< pad_interior.at(var_idx) << " /*pad_interior*/ + 1);\n";
++var_idx;
}
writer << "output" << access_dims_strided(input_shape, pad_below, pad_interior, true)
<< " = input0" << access_dims(input_shape) << ";\n";
// Generate padding conditionals
writer << "\n// Since we use unsigned indexes we don't need "
<< "(input_idxX >= 0) extra check\n"
<< "if (";
var_idx = 0;
for (auto const& i : input_shape)
{
if (var_idx)
{
writer << " && ";
}
writer << "(input_idx_interior" << var_idx << " < " << i << ") && ((input_idx"
<< var_idx << " % (" << pad_interior.at(var_idx) << " + 1)) == 0)";
++var_idx;
}
writer << ")\n";
writer.block_begin();
{
writer << "output" << access_dims(output_shape) << " = input0"
<< access_dims(input_shape, "input_idx_interior") << ";\n";
}
writer.block_end();
writer << "else\n";
writer.block_begin();
{
writer << "output" << access_dims(output_shape) << " = input1[0];\n";
} // End of padding conditionals
writer.block_end();
// Closing brackets for main Copy loop
for (auto const& i : input_shape)
for (auto const& i : output_shape)
{
writer.block_end();
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment