Commit 428f567d authored by shssf's avatar shssf Committed by Robert Kimball

IntelGPU backend: Convolution workaround operations completed (#1412)

parent 652840ab
......@@ -765,11 +765,16 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
const CoordinateDiff& pad_below = conv_op->get_padding_below();
const CoordinateDiff& pad_above = conv_op->get_padding_above();
// clDNN failed with filter size 1
const Shape filter_data(get_input_shape(op, 1).cbegin() + 2,
get_input_shape(op, 1).cend());
const size_t filter_size = shape_size(filter_data);
// clDNN has quite limited support for Convolution operation
// following are the checks to go with workaround
if ((win_stride.size() > 2) || (pad_below.size() > 2 || pad_above.size() > 2) ||
(pad_below.at(0) != pad_above.at(0) || pad_below.at(1) != pad_above.at(1)) ||
(win_dilation.size() > 2) ||
(win_dilation.size() > 2) || (filter_size < 2) ||
(data_dilation.size() > 2 || data_dilation.at(0) != 1 || data_dilation.at(1) != 1))
{
do_convolution_operation(topology,
......@@ -899,7 +904,10 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
}
}
instance.ocl_network = make_shared<cldnn::network>(*ocl_engine, topology);
cldnn::build_options network_build_options(cldnn::build_option::optimize_data(true));
instance.ocl_network =
make_shared<cldnn::network>(*ocl_engine, topology, network_build_options);
return true;
}
......
......@@ -59,34 +59,35 @@ static string array_dim(const Shape& dimentions, const string& var = "i", bool i
// input channel axes for both input data and filters are 1
// output channel axes for filters is 0
// output channel axis for output data is 1
//
// Example (Convolution):
// data[ 2, 1, 3, 5, 8 ]
// filter[ 2, 1, 2, 2, 3 ]
// output[ 2, 2, 2, 4, 6 ]
// data[ 2, 1, 3, 5, 8 ]
// filter[ 2, 1, 2, 2, 3 ]
// output[ 2, 2, 2, 4, 6 ]
// it is like
// data[ batch, data_channel, 3, 5, 8 ]
// filter[ output_channel, data_channel, 2, 2, 3 ]
// output[ batch, output_channel, 2, 4, 6 ]
// data[ batch, data_channel, 3, 5, 8 ]
// filter[ output_channel, data_channel, 2, 2, 3 ]
// output[ batch, output_channel, 2, 4, 6 ]
//
// Example (ConvolutionBackpropFilters):
// data[ 2, 1, 3, 5 ]
// filter[ 2, 2, 2, 4 ]
// output[ 2, 1, 2, 2 ]
// data[ 2, 1, 3, 5 ]
// filter[ 2, 2, 2, 4 ]
// output[ 2, 1, 2, 2 ]
// it is like
// data[ data_channel, batch, 3, 5 ]
// filter[ data_channel, output_channel, 2, 4 ]
// output[ output_channel, batch, 2, 2 ]
// data[ data_channel, batch, 3, 5 ]
// filter[ data_channel, output_channel, 2, 4 ]
// output[ output_channel, batch, 2, 2 ]
//
// Example (ConvolutionBackpropData):
// data[ 2, 2, 2, 4 ]
// filter[ 2, 1, 2, 2 ]
// output[ 2, 1, 3, 5 ]
// pad_below[ 1, 1 ]
// pad_above[ 1, 1 ]
// data[ 2, 2, 2, 4 ]
// filter[ 2, 1, 2, 2 ]
// output[ 2, 1, 3, 5 ]
// pad_below[ 1, 1 ]
// pad_above[ 1, 1 ]
// it is like
// data[ batch, data_channel, 2, 4 ]
// filter[ data_channel, output_channel, 2, 2 ]
// output[ batch, output_channel, 3, 5 ]
// data[ batch, data_channel, 2, 4 ]
// filter[ data_channel, output_channel, 2, 2 ]
// output[ batch, output_channel, 3, 5 ]
void runtime::intelgpu::do_convolution_operation(cldnn::topology& topology,
const string& input_name,
const Shape& input_shape,
......@@ -107,7 +108,6 @@ void runtime::intelgpu::do_convolution_operation(cldnn::topology& topology,
const string& output_order,
bool reverse_filter)
{
const string& default_pad_value = "0.0f";
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
const string entry_point_name = "convolution_" + output_name;
const Shape input_data(input_shape.cbegin() + 2, input_shape.cend());
......@@ -148,12 +148,12 @@ void runtime::intelgpu::do_convolution_operation(cldnn::topology& topology,
writer << "for (uint i" << var_idx << " = 0; i" << var_idx << " < " << *i
<< "; ++i" << var_idx << ")\n";
writer.block_begin();
++var_idx;
}
writer << "float result = 0.0f;\n";
writer << "\n// Loop over input_channel\n"
writer << "float result = 0.0f;\n\n"
<< "// Loop over input_channel\n"
<< "for (uint input_channel = 0; input_channel < "
<< input_shape.at(input_channel_axis_data) << "; ++input_channel)\n";
writer.block_begin();
......@@ -168,17 +168,20 @@ void runtime::intelgpu::do_convolution_operation(cldnn::topology& topology,
writer << "for (uint f" << var_idx << " = 0; f" << var_idx << " < " << i
<< "; ++f" << var_idx << ")\n";
writer.block_begin();
writer << "int input_idx" << var_idx << " = (i" << var_idx << " * "
writer << "uint input_idx" << var_idx << " = (i" << var_idx << " * "
<< win_stride.at(var_idx) << " /*win_stride*/"
<< ") + (f" << var_idx << " * " << win_dilation.at(var_idx)
<< " /*win_dilation*/) - " << pad_below.at(var_idx)
<< " /*pad_below*/;\n";
<< " /*win_dilation*/)"
<< " - " << pad_below.at(var_idx) << " /*pad_below*/;\n";
writer << "uint input_idx_data_dilation" << var_idx << " = input_idx"
<< var_idx << " / " << data_dilation.at(var_idx)
<< " /*data_dilation*/;\n";
++var_idx;
}
// Get the input value
writer << "float input_pad = " << default_pad_value << ";\n";
// Generate dilation conditionals
writer << "if (";
var_idx = 0;
......@@ -189,8 +192,9 @@ void runtime::intelgpu::do_convolution_operation(cldnn::topology& topology,
writer << " && ";
}
writer << "(((i" << var_idx << " + f" << var_idx << ") % "
<< data_dilation.at(var_idx) << ") == 0)";
writer << "(((input_idx" << var_idx << ") % "
<< data_dilation.at(var_idx) << " /*data_dilation*/) == 0)";
++var_idx;
}
writer << ") /*data_dilation. If we are in a dilation gap"
......@@ -198,7 +202,9 @@ void runtime::intelgpu::do_convolution_operation(cldnn::topology& topology,
writer.block_begin();
{
// Generate other conditionals
writer << "if (";
writer << "//Since we use unsigned indexes we don't need "
<< "(input_idx_data_dilationX >= 0) extra check\n"
<< "if (";
var_idx = 0;
for (auto const& i : input_data)
{
......@@ -207,15 +213,20 @@ void runtime::intelgpu::do_convolution_operation(cldnn::topology& topology,
writer << " && ";
}
writer << "((input_idx" << var_idx << " >= 0) && (input_idx"
<< var_idx << " < " << i << "))";
writer << "(input_idx_data_dilation" << var_idx << " < " << i
<< ")";
++var_idx;
}
writer << ")\n";
writer.block_begin();
{
writer << "input_pad = " << input_order
<< array_dim(input_data, "input_idx") << ";\n";
writer << "float input_elem = " << input_order
<< array_dim(input_data, "input_idx_data_dilation") << ";\n";
// Output element calculation
writer << "result += input_elem * " << filter_order
<< array_dim(filter_data, "f", reverse_filter) << ";\n";
}
writer.block_end();
//End of other conditional generation
......@@ -223,10 +234,6 @@ void runtime::intelgpu::do_convolution_operation(cldnn::topology& topology,
writer.block_end();
//End of dilation conditional generation
// Output element calculation
writer << "result += input_pad * " << filter_order
<< array_dim(filter_data, "f", reverse_filter) << ";\n";
// Closing brackets for filter loop
for (auto const& i : filter_data)
{
......
......@@ -45,22 +45,6 @@ concat_matrix_int64
constant_multi_use
convert_int32_bool
convert_int32_float32
convolution_2d_1item_1o1i_data_dilated
convolution_2d_1item_2o1i_data_dilated
convolution_2d_1item_2o2i_data_dilated
convolution_2d_1item_5o3i_data_dilated
convolution_2d_2item_5o3i_data_dilated
convolution_2d_2items_strided
convolution_2d_2items_strided_padded
convolution_2d_2items_strided_padded_same
convolution_2d_8item_large_5o3i_data_dilated
convolution_2d_8item_large_5o3i_uneven_filter_data_dilated
convolution_2d_8item_large_5o3i_uneven_filter_uneven_data_dilation_data_dilated
convolution_3d_1item_large_5o3i_padded_uneven_filter_uneven_data_dilation_data_dilated
convolution_3d_2item_large_5o3i_padded_strided_uneven_filter_uneven_data_dilation_data_dilated
convolution_3d_2item_large_5o3i_padded_strided_uneven_filter_uneven_data_dilation_filter_dilated_data_dilated
convolution_3d_2item_large_5o3i_uneven_filter_uneven_data_dilation_data_dilated
convolution_outlining
divide_by_zero_int32
dot_matrix_vector_int64
floor
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment