Commit 578f7d8f authored by Sergey Shalnov's avatar Sergey Shalnov Committed by Robert Kimball

IntelGPU backend: Convolution support for double and code minor clean up (#2479)

* IntelGPU backend: Comvolution support for double and code minor clean up

* PR2479. custom kernel selection fix
parent f64b0e0c
......@@ -847,7 +847,7 @@ shared_ptr<runtime::Executable>
{
do_equal_propagation(topology, get_input_name(op), get_output_name(op));
}
else if ((get_output_shape(op).size() <= 4) &&
else if ((get_output_shape(op).size() <= 4) && (shape_size(get_output_shape(op)) > 0) &&
((get_input_type(op) == element::f32) || (get_input_type(op) == element::i32)))
{
const size_t shift = 4 - get_output_shape(op).size();
......@@ -1434,7 +1434,8 @@ shared_ptr<runtime::Executable>
// following are the checks to go with workaround
if ((win_stride.size() > 2) || (pad_below.size() > 2) || (pad_above.size() > 2) ||
(win_dilation.size() > 2) || (data_dilation.size() > 2) ||
(data_dilation.at(0) != 1) || (data_dilation.at(1) != 1))
(data_dilation.at(0) != 1) || (data_dilation.at(1) != 1) ||
(get_output_type(op) != element::f32))
{
do_convolution_operation(topology,
get_input_name(op, 0),
......@@ -1509,7 +1510,7 @@ shared_ptr<runtime::Executable>
if ((win_stride.size() > 2) || (win_stride.at(0) != 1) || (win_stride.at(1) != 1) ||
(pad_below.size() > 2) || (pad_above.size() > 2) || (data_dilation.size() > 2) ||
(data_dilation.at(0) != 1) || (data_dilation.at(1) != 1) ||
(win_dilation.size() > 2))
(win_dilation.size() > 2) || (get_output_type(op) != element::f32))
{
do_convolution_operation(topology,
get_input_name(op, 0),
......@@ -1608,7 +1609,8 @@ shared_ptr<runtime::Executable>
if ((win_stride.size() > 2) || (win_stride.at(0) != 1) || (win_stride.at(1) != 1) ||
(pad_below.size() > 2) || (pad_above.size() > 2) || (data_dilation.size() > 2) ||
(data_dilation.at(0) != 1) || (data_dilation.at(1) != 1) ||
(win_dilation.size() > 2) || (win_dilation.at(0) != 1) || (win_dilation.at(1) != 1))
(win_dilation.size() > 2) || (win_dilation.at(0) != 1) ||
(win_dilation.at(1) != 1) || (get_output_type(op) != element::f32))
{
do_convolution_operation(topology,
get_input_name(op, 1),
......
......@@ -109,6 +109,7 @@ void runtime::intelgpu::do_convolution_operation(cldnn::topology& topology,
bool reverse_filter)
{
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
const string kernel_type_name = get_opencl_type_name(output_type);
const string entry_point_name = "convolution_" + output_name;
const Shape input_data(input_shape.cbegin() + 2, input_shape.cend());
const Shape filter_data(filter_shape.cbegin() + 2, filter_shape.cend());
......@@ -116,9 +117,10 @@ void runtime::intelgpu::do_convolution_operation(cldnn::topology& topology,
codegen::CodeWriter writer;
vector<size_t> gws;
writer << "__kernel void " << entry_point_name << "(const __global float input"
<< array_dims(input_shape) << ", const __global float filter" << array_dims(filter_shape)
<< ", __global float output" << array_dims(output_shape) << ")\n";
writer << "__kernel void " << entry_point_name << "(const __global " << kernel_type_name
<< " input" << array_dims(input_shape) << ", const __global " << kernel_type_name
<< " filter" << array_dims(filter_shape) << ", __global " << kernel_type_name
<< " output" << array_dims(output_shape) << ")\n";
writer.block_begin();
{ // Main function body
......@@ -152,7 +154,7 @@ void runtime::intelgpu::do_convolution_operation(cldnn::topology& topology,
++var_idx;
}
writer << "float result = 0.0f;\n\n"
writer << kernel_type_name << " result = 0.0;\n\n"
<< "// Loop over input_channel\n"
<< "for (uint input_channel = 0; input_channel < "
<< input_shape.at(input_channel_axis_data) << "; ++input_channel)\n";
......@@ -221,7 +223,7 @@ void runtime::intelgpu::do_convolution_operation(cldnn::topology& topology,
writer << ")\n";
writer.block_begin();
{
writer << "float input_elem = " << input_order
writer << kernel_type_name << " input_elem = " << input_order
<< array_dim(input_data, "input_idx_data_dilation") << ";\n";
// Output element calculation
......
......@@ -105,149 +105,3 @@ void runtime::intelgpu::do_all_any_op(cldnn::topology& topology,
{1});
topology.add(op_all_any);
}
static void get_custom_func_name(codegen::CodeWriter& writer,
vector<shared_ptr<Function>>& func,
const string& func_name,
const string& type_name)
{
if (func.size() != 1)
{
throw invalid_argument("IntelGPU Custom_Call operation. Custom function number: " +
to_string(func.size()) + " expected: 1");
}
writer << type_name << " " << func_name << "(const " << type_name << " input0, const "
<< type_name << " input1)\n";
writer.block_begin();
{
for (shared_ptr<Node> op : func.at(0)->get_ordered_ops())
{
if ((op->description() != "Parameter") && (op->description() != "Result"))
{
if (op->description() == "Multiply")
{
writer << "return input0 * input1;\n";
}
else if (op->description() == "Add")
{
writer << "return input0 + input1;\n";
}
else if (op->description() == "Maximum")
{
writer << "return max(input0, input1);\n";
}
else if (op->description() == "Minimum")
{
writer << "return min(input0, input1);\n";
}
else if (op->description() == "And")
{
writer << "return input0 && input1;\n";
}
else if (op->description() == "Or")
{
writer << "return input0 || input1;\n";
}
else if (op->description() == "Equal")
{
writer << "return input0 == input1;\n";
}
else if (op->description() == "NotEqual")
{
writer << "return input0 != input1;\n";
}
else
{
writer << "UNIMPLEMENTED_FUNCTION_INTELGPU: " << op->description() << "\n";
}
}
}
} // End of function bracket
writer.block_end();
}
void runtime::intelgpu::do_reduce_func_call(cldnn::topology& topology,
const string& input0_name,
const Shape& input0_shape,
const string& input1_name,
const Shape& input1_shape,
const string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const AxisSet& axis,
vector<shared_ptr<Function>>& func)
{
const string entry_point_name = "reduce_func_call_" + output_name;
const string aux_point_name = "aux_call_" + output_name;
const string kernel_type_name = get_opencl_type_name(output_type);
const size_t input_size = shape_size<Shape>(input0_shape);
codegen::CodeWriter writer;
get_custom_func_name(writer, func, aux_point_name, kernel_type_name);
// The kernel name and parameters
gen_func_def(writer,
entry_point_name,
{2, kernel_type_name},
{input0_shape, {1}},
kernel_type_name,
output_shape);
writer.block_begin();
{
// Initialization loop
size_t var_idx = 0;
for (auto const& i : output_shape)
{
writer << "for (uint i" << var_idx << " = 0; i" << var_idx << " < " << i << "; ++i"
<< var_idx << ")\n";
writer.block_begin();
++var_idx;
}
writer << "output" << access_dims(output_shape) << " = input1" << access_dims(input1_shape)
<< ";\n";
// Closing brackets for initialization loop
for (auto const& i : output_shape)
{
writer.block_end();
}
if (input_size && !input0_shape.empty())
{
// Main operation loop
var_idx = 0;
for (auto const& i : input0_shape)
{
writer << "for (uint i" << var_idx << " = 0; i" << var_idx << " < " << i << "; ++i"
<< var_idx << ")\n";
writer.block_begin();
++var_idx;
}
writer << "output" << access_dims(input0_shape, "i", axis) << " = " << aux_point_name
<< "(output" << access_dims(input0_shape, "i", axis) << ", input0"
<< access_dims(input0_shape) << ");\n";
// Closing brackets for loop
for (auto const& i : input0_shape)
{
writer.block_end();
}
}
} // End of function bracket
writer.block_end();
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
const cldnn::custom_gpu_primitive op_product(output_name,
{input0_name, input1_name},
{writer.get_code()},
entry_point_name,
get_kernel_args(2, 1),
"",
layout,
{1});
topology.add(op_product);
}
......@@ -19,7 +19,6 @@
#include <CPP/topology.hpp>
#include "ngraph/axis_set.hpp"
#include "ngraph/function.hpp"
#include "ngraph/shape.hpp"
namespace ngraph
......@@ -37,17 +36,6 @@ namespace ngraph
const AxisSet& axis,
const std::string& operation,
const std::string& init_val);
void do_reduce_func_call(cldnn::topology& topology,
const std::string& input0_name,
const Shape& input0_shape,
const std::string& input1_name,
const Shape& input1_shape,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const AxisSet& axis,
std::vector<std::shared_ptr<Function>>& func);
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment