Commit 176e105b authored by shssf's avatar shssf Committed by Scott Cyphers

IntelGPU backend: Broadcast operation optimization (#1450)

* IntelGPU backend: Broadcast operation optimization

* PR1450 style check fixed
parent 0593746d
...@@ -525,16 +525,6 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -525,16 +525,6 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{ {
do_equal_propagation(topology, get_input_name(op), get_output_name(op)); do_equal_propagation(topology, get_input_name(op), get_output_name(op));
} }
else if (get_input_shape(op).empty())
{
do_bcast_sum_operation_scalar(topology,
get_input_name(op),
get_input_shape(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
true);
}
else else
{ {
do_bcast_sum_operation(topology, do_bcast_sum_operation(topology,
...@@ -558,16 +548,6 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -558,16 +548,6 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{ {
do_equal_propagation(topology, get_input_name(op), get_output_name(op)); do_equal_propagation(topology, get_input_name(op), get_output_name(op));
} }
else if (get_output_shape(op).empty())
{
do_bcast_sum_operation_scalar(topology,
get_input_name(op),
get_input_shape(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
false);
}
else else
{ {
do_bcast_sum_operation(topology, do_bcast_sum_operation(topology,
......
...@@ -28,54 +28,6 @@ ...@@ -28,54 +28,6 @@
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
void runtime::intelgpu::do_bcast_sum_operation_scalar(cldnn::topology& topology,
const string& input_name,
const Shape& input_shape,
const string& output_name,
const Shape& output_shape,
const element::Type& output_type,
bool is_bcast)
{
string function_name = is_bcast ? "broadcast_scalar" : "sum_scalar";
function_name += output_name;
const size_t input_count =
is_bcast ? shape_size<Shape>(output_shape) : shape_size<Shape>(input_shape);
codegen::CodeWriter writer;
writer << "__kernel void " << function_name
<< "(const __global float* input, __global float* output)\n";
writer.block_begin();
{
writer << "float sum = 0.f;\n"
<< "for (uint i = 0; i < COUNT; ++i)\n";
writer.block_begin();
if (is_bcast)
{
writer << "output[i] = input[0];\n";
writer.block_end();
}
else
{
writer << "sum += input[i];\n";
writer.block_end();
writer << "output[0] = sum;\n";
}
} // End of function bracket
writer.block_end();
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
const cldnn::custom_gpu_primitive op_scalar(output_name,
{input_name},
{writer.get_code()},
function_name,
get_kernel_args(1, 1),
string("-DCOUNT=" + to_string(input_count)),
layout,
{1});
topology.add(op_scalar);
}
void runtime::intelgpu::do_bcast_sum_operation(cldnn::topology& topology, void runtime::intelgpu::do_bcast_sum_operation(cldnn::topology& topology,
const string& input_name, const string& input_name,
const Shape& input_shape, const Shape& input_shape,
...@@ -85,37 +37,29 @@ void runtime::intelgpu::do_bcast_sum_operation(cldnn::topology& topology, ...@@ -85,37 +37,29 @@ void runtime::intelgpu::do_bcast_sum_operation(cldnn::topology& topology,
const AxisSet& axis, const AxisSet& axis,
bool is_bcast) bool is_bcast)
{ {
string function_name = is_bcast ? "broadcast" : "sum"; string function_name = is_bcast ? "broadcast_" : "sum_";
function_name += output_name; function_name += output_name;
codegen::CodeWriter writer; codegen::CodeWriter writer;
vector<size_t> gws;
writer << "__kernel void " << function_name << "(const __global float input" runtime::intelgpu::gen_func_def(
<< array_dims(input_shape) << ", __global float output" << array_dims(output_shape) writer, function_name, {"float"}, {input_shape}, "float", output_shape);
<< ")\n";
writer.block_begin(); writer.block_begin();
{ {
if (is_bcast) if (is_bcast)
{ {
size_t var_idx = 0; // Broadcast loops
for (auto const& i : output_shape) gws = runtime::intelgpu::generate_loops(writer, output_shape, true);
{
writer << "for (uint i" << var_idx << " = 0; i" << var_idx << " < " << i << "; ++i" writer << "output" << access_dims(output_shape) << " = input0"
<< var_idx << ")\n";
writer.block_begin();
++var_idx;
}
writer << "output" << access_dims(output_shape) << " = input"
<< access_dims(output_shape, axis) << ";\n"; << access_dims(output_shape, axis) << ";\n";
// Closing brackets for Broadcast loop // Closing brackets for Broadcast loop
for (auto const& i : output_shape) runtime::intelgpu::generate_loops(writer, output_shape, false);
{
writer.block_end();
}
} }
else else
{ {
gws = {1}; // non parallel version
// Initialize destination output by zeroes // Initialize destination output by zeroes
size_t var_idx = 0; size_t var_idx = 0;
for (auto const& i : output_shape) for (auto const& i : output_shape)
...@@ -144,7 +88,7 @@ void runtime::intelgpu::do_bcast_sum_operation(cldnn::topology& topology, ...@@ -144,7 +88,7 @@ void runtime::intelgpu::do_bcast_sum_operation(cldnn::topology& topology,
++var_idx; ++var_idx;
} }
writer << "output" << access_dims(input_shape, axis) << " += input" writer << "output" << access_dims(input_shape, axis) << " += input0"
<< access_dims(input_shape) << ";\n"; << access_dims(input_shape) << ";\n";
// Closing brackets for Sum loop // Closing brackets for Sum loop
...@@ -164,7 +108,7 @@ void runtime::intelgpu::do_bcast_sum_operation(cldnn::topology& topology, ...@@ -164,7 +108,7 @@ void runtime::intelgpu::do_bcast_sum_operation(cldnn::topology& topology,
get_kernel_args(1, 1), get_kernel_args(1, 1),
"", "",
layout, layout,
{1}); gws);
topology.add(op_bcast_sum); topology.add(op_bcast_sum);
} }
......
...@@ -27,8 +27,9 @@ namespace ngraph ...@@ -27,8 +27,9 @@ namespace ngraph
{ {
namespace intelgpu namespace intelgpu
{ {
// This implements Broadcast and Sum nGraph operations // This implements Broadcast and Sum nGraph operations.
// in case of input_shape is not empty // input_shape (bcast) or output_shape (sum) can be empty.
// If the shape is empty it means scalar
void do_bcast_sum_operation(cldnn::topology& topology, void do_bcast_sum_operation(cldnn::topology& topology,
const std::string& input_name, const std::string& input_name,
const Shape& input_shape, const Shape& input_shape,
...@@ -38,16 +39,6 @@ namespace ngraph ...@@ -38,16 +39,6 @@ namespace ngraph
const AxisSet& axis, const AxisSet& axis,
bool is_bcast); bool is_bcast);
// This implements Broadcast and Sum nGraph operations
// in case of input_shape is empty
void do_bcast_sum_operation_scalar(cldnn::topology& topology,
const std::string& input_name,
const Shape& input_shape,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type,
bool is_bcast);
// This implements Min and Max operations depends on is_min parameter // This implements Min and Max operations depends on is_min parameter
void do_max_min_operation(cldnn::topology& topology, void do_max_min_operation(cldnn::topology& topology,
const std::string& input_name, const std::string& input_name,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment