Commit 7d6a41f3 authored by shssf's avatar shssf Committed by Robert Kimball

IntelGPU backend: Slice operation (#1304)

parent c38c76a7
......@@ -46,6 +46,7 @@
#include "ngraph/op/max_pool.hpp"
#include "ngraph/op/pad.hpp"
#include "ngraph/op/reshape.hpp"
#include "ngraph/op/slice.hpp"
#include "ngraph/op/sum.hpp"
#include "ngraph/util.hpp"
......@@ -186,6 +187,39 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
do_equal_propagation(topology, input_name, output_name);
}
else if ("Slice" == op->description())
{
arguments_check(op, 1, 1);
const string& input_name = op->get_inputs().begin()->get_tensor().get_name();
const Shape& input_shape = op->get_inputs().begin()->get_shape();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const Shape& output_shape = op->get_outputs().begin()->get_shape();
const element::Type& output_type =
op->get_outputs().begin()->get_tensor().get_element_type();
const shared_ptr<op::Slice> elem = static_pointer_cast<op::Slice>(op);
const Coordinate& lower_bounds = elem->get_lower_bounds();
const Coordinate& upper_bounds = elem->get_upper_bounds();
const Strides& strides = elem->get_strides();
if (input_shape.empty() || output_shape.empty() || lower_bounds.empty() ||
upper_bounds.empty() || strides.empty())
{
do_equal_propagation(topology, input_name, output_name);
}
else
{
do_slice_operation(topology,
input_name,
input_shape,
output_name,
output_shape,
output_type,
lower_bounds,
upper_bounds,
strides);
}
}
else if ("Add" == op->description())
{
do_eltwise_operation(topology, op, cldnn::eltwise_mode::sum);
......
......@@ -27,6 +27,7 @@
using namespace std;
using namespace ngraph;
static vector<cldnn_arg> parameters_1inp_1out = {{arg_input, 0}, {arg_output, 0}};
static vector<cldnn_arg> parameters_2inp_1out = {{arg_input, 0}, {arg_input, 1}, {arg_output, 0}};
static string array_dims(const Shape& dimentions)
......@@ -58,16 +59,22 @@ static string access_dims(const Shape& dimentions, const AxisSet& axis = {})
return buffer;
}
static string
access_dims_strided(const Shape& dimentions, const Shape& pad_below, const Shape& pad_interior)
static string access_dims_strided(const Shape& dimentions,
const Shape& pad_below,
const Shape& pad_interior,
bool is_pad_interior)
{
string buffer;
size_t var_idx = 0;
for (auto const& i : dimentions)
{
buffer += "[i" + to_string(var_idx) + " * (" + to_string(pad_interior.at(var_idx)) +
" + 1) + " + to_string(pad_below.at(var_idx)) + "]";
buffer += "[i" + to_string(var_idx) + " * (" + to_string(pad_interior.at(var_idx));
if (is_pad_interior)
{
buffer += " + 1";
}
buffer += ") + " + to_string(pad_below.at(var_idx)) + "]";
++var_idx;
}
......@@ -130,7 +137,7 @@ void runtime::intelgpu::do_pad_operation(cldnn::topology& topology,
++var_idx;
}
writer << "output" << access_dims_strided(input_shape, pad_below, pad_interior)
writer << "output" << access_dims_strided(input_shape, pad_below, pad_interior, true)
<< " = input" << access_dims(input_shape) << ";\n";
// Closing brackets for main Copy loop
......@@ -446,3 +453,53 @@ void runtime::intelgpu::do_dot_operation(cldnn::topology& topology,
layout);
topology.add(op_dot);
}
void runtime::intelgpu::do_slice_operation(cldnn::topology& topology,
const string& input_name,
const Shape& input_shape,
const string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const Coordinate& lower_bounds,
const Coordinate& uppper_bounds,
const Strides& strides)
{
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
string entry_point_name = "slice_unknown";
codegen::CodeWriter writer;
writer << "__kernel void " << entry_point_name << "(const __global float input"
<< array_dims(input_shape) << ", __global float output" << array_dims(output_shape)
<< ")\n";
writer.block_begin();
{
size_t var_idx = 0;
// Main loops
for (auto const& i : output_shape)
{
writer << "for (uint i" << var_idx << " = 0; i" << var_idx << " < " << i << "; ++i"
<< var_idx << ")\n";
writer.block_begin();
++var_idx;
}
writer << "output" << access_dims(output_shape) << " = input"
<< access_dims_strided(input_shape, lower_bounds, strides, false) << ";\n";
// Closing brackets for main loops
for (auto const& i : output_shape)
{
writer.block_end();
}
}
writer.block_end();
const cldnn::custom_gpu_primitive op_slice(output_name,
{input_name},
{writer.get_code()},
entry_point_name,
parameters_1inp_1out,
"",
layout);
topology.add(op_slice);
}
......@@ -19,7 +19,9 @@
#include <CPP/topology.hpp>
#include "ngraph/axis_set.hpp"
#include "ngraph/coordinate.hpp"
#include "ngraph/shape.hpp"
#include "ngraph/strides.hpp"
#include "ngraph/type/element_type.hpp"
namespace ngraph
......@@ -46,6 +48,16 @@ namespace ngraph
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type);
void do_slice_operation(cldnn::topology& topology,
const std::string& input_name,
const Shape& input_shape,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const Coordinate& lower_bounds,
const Coordinate& uppper_bounds,
const Strides& strides);
}
}
}
......@@ -49,7 +49,6 @@ backwards_maxpool_n4c1h4w4_kh2kw2_sh1sw1
backwards_maxpool_n4_c1_hw4_2x2_max
backwards_minimum
backwards_power
backwards_relu
backwards_replace_slice
backwards_reverse_3d_02
backwards_reverse_sequence_n3_c2_h3
......@@ -65,23 +64,12 @@ backwards_softmax_3d
backwards_softmax_all
backwards_softmax_axis
backwards_softmax_underflow
backwards_sum_m2s
backwards_sum_m2v_0
backwards_sum_m2v_1
backwards_sum_v2s
backwards_tan
batchnorm_bprop_n4c3h2w2
batchnorm_fprop_b1c2h2w2
batchnorm_fprop_b2c2h2w1
batch_norm_one_output
batch_norm_three_outputs
broadcast_algo_3d_stride_1
broadcast_algo_3d_stride_2
broadcast_algo_matrix_stride_1
broadcast_algo_matrix_stride_2
broadcast_algo_matrix_stride_3
broadcast_algo_vector_middle
broadcast_matrix_1
broadcast_vector_rowwise_int64
broadcast_vector_rowwise_reversed
ceiling
......@@ -149,8 +137,6 @@ floor
function_call
greater
greatereq
kahan_sum_3d_to_vector
kahan_sum_to_scalar
less
lesseq
lesseq_bool
......@@ -209,7 +195,6 @@ pad_exterior_2d_0x0
pad_exterior_2d_0x3
pad_exterior_2d_3x0
pad_interior_exterior_4d_2x0x3x2
power
product_3d_eliminate_zero_dim
product_3d_to_matrix_least_sig
product_3d_to_matrix_most_sig
......@@ -238,8 +223,6 @@ reduce_window_emulating_max_pool_1d_1channel_2image
reduce_window_emulating_max_pool_1d_2channel_2image
reduce_window_emulating_max_pool_2d_1channel_1image_strided
reduce_window_emulating_max_pool_2d_2channel_2image
relu_2Dbackprop
relu_4Dbackprop
replace_slice_3d
replace_slice_3d_strided
replace_slice_3d_strided_different_strides
......@@ -271,18 +254,9 @@ select_and_scatter_3d_without_overlap
select_and_scatter_without_overlap
select_and_scatter_with_overlap
sigmoid_bprop_n1c1h4
sigmoid_n1c1h2w2
sigmoid_n1c1h4
sign
sin
sinh
slice_3d
slice_3d_strided
slice_3d_strided_different_strides
slice_matrix
slice_matrix_strided
slice_scalar
slice_vector
softmax_all
softmax_axis
softmax_axis_2
......@@ -290,19 +264,9 @@ softmax_axis_3d
softmax_axis_3d_trivial
softmax_underflow
sum_3d_eliminate_zero_dim
sum_3d_to_matrix_least_sig
sum_3d_to_matrix_most_sig
sum_3d_to_scalar
sum_3d_to_vector
sum_5d_to_scalar
sum_matrix_cols_zero
sum_matrix_columns
sum_matrix_rows
sum_matrix_rows_zero
sum_matrix_to_scalar_zero_by_zero
sum_to_scalar
sum_trivial
sum_trivial_5d
sum_vector_zero
tan
tensor_constant_int64
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment