Commit 59a2d4dd authored by shssf's avatar shssf Committed by Scott Cyphers

IntelGPU backend: Tests updated. Code refactored. No algorithms changed. (#1362)

* IntelGPU backend: Tests updated. Code refactored. No algorithms changed.

* PR1362. debug code removed
parent b8de3b7d
...@@ -70,13 +70,42 @@ static void arguments_check(const shared_ptr<Node>& op, size_t input, size_t out ...@@ -70,13 +70,42 @@ static void arguments_check(const shared_ptr<Node>& op, size_t input, size_t out
} }
} }
static const string& get_input_name(const shared_ptr<Node>& op, size_t num = 0)
{
return op->get_inputs().at(num).get_tensor().get_name();
}
static const string& get_output_name(const shared_ptr<Node>& op, size_t num = 0)
{
return op->get_outputs().at(num).get_tensor().get_name();
}
static const Shape& get_input_shape(const shared_ptr<Node>& op, size_t num = 0)
{
return op->get_inputs().at(num).get_shape();
}
static const Shape& get_output_shape(const shared_ptr<Node>& op, size_t num = 0)
{
return op->get_outputs().at(num).get_shape();
}
static const element::Type& get_input_type(const shared_ptr<Node>& op, size_t num = 0)
{
return op->get_inputs().at(num).get_tensor().get_element_type();
}
static const element::Type& get_output_type(const shared_ptr<Node>& op, size_t num = 0)
{
return op->get_outputs().at(num).get_tensor().get_element_type();
}
static void argument_type_check(const element::Type& type) static void argument_type_check(const element::Type& type)
{ {
if (type != element::f32 && type != element::boolean) if (type != element::f32 && type != element::boolean)
{ {
ostringstream os; throw invalid_argument("Kernel data type \"" + type.c_type_string() +
os << "Kernel data type " << type << " is not supported"; "\" is not supported.");
throw invalid_argument(os.str());
} }
} }
...@@ -86,16 +115,8 @@ static void do_eltwise_operation(cldnn::topology& topology, ...@@ -86,16 +115,8 @@ static void do_eltwise_operation(cldnn::topology& topology,
{ {
arguments_check(op, 2, 1); arguments_check(op, 2, 1);
vector<cldnn::primitive_id> op_add_inputs; const cldnn::eltwise op_add(
for (const descriptor::Input& op_input : op->get_inputs()) get_output_name(op), {get_input_name(op, 0), get_input_name(op, 1)}, mode);
{
const string& element_name = op_input.get_tensor().get_name();
op_add_inputs.push_back(element_name);
}
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const cldnn::eltwise op_add(output_name, op_add_inputs, mode);
topology.add(op_add); topology.add(op_add);
} }
...@@ -106,10 +127,7 @@ static void do_unary_operation(cldnn::topology& topology, ...@@ -106,10 +127,7 @@ static void do_unary_operation(cldnn::topology& topology,
{ {
arguments_check(op, 1, 1); arguments_check(op, 1, 1);
const string& input_name = op->get_inputs().begin()->get_tensor().get_name(); const cldnn::activation cldnn_unary(get_output_name(op), get_input_name(op), mode, param);
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const cldnn::activation cldnn_unary(output_name, input_name, mode, param);
topology.add(cldnn_unary); topology.add(cldnn_unary);
} }
...@@ -123,11 +141,8 @@ static void do_pooling_operation(cldnn::topology& topology, ...@@ -123,11 +141,8 @@ static void do_pooling_operation(cldnn::topology& topology,
{ {
arguments_check(op, 1, 1); arguments_check(op, 1, 1);
const string& input_name = op->get_inputs().begin()->get_tensor().get_name();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const Shape& out_shape = op->get_outputs().begin()->get_shape();
const cldnn::tensor output_size = const cldnn::tensor output_size =
runtime::intelgpu::IntelGPULayout::create_cldnn_tensor(out_shape); runtime::intelgpu::IntelGPULayout::create_cldnn_tensor(get_output_shape(op));
const cldnn::tensor input_offset = const cldnn::tensor input_offset =
runtime::intelgpu::IntelGPULayout::create_cldnn_offset(pad_below); runtime::intelgpu::IntelGPULayout::create_cldnn_offset(pad_below);
...@@ -136,7 +151,7 @@ static void do_pooling_operation(cldnn::topology& topology, ...@@ -136,7 +151,7 @@ static void do_pooling_operation(cldnn::topology& topology,
runtime::intelgpu::IntelGPULayout::create_cldnn_tensor(pool_strides); runtime::intelgpu::IntelGPULayout::create_cldnn_tensor(pool_strides);
const cldnn::pooling cldnn_pooling( const cldnn::pooling cldnn_pooling(
output_name, input_name, mode, size, stride, input_offset, output_size); get_output_name(op), get_input_name(op), mode, size, stride, input_offset, output_size);
topology.add(cldnn_pooling); topology.add(cldnn_pooling);
} }
...@@ -145,31 +160,19 @@ static void do_logical_operation(cldnn::topology& topology, ...@@ -145,31 +160,19 @@ static void do_logical_operation(cldnn::topology& topology,
const string& operation) const string& operation)
{ {
arguments_check(op, 2, 1); arguments_check(op, 2, 1);
argument_type_check(get_input_type(op, 0));
const string& inputA_name = op->get_inputs().at(0).get_tensor().get_name(); argument_type_check(get_input_type(op, 1));
const Shape& inputA_shape = op->get_inputs().at(0).get_shape();
const string& inputA_type =
op->get_inputs().at(0).get_tensor().get_element_type().c_type_string();
argument_type_check(op->get_inputs().at(0).get_tensor().get_element_type());
const string& inputB_name = op->get_inputs().at(1).get_tensor().get_name();
const Shape& inputB_shape = op->get_inputs().at(1).get_shape();
const string& inputB_type =
op->get_inputs().at(1).get_tensor().get_element_type().c_type_string();
argument_type_check(op->get_inputs().at(1).get_tensor().get_element_type());
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const Shape& output_shape = op->get_outputs().begin()->get_shape();
const element::Type& output_type = op->get_outputs().begin()->get_tensor().get_element_type();
runtime::intelgpu::do_logic_kernel(topology, runtime::intelgpu::do_logic_kernel(topology,
inputA_name, get_input_name(op, 0),
inputA_shape, get_input_shape(op, 0),
inputA_type, get_input_type(op, 0).c_type_string(),
inputB_name, get_input_name(op, 1),
inputB_shape, get_input_shape(op, 1),
inputB_type, get_input_type(op, 1).c_type_string(),
output_name, get_output_name(op),
output_shape, get_output_shape(op),
output_type, get_output_type(op),
operation); operation);
} }
...@@ -246,10 +249,7 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -246,10 +249,7 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{ {
arguments_check(op, 1, 1); arguments_check(op, 1, 1);
const string& input_name = op->get_inputs().begin()->get_tensor().get_name(); do_equal_propagation(topology, get_input_name(op), get_output_name(op));
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
do_equal_propagation(topology, input_name, output_name);
} }
else if ("GetOutputElement" == op->description()) else if ("GetOutputElement" == op->description())
{ {
...@@ -257,39 +257,31 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -257,39 +257,31 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
const shared_ptr<op::GetOutputElement> elem = const shared_ptr<op::GetOutputElement> elem =
static_pointer_cast<op::GetOutputElement>(op); static_pointer_cast<op::GetOutputElement>(op);
const string& input_name = op->get_inputs().at(elem->get_n()).get_tensor().get_name();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
do_equal_propagation(topology, input_name, output_name); do_equal_propagation(topology, get_input_name(op, elem->get_n()), get_output_name(op));
} }
else if ("Slice" == op->description()) else if ("Slice" == op->description())
{ {
arguments_check(op, 1, 1); arguments_check(op, 1, 1);
const string& input_name = op->get_inputs().begin()->get_tensor().get_name();
const Shape& input_shape = op->get_inputs().begin()->get_shape();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const Shape& output_shape = op->get_outputs().begin()->get_shape();
const element::Type& output_type =
op->get_outputs().begin()->get_tensor().get_element_type();
const shared_ptr<op::Slice> elem = static_pointer_cast<op::Slice>(op); const shared_ptr<op::Slice> elem = static_pointer_cast<op::Slice>(op);
const Coordinate& lower_bounds = elem->get_lower_bounds(); const Coordinate& lower_bounds = elem->get_lower_bounds();
const Coordinate& upper_bounds = elem->get_upper_bounds(); const Coordinate& upper_bounds = elem->get_upper_bounds();
const Strides& strides = elem->get_strides(); const Strides& strides = elem->get_strides();
if (input_shape.empty() || output_shape.empty() || lower_bounds.empty() || if (get_input_shape(op).empty() || get_output_shape(op).empty() ||
upper_bounds.empty() || strides.empty()) lower_bounds.empty() || upper_bounds.empty() || strides.empty())
{ {
do_equal_propagation(topology, input_name, output_name); do_equal_propagation(topology, get_input_name(op), get_output_name(op));
} }
else else
{ {
do_slice_operation(topology, do_slice_operation(topology,
input_name, get_input_name(op),
input_shape, get_input_shape(op),
output_name, get_output_name(op),
output_shape, get_output_shape(op),
output_type, get_output_type(op),
lower_bounds, lower_bounds,
upper_bounds, upper_bounds,
strides); strides);
...@@ -299,54 +291,36 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -299,54 +291,36 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{ {
arguments_check(op, 3, 1); arguments_check(op, 3, 1);
const string& input0_name = op->get_inputs().at(0).get_tensor().get_name();
const Shape& input0_shape = op->get_inputs().at(0).get_shape();
const string& input1_name = op->get_inputs().at(1).get_tensor().get_name();
const Shape& input1_shape = op->get_inputs().at(1).get_shape();
const string& input2_name = op->get_inputs().at(2).get_tensor().get_name();
const Shape& input2_shape = op->get_inputs().at(2).get_shape();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const Shape& output_shape = op->get_outputs().begin()->get_shape();
const element::Type& output_type =
op->get_outputs().begin()->get_tensor().get_element_type();
do_select_operation(topology, do_select_operation(topology,
input0_name, get_input_name(op, 0),
input0_shape, get_input_shape(op, 0),
input1_name, get_input_name(op, 1),
input1_shape, get_input_shape(op, 1),
input2_name, get_input_name(op, 2),
input2_shape, get_input_shape(op, 2),
output_name, get_output_name(op),
output_shape, get_output_shape(op),
output_type); get_output_type(op));
} }
else if ("Reverse" == op->description()) else if ("Reverse" == op->description())
{ {
arguments_check(op, 1, 1); arguments_check(op, 1, 1);
const string& input_name = op->get_inputs().at(0).get_tensor().get_name();
const Shape& input_shape = op->get_inputs().at(0).get_shape();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const Shape& output_shape = op->get_outputs().begin()->get_shape();
const element::Type& output_type =
op->get_outputs().begin()->get_tensor().get_element_type();
const shared_ptr<op::Reverse> reverse_op = static_pointer_cast<op::Reverse>(op); const shared_ptr<op::Reverse> reverse_op = static_pointer_cast<op::Reverse>(op);
const AxisSet& reversed_axes = reverse_op->get_reversed_axes(); const AxisSet& reversed_axes = reverse_op->get_reversed_axes();
if (reversed_axes.empty()) if (reversed_axes.empty())
{ {
do_equal_propagation(topology, input_name, output_name); do_equal_propagation(topology, get_input_name(op), get_output_name(op));
} }
else else
{ {
do_reverse_operation(topology, do_reverse_operation(topology,
input_name, get_input_name(op),
input_shape, get_input_shape(op),
output_name, get_output_name(op),
output_shape, get_output_shape(op),
output_type, get_output_type(op),
reversed_axes); reversed_axes);
} }
} }
...@@ -370,46 +344,33 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -370,46 +344,33 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{ {
arguments_check(op, 0, 1); arguments_check(op, 0, 1);
auto input_it = op->get_outputs().cbegin();
const descriptor::Tensor& output_tensor = input_it->get_tensor();
const string& output_name = output_tensor.get_name();
const shared_ptr<op::Constant> constant_inst = static_pointer_cast<op::Constant>(op); const shared_ptr<op::Constant> constant_inst = static_pointer_cast<op::Constant>(op);
void* memory_pointer = const_cast<void*>(constant_inst->get_data_ptr()); void* memory_pointer = const_cast<void*>(constant_inst->get_data_ptr());
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout( const cldnn::layout layout =
output_tensor.get_element_type(), input_it->get_shape()); IntelGPULayout::create_cldnn_layout(get_output_type(op), get_output_shape(op));
const cldnn::memory mem( const cldnn::memory mem(
cldnn::memory::attach<void>(layout, memory_pointer, layout.bytes_count())); cldnn::memory::attach<void>(layout, memory_pointer, layout.bytes_count()));
const cldnn::data op_const(output_name, mem); const cldnn::data op_const(get_output_name(op), mem);
topology.add(op_const); topology.add(op_const);
} }
else if ("Dot" == op->description()) else if ("Dot" == op->description())
{ {
arguments_check(op, 2, 1); arguments_check(op, 2, 1);
const string& inputA_name = op->get_inputs().at(0).get_tensor().get_name();
const Shape& inputA_shape = op->get_inputs().at(0).get_shape();
const string& inputB_name = op->get_inputs().at(1).get_tensor().get_name();
const Shape& inputB_shape = op->get_inputs().at(1).get_shape();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const Shape& output_shape = op->get_outputs().begin()->get_shape();
const element::Type& output_type =
op->get_outputs().begin()->get_tensor().get_element_type();
do_dot_operation(topology, do_dot_operation(topology,
inputA_name, get_input_name(op, 0),
inputA_shape, get_input_shape(op, 0),
inputB_name, get_input_name(op, 1),
inputB_shape, get_input_shape(op, 1),
output_name, get_output_name(op),
output_shape, get_output_shape(op),
output_type); get_output_type(op));
} }
else if ("MaxPool" == op->description()) else if ("MaxPool" == op->description())
{ {
const shared_ptr<op::MaxPool> max_pool = static_pointer_cast<op::MaxPool>(op); const shared_ptr<op::MaxPool> max_pool = static_pointer_cast<op::MaxPool>(op);
const Shape& pool_shape = max_pool->get_window_shape(); const Shape& pool_shape = max_pool->get_window_shape();
const Strides& pool_strides = max_pool->get_window_movement_strides(); const Strides& pool_strides = max_pool->get_window_movement_strides();
const Shape& pad_below = max_pool->get_padding_below(); const Shape& pad_below = max_pool->get_padding_below();
...@@ -426,7 +387,6 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -426,7 +387,6 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
else if ("AvgPool" == op->description()) else if ("AvgPool" == op->description())
{ {
const shared_ptr<op::AvgPool> avg_pool = static_pointer_cast<op::AvgPool>(op); const shared_ptr<op::AvgPool> avg_pool = static_pointer_cast<op::AvgPool>(op);
const Shape& pool_shape = avg_pool->get_window_shape(); const Shape& pool_shape = avg_pool->get_window_shape();
const Strides& pool_strides = avg_pool->get_window_movement_strides(); const Strides& pool_strides = avg_pool->get_window_movement_strides();
const Shape& pad_below = avg_pool->get_padding_below(); const Shape& pad_below = avg_pool->get_padding_below();
...@@ -442,39 +402,31 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -442,39 +402,31 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{ {
arguments_check(op, 1, 1); arguments_check(op, 1, 1);
const string& input_name = op->get_inputs().begin()->get_tensor().get_name();
const Shape& input_shape = op->get_inputs().begin()->get_shape();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const Shape& output_shape = op->get_outputs().begin()->get_shape();
const element::Type& output_type =
op->get_outputs().begin()->get_tensor().get_element_type();
const shared_ptr<op::Broadcast> broadcast = static_pointer_cast<op::Broadcast>(op); const shared_ptr<op::Broadcast> broadcast = static_pointer_cast<op::Broadcast>(op);
const AxisSet& axis = broadcast->get_broadcast_axes(); const AxisSet& axis = broadcast->get_broadcast_axes();
if (axis.empty()) if (axis.empty())
{ {
do_equal_propagation(topology, input_name, output_name); do_equal_propagation(topology, get_input_name(op), get_output_name(op));
} }
else if (input_shape.empty()) else if (get_input_shape(op).empty())
{ {
do_bcast_sum_operation_scalar(topology, do_bcast_sum_operation_scalar(topology,
input_name, get_input_name(op),
input_shape, get_input_shape(op),
output_name, get_output_name(op),
output_shape, get_output_shape(op),
output_type, get_output_type(op),
true); true);
} }
else else
{ {
do_bcast_sum_operation(topology, do_bcast_sum_operation(topology,
input_name, get_input_name(op),
input_shape, get_input_shape(op),
output_name, get_output_name(op),
output_shape, get_output_shape(op),
output_type, get_output_type(op),
axis, axis,
true); true);
} }
...@@ -483,39 +435,31 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -483,39 +435,31 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{ {
arguments_check(op, 1, 1); arguments_check(op, 1, 1);
const string& input_name = op->get_inputs().begin()->get_tensor().get_name();
const Shape& input_shape = op->get_inputs().begin()->get_shape();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const Shape& output_shape = op->get_outputs().begin()->get_shape();
const element::Type& output_type =
op->get_outputs().begin()->get_tensor().get_element_type();
const shared_ptr<op::Sum> sum = static_pointer_cast<op::Sum>(op); const shared_ptr<op::Sum> sum = static_pointer_cast<op::Sum>(op);
const AxisSet& axis = sum->get_reduction_axes(); const AxisSet& axis = sum->get_reduction_axes();
if (axis.empty()) if (axis.empty())
{ {
do_equal_propagation(topology, input_name, output_name); do_equal_propagation(topology, get_input_name(op), get_output_name(op));
} }
else if (output_shape.empty()) else if (get_output_shape(op).empty())
{ {
do_bcast_sum_operation_scalar(topology, do_bcast_sum_operation_scalar(topology,
input_name, get_input_name(op),
input_shape, get_input_shape(op),
output_name, get_output_name(op),
output_shape, get_output_shape(op),
output_type, get_output_type(op),
false); false);
} }
else else
{ {
do_bcast_sum_operation(topology, do_bcast_sum_operation(topology,
input_name, get_input_name(op),
input_shape, get_input_shape(op),
output_name, get_output_name(op),
output_shape, get_output_shape(op),
output_type, get_output_type(op),
axis, axis,
false); false);
} }
...@@ -524,29 +468,21 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -524,29 +468,21 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{ {
arguments_check(op, 1, 1); arguments_check(op, 1, 1);
const string& input_name = op->get_inputs().begin()->get_tensor().get_name();
const Shape& input_shape = op->get_inputs().begin()->get_shape();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const Shape& output_shape = op->get_outputs().begin()->get_shape();
const element::Type& output_type =
op->get_outputs().begin()->get_tensor().get_element_type();
const shared_ptr<op::Product> prod = static_pointer_cast<op::Product>(op); const shared_ptr<op::Product> prod = static_pointer_cast<op::Product>(op);
const AxisSet& axis = prod->get_reduction_axes(); const AxisSet& axis = prod->get_reduction_axes();
if (axis.empty()) if (axis.empty())
{ {
do_equal_propagation(topology, input_name, output_name); do_equal_propagation(topology, get_input_name(op), get_output_name(op));
} }
else else
{ {
do_product_operation(topology, do_product_operation(topology,
input_name, get_input_name(op),
input_shape, get_input_shape(op),
output_name, get_output_name(op),
output_shape, get_output_shape(op),
output_type, get_output_type(op),
axis); axis);
} }
} }
...@@ -554,8 +490,6 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -554,8 +490,6 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{ {
arguments_check(op, 1, 1); arguments_check(op, 1, 1);
const string& input_name = op->get_inputs().begin()->get_tensor().get_name();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const shared_ptr<op::Reshape> op_broadcast = static_pointer_cast<op::Reshape>(op); const shared_ptr<op::Reshape> op_broadcast = static_pointer_cast<op::Reshape>(op);
const AxisVector& broadcast_axes = op_broadcast->get_input_order(); const AxisVector& broadcast_axes = op_broadcast->get_input_order();
...@@ -574,7 +508,8 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -574,7 +508,8 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
permute_order.at(rindex - 1) = *i + scale; permute_order.at(rindex - 1) = *i + scale;
} }
const cldnn::permute cldnn_permute(output_name, input_name, permute_order); const cldnn::permute cldnn_permute(
get_output_name(op), get_input_name(op), permute_order);
topology.add(cldnn_permute); topology.add(cldnn_permute);
} }
else if ("Negative" == op->description()) else if ("Negative" == op->description())
...@@ -590,13 +525,12 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -590,13 +525,12 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{ {
arguments_check(op, 2, 1); arguments_check(op, 2, 1);
const string& input = op->get_inputs().at(0).get_tensor().get_name();
const string& input_grad = op->get_inputs().at(1).get_tensor().get_name();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const cldnn_activation_additional_params& param = {0.f, 0.f}; const cldnn_activation_additional_params& param = {0.f, 0.f};
const cldnn::activation_grad cldnn_activ_grad(get_output_name(op),
const cldnn::activation_grad cldnn_activ_grad( get_input_name(op, 1),
output_name, input_grad, input, activation_grad_relu, param); get_input_name(op, 0),
activation_grad_relu,
param);
topology.add(cldnn_activ_grad); topology.add(cldnn_activ_grad);
} }
else if ("Abs" == op->description()) else if ("Abs" == op->description())
...@@ -659,26 +593,18 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -659,26 +593,18 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{ {
arguments_check(op, 2, 1); arguments_check(op, 2, 1);
const string& input_name = op->get_inputs().at(0).get_tensor().get_name();
const Shape& input_shape = op->get_inputs().at(0).get_shape();
const string& scalar_name = op->get_inputs().at(1).get_tensor().get_name();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const Shape& output_shape = op->get_outputs().begin()->get_shape();
const element::Type& output_type =
op->get_outputs().begin()->get_tensor().get_element_type();
const shared_ptr<op::Pad> pad = static_pointer_cast<op::Pad>(op); const shared_ptr<op::Pad> pad = static_pointer_cast<op::Pad>(op);
const Shape& pad_above = pad->get_padding_above(); const Shape& pad_above = pad->get_padding_above();
const Shape& pad_below = pad->get_padding_below(); const Shape& pad_below = pad->get_padding_below();
const Shape& pad_interior = pad->get_padding_interior(); const Shape& pad_interior = pad->get_padding_interior();
do_pad_operation(topology, do_pad_operation(topology,
input_name, get_input_name(op, 0),
input_shape, get_input_shape(op),
scalar_name, get_input_name(op, 1),
output_name, get_output_name(op),
output_shape, get_output_shape(op),
output_type, get_output_type(op),
pad_below, pad_below,
pad_interior); pad_interior);
} }
...@@ -686,39 +612,34 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -686,39 +612,34 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{ {
const shared_ptr<op::BatchNorm> batch_norm = static_pointer_cast<op::BatchNorm>(op); const shared_ptr<op::BatchNorm> batch_norm = static_pointer_cast<op::BatchNorm>(op);
const double eps = batch_norm->get_eps_value(); const double eps = batch_norm->get_eps_value();
string mean_name;
string variance_name;
if (op->get_inputs().size() < 3 || op->get_outputs().empty()) if (op->get_inputs().size() < 3 || op->get_outputs().empty())
{ {
arguments_check(op, 3, 1); // throw exception in this case arguments_check(op, 3, 1); // throw exception in this case
} }
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const Shape& output_shape = op->get_outputs().begin()->get_shape();
const element::Type& output_type =
op->get_outputs().begin()->get_tensor().get_element_type();
const string& gamma_name = op->get_inputs().at(0).get_tensor().get_name();
const Shape& gamma_shape = op->get_inputs().at(0).get_shape();
const string& beta_name = op->get_inputs().at(1).get_tensor().get_name();
const string& input_name = op->get_inputs().at(2).get_tensor().get_name();
const Shape& input_shape = op->get_inputs().at(2).get_shape();
string mean_name;
string variance_name;
if (op->get_outputs().size() == 3) if (op->get_outputs().size() == 3)
{ {
arguments_check(op, 3, 3); arguments_check(op, 3, 3);
mean_name = op->get_outputs().at(1).get_tensor().get_name(); mean_name = get_output_name(op, 1);
variance_name = op->get_outputs().at(2).get_tensor().get_name(); variance_name = get_output_name(op, 2);
do_create_mean(topology,
mean_name,
get_input_shape(op),
get_output_type(op),
get_input_name(op, 2),
get_input_shape(op, 2));
do_create_mean(
topology, mean_name, gamma_shape, output_type, input_name, input_shape);
do_create_variance(topology, do_create_variance(topology,
variance_name, variance_name,
gamma_shape, get_input_shape(op),
output_type, get_output_type(op),
input_name, get_input_name(op, 2),
input_shape, get_input_shape(op, 2),
mean_name); mean_name);
} }
...@@ -728,20 +649,20 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -728,20 +649,20 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{ {
arguments_check(op, 5, 1); arguments_check(op, 5, 1);
mean_name = op->get_inputs().at(3).get_tensor().get_name(); mean_name = get_input_name(op, 3);
variance_name = op->get_inputs().at(4).get_tensor().get_name(); variance_name = get_input_name(op, 4);
} }
do_batch_norm_operation(topology, do_batch_norm_operation(topology,
output_name, get_output_name(op),
output_shape, get_output_shape(op),
output_type, get_output_type(op),
eps, eps,
input_name, get_input_name(op, 2),
input_shape, get_input_shape(op, 2),
gamma_name, get_input_name(op, 0),
gamma_shape, get_input_shape(op, 0),
beta_name, get_input_name(op, 1),
mean_name, mean_name,
variance_name); variance_name);
} }
...@@ -754,12 +675,7 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -754,12 +675,7 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{ {
arguments_check(op, 2, 1); arguments_check(op, 2, 1);
const std::string& conv_name = op->get_outputs().begin()->get_tensor().get_name();
const std::string& image_name = op->get_inputs().at(0).get_tensor().get_name();
const std::string& weight_name = op->get_inputs().at(1).get_tensor().get_name();
const shared_ptr<op::Convolution> conv_op = static_pointer_cast<op::Convolution>(op); const shared_ptr<op::Convolution> conv_op = static_pointer_cast<op::Convolution>(op);
const Strides& conv_stride = conv_op->get_window_movement_strides(); const Strides& conv_stride = conv_op->get_window_movement_strides();
const Strides& conv_dilation = conv_op->get_window_dilation_strides(); const Strides& conv_dilation = conv_op->get_window_dilation_strides();
const CoordinateDiff& conv_padding_below = conv_op->get_padding_below(); const CoordinateDiff& conv_padding_below = conv_op->get_padding_below();
...@@ -809,31 +725,27 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -809,31 +725,27 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
const cldnn::tensor strides(1, 1, conv_stride.at(1), conv_stride.at(0)); const cldnn::tensor strides(1, 1, conv_stride.at(1), conv_stride.at(0));
const cldnn::tensor dilation(1, 1, conv_dilation.at(1), conv_dilation.at(0)); const cldnn::tensor dilation(1, 1, conv_dilation.at(1), conv_dilation.at(0));
const cldnn::convolution cldnn_conv( const cldnn::convolution cldnn_conv(get_output_name(op),
conv_name, image_name, {weight_name}, strides, input_offset, dilation); get_input_name(op, 0),
{get_input_name(op, 1)},
strides,
input_offset,
dilation);
topology.add(cldnn_conv); topology.add(cldnn_conv);
} }
else if ("Min" == op->description()) else if ("Min" == op->description())
{ {
arguments_check(op, 1, 1); arguments_check(op, 1, 1);
const string& input_name = op->get_inputs().begin()->get_tensor().get_name();
const Shape& input_shape = op->get_inputs().begin()->get_shape();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const Shape& output_shape = op->get_outputs().begin()->get_shape();
const element::Type& output_type =
op->get_outputs().begin()->get_tensor().get_element_type();
const shared_ptr<op::Min> min_op = static_pointer_cast<op::Min>(op); const shared_ptr<op::Min> min_op = static_pointer_cast<op::Min>(op);
const AxisSet& axis = min_op->get_reduction_axes(); const AxisSet& axis = min_op->get_reduction_axes();
do_max_min_operation(topology, do_max_min_operation(topology,
input_name, get_input_name(op),
input_shape, get_input_shape(op),
output_name, get_output_name(op),
output_shape, get_output_shape(op),
output_type, get_output_type(op),
axis, axis,
true); true);
} }
...@@ -841,31 +753,21 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -841,31 +753,21 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{ {
arguments_check(op, 1, 1); arguments_check(op, 1, 1);
const string& input_name = op->get_inputs().begin()->get_tensor().get_name();
const Shape& input_shape = op->get_inputs().begin()->get_shape();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const Shape& output_shape = op->get_outputs().begin()->get_shape();
const element::Type& output_type =
op->get_outputs().begin()->get_tensor().get_element_type();
const shared_ptr<op::Max> max_op = static_pointer_cast<op::Max>(op); const shared_ptr<op::Max> max_op = static_pointer_cast<op::Max>(op);
const AxisSet& axis = max_op->get_reduction_axes(); const AxisSet& axis = max_op->get_reduction_axes();
do_max_min_operation(topology, do_max_min_operation(topology,
input_name, get_input_name(op),
input_shape, get_input_shape(op),
output_name, get_output_name(op),
output_shape, get_output_shape(op),
output_type, get_output_type(op),
axis, axis,
false); false);
} }
else else
{ {
ostringstream os; throw invalid_argument("IntelGPU: Unsupported operation \"" + op->description() + "\"");
os << "Unsupported operation \"" << op->description() << '\"';
throw invalid_argument(os.str());
} }
} }
......
...@@ -85,7 +85,8 @@ void runtime::intelgpu::do_bcast_sum_operation(cldnn::topology& topology, ...@@ -85,7 +85,8 @@ void runtime::intelgpu::do_bcast_sum_operation(cldnn::topology& topology,
const AxisSet& axis, const AxisSet& axis,
bool is_bcast) bool is_bcast)
{ {
const string function_name = is_bcast ? "broadcast" : "sum"; string function_name = is_bcast ? "broadcast" : "sum";
function_name += output_name;
codegen::CodeWriter writer; codegen::CodeWriter writer;
writer << "__kernel void " << function_name << "(const __global float input" writer << "__kernel void " << function_name << "(const __global float input"
......
...@@ -3,18 +3,7 @@ acos ...@@ -3,18 +3,7 @@ acos
aliased_output aliased_output
asin asin
atan atan
avg_pool_1d_1channel_1image
avg_pool_1d_1channel_2image
avg_pool_1d_2channel_2image
avg_pool_2d_1channel_1image_padded
avg_pool_2d_1channel_1image_strided
avg_pool_2d_2channel_2image
avg_pool_2d_2channel_2image_padded
avg_pool_2d_2channel_2image_padded_3x3
avg_pool_2d_2channel_2image_padded_3x3_strided
avg_pool_2d_2channel_2image_padded_3x3_strided_uneven
avg_pool_2d_2channel_2image_padded_only_above avg_pool_2d_2channel_2image_padded_only_above
avg_pool_2d_2channel_2image_padded_only_below
avg_pool_3d avg_pool_3d
backwards_abs backwards_abs
backwards_acos backwards_acos
...@@ -26,8 +15,7 @@ backwards_avgpool_n2_c2_hw2x2_win_2x2_str_1x1_padding_numeric ...@@ -26,8 +15,7 @@ backwards_avgpool_n2_c2_hw2x2_win_2x2_str_1x1_padding_numeric
backwards_avgpool_n2_c2_hw4x4 backwards_avgpool_n2_c2_hw4x4
backwards_avgpool_n2_c2_hw4x4_numeric backwards_avgpool_n2_c2_hw4x4_numeric
backwards_avgpool_n2_c2_hw4x4_win_2x2_str_1x1_numeric backwards_avgpool_n2_c2_hw4x4_win_2x2_str_1x1_numeric
backwards_broadcast0 backwards_batch_norm_three_outputs
backwards_broadcast1
backwards_ceiling backwards_ceiling
backwards_concat_axis_0 backwards_concat_axis_0
backwards_concat_axis_1 backwards_concat_axis_1
...@@ -35,7 +23,6 @@ backwards_concat_vector ...@@ -35,7 +23,6 @@ backwards_concat_vector
backwards_cos backwards_cos
backwards_cosh backwards_cosh
backwards_dot_scalar_tensor backwards_dot_scalar_tensor
backwards_dot_tensor2_tensor2
backwards_dot_tensor3_tensor3 backwards_dot_tensor3_tensor3
backwards_dot_tensor_scalar backwards_dot_tensor_scalar
backwards_dot_tensor_vector backwards_dot_tensor_vector
...@@ -50,7 +37,6 @@ backwards_maxpool_n4_c1_hw4_2x2_max ...@@ -50,7 +37,6 @@ backwards_maxpool_n4_c1_hw4_2x2_max
backwards_minimum backwards_minimum
backwards_power backwards_power
backwards_replace_slice backwards_replace_slice
backwards_reverse_3d_02
backwards_reverse_sequence_n3_c2_h3 backwards_reverse_sequence_n3_c2_h3
backwards_reverse_sequence_n4d2c3h2w2 backwards_reverse_sequence_n4d2c3h2w2
backwards_select backwards_select
...@@ -66,14 +52,10 @@ backwards_softmax_axis ...@@ -66,14 +52,10 @@ backwards_softmax_axis
backwards_softmax_underflow backwards_softmax_underflow
backwards_tan backwards_tan
batchnorm_bprop_n4c3h2w2 batchnorm_bprop_n4c3h2w2
batchnorm_fprop_b1c2h2w2
batchnorm_fprop_b2c2h2w1
batch_norm_one_output batch_norm_one_output
batch_norm_three_outputs batch_norm_three_outputs
broadcast_vector_rowwise_int64 broadcast_vector_rowwise_int64
broadcast_vector_rowwise_reversed
ceiling ceiling
computation_reuse
concat_2d_tensor concat_2d_tensor
concat_4d_tensor concat_4d_tensor
concat_5d concat_5d
...@@ -84,7 +66,6 @@ concat_vector ...@@ -84,7 +66,6 @@ concat_vector
concat_zero_length_1d_last concat_zero_length_1d_last
concat_zero_length_1d_middle concat_zero_length_1d_middle
concat_zero_length_4d_middle concat_zero_length_4d_middle
constant_equality_bool
constant_multi_use constant_multi_use
convert_float32_bool convert_float32_bool
convert_int32_bool convert_int32_bool
...@@ -124,61 +105,17 @@ convolution_outlining ...@@ -124,61 +105,17 @@ convolution_outlining
cos cos
cosh cosh
divide_by_zero_int32 divide_by_zero_int32
dot_0_0
dot_2x0_0
dot_matrix_0x2_2x0
dot_matrix_2x0_0x2
dot_matrix_3x2_2x0
dot_matrix_vector_int64 dot_matrix_vector_int64
dot_scalar_0x2
equal
exp exp
floor floor
function_call function_call
greater
greatereq
less
lesseq
lesseq_bool
log log
logical_and lrn
logical_or
max_3d_eliminate_zero_dim
max_3d_to_matrix_least_sig
max_3d_to_matrix_most_sig
max_3d_to_scalar
max_3d_to_vector
max_matrix_cols_zero
max_matrix_columns
max_matrix_rows
max_matrix_rows_zero
max_matrix_to_scalar_zero_by_zero
max_pool_3d max_pool_3d
max_to_scalar
max_trivial
max_trivial_5d
max_vector_zero
min_3d_eliminate_zero_dim
min_3d_to_matrix_least_sig
min_3d_to_matrix_most_sig
min_3d_to_scalar
min_3d_to_vector
minimum minimum
min_matrix_cols_zero
min_matrix_columns
min_matrix_rows
min_matrix_rows_zero
min_matrix_to_scalar_zero_by_zero
min_to_scalar
min_trivial
min_trivial_5d
min_vector_zero
not not
notequal
numeric_double_inf numeric_double_inf
numeric_double_nan numeric_double_nan
numeric_float_inf
numeric_float_nan
one_hot_matrix_0 one_hot_matrix_0
one_hot_scalar_0_in_3 one_hot_scalar_0_in_3
one_hot_scalar_1_in_3 one_hot_scalar_1_in_3
...@@ -191,24 +128,6 @@ one_hot_vector_1_barely_oob ...@@ -191,24 +128,6 @@ one_hot_vector_1_barely_oob
one_hot_vector_1_far_oob one_hot_vector_1_far_oob
one_hot_vector_1_fp one_hot_vector_1_fp
one_hot_vector_1_fp_nonint one_hot_vector_1_fp_nonint
pad_exterior_2d_0x0
pad_exterior_2d_0x3
pad_exterior_2d_3x0
pad_interior_exterior_4d_2x0x3x2
product_3d_eliminate_zero_dim
product_3d_to_matrix_least_sig
product_3d_to_matrix_most_sig
product_3d_to_scalar
product_3d_to_vector
product_matrix_cols_zero
product_matrix_columns
product_matrix_rows
product_matrix_rows_zero
product_matrix_to_scalar_zero_by_zero
product_to_scalar
product_trivial
product_trivial_5d
product_vector_zero
reduce_3d_to_vector reduce_3d_to_vector
reduce_matrix_cols_zero reduce_matrix_cols_zero
reduce_matrix_columns reduce_matrix_columns
...@@ -230,26 +149,10 @@ replace_slice_matrix ...@@ -230,26 +149,10 @@ replace_slice_matrix
replace_slice_scalar replace_slice_scalar
replace_slice_vector replace_slice_vector
reshape_6d reshape_6d
reverse_0d
reverse_1d_0
reverse_1d_nochange
reverse_2d_0
reverse_2d_01
reverse_2d_1
reverse_2d_nochange
reverse_3d_0
reverse_3d_01
reverse_3d_012
reverse_3d_02
reverse_3d_1
reverse_3d_12
reverse_3d_2
reverse_3d_nochange
reverse_sequence_n2c3h4w2 reverse_sequence_n2c3h4w2
reverse_sequence_n4c3h2w2 reverse_sequence_n4c3h2w2
reverse_sequence_n4d2c3h2w2 reverse_sequence_n4d2c3h2w2
scalar_constant_int64 scalar_constant_int64
select
select_and_scatter_3d_without_overlap select_and_scatter_3d_without_overlap
select_and_scatter_without_overlap select_and_scatter_without_overlap
select_and_scatter_with_overlap select_and_scatter_with_overlap
...@@ -263,11 +166,6 @@ softmax_axis_2 ...@@ -263,11 +166,6 @@ softmax_axis_2
softmax_axis_3d softmax_axis_3d
softmax_axis_3d_trivial softmax_axis_3d_trivial
softmax_underflow softmax_underflow
sum_3d_eliminate_zero_dim
sum_matrix_cols_zero
sum_matrix_rows_zero
sum_matrix_to_scalar_zero_by_zero
sum_vector_zero
tan tan
tensor_constant_int64 tensor_constant_int64
validate_call_input_type validate_call_input_type
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment