Commit 1f248d0f authored by Sergey Shalnov's avatar Sergey Shalnov Committed by Robert Kimball

IntelGPU backend: Set of fusion operations (#2809)

* IntelGPU backend: Set of fusion operations

* avoid test failed after pr merge
parent dc45b9db
......@@ -43,7 +43,6 @@
#include "ngraph/pass/algebraic_simplification.hpp"
#include "ngraph/pass/cse.hpp"
#include "ngraph/pass/fused_op_decomposition.hpp"
#include "ngraph/pass/get_output_element_elimination.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/nop_elimination.hpp"
......@@ -75,6 +74,7 @@
#include "ngraph/op/embedding_lookup.hpp"
#include "ngraph/op/equal.hpp"
#include "ngraph/op/erf.hpp"
#include "ngraph/op/fused/conv_fused.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/greater.hpp"
#include "ngraph/op/greater_eq.hpp"
......@@ -113,6 +113,7 @@ using intelgpu_space = runtime::intelgpu::IntelGPULayout;
#define NGRAPH_OP(a, b) a,
enum class OP_TYPEID
{
#include "ngraph/op/fused_op_tbl.hpp"
#include "ngraph/op/op_tbl.hpp"
};
#undef NGRAPH_OP
......@@ -125,6 +126,7 @@ static OP_TYPEID get_typeid(const string& s)
// ...
#define NGRAPH_OP(a, b) {#a, OP_TYPEID::a},
static const unordered_map<string, OP_TYPEID> typeid_map{
#include "ngraph/op/fused_op_tbl.hpp"
#include "ngraph/op/op_tbl.hpp"
};
#undef NGRAPH_OP
......@@ -394,7 +396,6 @@ shared_ptr<runtime::Executable>
{
ngraph::pass::Manager pass_manager;
pass_manager.register_pass<ngraph::pass::FusedOpDecomposition>();
pass_manager.register_pass<ngraph::pass::NopElimination>();
pass_manager.register_pass<ngraph::pass::AlgebraicSimplification>();
pass_manager.register_pass<ngraph::pass::CommonSubexpressionElimination>();
......@@ -413,13 +414,14 @@ shared_ptr<runtime::Executable>
for (shared_ptr<Node> op : func->get_ops())
{
const OP_TYPEID op_type_id = get_typeid(op->description());
// We want to check that every OP_TYPEID enumeration is included in the list.
// These GCC flags enable compile-time checking so that if an enumeration
// is not in the list an error is generated.
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
switch (get_typeid(op->description()))
switch (op_type_id)
{
case OP_TYPEID::Parameter:
{
......@@ -1403,15 +1405,52 @@ shared_ptr<runtime::Executable>
break;
}
case OP_TYPEID::Convolution:
case OP_TYPEID::ConvolutionBias:
case OP_TYPEID::ConvolutionBiasAdd:
{
arguments_check(op, 2, 1);
// since bad inheritance design of these classes
Strides win_stride;
Strides win_dilation;
Strides data_dilation;
CoordinateDiff pad_below;
CoordinateDiff pad_above;
const shared_ptr<op::Convolution> conv_op = static_pointer_cast<op::Convolution>(op);
const Strides& win_stride = conv_op->get_window_movement_strides();
const Strides& win_dilation = conv_op->get_window_dilation_strides();
const Strides& data_dilation = conv_op->get_data_dilation_strides();
const CoordinateDiff& pad_below = conv_op->get_padding_below();
const CoordinateDiff& pad_above = conv_op->get_padding_above();
if (op_type_id == OP_TYPEID::ConvolutionBias)
{
arguments_check(op, 3, 1);
const shared_ptr<op::ConvolutionBias> conv_op =
static_pointer_cast<op::ConvolutionBias>(op);
win_stride = conv_op->get_window_movement_strides();
win_dilation = conv_op->get_window_dilation_strides();
data_dilation = conv_op->get_data_dilation_strides();
pad_below = conv_op->get_padding_below();
pad_above = conv_op->get_padding_above();
}
else if (op_type_id == OP_TYPEID::ConvolutionBiasAdd)
{
arguments_check(op, 4, 1);
const shared_ptr<op::ConvolutionBiasAdd> conv_op =
static_pointer_cast<op::ConvolutionBiasAdd>(op);
win_stride = conv_op->get_window_movement_strides();
win_dilation = conv_op->get_window_dilation_strides();
data_dilation = conv_op->get_data_dilation_strides();
pad_below = conv_op->get_padding_below();
pad_above = conv_op->get_padding_above();
}
else
{
arguments_check(op, 2, 1);
const shared_ptr<op::Convolution> conv_op =
static_pointer_cast<op::Convolution>(op);
win_stride = conv_op->get_window_movement_strides();
win_dilation = conv_op->get_window_dilation_strides();
data_dilation = conv_op->get_data_dilation_strides();
pad_below = conv_op->get_padding_below();
pad_above = conv_op->get_padding_above();
}
// clDNN has quite limited support for Convolution operation
// following are the checks to go with workaround
......@@ -1420,7 +1459,19 @@ shared_ptr<runtime::Executable>
(data_dilation.at(0) != 1) || (data_dilation.at(1) != 1) ||
(op->get_output_element_type(0) != element::f32))
{
kern.emit<op::Convolution>(conv_op);
if (op_type_id == OP_TYPEID::ConvolutionBias)
{
kern.emit<op::ConvolutionBias>(static_pointer_cast<op::ConvolutionBias>(op));
}
else if (op_type_id == OP_TYPEID::ConvolutionBiasAdd)
{
kern.emit<op::ConvolutionBiasAdd>(
static_pointer_cast<op::ConvolutionBiasAdd>(op));
}
else
{
kern.emit<op::Convolution>(static_pointer_cast<op::Convolution>(op));
}
}
else
{
......@@ -1450,16 +1501,61 @@ shared_ptr<runtime::Executable>
const cldnn::tensor strides(1, 1, win_stride.at(1), win_stride.at(0));
const cldnn::tensor dilation(1, 1, win_dilation.at(1), win_dilation.at(0));
const cldnn::convolution cldnn_conv(op->get_output_tensor_name(0),
op_input_name,
{op->get_input_tensor_name(1)},
strides,
input_offset,
dilation);
topology.add(cldnn_conv);
if (op_type_id == OP_TYPEID::ConvolutionBias)
{
const cldnn::convolution cldnn_conv_bias(op->get_output_tensor_name(0),
op_input_name,
{op->get_input_tensor_name(1)},
{op->get_input_tensor_name(2)},
strides,
input_offset,
dilation);
topology.add(cldnn_conv_bias);
}
else if (op_type_id == OP_TYPEID::ConvolutionBiasAdd)
{
// Do not understand which cldnn::convolution::ctor() should be called
// make it clear by two operations
const string intermediate_name =
op_input_name + op->get_output_tensor_name(0) + "_intermediate";
const cldnn::convolution cldnn_conv_bias(intermediate_name,
op_input_name,
{op->get_input_tensor_name(1)},
{op->get_input_tensor_name(2)},
strides,
input_offset,
dilation);
topology.add(cldnn_conv_bias);
const cldnn::eltwise cldnn_conv_bias_add(
op->get_output_tensor_name(0),
{intermediate_name, op->get_input_tensor_name(3)},
cldnn::eltwise_mode::sum);
topology.add(cldnn_conv_bias_add);
}
else
{
const cldnn::convolution cldnn_conv(op->get_output_tensor_name(0),
op_input_name,
{op->get_input_tensor_name(1)},
strides,
input_offset,
dilation);
topology.add(cldnn_conv);
}
}
break;
}
case OP_TYPEID::ConvolutionBiasBackpropFiltersBias:
{
arguments_check(op, 2, 2);
kern.emit<op::ConvolutionBiasBackpropFiltersBias>(
static_pointer_cast<op::ConvolutionBiasBackpropFiltersBias>(op));
break;
}
case OP_TYPEID::ConvolutionBackpropFilters:
{
arguments_check(op, 2, 1);
......@@ -1839,6 +1935,8 @@ shared_ptr<runtime::Executable>
case OP_TYPEID::DynBroadcast:
case OP_TYPEID::Passthrough:
case OP_TYPEID::DynPad:
case OP_TYPEID::PRelu:
default:
{
throw unsupported_op("Unsupported op '" + op->description() +
"' in IntelGPU back end.");
......
......@@ -97,6 +97,7 @@ void runtime::intelgpu::CustomKernels::queue_krnl(const krnl_info& krnl_info,
kr.m_lws);
stream.add(kernel_item);
#endif
++m_count_krnls;
}
}
......
......@@ -30,6 +30,7 @@
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/equal.hpp"
#include "ngraph/op/fused/conv_fused.hpp"
#include "ngraph/op/greater.hpp"
#include "ngraph/op/greater_eq.hpp"
#include "ngraph/op/less.hpp"
......@@ -111,8 +112,6 @@ public:
krnl_info = build_krnl(op);
queue_krnl(krnl_info, op);
++m_count_krnls;
}
size_t get_custom_kernel_count() const { return m_count_krnls; }
......@@ -129,6 +128,9 @@ private:
krnl_info build_krnl(const std::shared_ptr<op::Convolution>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBackpropData>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBackpropFilters>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBias>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBiasAdd>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBiasBackpropFiltersBias>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Equal>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Greater>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::GreaterEq>& op) const;
......
......@@ -25,6 +25,10 @@ shape_of_vector
floor_int32
convert_int32_bool
convert_float32_bool
prelu
prelu_shared_slope
prelu_negative_slope
group_conv
# Unsupported extra padding modes
pad_edge_1d
......
......@@ -55,6 +55,7 @@ using namespace std;
#define NGRAPH_OP(a, b) a,
enum class OP_TYPEID
{
#include "ngraph/op/fused_op_tbl.hpp"
#include "ngraph/op/op_tbl.hpp"
UNDEFINED_OP
};
......@@ -68,6 +69,7 @@ static OP_TYPEID get_typeid(const string& s)
// ...
#define NGRAPH_OP(a, b) {#a, OP_TYPEID::a},
static const unordered_map<string, OP_TYPEID> typeid_map{
#include "ngraph/op/fused_op_tbl.hpp"
#include "ngraph/op/op_tbl.hpp"
};
#undef NGRAPH_OP
......@@ -329,6 +331,8 @@ void print_node_parameters(ostringstream& writer, const shared_ptr<Node>& node)
break;
}
case OP_TYPEID::Convolution:
case OP_TYPEID::ConvolutionBias:
case OP_TYPEID::ConvolutionBiasAdd:
{
const shared_ptr<op::Convolution> conv_op = static_pointer_cast<op::Convolution>(node);
......@@ -340,6 +344,7 @@ void print_node_parameters(ostringstream& writer, const shared_ptr<Node>& node)
break;
}
case OP_TYPEID::ConvolutionBackpropFilters:
case OP_TYPEID::ConvolutionBiasBackpropFiltersBias:
{
const shared_ptr<op::ConvolutionBackpropFilters> conv_op_filt =
static_pointer_cast<op::ConvolutionBackpropFilters>(node);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment