Commit 1f248d0f authored by Sergey Shalnov's avatar Sergey Shalnov Committed by Robert Kimball

IntelGPU backend: Set of fusion operations (#2809)

* IntelGPU backend: Set of fusion operations

* avoid test failed after pr merge
parent dc45b9db
...@@ -43,7 +43,6 @@ ...@@ -43,7 +43,6 @@
#include "ngraph/pass/algebraic_simplification.hpp" #include "ngraph/pass/algebraic_simplification.hpp"
#include "ngraph/pass/cse.hpp" #include "ngraph/pass/cse.hpp"
#include "ngraph/pass/fused_op_decomposition.hpp"
#include "ngraph/pass/get_output_element_elimination.hpp" #include "ngraph/pass/get_output_element_elimination.hpp"
#include "ngraph/pass/manager.hpp" #include "ngraph/pass/manager.hpp"
#include "ngraph/pass/nop_elimination.hpp" #include "ngraph/pass/nop_elimination.hpp"
...@@ -75,6 +74,7 @@ ...@@ -75,6 +74,7 @@
#include "ngraph/op/embedding_lookup.hpp" #include "ngraph/op/embedding_lookup.hpp"
#include "ngraph/op/equal.hpp" #include "ngraph/op/equal.hpp"
#include "ngraph/op/erf.hpp" #include "ngraph/op/erf.hpp"
#include "ngraph/op/fused/conv_fused.hpp"
#include "ngraph/op/get_output_element.hpp" #include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/greater.hpp" #include "ngraph/op/greater.hpp"
#include "ngraph/op/greater_eq.hpp" #include "ngraph/op/greater_eq.hpp"
...@@ -113,6 +113,7 @@ using intelgpu_space = runtime::intelgpu::IntelGPULayout; ...@@ -113,6 +113,7 @@ using intelgpu_space = runtime::intelgpu::IntelGPULayout;
#define NGRAPH_OP(a, b) a, #define NGRAPH_OP(a, b) a,
enum class OP_TYPEID enum class OP_TYPEID
{ {
#include "ngraph/op/fused_op_tbl.hpp"
#include "ngraph/op/op_tbl.hpp" #include "ngraph/op/op_tbl.hpp"
}; };
#undef NGRAPH_OP #undef NGRAPH_OP
...@@ -125,6 +126,7 @@ static OP_TYPEID get_typeid(const string& s) ...@@ -125,6 +126,7 @@ static OP_TYPEID get_typeid(const string& s)
// ... // ...
#define NGRAPH_OP(a, b) {#a, OP_TYPEID::a}, #define NGRAPH_OP(a, b) {#a, OP_TYPEID::a},
static const unordered_map<string, OP_TYPEID> typeid_map{ static const unordered_map<string, OP_TYPEID> typeid_map{
#include "ngraph/op/fused_op_tbl.hpp"
#include "ngraph/op/op_tbl.hpp" #include "ngraph/op/op_tbl.hpp"
}; };
#undef NGRAPH_OP #undef NGRAPH_OP
...@@ -394,7 +396,6 @@ shared_ptr<runtime::Executable> ...@@ -394,7 +396,6 @@ shared_ptr<runtime::Executable>
{ {
ngraph::pass::Manager pass_manager; ngraph::pass::Manager pass_manager;
pass_manager.register_pass<ngraph::pass::FusedOpDecomposition>();
pass_manager.register_pass<ngraph::pass::NopElimination>(); pass_manager.register_pass<ngraph::pass::NopElimination>();
pass_manager.register_pass<ngraph::pass::AlgebraicSimplification>(); pass_manager.register_pass<ngraph::pass::AlgebraicSimplification>();
pass_manager.register_pass<ngraph::pass::CommonSubexpressionElimination>(); pass_manager.register_pass<ngraph::pass::CommonSubexpressionElimination>();
...@@ -413,13 +414,14 @@ shared_ptr<runtime::Executable> ...@@ -413,13 +414,14 @@ shared_ptr<runtime::Executable>
for (shared_ptr<Node> op : func->get_ops()) for (shared_ptr<Node> op : func->get_ops())
{ {
const OP_TYPEID op_type_id = get_typeid(op->description());
// We want to check that every OP_TYPEID enumeration is included in the list. // We want to check that every OP_TYPEID enumeration is included in the list.
// These GCC flags enable compile-time checking so that if an enumeration // These GCC flags enable compile-time checking so that if an enumeration
// is not in the list an error is generated. // is not in the list an error is generated.
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum" #pragma GCC diagnostic error "-Wswitch-enum"
switch (get_typeid(op->description())) switch (op_type_id)
{ {
case OP_TYPEID::Parameter: case OP_TYPEID::Parameter:
{ {
...@@ -1403,15 +1405,52 @@ shared_ptr<runtime::Executable> ...@@ -1403,15 +1405,52 @@ shared_ptr<runtime::Executable>
break; break;
} }
case OP_TYPEID::Convolution: case OP_TYPEID::Convolution:
case OP_TYPEID::ConvolutionBias:
case OP_TYPEID::ConvolutionBiasAdd:
{
// since bad inheritance design of these classes
Strides win_stride;
Strides win_dilation;
Strides data_dilation;
CoordinateDiff pad_below;
CoordinateDiff pad_above;
if (op_type_id == OP_TYPEID::ConvolutionBias)
{
arguments_check(op, 3, 1);
const shared_ptr<op::ConvolutionBias> conv_op =
static_pointer_cast<op::ConvolutionBias>(op);
win_stride = conv_op->get_window_movement_strides();
win_dilation = conv_op->get_window_dilation_strides();
data_dilation = conv_op->get_data_dilation_strides();
pad_below = conv_op->get_padding_below();
pad_above = conv_op->get_padding_above();
}
else if (op_type_id == OP_TYPEID::ConvolutionBiasAdd)
{
arguments_check(op, 4, 1);
const shared_ptr<op::ConvolutionBiasAdd> conv_op =
static_pointer_cast<op::ConvolutionBiasAdd>(op);
win_stride = conv_op->get_window_movement_strides();
win_dilation = conv_op->get_window_dilation_strides();
data_dilation = conv_op->get_data_dilation_strides();
pad_below = conv_op->get_padding_below();
pad_above = conv_op->get_padding_above();
}
else
{ {
arguments_check(op, 2, 1); arguments_check(op, 2, 1);
const shared_ptr<op::Convolution> conv_op = static_pointer_cast<op::Convolution>(op); const shared_ptr<op::Convolution> conv_op =
const Strides& win_stride = conv_op->get_window_movement_strides(); static_pointer_cast<op::Convolution>(op);
const Strides& win_dilation = conv_op->get_window_dilation_strides(); win_stride = conv_op->get_window_movement_strides();
const Strides& data_dilation = conv_op->get_data_dilation_strides(); win_dilation = conv_op->get_window_dilation_strides();
const CoordinateDiff& pad_below = conv_op->get_padding_below(); data_dilation = conv_op->get_data_dilation_strides();
const CoordinateDiff& pad_above = conv_op->get_padding_above(); pad_below = conv_op->get_padding_below();
pad_above = conv_op->get_padding_above();
}
// clDNN has quite limited support for Convolution operation // clDNN has quite limited support for Convolution operation
// following are the checks to go with workaround // following are the checks to go with workaround
...@@ -1420,7 +1459,19 @@ shared_ptr<runtime::Executable> ...@@ -1420,7 +1459,19 @@ shared_ptr<runtime::Executable>
(data_dilation.at(0) != 1) || (data_dilation.at(1) != 1) || (data_dilation.at(0) != 1) || (data_dilation.at(1) != 1) ||
(op->get_output_element_type(0) != element::f32)) (op->get_output_element_type(0) != element::f32))
{ {
kern.emit<op::Convolution>(conv_op); if (op_type_id == OP_TYPEID::ConvolutionBias)
{
kern.emit<op::ConvolutionBias>(static_pointer_cast<op::ConvolutionBias>(op));
}
else if (op_type_id == OP_TYPEID::ConvolutionBiasAdd)
{
kern.emit<op::ConvolutionBiasAdd>(
static_pointer_cast<op::ConvolutionBiasAdd>(op));
}
else
{
kern.emit<op::Convolution>(static_pointer_cast<op::Convolution>(op));
}
} }
else else
{ {
...@@ -1450,6 +1501,42 @@ shared_ptr<runtime::Executable> ...@@ -1450,6 +1501,42 @@ shared_ptr<runtime::Executable>
const cldnn::tensor strides(1, 1, win_stride.at(1), win_stride.at(0)); const cldnn::tensor strides(1, 1, win_stride.at(1), win_stride.at(0));
const cldnn::tensor dilation(1, 1, win_dilation.at(1), win_dilation.at(0)); const cldnn::tensor dilation(1, 1, win_dilation.at(1), win_dilation.at(0));
if (op_type_id == OP_TYPEID::ConvolutionBias)
{
const cldnn::convolution cldnn_conv_bias(op->get_output_tensor_name(0),
op_input_name,
{op->get_input_tensor_name(1)},
{op->get_input_tensor_name(2)},
strides,
input_offset,
dilation);
topology.add(cldnn_conv_bias);
}
else if (op_type_id == OP_TYPEID::ConvolutionBiasAdd)
{
// Do not understand which cldnn::convolution::ctor() should be called
// make it clear by two operations
const string intermediate_name =
op_input_name + op->get_output_tensor_name(0) + "_intermediate";
const cldnn::convolution cldnn_conv_bias(intermediate_name,
op_input_name,
{op->get_input_tensor_name(1)},
{op->get_input_tensor_name(2)},
strides,
input_offset,
dilation);
topology.add(cldnn_conv_bias);
const cldnn::eltwise cldnn_conv_bias_add(
op->get_output_tensor_name(0),
{intermediate_name, op->get_input_tensor_name(3)},
cldnn::eltwise_mode::sum);
topology.add(cldnn_conv_bias_add);
}
else
{
const cldnn::convolution cldnn_conv(op->get_output_tensor_name(0), const cldnn::convolution cldnn_conv(op->get_output_tensor_name(0),
op_input_name, op_input_name,
{op->get_input_tensor_name(1)}, {op->get_input_tensor_name(1)},
...@@ -1458,6 +1545,15 @@ shared_ptr<runtime::Executable> ...@@ -1458,6 +1545,15 @@ shared_ptr<runtime::Executable>
dilation); dilation);
topology.add(cldnn_conv); topology.add(cldnn_conv);
} }
}
break;
}
case OP_TYPEID::ConvolutionBiasBackpropFiltersBias:
{
arguments_check(op, 2, 2);
kern.emit<op::ConvolutionBiasBackpropFiltersBias>(
static_pointer_cast<op::ConvolutionBiasBackpropFiltersBias>(op));
break; break;
} }
case OP_TYPEID::ConvolutionBackpropFilters: case OP_TYPEID::ConvolutionBackpropFilters:
...@@ -1839,6 +1935,8 @@ shared_ptr<runtime::Executable> ...@@ -1839,6 +1935,8 @@ shared_ptr<runtime::Executable>
case OP_TYPEID::DynBroadcast: case OP_TYPEID::DynBroadcast:
case OP_TYPEID::Passthrough: case OP_TYPEID::Passthrough:
case OP_TYPEID::DynPad: case OP_TYPEID::DynPad:
case OP_TYPEID::PRelu:
default:
{ {
throw unsupported_op("Unsupported op '" + op->description() + throw unsupported_op("Unsupported op '" + op->description() +
"' in IntelGPU back end."); "' in IntelGPU back end.");
......
...@@ -97,6 +97,7 @@ void runtime::intelgpu::CustomKernels::queue_krnl(const krnl_info& krnl_info, ...@@ -97,6 +97,7 @@ void runtime::intelgpu::CustomKernels::queue_krnl(const krnl_info& krnl_info,
kr.m_lws); kr.m_lws);
stream.add(kernel_item); stream.add(kernel_item);
#endif #endif
++m_count_krnls;
} }
} }
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "ngraph/op/broadcast.hpp" #include "ngraph/op/broadcast.hpp"
#include "ngraph/op/convolution.hpp" #include "ngraph/op/convolution.hpp"
#include "ngraph/op/equal.hpp" #include "ngraph/op/equal.hpp"
#include "ngraph/op/fused/conv_fused.hpp"
#include "ngraph/op/greater.hpp" #include "ngraph/op/greater.hpp"
#include "ngraph/op/greater_eq.hpp" #include "ngraph/op/greater_eq.hpp"
#include "ngraph/op/less.hpp" #include "ngraph/op/less.hpp"
...@@ -111,8 +112,6 @@ public: ...@@ -111,8 +112,6 @@ public:
krnl_info = build_krnl(op); krnl_info = build_krnl(op);
queue_krnl(krnl_info, op); queue_krnl(krnl_info, op);
++m_count_krnls;
} }
size_t get_custom_kernel_count() const { return m_count_krnls; } size_t get_custom_kernel_count() const { return m_count_krnls; }
...@@ -129,6 +128,9 @@ private: ...@@ -129,6 +128,9 @@ private:
krnl_info build_krnl(const std::shared_ptr<op::Convolution>& op) const; krnl_info build_krnl(const std::shared_ptr<op::Convolution>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBackpropData>& op) const; krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBackpropData>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBackpropFilters>& op) const; krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBackpropFilters>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBias>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBiasAdd>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBiasBackpropFiltersBias>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Equal>& op) const; krnl_info build_krnl(const std::shared_ptr<op::Equal>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Greater>& op) const; krnl_info build_krnl(const std::shared_ptr<op::Greater>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::GreaterEq>& op) const; krnl_info build_krnl(const std::shared_ptr<op::GreaterEq>& op) const;
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
using namespace ngraph::runtime::intelgpu;
// this is duplication of the runtime::intelgpu::access_dims // this is duplication of the runtime::intelgpu::access_dims
// needs to be merged but not at the same time as this new code // needs to be merged but not at the same time as this new code
...@@ -86,11 +87,14 @@ static string array_dim(const Shape& dimentions, const string& var = "i", bool i ...@@ -86,11 +87,14 @@ static string array_dim(const Shape& dimentions, const string& var = "i", bool i
// data[ batch, data_channel, 2, 4 ] // data[ batch, data_channel, 2, 4 ]
// filter[ data_channel, output_channel, 2, 2 ] // filter[ data_channel, output_channel, 2, 2 ]
// output[ batch, output_channel, 3, 5 ] // output[ batch, output_channel, 3, 5 ]
static runtime::intelgpu::CustomKernels::krnl_info static CustomKernels::krnl_info do_convolution_operation(const string& input_name,
do_convolution_operation(const string& input_name,
const Shape& input_shape, const Shape& input_shape,
const string& filter_name, const string& filter_name,
const Shape& filter_shape, const Shape& filter_shape,
const string& bias_name,
const Shape& bias_shape,
const string& shift_name,
const Shape& shift_shape,
const string& output_name, const string& output_name,
const Shape& output_shape, const Shape& output_shape,
const element::Type& output_type, const element::Type& output_type,
...@@ -106,39 +110,51 @@ static runtime::intelgpu::CustomKernels::krnl_info ...@@ -106,39 +110,51 @@ static runtime::intelgpu::CustomKernels::krnl_info
const string& output_order, const string& output_order,
bool reverse_filter) bool reverse_filter)
{ {
const string kernel_type_name = runtime::intelgpu::get_opencl_type_name(output_type); const string kernel_type_name = get_opencl_type_name(output_type);
const string entry_point_name = "convolution_" + output_name; const string entry_point_name = "convolution_" + output_name;
const Shape input_data(input_shape.cbegin() + 2, input_shape.cend()); const Shape input_data(input_shape.cbegin() + 2, input_shape.cend());
const Shape filter_data(filter_shape.cbegin() + 2, filter_shape.cend()); const Shape filter_data(filter_shape.cbegin() + 2, filter_shape.cend());
const Shape output_data(output_shape.cbegin() + 2, output_shape.cend()); const Shape output_data(output_shape.cbegin() + 2, output_shape.cend());
string acc_init = "0.0";
CodeWriter writer; CodeWriter writer;
vector<size_t> gws; vector<size_t> gws;
writer << "__kernel void " << entry_point_name << "(const __global " << kernel_type_name writer << "__kernel void " << entry_point_name << "(const __global " << kernel_type_name
<< " input" << runtime::intelgpu::array_dims(input_shape) << ", const __global " << " input" << array_dims(input_shape) << ", const __global " << kernel_type_name
<< kernel_type_name << " filter" << runtime::intelgpu::array_dims(filter_shape) << " filter" << array_dims(filter_shape);
<< ", __global " << kernel_type_name << " output"
<< runtime::intelgpu::array_dims(output_shape) << ")\n"; if (!bias_name.empty())
{
writer << ", const __global " << kernel_type_name << " bias" << array_dims(bias_shape);
}
if (!shift_name.empty())
{
writer << ", const __global " << kernel_type_name << " shift" << array_dims(shift_shape);
}
writer << ", __global " << kernel_type_name << " output" << array_dims(output_shape);
writer << ")\n";
writer.block_begin(); writer.block_begin();
{ // Main function body { // Main function body
writer << "const unsigned batch = get_global_id(0);\n"; writer << "const unsigned batch = get_global_id(0); /*batch trip count: "
<< output_shape.at(batch_axis_data) << "*/\n";
gws.push_back(output_shape.at(batch_axis_data)); gws.push_back(output_shape.at(batch_axis_data));
writer << "// for (uint batch = 0; batch < " << output_shape.at(batch_axis_data)
<< "; ++batch)\n";
writer.block_begin(); writer.block_begin();
{ {
writer << "const unsigned output_channel = get_global_id(1);\n"; writer
<< "const unsigned output_channel = get_global_id(1); /*output_channel trip count: "
<< output_shape.at(output_channel_axis_result) << "*/\n";
gws.push_back(output_shape.at(output_channel_axis_result)); gws.push_back(output_shape.at(output_channel_axis_result));
writer << "// for (uint output_channel = 0; output_channel < "
<< output_shape.at(output_channel_axis_result) << "; ++output_channel)\n";
writer.block_begin(); writer.block_begin();
{ {
// The first loop over output dimensions // The first loop over output dimensions
writer << "const unsigned i0 = get_global_id(2);\n"; writer << "const unsigned i0 = get_global_id(2); /*i0 trip count: "
<< output_data.at(0) << "*/\n";
gws.push_back(output_data.at(0)); gws.push_back(output_data.at(0));
writer << "// for (uint i0 = 0; i0 < " << output_data.at(0) << "; ++i0)\n";
writer.block_begin(); writer.block_begin();
{ {
// Loops over other output dimensions // Loops over other output dimensions
...@@ -152,7 +168,11 @@ static runtime::intelgpu::CustomKernels::krnl_info ...@@ -152,7 +168,11 @@ static runtime::intelgpu::CustomKernels::krnl_info
++var_idx; ++var_idx;
} }
writer << kernel_type_name << " result = 0.0;\n\n" if (!bias_name.empty())
{
acc_init = "bias[output_channel]";
}
writer << kernel_type_name << " result = " << acc_init << ";\n\n"
<< "// Loop over input_channel\n" << "// Loop over input_channel\n"
<< "for (uint input_channel = 0; input_channel < " << "for (uint input_channel = 0; input_channel < "
<< input_shape.at(input_channel_axis_data) << "; ++input_channel)\n"; << input_shape.at(input_channel_axis_data) << "; ++input_channel)\n";
...@@ -243,8 +263,12 @@ static runtime::intelgpu::CustomKernels::krnl_info ...@@ -243,8 +263,12 @@ static runtime::intelgpu::CustomKernels::krnl_info
writer.block_end(); writer.block_end();
writer << "// End input_channel loop\n"; writer << "// End input_channel loop\n";
writer << output_order << runtime::intelgpu::access_dims(output_data) writer << output_order << access_dims(output_data) << " = result";
<< " = result;\n"; if (!shift_name.empty())
{
writer << " + shift[batch][output_channel]" << access_dims(output_data);
}
writer << ";\n";
// Closing brackets for other output dimensions // Closing brackets for other output dimensions
for (auto i = output_data.begin() + 1; i != output_data.end(); ++i) for (auto i = output_data.begin() + 1; i != output_data.end(); ++i)
...@@ -261,24 +285,87 @@ static runtime::intelgpu::CustomKernels::krnl_info ...@@ -261,24 +285,87 @@ static runtime::intelgpu::CustomKernels::krnl_info
} // Main function body } // Main function body
writer.block_end(); writer.block_end();
const runtime::intelgpu::CustomKernelInfo krn_ret(output_name, vector<string> inputs = {input_name, filter_name};
output_shape,
output_type, if (!bias_name.empty())
{input_name, filter_name}, {
{writer.get_code()}, inputs.push_back(bias_name);
entry_point_name, }
gws);
if (!shift_name.empty())
{
inputs.push_back(shift_name);
}
const CustomKernelInfo krn_ret(
output_name, output_shape, output_type, inputs, {writer.get_code()}, entry_point_name, gws);
return {krn_ret}; return {krn_ret};
} }
runtime::intelgpu::CustomKernels::krnl_info CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::Convolution>& op) const
runtime::intelgpu::CustomKernels::build_krnl(const shared_ptr<op::Convolution>& op) const {
return do_convolution_operation(op->get_input_tensor_name(0),
op->get_input_shape(0),
op->get_input_tensor_name(1),
op->get_input_shape(1),
string(),
{},
string(),
{},
op->get_output_tensor_name(0),
op->get_output_shape(0),
op->get_output_element_type(0),
op->get_padding_below(),
op->get_window_movement_strides(),
op->get_window_dilation_strides(),
op->get_data_dilation_strides(),
0,
1,
1,
"input[batch][input_channel]",
"filter[output_channel][input_channel]",
"output[batch][output_channel]",
false);
}
CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::ConvolutionBias>& op) const
{
return do_convolution_operation(op->get_input_tensor_name(0),
op->get_input_shape(0),
op->get_input_tensor_name(1),
op->get_input_shape(1),
op->get_input_tensor_name(2),
op->get_input_shape(2),
string(),
{},
op->get_output_tensor_name(0),
op->get_output_shape(0),
op->get_output_element_type(0),
op->get_padding_below(),
op->get_window_movement_strides(),
op->get_window_dilation_strides(),
op->get_data_dilation_strides(),
0,
1,
1,
"input[batch][input_channel]",
"filter[output_channel][input_channel]",
"output[batch][output_channel]",
false);
}
CustomKernels::krnl_info
CustomKernels::build_krnl(const shared_ptr<op::ConvolutionBiasAdd>& op) const
{ {
return do_convolution_operation(op->get_input_tensor_name(0), return do_convolution_operation(op->get_input_tensor_name(0),
op->get_input_shape(0), op->get_input_shape(0),
op->get_input_tensor_name(1), op->get_input_tensor_name(1),
op->get_input_shape(1), op->get_input_shape(1),
op->get_input_tensor_name(2),
op->get_input_shape(2),
op->get_input_tensor_name(3),
op->get_input_shape(3),
op->get_output_tensor_name(0), op->get_output_tensor_name(0),
op->get_output_shape(0), op->get_output_shape(0),
op->get_output_element_type(0), op->get_output_element_type(0),
...@@ -295,13 +382,47 @@ runtime::intelgpu::CustomKernels::krnl_info ...@@ -295,13 +382,47 @@ runtime::intelgpu::CustomKernels::krnl_info
false); false);
} }
runtime::intelgpu::CustomKernels::krnl_info runtime::intelgpu::CustomKernels::build_krnl( CustomKernels::krnl_info
const shared_ptr<op::ConvolutionBackpropFilters>& op) const CustomKernels::build_krnl(const shared_ptr<op::ConvolutionBackpropFilters>& op) const
{ {
return do_convolution_operation(op->get_input_tensor_name(0), return do_convolution_operation(op->get_input_tensor_name(0),
op->get_input_shape(0), op->get_input_shape(0),
op->get_input_tensor_name(1), op->get_input_tensor_name(1),
op->get_input_shape(1), op->get_input_shape(1),
string(),
{},
string(),
{},
op->get_output_tensor_name(0),
op->get_output_shape(0),
op->get_output_element_type(0),
op->get_padding_below_forward(),
op->get_window_dilation_strides_forward(),
op->get_window_movement_strides_forward(),
op->get_data_dilation_strides_forward(),
1,
0,
0,
"input[input_channel][batch]",
"filter[input_channel][output_channel]",
"output[output_channel][batch]",
false);
}
CustomKernels::krnl_info
CustomKernels::build_krnl(const shared_ptr<op::ConvolutionBiasBackpropFiltersBias>& op) const
{
CustomKernels::krnl_info result;
CustomKernels::krnl_info filter =
do_convolution_operation(op->get_input_tensor_name(0),
op->get_input_shape(0),
op->get_input_tensor_name(1),
op->get_input_shape(1),
string(),
{},
string(),
{},
op->get_output_tensor_name(0), op->get_output_tensor_name(0),
op->get_output_shape(0), op->get_output_shape(0),
op->get_output_element_type(0), op->get_output_element_type(0),
...@@ -316,15 +437,34 @@ runtime::intelgpu::CustomKernels::krnl_info runtime::intelgpu::CustomKernels::bu ...@@ -316,15 +437,34 @@ runtime::intelgpu::CustomKernels::krnl_info runtime::intelgpu::CustomKernels::bu
"filter[input_channel][output_channel]", "filter[input_channel][output_channel]",
"output[output_channel][batch]", "output[output_channel][batch]",
false); false);
result.insert(result.end(), filter.begin(), filter.end());
AxisSet reduce_axes;
reduce_axes.insert(0);
for (size_t i = 2; i < op->get_output_shape(0).size(); i++)
{
reduce_axes.insert(i);
}
shared_ptr<op::Sum> bias_bprop_op = make_shared<op::Sum>(op->get_argument(1), reduce_axes);
CustomKernels::krnl_info bias_bprop = build_krnl(bias_bprop_op);
bias_bprop.at(0).m_name = op->get_output_tensor_name(1);
result.insert(result.end(), bias_bprop.begin(), bias_bprop.end());
return result;
} }
runtime::intelgpu::CustomKernels::krnl_info runtime::intelgpu::CustomKernels::build_krnl( CustomKernels::krnl_info
const shared_ptr<op::ConvolutionBackpropData>& op) const CustomKernels::build_krnl(const shared_ptr<op::ConvolutionBackpropData>& op) const
{ {
return do_convolution_operation(op->get_input_tensor_name(1), return do_convolution_operation(op->get_input_tensor_name(1),
op->get_input_shape(1), op->get_input_shape(1),
op->get_input_tensor_name(0), op->get_input_tensor_name(0),
op->get_input_shape(0), op->get_input_shape(0),
string(),
{},
string(),
{},
op->get_output_tensor_name(0), op->get_output_tensor_name(0),
op->get_output_shape(0), op->get_output_shape(0),
op->get_output_element_type(0), op->get_output_element_type(0),
......
...@@ -25,6 +25,10 @@ shape_of_vector ...@@ -25,6 +25,10 @@ shape_of_vector
floor_int32 floor_int32
convert_int32_bool convert_int32_bool
convert_float32_bool convert_float32_bool
prelu
prelu_shared_slope
prelu_negative_slope
group_conv
# Unsupported extra padding modes # Unsupported extra padding modes
pad_edge_1d pad_edge_1d
......
...@@ -55,6 +55,7 @@ using namespace std; ...@@ -55,6 +55,7 @@ using namespace std;
#define NGRAPH_OP(a, b) a, #define NGRAPH_OP(a, b) a,
enum class OP_TYPEID enum class OP_TYPEID
{ {
#include "ngraph/op/fused_op_tbl.hpp"
#include "ngraph/op/op_tbl.hpp" #include "ngraph/op/op_tbl.hpp"
UNDEFINED_OP UNDEFINED_OP
}; };
...@@ -68,6 +69,7 @@ static OP_TYPEID get_typeid(const string& s) ...@@ -68,6 +69,7 @@ static OP_TYPEID get_typeid(const string& s)
// ... // ...
#define NGRAPH_OP(a, b) {#a, OP_TYPEID::a}, #define NGRAPH_OP(a, b) {#a, OP_TYPEID::a},
static const unordered_map<string, OP_TYPEID> typeid_map{ static const unordered_map<string, OP_TYPEID> typeid_map{
#include "ngraph/op/fused_op_tbl.hpp"
#include "ngraph/op/op_tbl.hpp" #include "ngraph/op/op_tbl.hpp"
}; };
#undef NGRAPH_OP #undef NGRAPH_OP
...@@ -329,6 +331,8 @@ void print_node_parameters(ostringstream& writer, const shared_ptr<Node>& node) ...@@ -329,6 +331,8 @@ void print_node_parameters(ostringstream& writer, const shared_ptr<Node>& node)
break; break;
} }
case OP_TYPEID::Convolution: case OP_TYPEID::Convolution:
case OP_TYPEID::ConvolutionBias:
case OP_TYPEID::ConvolutionBiasAdd:
{ {
const shared_ptr<op::Convolution> conv_op = static_pointer_cast<op::Convolution>(node); const shared_ptr<op::Convolution> conv_op = static_pointer_cast<op::Convolution>(node);
...@@ -340,6 +344,7 @@ void print_node_parameters(ostringstream& writer, const shared_ptr<Node>& node) ...@@ -340,6 +344,7 @@ void print_node_parameters(ostringstream& writer, const shared_ptr<Node>& node)
break; break;
} }
case OP_TYPEID::ConvolutionBackpropFilters: case OP_TYPEID::ConvolutionBackpropFilters:
case OP_TYPEID::ConvolutionBiasBackpropFiltersBias:
{ {
const shared_ptr<op::ConvolutionBackpropFilters> conv_op_filt = const shared_ptr<op::ConvolutionBackpropFilters> conv_op_filt =
static_pointer_cast<op::ConvolutionBackpropFilters>(node); static_pointer_cast<op::ConvolutionBackpropFilters>(node);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment