Commit 780ef08c authored by Sergey Shalnov's avatar Sergey Shalnov Committed by Robert Kimball

IntelGPU backend: Eltwise and unary operation all types support (#2460)

parent afcc4ca8
......@@ -170,30 +170,15 @@ static const element::Type& get_output_type(const shared_ptr<Node>& op, size_t n
static void do_eltwise_operation(cldnn::topology& topology,
const shared_ptr<Node>& op,
const string& custom_op,
bool function_operation,
cldnn::eltwise_mode mode)
{
arguments_check(op, 2, 1);
// Leave it here for some time
#if USE_INTELGPU_CUSTOM_KERNELS
if ((get_input_type(op) == element::i32 || get_input_type(op) == element::i64) &&
(mode == cldnn::eltwise_mode::min || mode == cldnn::eltwise_mode::max))
if (get_input_type(op) != element::f32 || get_input_type(op, 1) != element::f32 ||
get_output_type(op) != element::f32)
{
string custom_op;
if (mode == cldnn::eltwise_mode::min)
{
custom_op = "min";
}
else if (mode == cldnn::eltwise_mode::max)
{
custom_op = "max";
}
else
{
custom_op = "not_implemented_operation";
}
runtime::intelgpu::do_eltwise_kernel(topology,
get_input_name(op, 0),
get_input_shape(op, 0),
......@@ -203,31 +188,59 @@ static void do_eltwise_operation(cldnn::topology& topology,
get_output_name(op),
get_output_shape(op),
get_output_type(op),
custom_op);
custom_op,
function_operation);
}
else
{
const cldnn::eltwise op_add(
const cldnn::eltwise op_eltwise(
get_output_name(op), {get_input_name(op, 0), get_input_name(op, 1)}, mode);
topology.add(op_add);
topology.add(op_eltwise);
}
#else
}
const cldnn::eltwise op_eltwise(
get_output_name(op), {get_input_name(op, 0), get_input_name(op, 1)}, mode);
topology.add(op_eltwise);
#endif
static void do_cldnn_unary(cldnn::topology& topology,
const shared_ptr<Node>& op,
cldnn_activation_func mode,
const cldnn_activation_additional_params& param = {0.f, 0.f})
{
arguments_check(op, 1, 1);
const cldnn::activation cldnn_unary(get_output_name(op), get_input_name(op), mode, param);
topology.add(cldnn_unary);
}
static void
do_custom_unary(cldnn::topology& topology, const shared_ptr<Node>& op, const string& operation)
{
arguments_check(op, 1, 1);
runtime::intelgpu::do_custom_unary_operation(topology,
get_input_name(op),
get_input_shape(op),
get_input_type(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
operation);
}
static void do_unary_operation(cldnn::topology& topology,
static void do_universal_unary(cldnn::topology& topology,
const shared_ptr<Node>& op,
const string& operation,
cldnn_activation_func mode,
const cldnn_activation_additional_params& param = {0.f, 0.f})
{
arguments_check(op, 1, 1);
const cldnn::activation cldnn_unary(get_output_name(op), get_input_name(op), mode, param);
topology.add(cldnn_unary);
if (get_input_type(op) != element::f32)
{
do_custom_unary(topology, op, operation);
}
else
{
do_cldnn_unary(topology, op, mode, param);
}
}
static void do_pooling_operation(cldnn::topology& topology,
......@@ -608,7 +621,8 @@ shared_ptr<runtime::Executable>
// clDNN has limited support for Softmax operation
// following are the checks to go with custom kernel
if ((shape_dim_count > 3) || ((shape_dim_count == 3) && (axes_size == 2)))
if ((shape_dim_count > 3) || ((shape_dim_count == 3) && (axes_size == 2)) ||
(get_input_type(op) != element::f32))
{
do_softmax_operation(topology,
get_input_name(op),
......@@ -644,27 +658,37 @@ shared_ptr<runtime::Executable>
}
case OP_TYPEID::Add:
{
do_eltwise_operation(topology, op, cldnn::eltwise_mode::sum);
do_eltwise_operation(topology, op, "+", false, cldnn::eltwise_mode::sum);
break;
}
case OP_TYPEID::Subtract:
{
do_eltwise_operation(topology, op, "-", false, cldnn::eltwise_mode::sub);
break;
}
case OP_TYPEID::Multiply:
{
do_eltwise_operation(topology, op, cldnn::eltwise_mode::prod);
do_eltwise_operation(topology, op, "*", false, cldnn::eltwise_mode::prod);
break;
}
case OP_TYPEID::Divide:
{
do_eltwise_operation(topology, op, cldnn::eltwise_mode::div);
do_eltwise_operation(topology, op, "/", false, cldnn::eltwise_mode::div);
break;
}
case OP_TYPEID::Maximum:
{
do_eltwise_operation(topology, op, cldnn::eltwise_mode::max);
do_eltwise_operation(topology, op, "max", true, cldnn::eltwise_mode::max);
break;
}
case OP_TYPEID::Minimum:
{
do_eltwise_operation(topology, op, cldnn::eltwise_mode::min);
do_eltwise_operation(topology, op, "min", true, cldnn::eltwise_mode::min);
break;
}
case OP_TYPEID::Power:
{
do_eltwise_operation(topology, op, "pow", true, cldnn::eltwise_mode::pow);
break;
}
case OP_TYPEID::Constant:
......@@ -945,29 +969,6 @@ shared_ptr<runtime::Executable>
}
break;
}
case OP_TYPEID::Negative:
{
if (get_input_type(op) == ngraph::element::i32)
{
// This is workaround to enable GNMT in training mode.
// clDNN doesn't support i32 data type for activation primitive.
// Exception from clDNN: implementation_map for N5cldnn10activationE
// could not find any implementation to match key
do_negative_operation(topology,
get_input_name(op),
get_input_shape(op),
get_input_type(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op));
}
else
{
const cldnn_activation_additional_params param = {-1.f, 0.f};
do_unary_operation(topology, op, activation_linear, param);
}
break;
}
case OP_TYPEID::All:
{
arguments_check(op, 1, 1);
......@@ -1014,7 +1015,12 @@ shared_ptr<runtime::Executable>
}
case OP_TYPEID::Relu:
{
do_unary_operation(topology, op, activation_relu);
do_cldnn_unary(topology, op, activation_relu);
break;
}
case OP_TYPEID::Sigmoid:
{
do_cldnn_unary(topology, op, activation_logistic);
break;
}
case OP_TYPEID::ReluBackprop:
......@@ -1032,62 +1038,88 @@ shared_ptr<runtime::Executable>
}
case OP_TYPEID::Abs:
{
do_unary_operation(topology, op, activation_abs);
do_universal_unary(topology, op, "fabs", activation_abs);
break;
}
case OP_TYPEID::Sqrt:
{
do_unary_operation(topology, op, activation_sqrt);
do_universal_unary(topology, op, "sqrt", activation_sqrt);
break;
}
case OP_TYPEID::Tanh:
{
do_unary_operation(topology, op, activation_hyperbolic_tan);
do_universal_unary(topology, op, "tanh", activation_hyperbolic_tan);
break;
}
case OP_TYPEID::Sin:
{
do_unary_operation(topology, op, activation_sin);
do_universal_unary(topology, op, "sin", activation_sin);
break;
}
case OP_TYPEID::Asin:
{
do_unary_operation(topology, op, activation_asin);
do_universal_unary(topology, op, "asin", activation_asin);
break;
}
case OP_TYPEID::Sinh:
{
do_unary_operation(topology, op, activation_sinh);
do_universal_unary(topology, op, "sinh", activation_sinh);
break;
}
case OP_TYPEID::Cos:
{
do_unary_operation(topology, op, activation_cos);
do_universal_unary(topology, op, "cos", activation_cos);
break;
}
case OP_TYPEID::Acos:
{
do_unary_operation(topology, op, activation_acos);
do_universal_unary(topology, op, "acos", activation_acos);
break;
}
case OP_TYPEID::Cosh:
{
do_unary_operation(topology, op, activation_cosh);
do_universal_unary(topology, op, "cosh", activation_cosh);
break;
}
case OP_TYPEID::Log:
{
do_unary_operation(topology, op, activation_log);
do_universal_unary(topology, op, "log", activation_log);
break;
}
case OP_TYPEID::Exp:
{
do_unary_operation(topology, op, activation_exp);
do_universal_unary(topology, op, "exp", activation_exp);
break;
}
case OP_TYPEID::Sigmoid:
case OP_TYPEID::Negative:
{
const cldnn_activation_additional_params param = {-1.f, 0.f};
do_universal_unary(topology, op, "-", activation_linear, param);
break;
}
case OP_TYPEID::Atan:
{
do_custom_unary(topology, op, "atan");
break;
}
case OP_TYPEID::Ceiling:
{
do_custom_unary(topology, op, "ceil");
break;
}
case OP_TYPEID::Floor:
{
do_custom_unary(topology, op, "floor");
break;
}
case OP_TYPEID::Sign:
{
do_custom_unary(topology, op, "sign");
break;
}
case OP_TYPEID::Tan:
{
do_unary_operation(topology, op, activation_logistic);
do_custom_unary(topology, op, "tan");
break;
}
case OP_TYPEID::SigmoidBackprop:
......@@ -1156,81 +1188,6 @@ shared_ptr<runtime::Executable>
do_logical_operation(topology, op, " || ");
break;
}
case OP_TYPEID::Subtract:
{
do_eltwise_operation(topology, op, cldnn::eltwise_mode::sub);
break;
}
case OP_TYPEID::Power:
{
do_eltwise_operation(topology, op, cldnn::eltwise_mode::pow);
break;
}
case OP_TYPEID::Atan:
{
arguments_check(op, 1, 1);
do_custom_eltwise_operation(topology,
get_input_name(op),
get_input_shape(op),
get_input_type(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
CUSTOM_ELTWISE::Atan);
break;
}
case OP_TYPEID::Ceiling:
{
arguments_check(op, 1, 1);
do_custom_eltwise_operation(topology,
get_input_name(op),
get_input_shape(op),
get_input_type(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
CUSTOM_ELTWISE::Ceil);
break;
}
case OP_TYPEID::Floor:
{
arguments_check(op, 1, 1);
do_custom_eltwise_operation(topology,
get_input_name(op),
get_input_shape(op),
get_input_type(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
CUSTOM_ELTWISE::Floor);
break;
}
case OP_TYPEID::Sign:
{
arguments_check(op, 1, 1);
do_custom_eltwise_operation(topology,
get_input_name(op),
get_input_shape(op),
get_input_type(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
CUSTOM_ELTWISE::Sign);
break;
}
case OP_TYPEID::Tan:
{
arguments_check(op, 1, 1);
do_custom_eltwise_operation(topology,
get_input_name(op),
get_input_shape(op),
get_input_type(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
CUSTOM_ELTWISE::Tan);
break;
}
case OP_TYPEID::Pad:
{
arguments_check(op, 2, 1);
......
......@@ -1012,7 +1012,8 @@ void runtime::intelgpu::do_eltwise_kernel(cldnn::topology& topology,
const string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const string& operation)
const string& operation,
bool function_operation)
{
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
const string entry_point_name = "eltwise_" + output_name;
......@@ -1031,15 +1032,34 @@ void runtime::intelgpu::do_eltwise_kernel(cldnn::topology& topology,
// Main loops
gws = generate_loops(writer, output_shape, true);
writer << "output" << access_dims(output_shape) << " = " << operation << "(input0"
<< access_dims(input0_shape) << ", input1" << access_dims(input1_shape) << ");\n";
writer << "output" << access_dims(output_shape) << " = ";
if (function_operation)
{
string explicit_conversion;
// TODO need better workaround for this built_in
if (operation == "pow")
{
explicit_conversion = "convert_double";
}
writer << operation << "(" << explicit_conversion << "(input0"
<< access_dims(input0_shape) << "), " << explicit_conversion << "(input1"
<< access_dims(input1_shape) << "));";
}
else
{
writer << "(input0" << access_dims(input0_shape) << " " << operation << " input1"
<< access_dims(input1_shape) << ");";
}
writer << " // " << get_opencl_type_name(input0_type) << " "
<< get_opencl_type_name(output_type) << "\n";
// Closing brackets for main loops
generate_loops(writer, output_shape, false);
}
writer.block_end();
const cldnn::custom_gpu_primitive op_logical(output_name,
const cldnn::custom_gpu_primitive op_eltwise(output_name,
{input0_name, input1_name},
{writer.get_code()},
entry_point_name,
......@@ -1047,7 +1067,7 @@ void runtime::intelgpu::do_eltwise_kernel(cldnn::topology& topology,
"",
layout,
gws);
topology.add(op_logical);
topology.add(op_eltwise);
}
void runtime::intelgpu::do_reverse_operation(cldnn::topology& topology,
......@@ -1268,16 +1288,17 @@ void runtime::intelgpu::do_sigmoid_backprop_operation(cldnn::topology& topology,
topology.add(op_sigmoid_backprop);
}
void runtime::intelgpu::do_custom_eltwise_operation(cldnn::topology& topology,
const string& input_name,
const Shape& input_shape,
const element::Type& input_type,
const string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const CUSTOM_ELTWISE operation_name)
void runtime::intelgpu::do_custom_unary_operation(cldnn::topology& topology,
const string& input_name,
const Shape& input_shape,
const element::Type& input_type,
const string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const string& operation_name)
{
const string entry_point_name = "op_custom_eltwise_" + output_name;
const string entry_point_name = "op_custom_unary_" + output_name;
const string intermidiate_type = input_type.size() < 8 ? "float" : "double";
codegen::CodeWriter writer;
vector<size_t> gws;
......@@ -1290,39 +1311,19 @@ void runtime::intelgpu::do_custom_eltwise_operation(cldnn::topology& topology,
writer.block_begin();
{
gws = generate_loops(writer, output_shape, true);
writer << "output" << access_dims(output_shape) << " = ";
switch (operation_name)
{
case CUSTOM_ELTWISE::Atan:
{
writer << "atan";
break;
}
case CUSTOM_ELTWISE::Ceil:
{
writer << "ceil";
break;
}
case CUSTOM_ELTWISE::Floor:
{
if (input_type.is_real())
{
writer << "floor";
}
break;
}
case CUSTOM_ELTWISE::Sign:
{
writer << "sign";
break;
}
case CUSTOM_ELTWISE::Tan:
{
writer << "tan";
break;
}
}
writer << "(input0" << access_dims(input_shape) << ");\n";
// convert to intermediate floating point type
writer << intermidiate_type << " input_var = convert_" << intermidiate_type << "(input0"
<< access_dims(input_shape) << ");\n";
// do the operation with the same type
writer << intermidiate_type << " output_var = " << operation_name
<< "(input_var); //Type: " << get_opencl_type_name(input_type) << "\n";
// convert to destination type
writer << "output" << access_dims(output_shape) << " = convert_"
<< get_opencl_type_name(output_type) << "(output_var);\n";
generate_loops(writer, output_shape, false);
}
writer.block_end();
......@@ -1419,46 +1420,6 @@ void runtime::intelgpu::do_arg_max_min_operation(cldnn::topology& topology,
topology.add(op_arg_max_min);
}
void runtime::intelgpu::do_negative_operation(cldnn::topology& topology,
const string& input_name,
const Shape& input_shape,
const element::Type& input_type,
const string& output_name,
const Shape& output_shape,
const element::Type& output_type)
{
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
const string entry_point_name = "negative_" + output_name;
const string& input_type_name = get_opencl_type_name(input_type);
const string& output_type_name = get_opencl_type_name(output_type);
codegen::CodeWriter writer;
vector<size_t> gws;
gen_func_def(
writer, entry_point_name, {input_type_name}, {input_shape}, output_type_name, output_shape);
writer.block_begin();
{
gws = generate_loops(writer, output_shape, true);
writer << "output" << access_dims(output_shape) << " = - (input0"
<< access_dims(input_shape) << ");\n";
generate_loops(writer, output_shape, false);
}
writer.block_end();
const cldnn::custom_gpu_primitive op_negative(output_name,
{input_name},
{writer.get_code()},
entry_point_name,
get_kernel_args(1, 1),
"",
layout,
gws);
topology.add(op_negative);
}
void runtime::intelgpu::do_reshape_operation(cldnn::topology& topology,
const string& input_name,
const Shape& input_shape,
......
......@@ -119,7 +119,8 @@ namespace ngraph
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const std::string& operation);
const std::string& operation,
bool function_operation);
void do_reverse_operation(cldnn::topology& topology,
const std::string& input_name,
......@@ -162,23 +163,14 @@ namespace ngraph
const Shape& output_shape,
const element::Type& output_type);
enum class CUSTOM_ELTWISE
{
Atan,
Ceil,
Floor,
Sign,
Tan
};
void do_custom_eltwise_operation(cldnn::topology& topology,
const std::string& input_name,
const Shape& input_shape,
const element::Type& input_type,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const CUSTOM_ELTWISE operation_name);
void do_custom_unary_operation(cldnn::topology& topology,
const std::string& input_name,
const Shape& input_shape,
const element::Type& input_type,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const std::string& operation_name);
void do_arg_max_min_operation(cldnn::topology& topology,
const std::string& input_name,
......@@ -190,14 +182,6 @@ namespace ngraph
const size_t reduction_axis,
const bool is_max);
void do_negative_operation(cldnn::topology& topology,
const std::string& input_name,
const Shape& input_shape,
const element::Type& input_type,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type);
void do_reshape_operation(cldnn::topology& topology,
const std::string& input_name,
const Shape& input_shape,
......
......@@ -65,9 +65,10 @@ void runtime::intelgpu::do_softmax_operation(cldnn::topology& topology,
codegen::CodeWriter writer1;
vector<size_t> gws;
writer0 << "__kernel void " << entry_point_middle_name << "(const __global float input"
<< array_dims(input_shape) << ", __global float output" << array_dims(input_shape, axes)
<< ")\n";
writer0 << "__kernel void " << entry_point_middle_name << "(const __global "
<< get_opencl_type_name(input_type) << " input" << array_dims(input_shape)
<< ", __global " << get_opencl_type_name(output_type) << " output"
<< array_dims(input_shape, axes) << ")\n";
writer0.block_begin();
{
......@@ -90,10 +91,11 @@ void runtime::intelgpu::do_softmax_operation(cldnn::topology& topology,
gws);
topology.add(op_softmax_middle);
writer1 << "__kernel void " << entry_point_name << "(const __global float input0"
<< array_dims(input_shape) << ", const __global float input1"
<< array_dims(input_shape, axes) << ", __global float output"
<< array_dims(output_shape) << ")\n";
writer1 << "__kernel void " << entry_point_name << "(const __global "
<< get_opencl_type_name(input_type) << " input0" << array_dims(input_shape)
<< ", const __global " << get_opencl_type_name(input_type) << " input1"
<< array_dims(input_shape, axes) << ", __global " << get_opencl_type_name(output_type)
<< " output" << array_dims(output_shape) << ")\n";
writer1.block_begin();
{
......
......@@ -13,9 +13,6 @@ backwards_reverse_sequence_n3_c2_h3
backwards_reverse_sequence_n4d2c3h2w2
backwards_slice
batch_norm_bprop_n4c3h2w2
batch_norm_inference_0eps_f64
batch_norm_inference_f64
batch_norm_training_0eps_f64
dequantize
dequantize_axes
dequantize_dynamic_offset
......@@ -64,7 +61,6 @@ shape_of_5d
shape_of_matrix
shape_of_scalar
shape_of_vector
softmax_axis_3d_double
topk_1d_max_all
topk_1d_max_one
topk_1d_max_partial
......@@ -88,26 +84,3 @@ topk_3d_min_partial
topk_3d_single_output
topk_5d_max_partial
topk_int64
zero_sized_abs
zero_sized_acos
zero_sized_add
zero_sized_asin
zero_sized_atan
zero_sized_ceiling
zero_sized_cos
zero_sized_cosh
zero_sized_divide
zero_sized_exp
zero_sized_log
zero_sized_maximum
zero_sized_minimum
zero_sized_multiply
zero_sized_negative
zero_sized_power
zero_sized_sign
zero_sized_sin
zero_sized_sinh
zero_sized_sqrt
zero_sized_subtract
zero_sized_tan
zero_sized_tanh
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment