Commit 780ef08c authored by Sergey Shalnov's avatar Sergey Shalnov Committed by Robert Kimball

IntelGPU backend: Eltwise and unary operation all types support (#2460)

parent afcc4ca8
...@@ -170,30 +170,15 @@ static const element::Type& get_output_type(const shared_ptr<Node>& op, size_t n ...@@ -170,30 +170,15 @@ static const element::Type& get_output_type(const shared_ptr<Node>& op, size_t n
static void do_eltwise_operation(cldnn::topology& topology, static void do_eltwise_operation(cldnn::topology& topology,
const shared_ptr<Node>& op, const shared_ptr<Node>& op,
const string& custom_op,
bool function_operation,
cldnn::eltwise_mode mode) cldnn::eltwise_mode mode)
{ {
arguments_check(op, 2, 1); arguments_check(op, 2, 1);
// Leave it here for some time if (get_input_type(op) != element::f32 || get_input_type(op, 1) != element::f32 ||
#if USE_INTELGPU_CUSTOM_KERNELS get_output_type(op) != element::f32)
if ((get_input_type(op) == element::i32 || get_input_type(op) == element::i64) &&
(mode == cldnn::eltwise_mode::min || mode == cldnn::eltwise_mode::max))
{
string custom_op;
if (mode == cldnn::eltwise_mode::min)
{
custom_op = "min";
}
else if (mode == cldnn::eltwise_mode::max)
{ {
custom_op = "max";
}
else
{
custom_op = "not_implemented_operation";
}
runtime::intelgpu::do_eltwise_kernel(topology, runtime::intelgpu::do_eltwise_kernel(topology,
get_input_name(op, 0), get_input_name(op, 0),
get_input_shape(op, 0), get_input_shape(op, 0),
...@@ -203,23 +188,18 @@ static void do_eltwise_operation(cldnn::topology& topology, ...@@ -203,23 +188,18 @@ static void do_eltwise_operation(cldnn::topology& topology,
get_output_name(op), get_output_name(op),
get_output_shape(op), get_output_shape(op),
get_output_type(op), get_output_type(op),
custom_op); custom_op,
function_operation);
} }
else else
{ {
const cldnn::eltwise op_add(
get_output_name(op), {get_input_name(op, 0), get_input_name(op, 1)}, mode);
topology.add(op_add);
}
#else
const cldnn::eltwise op_eltwise( const cldnn::eltwise op_eltwise(
get_output_name(op), {get_input_name(op, 0), get_input_name(op, 1)}, mode); get_output_name(op), {get_input_name(op, 0), get_input_name(op, 1)}, mode);
topology.add(op_eltwise); topology.add(op_eltwise);
#endif }
} }
static void do_unary_operation(cldnn::topology& topology, static void do_cldnn_unary(cldnn::topology& topology,
const shared_ptr<Node>& op, const shared_ptr<Node>& op,
cldnn_activation_func mode, cldnn_activation_func mode,
const cldnn_activation_additional_params& param = {0.f, 0.f}) const cldnn_activation_additional_params& param = {0.f, 0.f})
...@@ -230,6 +210,39 @@ static void do_unary_operation(cldnn::topology& topology, ...@@ -230,6 +210,39 @@ static void do_unary_operation(cldnn::topology& topology,
topology.add(cldnn_unary); topology.add(cldnn_unary);
} }
static void
do_custom_unary(cldnn::topology& topology, const shared_ptr<Node>& op, const string& operation)
{
arguments_check(op, 1, 1);
runtime::intelgpu::do_custom_unary_operation(topology,
get_input_name(op),
get_input_shape(op),
get_input_type(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
operation);
}
static void do_universal_unary(cldnn::topology& topology,
const shared_ptr<Node>& op,
const string& operation,
cldnn_activation_func mode,
const cldnn_activation_additional_params& param = {0.f, 0.f})
{
arguments_check(op, 1, 1);
if (get_input_type(op) != element::f32)
{
do_custom_unary(topology, op, operation);
}
else
{
do_cldnn_unary(topology, op, mode, param);
}
}
static void do_pooling_operation(cldnn::topology& topology, static void do_pooling_operation(cldnn::topology& topology,
const shared_ptr<Node>& op, const shared_ptr<Node>& op,
const Shape& pool_shape, const Shape& pool_shape,
...@@ -608,7 +621,8 @@ shared_ptr<runtime::Executable> ...@@ -608,7 +621,8 @@ shared_ptr<runtime::Executable>
// clDNN has limited support for Softmax operation // clDNN has limited support for Softmax operation
// following are the checks to go with custom kernel // following are the checks to go with custom kernel
if ((shape_dim_count > 3) || ((shape_dim_count == 3) && (axes_size == 2))) if ((shape_dim_count > 3) || ((shape_dim_count == 3) && (axes_size == 2)) ||
(get_input_type(op) != element::f32))
{ {
do_softmax_operation(topology, do_softmax_operation(topology,
get_input_name(op), get_input_name(op),
...@@ -644,27 +658,37 @@ shared_ptr<runtime::Executable> ...@@ -644,27 +658,37 @@ shared_ptr<runtime::Executable>
} }
case OP_TYPEID::Add: case OP_TYPEID::Add:
{ {
do_eltwise_operation(topology, op, cldnn::eltwise_mode::sum); do_eltwise_operation(topology, op, "+", false, cldnn::eltwise_mode::sum);
break;
}
case OP_TYPEID::Subtract:
{
do_eltwise_operation(topology, op, "-", false, cldnn::eltwise_mode::sub);
break; break;
} }
case OP_TYPEID::Multiply: case OP_TYPEID::Multiply:
{ {
do_eltwise_operation(topology, op, cldnn::eltwise_mode::prod); do_eltwise_operation(topology, op, "*", false, cldnn::eltwise_mode::prod);
break; break;
} }
case OP_TYPEID::Divide: case OP_TYPEID::Divide:
{ {
do_eltwise_operation(topology, op, cldnn::eltwise_mode::div); do_eltwise_operation(topology, op, "/", false, cldnn::eltwise_mode::div);
break; break;
} }
case OP_TYPEID::Maximum: case OP_TYPEID::Maximum:
{ {
do_eltwise_operation(topology, op, cldnn::eltwise_mode::max); do_eltwise_operation(topology, op, "max", true, cldnn::eltwise_mode::max);
break; break;
} }
case OP_TYPEID::Minimum: case OP_TYPEID::Minimum:
{ {
do_eltwise_operation(topology, op, cldnn::eltwise_mode::min); do_eltwise_operation(topology, op, "min", true, cldnn::eltwise_mode::min);
break;
}
case OP_TYPEID::Power:
{
do_eltwise_operation(topology, op, "pow", true, cldnn::eltwise_mode::pow);
break; break;
} }
case OP_TYPEID::Constant: case OP_TYPEID::Constant:
...@@ -945,29 +969,6 @@ shared_ptr<runtime::Executable> ...@@ -945,29 +969,6 @@ shared_ptr<runtime::Executable>
} }
break; break;
} }
case OP_TYPEID::Negative:
{
if (get_input_type(op) == ngraph::element::i32)
{
// This is workaround to enable GNMT in training mode.
// clDNN doesn't support i32 data type for activation primitive.
// Exception from clDNN: implementation_map for N5cldnn10activationE
// could not find any implementation to match key
do_negative_operation(topology,
get_input_name(op),
get_input_shape(op),
get_input_type(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op));
}
else
{
const cldnn_activation_additional_params param = {-1.f, 0.f};
do_unary_operation(topology, op, activation_linear, param);
}
break;
}
case OP_TYPEID::All: case OP_TYPEID::All:
{ {
arguments_check(op, 1, 1); arguments_check(op, 1, 1);
...@@ -1014,7 +1015,12 @@ shared_ptr<runtime::Executable> ...@@ -1014,7 +1015,12 @@ shared_ptr<runtime::Executable>
} }
case OP_TYPEID::Relu: case OP_TYPEID::Relu:
{ {
do_unary_operation(topology, op, activation_relu); do_cldnn_unary(topology, op, activation_relu);
break;
}
case OP_TYPEID::Sigmoid:
{
do_cldnn_unary(topology, op, activation_logistic);
break; break;
} }
case OP_TYPEID::ReluBackprop: case OP_TYPEID::ReluBackprop:
...@@ -1032,62 +1038,88 @@ shared_ptr<runtime::Executable> ...@@ -1032,62 +1038,88 @@ shared_ptr<runtime::Executable>
} }
case OP_TYPEID::Abs: case OP_TYPEID::Abs:
{ {
do_unary_operation(topology, op, activation_abs); do_universal_unary(topology, op, "fabs", activation_abs);
break; break;
} }
case OP_TYPEID::Sqrt: case OP_TYPEID::Sqrt:
{ {
do_unary_operation(topology, op, activation_sqrt); do_universal_unary(topology, op, "sqrt", activation_sqrt);
break; break;
} }
case OP_TYPEID::Tanh: case OP_TYPEID::Tanh:
{ {
do_unary_operation(topology, op, activation_hyperbolic_tan); do_universal_unary(topology, op, "tanh", activation_hyperbolic_tan);
break; break;
} }
case OP_TYPEID::Sin: case OP_TYPEID::Sin:
{ {
do_unary_operation(topology, op, activation_sin); do_universal_unary(topology, op, "sin", activation_sin);
break; break;
} }
case OP_TYPEID::Asin: case OP_TYPEID::Asin:
{ {
do_unary_operation(topology, op, activation_asin); do_universal_unary(topology, op, "asin", activation_asin);
break; break;
} }
case OP_TYPEID::Sinh: case OP_TYPEID::Sinh:
{ {
do_unary_operation(topology, op, activation_sinh); do_universal_unary(topology, op, "sinh", activation_sinh);
break; break;
} }
case OP_TYPEID::Cos: case OP_TYPEID::Cos:
{ {
do_unary_operation(topology, op, activation_cos); do_universal_unary(topology, op, "cos", activation_cos);
break; break;
} }
case OP_TYPEID::Acos: case OP_TYPEID::Acos:
{ {
do_unary_operation(topology, op, activation_acos); do_universal_unary(topology, op, "acos", activation_acos);
break; break;
} }
case OP_TYPEID::Cosh: case OP_TYPEID::Cosh:
{ {
do_unary_operation(topology, op, activation_cosh); do_universal_unary(topology, op, "cosh", activation_cosh);
break; break;
} }
case OP_TYPEID::Log: case OP_TYPEID::Log:
{ {
do_unary_operation(topology, op, activation_log); do_universal_unary(topology, op, "log", activation_log);
break; break;
} }
case OP_TYPEID::Exp: case OP_TYPEID::Exp:
{ {
do_unary_operation(topology, op, activation_exp); do_universal_unary(topology, op, "exp", activation_exp);
break; break;
} }
case OP_TYPEID::Sigmoid: case OP_TYPEID::Negative:
{
const cldnn_activation_additional_params param = {-1.f, 0.f};
do_universal_unary(topology, op, "-", activation_linear, param);
break;
}
case OP_TYPEID::Atan:
{
do_custom_unary(topology, op, "atan");
break;
}
case OP_TYPEID::Ceiling:
{
do_custom_unary(topology, op, "ceil");
break;
}
case OP_TYPEID::Floor:
{
do_custom_unary(topology, op, "floor");
break;
}
case OP_TYPEID::Sign:
{
do_custom_unary(topology, op, "sign");
break;
}
case OP_TYPEID::Tan:
{ {
do_unary_operation(topology, op, activation_logistic); do_custom_unary(topology, op, "tan");
break; break;
} }
case OP_TYPEID::SigmoidBackprop: case OP_TYPEID::SigmoidBackprop:
...@@ -1156,81 +1188,6 @@ shared_ptr<runtime::Executable> ...@@ -1156,81 +1188,6 @@ shared_ptr<runtime::Executable>
do_logical_operation(topology, op, " || "); do_logical_operation(topology, op, " || ");
break; break;
} }
case OP_TYPEID::Subtract:
{
do_eltwise_operation(topology, op, cldnn::eltwise_mode::sub);
break;
}
case OP_TYPEID::Power:
{
do_eltwise_operation(topology, op, cldnn::eltwise_mode::pow);
break;
}
case OP_TYPEID::Atan:
{
arguments_check(op, 1, 1);
do_custom_eltwise_operation(topology,
get_input_name(op),
get_input_shape(op),
get_input_type(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
CUSTOM_ELTWISE::Atan);
break;
}
case OP_TYPEID::Ceiling:
{
arguments_check(op, 1, 1);
do_custom_eltwise_operation(topology,
get_input_name(op),
get_input_shape(op),
get_input_type(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
CUSTOM_ELTWISE::Ceil);
break;
}
case OP_TYPEID::Floor:
{
arguments_check(op, 1, 1);
do_custom_eltwise_operation(topology,
get_input_name(op),
get_input_shape(op),
get_input_type(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
CUSTOM_ELTWISE::Floor);
break;
}
case OP_TYPEID::Sign:
{
arguments_check(op, 1, 1);
do_custom_eltwise_operation(topology,
get_input_name(op),
get_input_shape(op),
get_input_type(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
CUSTOM_ELTWISE::Sign);
break;
}
case OP_TYPEID::Tan:
{
arguments_check(op, 1, 1);
do_custom_eltwise_operation(topology,
get_input_name(op),
get_input_shape(op),
get_input_type(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
CUSTOM_ELTWISE::Tan);
break;
}
case OP_TYPEID::Pad: case OP_TYPEID::Pad:
{ {
arguments_check(op, 2, 1); arguments_check(op, 2, 1);
......
...@@ -1012,7 +1012,8 @@ void runtime::intelgpu::do_eltwise_kernel(cldnn::topology& topology, ...@@ -1012,7 +1012,8 @@ void runtime::intelgpu::do_eltwise_kernel(cldnn::topology& topology,
const string& output_name, const string& output_name,
const Shape& output_shape, const Shape& output_shape,
const element::Type& output_type, const element::Type& output_type,
const string& operation) const string& operation,
bool function_operation)
{ {
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape); const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
const string entry_point_name = "eltwise_" + output_name; const string entry_point_name = "eltwise_" + output_name;
...@@ -1031,15 +1032,34 @@ void runtime::intelgpu::do_eltwise_kernel(cldnn::topology& topology, ...@@ -1031,15 +1032,34 @@ void runtime::intelgpu::do_eltwise_kernel(cldnn::topology& topology,
// Main loops // Main loops
gws = generate_loops(writer, output_shape, true); gws = generate_loops(writer, output_shape, true);
writer << "output" << access_dims(output_shape) << " = " << operation << "(input0" writer << "output" << access_dims(output_shape) << " = ";
<< access_dims(input0_shape) << ", input1" << access_dims(input1_shape) << ");\n"; if (function_operation)
{
string explicit_conversion;
// TODO need better workaround for this built_in
if (operation == "pow")
{
explicit_conversion = "convert_double";
}
writer << operation << "(" << explicit_conversion << "(input0"
<< access_dims(input0_shape) << "), " << explicit_conversion << "(input1"
<< access_dims(input1_shape) << "));";
}
else
{
writer << "(input0" << access_dims(input0_shape) << " " << operation << " input1"
<< access_dims(input1_shape) << ");";
}
writer << " // " << get_opencl_type_name(input0_type) << " "
<< get_opencl_type_name(output_type) << "\n";
// Closing brackets for main loops // Closing brackets for main loops
generate_loops(writer, output_shape, false); generate_loops(writer, output_shape, false);
} }
writer.block_end(); writer.block_end();
const cldnn::custom_gpu_primitive op_logical(output_name, const cldnn::custom_gpu_primitive op_eltwise(output_name,
{input0_name, input1_name}, {input0_name, input1_name},
{writer.get_code()}, {writer.get_code()},
entry_point_name, entry_point_name,
...@@ -1047,7 +1067,7 @@ void runtime::intelgpu::do_eltwise_kernel(cldnn::topology& topology, ...@@ -1047,7 +1067,7 @@ void runtime::intelgpu::do_eltwise_kernel(cldnn::topology& topology,
"", "",
layout, layout,
gws); gws);
topology.add(op_logical); topology.add(op_eltwise);
} }
void runtime::intelgpu::do_reverse_operation(cldnn::topology& topology, void runtime::intelgpu::do_reverse_operation(cldnn::topology& topology,
...@@ -1268,16 +1288,17 @@ void runtime::intelgpu::do_sigmoid_backprop_operation(cldnn::topology& topology, ...@@ -1268,16 +1288,17 @@ void runtime::intelgpu::do_sigmoid_backprop_operation(cldnn::topology& topology,
topology.add(op_sigmoid_backprop); topology.add(op_sigmoid_backprop);
} }
void runtime::intelgpu::do_custom_eltwise_operation(cldnn::topology& topology, void runtime::intelgpu::do_custom_unary_operation(cldnn::topology& topology,
const string& input_name, const string& input_name,
const Shape& input_shape, const Shape& input_shape,
const element::Type& input_type, const element::Type& input_type,
const string& output_name, const string& output_name,
const Shape& output_shape, const Shape& output_shape,
const element::Type& output_type, const element::Type& output_type,
const CUSTOM_ELTWISE operation_name) const string& operation_name)
{ {
const string entry_point_name = "op_custom_eltwise_" + output_name; const string entry_point_name = "op_custom_unary_" + output_name;
const string intermidiate_type = input_type.size() < 8 ? "float" : "double";
codegen::CodeWriter writer; codegen::CodeWriter writer;
vector<size_t> gws; vector<size_t> gws;
...@@ -1290,39 +1311,19 @@ void runtime::intelgpu::do_custom_eltwise_operation(cldnn::topology& topology, ...@@ -1290,39 +1311,19 @@ void runtime::intelgpu::do_custom_eltwise_operation(cldnn::topology& topology,
writer.block_begin(); writer.block_begin();
{ {
gws = generate_loops(writer, output_shape, true); gws = generate_loops(writer, output_shape, true);
writer << "output" << access_dims(output_shape) << " = ";
switch (operation_name) // convert to intermediate floating point type
{ writer << intermidiate_type << " input_var = convert_" << intermidiate_type << "(input0"
case CUSTOM_ELTWISE::Atan: << access_dims(input_shape) << ");\n";
{
writer << "atan"; // do the operation with the same type
break; writer << intermidiate_type << " output_var = " << operation_name
} << "(input_var); //Type: " << get_opencl_type_name(input_type) << "\n";
case CUSTOM_ELTWISE::Ceil:
{ // convert to destination type
writer << "ceil"; writer << "output" << access_dims(output_shape) << " = convert_"
break; << get_opencl_type_name(output_type) << "(output_var);\n";
}
case CUSTOM_ELTWISE::Floor:
{
if (input_type.is_real())
{
writer << "floor";
}
break;
}
case CUSTOM_ELTWISE::Sign:
{
writer << "sign";
break;
}
case CUSTOM_ELTWISE::Tan:
{
writer << "tan";
break;
}
}
writer << "(input0" << access_dims(input_shape) << ");\n";
generate_loops(writer, output_shape, false); generate_loops(writer, output_shape, false);
} }
writer.block_end(); writer.block_end();
...@@ -1419,46 +1420,6 @@ void runtime::intelgpu::do_arg_max_min_operation(cldnn::topology& topology, ...@@ -1419,46 +1420,6 @@ void runtime::intelgpu::do_arg_max_min_operation(cldnn::topology& topology,
topology.add(op_arg_max_min); topology.add(op_arg_max_min);
} }
void runtime::intelgpu::do_negative_operation(cldnn::topology& topology,
const string& input_name,
const Shape& input_shape,
const element::Type& input_type,
const string& output_name,
const Shape& output_shape,
const element::Type& output_type)
{
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
const string entry_point_name = "negative_" + output_name;
const string& input_type_name = get_opencl_type_name(input_type);
const string& output_type_name = get_opencl_type_name(output_type);
codegen::CodeWriter writer;
vector<size_t> gws;
gen_func_def(
writer, entry_point_name, {input_type_name}, {input_shape}, output_type_name, output_shape);
writer.block_begin();
{
gws = generate_loops(writer, output_shape, true);
writer << "output" << access_dims(output_shape) << " = - (input0"
<< access_dims(input_shape) << ");\n";
generate_loops(writer, output_shape, false);
}
writer.block_end();
const cldnn::custom_gpu_primitive op_negative(output_name,
{input_name},
{writer.get_code()},
entry_point_name,
get_kernel_args(1, 1),
"",
layout,
gws);
topology.add(op_negative);
}
void runtime::intelgpu::do_reshape_operation(cldnn::topology& topology, void runtime::intelgpu::do_reshape_operation(cldnn::topology& topology,
const string& input_name, const string& input_name,
const Shape& input_shape, const Shape& input_shape,
......
...@@ -119,7 +119,8 @@ namespace ngraph ...@@ -119,7 +119,8 @@ namespace ngraph
const std::string& output_name, const std::string& output_name,
const Shape& output_shape, const Shape& output_shape,
const element::Type& output_type, const element::Type& output_type,
const std::string& operation); const std::string& operation,
bool function_operation);
void do_reverse_operation(cldnn::topology& topology, void do_reverse_operation(cldnn::topology& topology,
const std::string& input_name, const std::string& input_name,
...@@ -162,23 +163,14 @@ namespace ngraph ...@@ -162,23 +163,14 @@ namespace ngraph
const Shape& output_shape, const Shape& output_shape,
const element::Type& output_type); const element::Type& output_type);
enum class CUSTOM_ELTWISE void do_custom_unary_operation(cldnn::topology& topology,
{
Atan,
Ceil,
Floor,
Sign,
Tan
};
void do_custom_eltwise_operation(cldnn::topology& topology,
const std::string& input_name, const std::string& input_name,
const Shape& input_shape, const Shape& input_shape,
const element::Type& input_type, const element::Type& input_type,
const std::string& output_name, const std::string& output_name,
const Shape& output_shape, const Shape& output_shape,
const element::Type& output_type, const element::Type& output_type,
const CUSTOM_ELTWISE operation_name); const std::string& operation_name);
void do_arg_max_min_operation(cldnn::topology& topology, void do_arg_max_min_operation(cldnn::topology& topology,
const std::string& input_name, const std::string& input_name,
...@@ -190,14 +182,6 @@ namespace ngraph ...@@ -190,14 +182,6 @@ namespace ngraph
const size_t reduction_axis, const size_t reduction_axis,
const bool is_max); const bool is_max);
void do_negative_operation(cldnn::topology& topology,
const std::string& input_name,
const Shape& input_shape,
const element::Type& input_type,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type);
void do_reshape_operation(cldnn::topology& topology, void do_reshape_operation(cldnn::topology& topology,
const std::string& input_name, const std::string& input_name,
const Shape& input_shape, const Shape& input_shape,
......
...@@ -65,9 +65,10 @@ void runtime::intelgpu::do_softmax_operation(cldnn::topology& topology, ...@@ -65,9 +65,10 @@ void runtime::intelgpu::do_softmax_operation(cldnn::topology& topology,
codegen::CodeWriter writer1; codegen::CodeWriter writer1;
vector<size_t> gws; vector<size_t> gws;
writer0 << "__kernel void " << entry_point_middle_name << "(const __global float input" writer0 << "__kernel void " << entry_point_middle_name << "(const __global "
<< array_dims(input_shape) << ", __global float output" << array_dims(input_shape, axes) << get_opencl_type_name(input_type) << " input" << array_dims(input_shape)
<< ")\n"; << ", __global " << get_opencl_type_name(output_type) << " output"
<< array_dims(input_shape, axes) << ")\n";
writer0.block_begin(); writer0.block_begin();
{ {
...@@ -90,10 +91,11 @@ void runtime::intelgpu::do_softmax_operation(cldnn::topology& topology, ...@@ -90,10 +91,11 @@ void runtime::intelgpu::do_softmax_operation(cldnn::topology& topology,
gws); gws);
topology.add(op_softmax_middle); topology.add(op_softmax_middle);
writer1 << "__kernel void " << entry_point_name << "(const __global float input0" writer1 << "__kernel void " << entry_point_name << "(const __global "
<< array_dims(input_shape) << ", const __global float input1" << get_opencl_type_name(input_type) << " input0" << array_dims(input_shape)
<< array_dims(input_shape, axes) << ", __global float output" << ", const __global " << get_opencl_type_name(input_type) << " input1"
<< array_dims(output_shape) << ")\n"; << array_dims(input_shape, axes) << ", __global " << get_opencl_type_name(output_type)
<< " output" << array_dims(output_shape) << ")\n";
writer1.block_begin(); writer1.block_begin();
{ {
......
...@@ -13,9 +13,6 @@ backwards_reverse_sequence_n3_c2_h3 ...@@ -13,9 +13,6 @@ backwards_reverse_sequence_n3_c2_h3
backwards_reverse_sequence_n4d2c3h2w2 backwards_reverse_sequence_n4d2c3h2w2
backwards_slice backwards_slice
batch_norm_bprop_n4c3h2w2 batch_norm_bprop_n4c3h2w2
batch_norm_inference_0eps_f64
batch_norm_inference_f64
batch_norm_training_0eps_f64
dequantize dequantize
dequantize_axes dequantize_axes
dequantize_dynamic_offset dequantize_dynamic_offset
...@@ -64,7 +61,6 @@ shape_of_5d ...@@ -64,7 +61,6 @@ shape_of_5d
shape_of_matrix shape_of_matrix
shape_of_scalar shape_of_scalar
shape_of_vector shape_of_vector
softmax_axis_3d_double
topk_1d_max_all topk_1d_max_all
topk_1d_max_one topk_1d_max_one
topk_1d_max_partial topk_1d_max_partial
...@@ -88,26 +84,3 @@ topk_3d_min_partial ...@@ -88,26 +84,3 @@ topk_3d_min_partial
topk_3d_single_output topk_3d_single_output
topk_5d_max_partial topk_5d_max_partial
topk_int64 topk_int64
zero_sized_abs
zero_sized_acos
zero_sized_add
zero_sized_asin
zero_sized_atan
zero_sized_ceiling
zero_sized_cos
zero_sized_cosh
zero_sized_divide
zero_sized_exp
zero_sized_log
zero_sized_maximum
zero_sized_minimum
zero_sized_multiply
zero_sized_negative
zero_sized_power
zero_sized_sign
zero_sized_sin
zero_sized_sinh
zero_sized_sqrt
zero_sized_subtract
zero_sized_tan
zero_sized_tanh
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment