Commit c449005a authored by dmyershov's avatar dmyershov Committed by Scott Cyphers

IntelGPU backend: Softmax operation implementation (#1413)

parent 428f567d
......@@ -22,6 +22,7 @@ set(SRC
intelgpu_op_broadcast.cpp
intelgpu_op_custom_kernels.cpp
intelgpu_op_convolution.cpp
intelgpu_op_softmax.cpp
code_writer.cpp
)
......
......@@ -28,6 +28,7 @@
#include <CPP/reorder.hpp>
#include <CPP/reshape.hpp>
#include <CPP/scale.hpp>
#include <CPP/softmax.hpp>
#include <CPP/topology.hpp>
#include "ngraph/runtime/intelgpu/intelgpu_backend.hpp"
......@@ -36,6 +37,7 @@
#include "ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_convolution.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_softmax.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_tensor_view.hpp"
#include "ngraph/function.hpp"
......@@ -57,6 +59,7 @@
#include "ngraph/op/reshape.hpp"
#include "ngraph/op/reverse.hpp"
#include "ngraph/op/slice.hpp"
#include "ngraph/op/softmax.hpp"
#include "ngraph/op/sum.hpp"
#include "ngraph/util.hpp"
......@@ -375,6 +378,50 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
const cldnn::concatenation cldnn_concat(get_output_name(op), inputs, cldnn_axis);
topology.add(cldnn_concat);
}
else if ("Softmax" == op->description())
{
arguments_check(op, 1, 1);
const shared_ptr<op::Softmax> softmax_op = static_pointer_cast<op::Softmax>(op);
const AxisSet& axes = softmax_op->get_axes();
const size_t axes_size = axes.size();
const size_t shape_dim_count = get_input_shape(op, 0).size();
// clDNN has limited support for Softmax operation
// following are the checks to go with custom kernel
if ((shape_dim_count > 3) || ((shape_dim_count == 3) && (axes_size == 2)))
{
do_softmax_operation(topology,
get_input_name(op),
get_input_shape(op),
get_input_type(op),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
axes);
}
else
{
cldnn::softmax::dimension_t dimension = cldnn::softmax::normalize_fyx;
if (axes_size == 1)
{
size_t axes_idx = shape_dim_count - *(axes.begin()) - 1;
switch (axes_idx)
{
case 0: dimension = cldnn::softmax::normalize_x; break;
case 1: dimension = cldnn::softmax::normalize_y; break;
case 2: dimension = cldnn::softmax::normalize_f; break;
default:
throw invalid_argument("Softmax operation: wrong axis " +
to_string(axes_idx));
}
}
const cldnn::softmax cldnn_softmax(
get_output_name(op), get_input_name(op), dimension);
topology.add(cldnn_softmax);
}
}
else if ("Add" == op->description())
{
do_eltwise_operation(topology, op, cldnn::eltwise_mode::sum);
......
......@@ -43,13 +43,18 @@ vector<cldnn_arg> runtime::intelgpu::get_kernel_args(size_t input, size_t output
return result;
}
string runtime::intelgpu::array_dims(const Shape& dimentions)
string runtime::intelgpu::array_dims(const Shape& dimentions, const AxisSet& axis)
{
size_t var_idx = 0;
string buffer;
for (auto const& dim : dimentions)
{
buffer += "[" + to_string(dim) + "]";
if (axis.find(var_idx) == axis.end())
{
buffer += "[" + to_string(dim) + "]";
}
++var_idx;
}
if (buffer.empty())
......
......@@ -102,7 +102,7 @@ namespace ngraph
// Helper functions used in cldnn::custom_gpu_primitive kernels
std::vector<cldnn_arg> get_kernel_args(size_t input, size_t output);
std::string array_dims(const Shape& dimentions);
std::string array_dims(const Shape& dimentions, const AxisSet& axis = {});
std::string access_dims(const Shape& dimentions,
const AxisSet& axis = {},
bool is_reversed = false);
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <CPP/custom_gpu_primitive.hpp>
#include "ngraph/runtime/intelgpu/code_writer.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_softmax.hpp"
using namespace std;
using namespace ngraph;
static Shape shape_dims(const Shape& dimentions, const AxisSet& axis = {})
{
size_t var_idx = 0;
Shape output_shape;
for (auto const& dim : dimentions)
{
if (axis.find(var_idx) == axis.end())
{
output_shape.push_back(dim);
}
++var_idx;
}
if (output_shape.size() == 0)
{ // it means scalar
output_shape.push_back(1);
}
return output_shape;
}
static vector<size_t> generate_loops_w_axes(codegen::CodeWriter& writer,
const Shape& shape,
bool is_begin,
const AxisSet& axis,
const string& expression)
{
const size_t cldnn_gws_lim = 3;
vector<size_t> gws;
size_t var_idx = 0;
size_t dim_idx = 0;
for (auto const& i : shape)
{
if (axis.find(var_idx) == axis.end())
{
if (dim_idx < cldnn_gws_lim)
{
if (is_begin)
{
writer << "const unsigned i" << var_idx << " = get_global_id(" << dim_idx
<< ");\n";
gws.push_back(i);
}
++dim_idx;
}
else
{
if (is_begin)
{
writer << "for (uint i" << var_idx << " = 0; i" << var_idx << " < " << i
<< "; ++i" << var_idx << ")\n";
writer.block_begin();
}
else
{
writer.block_end();
}
}
}
++var_idx;
}
if (is_begin)
{
writer << expression;
}
var_idx = 0;
for (auto const& i : shape)
{
if (axis.find(var_idx) != axis.end())
{
if (is_begin)
{
writer << "for (uint i" << var_idx << " = 0; i" << var_idx << " < " << i << "; ++i"
<< var_idx << ")\n";
writer.block_begin();
}
else
{
writer.block_end();
}
}
++var_idx;
}
if (gws.empty())
{
gws.push_back(1);
}
return gws;
}
void runtime::intelgpu::do_softmax_operation(cldnn::topology& topology,
const string& input_name,
const Shape& input_shape,
const element::Type& input_type,
const string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const AxisSet& axes)
{
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
const string entry_point_name = "softmax_" + output_name;
const string middle_name = entry_point_name + "_middle";
const string entry_point_middle_name = "softmax_middle_" + output_name;
const string expression = "output" + access_dims(input_shape, axes) + " = 0.0f;\n";
const Shape new_shape = shape_dims(output_shape, axes);
const cldnn::layout layout_middle = IntelGPULayout::create_cldnn_layout(output_type, new_shape);
codegen::CodeWriter writer0;
codegen::CodeWriter writer1;
vector<size_t> gws;
writer0 << "__kernel void " << entry_point_middle_name << "(const __global float input"
<< array_dims(input_shape) << ", __global float output" << array_dims(input_shape, axes)
<< ")\n";
writer0.block_begin();
{
gws = generate_loops_w_axes(writer0, output_shape, true, axes, expression);
writer0 << "output" << access_dims(input_shape, axes) << " += exp(input"
<< access_dims(input_shape) << ");\n";
generate_loops_w_axes(writer0, output_shape, false, axes, "");
}
writer0.block_end();
const cldnn::custom_gpu_primitive op_softmax_middle(middle_name,
{input_name},
{writer0.get_code()},
entry_point_middle_name,
get_kernel_args(1, 1),
"",
layout_middle,
gws);
topology.add(op_softmax_middle);
writer1 << "__kernel void " << entry_point_name << "(const __global float input0"
<< array_dims(input_shape) << ", const __global float input1"
<< array_dims(input_shape, axes) << ", __global float output"
<< array_dims(output_shape) << ")\n";
writer1.block_begin();
{
gws = generate_loops(writer1, output_shape, true);
writer1 << "output" << access_dims(input_shape) << " = exp(input0"
<< access_dims(input_shape) << ")/input1" << access_dims(input_shape, axes)
<< ";\n";
generate_loops(writer1, output_shape, false);
}
writer1.block_end();
const cldnn::custom_gpu_primitive op_softmax(output_name,
{input_name, middle_name},
{writer1.get_code()},
entry_point_name,
get_kernel_args(2, 1),
"",
layout,
gws);
topology.add(op_softmax);
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <CPP/topology.hpp>
#include "ngraph/shape.hpp"
#include "ngraph/type/element_type.hpp"
namespace ngraph
{
namespace runtime
{
namespace intelgpu
{
void do_softmax_operation(cldnn::topology& topology,
const std::string& input_name,
const Shape& input_shape,
const element::Type& input_type,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const AxisSet& axes);
}
}
}
......@@ -31,10 +31,6 @@ backwards_select_nested
backwards_sigmoid
backwards_sign
backwards_slice
backwards_softmax_3d
backwards_softmax_all
backwards_softmax_axis
backwards_softmax_underflow
backwards_tan
batchnorm_bprop_n4c3h2w2
batch_norm_one_output
......@@ -97,12 +93,6 @@ select_and_scatter_without_overlap
select_and_scatter_with_overlap
sigmoid_bprop_n1c1h4
sign
softmax_all
softmax_axis
softmax_axis_2
softmax_axis_3d
softmax_axis_3d_trivial
softmax_underflow
tan
tensor_constant_int64
validate_call_input_type
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment