Commit ab9fad24 authored by Sergey Shalnov's avatar Sergey Shalnov Committed by Robert Kimball

IntelGPU backend: Custom kernels refactoring 2 (#2770)

parent 6f0c8190
......@@ -53,10 +53,7 @@
#include "ngraph/runtime/intelgpu/intelgpu_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_softmax.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_tensor_view.hpp"
#include "ngraph/runtime/intelgpu/visualize_tree.hpp"
......@@ -671,14 +668,7 @@ shared_ptr<runtime::Executable>
if ((shape_dim_count > 3) || ((shape_dim_count == 3) && (axes_size == 2)) ||
(op->get_input_element_type(0) != element::f32))
{
do_softmax_operation(topology,
op->get_input_tensor_name(0),
op->get_input_shape(0),
op->get_input_element_type(0),
op->get_output_tensor_name(0),
op->get_output_shape(0),
op->get_output_element_type(0),
axes);
kern.emit<op::Softmax>(softmax_op);
}
else
{
......@@ -979,15 +969,7 @@ shared_ptr<runtime::Executable>
}
else
{
do_bcast_sum_operation(topology,
op->get_input_tensor_name(0),
op->get_input_shape(0),
op->get_input_element_type(0),
op->get_output_tensor_name(0),
op->get_output_shape(0),
op->get_output_element_type(0),
axis,
true);
kern.emit<op::Broadcast>(broadcast);
}
break;
}
......@@ -1005,15 +987,7 @@ shared_ptr<runtime::Executable>
}
else
{
do_bcast_sum_operation(topology,
op->get_input_tensor_name(0),
op->get_input_shape(0),
op->get_input_element_type(0),
op->get_output_tensor_name(0),
op->get_output_shape(0),
op->get_output_element_type(0),
axis,
false);
kern.emit<op::Sum>(sum);
}
break;
}
......@@ -1031,13 +1005,7 @@ shared_ptr<runtime::Executable>
}
else
{
do_product_operation(topology,
op->get_input_tensor_name(0),
op->get_input_shape(0),
op->get_output_tensor_name(0),
op->get_output_shape(0),
op->get_output_element_type(0),
axis);
kern.emit<op::Product>(prod);
}
break;
}
......@@ -1098,44 +1066,16 @@ shared_ptr<runtime::Executable>
{
arguments_check(op, 1, 1);
const shared_ptr<op::All> all_op = static_pointer_cast<op::All>(op);
const AxisSet& axis = all_op->get_reduction_axes();
const shared_ptr<Node> def_val = all_op->get_default_value();
const shared_ptr<op::Constant> def_const = static_pointer_cast<op::Constant>(def_val);
const vector<std::string>& values = def_const->get_value_strings();
// Empty axis is not a case for do_equal_propagation()
do_all_any_op(topology,
op->get_input_tensor_name(0),
op->get_input_shape(0),
op->get_output_tensor_name(0),
op->get_output_shape(0),
op->get_output_element_type(0),
axis,
"lhs && rhs",
values.at(0));
kern.emit<op::All>(static_pointer_cast<op::All>(op));
break;
}
case OP_TYPEID::Any:
{
arguments_check(op, 1, 1);
const shared_ptr<op::Any> any_op = static_pointer_cast<op::Any>(op);
const AxisSet& axis = any_op->get_reduction_axes();
const shared_ptr<Node> def_val = any_op->get_default_value();
const shared_ptr<op::Constant> def_const = static_pointer_cast<op::Constant>(def_val);
const vector<std::string>& values = def_const->get_value_strings();
// Empty axis is not a case for do_equal_propagation()
do_all_any_op(topology,
op->get_input_tensor_name(0),
op->get_input_shape(0),
op->get_output_tensor_name(0),
op->get_output_shape(0),
op->get_output_element_type(0),
axis,
"lhs || rhs",
values.at(0));
kern.emit<op::Any>(static_pointer_cast<op::Any>(op));
break;
}
case OP_TYPEID::ReluBackprop:
......@@ -1788,34 +1728,14 @@ shared_ptr<runtime::Executable>
{
arguments_check(op, 1, 1);
const shared_ptr<op::Min> min_op = static_pointer_cast<op::Min>(op);
const AxisSet& axis = min_op->get_reduction_axes();
do_max_min_operation(topology,
op->get_input_tensor_name(0),
op->get_input_shape(0),
op->get_output_tensor_name(0),
op->get_output_shape(0),
op->get_output_element_type(0),
axis,
true);
kern.emit<op::Min>(static_pointer_cast<op::Min>(op));
break;
}
case OP_TYPEID::Max:
{
arguments_check(op, 1, 1);
const shared_ptr<op::Max> max_op = static_pointer_cast<op::Max>(op);
const AxisSet& axis = max_op->get_reduction_axes();
do_max_min_operation(topology,
op->get_input_tensor_name(0),
op->get_input_shape(0),
op->get_output_tensor_name(0),
op->get_output_shape(0),
op->get_output_element_type(0),
axis,
false);
kern.emit<op::Max>(static_pointer_cast<op::Max>(op));
break;
}
case OP_TYPEID::OneHot:
......
......@@ -23,9 +23,17 @@
#include <CPP/topology.hpp>
#include "ngraph/node.hpp"
#include "ngraph/op/all.hpp"
#include "ngraph/op/any.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/max.hpp"
#include "ngraph/op/min.hpp"
#include "ngraph/op/product.hpp"
#include "ngraph/op/select.hpp"
#include "ngraph/op/slice.hpp"
#include "ngraph/op/softmax.hpp"
#include "ngraph/op/sum.hpp"
namespace ngraph
{
......@@ -98,11 +106,19 @@ public:
private:
void queue_krnl(const krnl_info& krn_info, const std::shared_ptr<Node>& op);
krnl_info build_krnl(const std::shared_ptr<op::All>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Any>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Broadcast>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Convolution>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBackpropData>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBackpropFilters>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Max>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Min>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Product>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Select>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Slice>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Softmax>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Sum>& op) const;
cldnn::topology& stream;
size_t m_count_krnls;
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <CPP/topology.hpp>
#include "ngraph/axis_set.hpp"
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace runtime
{
namespace intelgpu
{
// This implements Broadcast and Sum nGraph operations.
// input_shape (bcast) or output_shape (sum) can be empty.
// If the shape is empty it means scalar
void do_bcast_sum_operation(cldnn::topology& topology,
const std::string& input_name,
const Shape& input_shape,
const element::Type& input_type,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const AxisSet& axis,
bool is_bcast);
// This implements Min and Max operations depends on is_min parameter
void do_max_min_operation(cldnn::topology& topology,
const std::string& input_name,
const Shape& input_shape,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const AxisSet& axis,
bool is_min);
// This implements Product operation
void do_product_operation(cldnn::topology& topology,
const std::string& input_name,
const Shape& input_shape,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const AxisSet& axis);
}
}
}
......@@ -14,26 +14,29 @@
// limitations under the License.
//*****************************************************************************
#include <CPP/custom_gpu_primitive.hpp>
#include "ngraph/code_writer.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/op/constant.hpp"
using namespace std;
using namespace ngraph;
using namespace ngraph::runtime::intelgpu;
void runtime::intelgpu::do_all_any_op(cldnn::topology& topology,
const string& input0_name,
const Shape& input0_shape,
const string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const AxisSet& axis,
const std::string& operation,
const std::string& init_val)
static CustomKernels::krnl_info do_all_any_op(const shared_ptr<op::util::LogicalReduction>& op,
const string& operation)
{
const string& input0_name = op->get_input_tensor_name(0);
const Shape& input0_shape = op->get_input_shape(0);
const string& output_name = op->get_output_tensor_name(0);
const Shape& output_shape = op->get_output_shape(0);
const element::Type& output_type = op->get_output_element_type(0);
const AxisSet& axis = op->get_reduction_axes();
const shared_ptr<Node> def_val = op->get_default_value();
const shared_ptr<op::Constant> def_const = static_pointer_cast<op::Constant>(def_val);
const vector<string>& values = def_const->get_value_strings();
const string& init_val = values.at(0);
const string entry_point_name = "custom_op_all_any_" + output_name;
const string kernel_type_name = get_opencl_type_name(output_type);
const size_t input_size = shape_size<Shape>(input0_shape);
......@@ -94,14 +97,21 @@ void runtime::intelgpu::do_all_any_op(cldnn::topology& topology,
} // End of function bracket
writer.block_end();
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
const cldnn::custom_gpu_primitive op_all_any(output_name,
{input0_name},
{writer.get_code()},
entry_point_name,
get_kernel_args(1, 1),
"",
layout,
{1});
topology.add(op_all_any);
const CustomKernelInfo krn_ret(output_name,
output_shape,
output_type,
{input0_name},
{writer.get_code()},
entry_point_name);
return {krn_ret};
}
CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::All>& op) const
{
return do_all_any_op(op, "lhs && rhs");
}
CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::Any>& op) const
{
return do_all_any_op(op, "lhs || rhs");
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <CPP/topology.hpp>
#include "ngraph/axis_set.hpp"
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace runtime
{
namespace intelgpu
{
void do_all_any_op(cldnn::topology& topology,
const std::string& input0_name,
const Shape& input0_shape,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const AxisSet& axis,
const std::string& operation,
const std::string& init_val);
}
}
}
......@@ -14,15 +14,13 @@
// limitations under the License.
//*****************************************************************************
#include <CPP/custom_gpu_primitive.hpp>
#include "ngraph/code_writer.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_softmax.hpp"
using namespace std;
using namespace ngraph;
using namespace ngraph::runtime::intelgpu;
static Shape shape_dims(const Shape& dimentions, const AxisSet& axis = {})
{
......@@ -45,22 +43,20 @@ static Shape shape_dims(const Shape& dimentions, const AxisSet& axis = {})
return output_shape;
}
void runtime::intelgpu::do_softmax_operation(cldnn::topology& topology,
const string& input_name,
const Shape& input_shape,
const element::Type& input_type,
const string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const AxisSet& axes)
CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::Softmax>& op) const
{
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
const string& input_name = op->get_input_tensor_name(0);
const Shape& input_shape = op->get_input_shape(0);
const element::Type& input_type = op->get_input_element_type(0);
const string& output_name = op->get_output_tensor_name(0);
const Shape& output_shape = op->get_output_shape(0);
const element::Type& output_type = op->get_output_element_type(0);
const AxisSet& axes = op->get_axes();
const string entry_point_name = "softmax_" + output_name;
const string middle_name = entry_point_name + "_middle";
const string entry_point_middle_name = "softmax_middle_" + output_name;
const string expression = "output" + access_dims(input_shape, "i", axes) + " = 0.0f;\n";
const Shape new_shape = shape_dims(output_shape, axes);
const cldnn::layout layout_middle = IntelGPULayout::create_cldnn_layout(output_type, new_shape);
CodeWriter writer0;
CodeWriter writer1;
vector<size_t> gws;
......@@ -81,15 +77,13 @@ void runtime::intelgpu::do_softmax_operation(cldnn::topology& topology,
}
writer0.block_end();
const cldnn::custom_gpu_primitive op_softmax_middle(middle_name,
{input_name},
{writer0.get_code()},
entry_point_middle_name,
get_kernel_args(1, 1),
"",
layout_middle,
gws);
topology.add(op_softmax_middle);
const CustomKernelInfo op_softmax_middle(middle_name,
new_shape,
output_type,
{input_name},
{writer0.get_code()},
entry_point_middle_name,
gws);
writer1 << "__kernel void " << entry_point_name << "(const __global "
<< get_opencl_type_name(input_type) << " input0" << array_dims(input_shape)
......@@ -107,13 +101,12 @@ void runtime::intelgpu::do_softmax_operation(cldnn::topology& topology,
}
writer1.block_end();
const cldnn::custom_gpu_primitive op_softmax(output_name,
{input_name, middle_name},
{writer1.get_code()},
entry_point_name,
get_kernel_args(2, 1),
"",
layout,
gws);
topology.add(op_softmax);
const CustomKernelInfo op_softmax(output_name,
output_shape,
output_type,
{input_name, middle_name},
{writer1.get_code()},
entry_point_name,
gws);
return {op_softmax_middle, op_softmax};
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <CPP/topology.hpp>
#include "ngraph/shape.hpp"
#include "ngraph/type/element_type.hpp"
namespace ngraph
{
namespace runtime
{
namespace intelgpu
{
void do_softmax_operation(cldnn::topology& topology,
const std::string& input_name,
const Shape& input_shape,
const element::Type& input_type,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const AxisSet& axes);
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment