IntelGPU backend: Custom kernels refactoring 2 (#2770)

ab9fad24 · Sergey Shalnov · Robert Kimball · 6f0c8190 · ab9fad24 · ab9fad24
Commit ab9fad24 authored Apr 18, 2019 by Sergey Shalnov Committed by Robert Kimball Apr 18, 2019
8 changed files
--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
@@ -53,10 +53,7 @@
 #include "ngraph/runtime/intelgpu/intelgpu_kernels.hpp"
 #include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
 #include "ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp"
-#include "ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp"
-#include "ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.hpp"
 #include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
-#include "ngraph/runtime/intelgpu/intelgpu_op_softmax.hpp"
 #include "ngraph/runtime/intelgpu/intelgpu_tensor_view.hpp"
 #include "ngraph/runtime/intelgpu/visualize_tree.hpp"

@@ -671,14 +668,7 @@ shared_ptr<runtime::Executable>
            if ((shape_dim_count > 3) || ((shape_dim_count == 3) && (axes_size == 2)) ||
                (op->get_input_element_type(0) != element::f32))
            {
-                do_softmax_operation(topology,
-                                     op->get_input_tensor_name(0),
-                                     op->get_input_shape(0),
-                                     op->get_input_element_type(0),
-                                     op->get_output_tensor_name(0),
-                                     op->get_output_shape(0),
-                                     op->get_output_element_type(0),
-                                     axes);
+                kern.emit<op::Softmax>(softmax_op);
            }
            else
            {
@@ -979,15 +969,7 @@ shared_ptr<runtime::Executable>
            }
            else
            {
-                do_bcast_sum_operation(topology,
-                                       op->get_input_tensor_name(0),
-                                       op->get_input_shape(0),
-                                       op->get_input_element_type(0),
-                                       op->get_output_tensor_name(0),
-                                       op->get_output_shape(0),
-                                       op->get_output_element_type(0),
-                                       axis,
-                                       true);
+                kern.emit<op::Broadcast>(broadcast);
            }
            break;
        }
@@ -1005,15 +987,7 @@ shared_ptr<runtime::Executable>
            }
            else
            {
-                do_bcast_sum_operation(topology,
-                                       op->get_input_tensor_name(0),
-                                       op->get_input_shape(0),
-                                       op->get_input_element_type(0),
-                                       op->get_output_tensor_name(0),
-                                       op->get_output_shape(0),
-                                       op->get_output_element_type(0),
-                                       axis,
-                                       false);
+                kern.emit<op::Sum>(sum);
            }
            break;
        }
@@ -1031,13 +1005,7 @@ shared_ptr<runtime::Executable>
            }
            else
            {
-                do_product_operation(topology,
-                                     op->get_input_tensor_name(0),
-                                     op->get_input_shape(0),
-                                     op->get_output_tensor_name(0),
-                                     op->get_output_shape(0),
-                                     op->get_output_element_type(0),
-                                     axis);
+                kern.emit<op::Product>(prod);
            }
            break;
        }
@@ -1098,44 +1066,16 @@ shared_ptr<runtime::Executable>
        {
            arguments_check(op, 1, 1);

-            const shared_ptr<op::All> all_op = static_pointer_cast<op::All>(op);
-            const AxisSet& axis = all_op->get_reduction_axes();
-            const shared_ptr<Node> def_val = all_op->get_default_value();
-            const shared_ptr<op::Constant> def_const = static_pointer_cast<op::Constant>(def_val);
-            const vector<std::string>& values = def_const->get_value_strings();
-
            // Empty axis is not a case for do_equal_propagation()
-            do_all_any_op(topology,
-                          op->get_input_tensor_name(0),
-                          op->get_input_shape(0),
-                          op->get_output_tensor_name(0),
-                          op->get_output_shape(0),
-                          op->get_output_element_type(0),
-                          axis,
-                          "lhs && rhs",
-                          values.at(0));
+            kern.emit<op::All>(static_pointer_cast<op::All>(op));
            break;
        }
        case OP_TYPEID::Any:
        {
            arguments_check(op, 1, 1);

-            const shared_ptr<op::Any> any_op = static_pointer_cast<op::Any>(op);
-            const AxisSet& axis = any_op->get_reduction_axes();
-            const shared_ptr<Node> def_val = any_op->get_default_value();
-            const shared_ptr<op::Constant> def_const = static_pointer_cast<op::Constant>(def_val);
-            const vector<std::string>& values = def_const->get_value_strings();
-
            // Empty axis is not a case for do_equal_propagation()
-            do_all_any_op(topology,
-                          op->get_input_tensor_name(0),
-                          op->get_input_shape(0),
-                          op->get_output_tensor_name(0),
-                          op->get_output_shape(0),
-                          op->get_output_element_type(0),
-                          axis,
-                          "lhs || rhs",
-                          values.at(0));
+            kern.emit<op::Any>(static_pointer_cast<op::Any>(op));
            break;
        }
        case OP_TYPEID::ReluBackprop:
@@ -1788,34 +1728,14 @@ shared_ptr<runtime::Executable>
        {
            arguments_check(op, 1, 1);

-            const shared_ptr<op::Min> min_op = static_pointer_cast<op::Min>(op);
-            const AxisSet& axis = min_op->get_reduction_axes();
-
-            do_max_min_operation(topology,
-                                 op->get_input_tensor_name(0),
-                                 op->get_input_shape(0),
-                                 op->get_output_tensor_name(0),
-                                 op->get_output_shape(0),
-                                 op->get_output_element_type(0),
-                                 axis,
-                                 true);
+            kern.emit<op::Min>(static_pointer_cast<op::Min>(op));
            break;
        }
        case OP_TYPEID::Max:
        {
            arguments_check(op, 1, 1);

-            const shared_ptr<op::Max> max_op = static_pointer_cast<op::Max>(op);
-            const AxisSet& axis = max_op->get_reduction_axes();
-
-            do_max_min_operation(topology,
-                                 op->get_input_tensor_name(0),
-                                 op->get_input_shape(0),
-                                 op->get_output_tensor_name(0),
-                                 op->get_output_shape(0),
-                                 op->get_output_element_type(0),
-                                 axis,
-                                 false);
+            kern.emit<op::Max>(static_pointer_cast<op::Max>(op));
            break;
        }
        case OP_TYPEID::OneHot:

--- a/src/ngraph/runtime/intelgpu/intelgpu_kernels.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_kernels.hpp
@@ -23,9 +23,17 @@
 #include <CPP/topology.hpp>

 #include "ngraph/node.hpp"
+#include "ngraph/op/all.hpp"
+#include "ngraph/op/any.hpp"
+#include "ngraph/op/broadcast.hpp"
 #include "ngraph/op/convolution.hpp"
+#include "ngraph/op/max.hpp"
+#include "ngraph/op/min.hpp"
+#include "ngraph/op/product.hpp"
 #include "ngraph/op/select.hpp"
 #include "ngraph/op/slice.hpp"
+#include "ngraph/op/softmax.hpp"
+#include "ngraph/op/sum.hpp"

 namespace ngraph
 {
@@ -98,11 +106,19 @@ public:
 private:
    void queue_krnl(const krnl_info& krn_info, const std::shared_ptr<Node>& op);

+    krnl_info build_krnl(const std::shared_ptr<op::All>& op) const;
+    krnl_info build_krnl(const std::shared_ptr<op::Any>& op) const;
+    krnl_info build_krnl(const std::shared_ptr<op::Broadcast>& op) const;
    krnl_info build_krnl(const std::shared_ptr<op::Convolution>& op) const;
    krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBackpropData>& op) const;
    krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBackpropFilters>& op) const;
+    krnl_info build_krnl(const std::shared_ptr<op::Max>& op) const;
+    krnl_info build_krnl(const std::shared_ptr<op::Min>& op) const;
+    krnl_info build_krnl(const std::shared_ptr<op::Product>& op) const;
    krnl_info build_krnl(const std::shared_ptr<op::Select>& op) const;
    krnl_info build_krnl(const std::shared_ptr<op::Slice>& op) const;
+    krnl_info build_krnl(const std::shared_ptr<op::Softmax>& op) const;
+    krnl_info build_krnl(const std::shared_ptr<op::Sum>& op) const;

    cldnn::topology& stream;
    size_t m_count_krnls;

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.cpp
--- a/src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp
-//*****************************************************************************
-// Copyright 2017-2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//*****************************************************************************
-
-#pragma once
-
-#include <CPP/topology.hpp>
-
-#include "ngraph/axis_set.hpp"
-#include "ngraph/shape.hpp"
-
-namespace ngraph
-{
-    namespace runtime
-    {
-        namespace intelgpu
-        {
-            // This implements Broadcast and Sum nGraph operations.
-            // input_shape (bcast) or output_shape (sum) can be empty.
-            // If the shape is empty it means scalar
-            void do_bcast_sum_operation(cldnn::topology& topology,
-                                        const std::string& input_name,
-                                        const Shape& input_shape,
-                                        const element::Type& input_type,
-                                        const std::string& output_name,
-                                        const Shape& output_shape,
-                                        const element::Type& output_type,
-                                        const AxisSet& axis,
-                                        bool is_bcast);
-
-            // This implements Min and Max operations depends on is_min parameter
-            void do_max_min_operation(cldnn::topology& topology,
-                                      const std::string& input_name,
-                                      const Shape& input_shape,
-                                      const std::string& output_name,
-                                      const Shape& output_shape,
-                                      const element::Type& output_type,
-                                      const AxisSet& axis,
-                                      bool is_min);
-
-            // This implements Product operation
-            void do_product_operation(cldnn::topology& topology,
-                                      const std::string& input_name,
-                                      const Shape& input_shape,
-                                      const std::string& output_name,
-                                      const Shape& output_shape,
-                                      const element::Type& output_type,
-                                      const AxisSet& axis);
-        }
-    }
-}
--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.cpp
@@ -14,26 +14,29 @@
 // limitations under the License.
 //*****************************************************************************

-#include <CPP/custom_gpu_primitive.hpp>
-
 #include "ngraph/code_writer.hpp"
-#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
-#include "ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.hpp"
+#include "ngraph/runtime/intelgpu/intelgpu_kernels.hpp"
 #include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"

+#include "ngraph/op/constant.hpp"
+
 using namespace std;
 using namespace ngraph;
+using namespace ngraph::runtime::intelgpu;

-void runtime::intelgpu::do_all_any_op(cldnn::topology& topology,
-                                      const string& input0_name,
-                                      const Shape& input0_shape,
-                                      const string& output_name,
-                                      const Shape& output_shape,
-                                      const element::Type& output_type,
-                                      const AxisSet& axis,
-                                      const std::string& operation,
-                                      const std::string& init_val)
+static CustomKernels::krnl_info do_all_any_op(const shared_ptr<op::util::LogicalReduction>& op,
+                                              const string& operation)
 {
+    const string& input0_name = op->get_input_tensor_name(0);
+    const Shape& input0_shape = op->get_input_shape(0);
+    const string& output_name = op->get_output_tensor_name(0);
+    const Shape& output_shape = op->get_output_shape(0);
+    const element::Type& output_type = op->get_output_element_type(0);
+    const AxisSet& axis = op->get_reduction_axes();
+    const shared_ptr<Node> def_val = op->get_default_value();
+    const shared_ptr<op::Constant> def_const = static_pointer_cast<op::Constant>(def_val);
+    const vector<string>& values = def_const->get_value_strings();
+    const string& init_val = values.at(0);
    const string entry_point_name = "custom_op_all_any_" + output_name;
    const string kernel_type_name = get_opencl_type_name(output_type);
    const size_t input_size = shape_size<Shape>(input0_shape);
@@ -94,14 +97,21 @@ void runtime::intelgpu::do_all_any_op(cldnn::topology& topology,
    } // End of function bracket
    writer.block_end();

-    const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
-    const cldnn::custom_gpu_primitive op_all_any(output_name,
-                                                 {input0_name},
-                                                 {writer.get_code()},
-                                                 entry_point_name,
-                                                 get_kernel_args(1, 1),
-                                                 "",
-                                                 layout,
-                                                 {1});
-    topology.add(op_all_any);
+    const CustomKernelInfo krn_ret(output_name,
+                                   output_shape,
+                                   output_type,
+                                   {input0_name},
+                                   {writer.get_code()},
+                                   entry_point_name);
+    return {krn_ret};
+}
+
+CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::All>& op) const
+{
+    return do_all_any_op(op, "lhs && rhs");
+}
+
+CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::Any>& op) const
+{
+    return do_all_any_op(op, "lhs || rhs");
 }
--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.hpp
-//*****************************************************************************
-// Copyright 2017-2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//*****************************************************************************
-
-#pragma once
-
-#include <CPP/topology.hpp>
-
-#include "ngraph/axis_set.hpp"
-#include "ngraph/shape.hpp"
-
-namespace ngraph
-{
-    namespace runtime
-    {
-        namespace intelgpu
-        {
-            void do_all_any_op(cldnn::topology& topology,
-                               const std::string& input0_name,
-                               const Shape& input0_shape,
-                               const std::string& output_name,
-                               const Shape& output_shape,
-                               const element::Type& output_type,
-                               const AxisSet& axis,
-                               const std::string& operation,
-                               const std::string& init_val);
-        }
-    }
-}
--- a/src/ngraph/runtime/intelgpu/intelgpu_op_softmax.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_softmax.cpp
@@ -14,15 +14,13 @@
 // limitations under the License.
 //*****************************************************************************

-#include <CPP/custom_gpu_primitive.hpp>
-
 #include "ngraph/code_writer.hpp"
-#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
+#include "ngraph/runtime/intelgpu/intelgpu_kernels.hpp"
 #include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
-#include "ngraph/runtime/intelgpu/intelgpu_op_softmax.hpp"

 using namespace std;
 using namespace ngraph;
+using namespace ngraph::runtime::intelgpu;

 static Shape shape_dims(const Shape& dimentions, const AxisSet& axis = {})
 {
@@ -45,22 +43,20 @@ static Shape shape_dims(const Shape& dimentions, const AxisSet& axis = {})
    return output_shape;
 }

-void runtime::intelgpu::do_softmax_operation(cldnn::topology& topology,
-                                             const string& input_name,
-                                             const Shape& input_shape,
-                                             const element::Type& input_type,
-                                             const string& output_name,
-                                             const Shape& output_shape,
-                                             const element::Type& output_type,
-                                             const AxisSet& axes)
+CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::Softmax>& op) const
 {
-    const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
+    const string& input_name = op->get_input_tensor_name(0);
+    const Shape& input_shape = op->get_input_shape(0);
+    const element::Type& input_type = op->get_input_element_type(0);
+    const string& output_name = op->get_output_tensor_name(0);
+    const Shape& output_shape = op->get_output_shape(0);
+    const element::Type& output_type = op->get_output_element_type(0);
+    const AxisSet& axes = op->get_axes();
    const string entry_point_name = "softmax_" + output_name;
    const string middle_name = entry_point_name + "_middle";
    const string entry_point_middle_name = "softmax_middle_" + output_name;
    const string expression = "output" + access_dims(input_shape, "i", axes) + " = 0.0f;\n";
    const Shape new_shape = shape_dims(output_shape, axes);
-    const cldnn::layout layout_middle = IntelGPULayout::create_cldnn_layout(output_type, new_shape);
    CodeWriter writer0;
    CodeWriter writer1;
    vector<size_t> gws;
@@ -81,15 +77,13 @@ void runtime::intelgpu::do_softmax_operation(cldnn::topology& topology,
    }
    writer0.block_end();

-    const cldnn::custom_gpu_primitive op_softmax_middle(middle_name,
-                                                        {input_name},
-                                                        {writer0.get_code()},
-                                                        entry_point_middle_name,
-                                                        get_kernel_args(1, 1),
-                                                        "",
-                                                        layout_middle,
-                                                        gws);
-    topology.add(op_softmax_middle);
+    const CustomKernelInfo op_softmax_middle(middle_name,
+                                             new_shape,
+                                             output_type,
+                                             {input_name},
+                                             {writer0.get_code()},
+                                             entry_point_middle_name,
+                                             gws);

    writer1 << "__kernel void " << entry_point_name << "(const __global "
            << get_opencl_type_name(input_type) << " input0" << array_dims(input_shape)
@@ -107,13 +101,12 @@ void runtime::intelgpu::do_softmax_operation(cldnn::topology& topology,
    }
    writer1.block_end();

-    const cldnn::custom_gpu_primitive op_softmax(output_name,
-                                                 {input_name, middle_name},
-                                                 {writer1.get_code()},
-                                                 entry_point_name,
-                                                 get_kernel_args(2, 1),
-                                                 "",
-                                                 layout,
-                                                 gws);
-    topology.add(op_softmax);
+    const CustomKernelInfo op_softmax(output_name,
+                                      output_shape,
+                                      output_type,
+                                      {input_name, middle_name},
+                                      {writer1.get_code()},
+                                      entry_point_name,
+                                      gws);
+    return {op_softmax_middle, op_softmax};
 }
--- a/src/ngraph/runtime/intelgpu/intelgpu_op_softmax.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_softmax.hpp
-//*****************************************************************************
-// Copyright 2017-2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//*****************************************************************************
-
-#pragma once
-
-#include <CPP/topology.hpp>
-
-#include "ngraph/shape.hpp"
-#include "ngraph/type/element_type.hpp"
-
-namespace ngraph
-{
-    namespace runtime
-    {
-        namespace intelgpu
-        {
-            void do_softmax_operation(cldnn::topology& topology,
-                                      const std::string& input_name,
-                                      const Shape& input_shape,
-                                      const element::Type& input_type,
-                                      const std::string& output_name,
-                                      const Shape& output_shape,
-                                      const element::Type& output_type,
-                                      const AxisSet& axes);
-        }
-    }
-}