IntelGPU backend: Custom kernels refactoring 3 (#2787)

67248fdb · Sergey Shalnov · Robert Kimball · 2b13ae40 · 67248fdb · 67248fdb
Commit 67248fdb authored Apr 19, 2019 by Sergey Shalnov Committed by Robert Kimball Apr 19, 2019
7 changed files
--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
--- a/src/ngraph/runtime/intelgpu/intelgpu_kernels.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_kernels.cpp
@@ -44,3 +44,15 @@ void runtime::intelgpu::CustomKernels::queue_krnl(const krnl_info& krnl_info,
        stream.add(kernel_item);
    }
 }
+void runtime::intelgpu::arguments_check(const shared_ptr<Node>& op, size_t input, size_t output)
+{
+    if (op->get_input_size() != input || op->get_output_size() != output)
+    {
+        ostringstream os;
+        os << "Operation \"" << op->description() << "\" input and output sizes mismatch."
+           << " Expected input size=" << input << ", provided=" << op->get_input_size()
+           << ". Expected output size=" << output << ", provided=" << op->get_output_size();
+        throw invalid_argument(os.str());
+    }
+}
--- a/src/ngraph/runtime/intelgpu/intelgpu_kernels.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_kernels.hpp
@@ -24,11 +24,20 @@
 #include "ngraph/node.hpp"
 #include "ngraph/op/all.hpp"
+#include "ngraph/op/and.hpp"
 #include "ngraph/op/any.hpp"
+#include "ngraph/op/batch_norm.hpp"
 #include "ngraph/op/broadcast.hpp"
 #include "ngraph/op/convolution.hpp"
+#include "ngraph/op/equal.hpp"
+#include "ngraph/op/greater.hpp"
+#include "ngraph/op/greater_eq.hpp"
+#include "ngraph/op/less.hpp"
+#include "ngraph/op/less_eq.hpp"
 #include "ngraph/op/max.hpp"
 #include "ngraph/op/min.hpp"
+#include "ngraph/op/not_equal.hpp"
+#include "ngraph/op/or.hpp"
 #include "ngraph/op/product.hpp"
 #include "ngraph/op/select.hpp"
 #include "ngraph/op/slice.hpp"
@@ -43,6 +52,8 @@ namespace ngraph
        {
            class CustomKernelInfo;
            class CustomKernels;
+            void arguments_check(const std::shared_ptr<Node>& op, size_t input, size_t output);
        }
    }
 }
@@ -107,13 +118,24 @@ private:
    void queue_krnl(const krnl_info& krn_info, const std::shared_ptr<Node>& op);
    krnl_info build_krnl(const std::shared_ptr<op::All>& op) const;
+    krnl_info build_krnl(const std::shared_ptr<op::And>& op) const;
    krnl_info build_krnl(const std::shared_ptr<op::Any>& op) const;
+    krnl_info build_krnl(const std::shared_ptr<op::BatchNormInference>& op) const;
+    krnl_info build_krnl(const std::shared_ptr<op::BatchNormTraining>& op) const;
+    krnl_info build_krnl(const std::shared_ptr<op::BatchNormTrainingBackprop>& op) const;
    krnl_info build_krnl(const std::shared_ptr<op::Broadcast>& op) const;
    krnl_info build_krnl(const std::shared_ptr<op::Convolution>& op) const;
    krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBackpropData>& op) const;
    krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBackpropFilters>& op) const;
+    krnl_info build_krnl(const std::shared_ptr<op::Equal>& op) const;
+    krnl_info build_krnl(const std::shared_ptr<op::Greater>& op) const;
+    krnl_info build_krnl(const std::shared_ptr<op::GreaterEq>& op) const;
+    krnl_info build_krnl(const std::shared_ptr<op::Less>& op) const;
+    krnl_info build_krnl(const std::shared_ptr<op::LessEq>& op) const;
    krnl_info build_krnl(const std::shared_ptr<op::Max>& op) const;
    krnl_info build_krnl(const std::shared_ptr<op::Min>& op) const;
+    krnl_info build_krnl(const std::shared_ptr<op::NotEqual>& op) const;
+    krnl_info build_krnl(const std::shared_ptr<op::Or>& op) const;
    krnl_info build_krnl(const std::shared_ptr<op::Product>& op) const;
    krnl_info build_krnl(const std::shared_ptr<op::Select>& op) const;
    krnl_info build_krnl(const std::shared_ptr<op::Slice>& op) const;

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_batchnorm.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_batchnorm.cpp
--- a/src/ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp
-//*****************************************************************************
-// Copyright 2017-2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//*****************************************************************************
-#pragma once
-#include <CPP/topology.hpp>
-#include "ngraph/shape.hpp"
-#include "ngraph/type/element_type.hpp"
-namespace ngraph
-{
-    namespace runtime
-    {
-        namespace intelgpu
-        {
-            // This implements BatchNorm nGraph operation
-            // nGraph uses channels in this operation but clDNN uses full input data
-            void do_batch_norm_operation(cldnn::topology& topology,
-                                         const std::string& output_name,
-                                         const element::Type& output_type,
-                                         double eps,
-                                         const std::string& input_name,
-                                         const Shape& input_shape,
-                                         const std::string& gamma_name,
-                                         const std::string& beta_name,
-                                         const std::string& mean_name,
-                                         const std::string& variance_name);
-            // This creates mean of the input matrix by Channel axis
-            void do_create_mean(cldnn::topology& topology,
-                                const std::string& output_name,
-                                const element::Type& output_type,
-                                const std::string& input_name,
-                                const Shape& input_shape,
-                                bool backward);
-            // This creates variance of the input matrix by Channel axis
-            void do_create_variance(cldnn::topology& topology,
-                                    const std::string& output_name,
-                                    const element::Type& output_type,
-                                    const std::string& input_name,
-                                    const Shape& input_shape,
-                                    const std::string& mean_name);
-            // This creates variance backprop of the input matrix by Channel axis
-            void do_create_variance_back(cldnn::topology& topology,
-                                         const std::string& output_name,
-                                         const element::Type& output_type,
-                                         double eps,
-                                         const std::string& input_name,
-                                         const Shape& input_shape,
-                                         const std::string& mean_name,
-                                         const std::string& variance_name,
-                                         const std::string& delta_name);
-            // This function uses "shape" parameter as input or output Shape
-            // Shape of all other calculated as first axis from the left
-            // Example: output[ 4, 3, 2, 8 ] means out_gamma[ 3 ]
-            void do_batch_norm_backprop_operation(cldnn::topology& topology,
-                                                  const Shape& shape,
-                                                  const element::Type& type,
-                                                  const std::string& gamma_name,
-                                                  const std::string& beta_name,
-                                                  const std::string& input_name,
-                                                  const std::string& mean_name,
-                                                  const std::string& variance_name,
-                                                  const std::string& delta_name,
-                                                  double eps,
-                                                  const std::string& output_name,
-                                                  const std::string& output_gamma_name,
-                                                  const std::string& output_beta_name);
-        }
-    }
-}
--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
@@ -1277,18 +1277,16 @@ CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::Select>&
    return {krn_ret};
 }
-void runtime::intelgpu::do_logic_kernel(cldnn::topology& topology,
+static CustomKernels::krnl_info do_logic_kernel(const shared_ptr<Node>& op, const string& operation)
-                                        const string& input0_name,
-                                        const Shape& input0_shape,
-                                        const element::Type& input0_type,
-                                        const string& input1_name,
-                                        const Shape& input1_shape,
-                                        const string& output_name,
-                                        const Shape& output_shape,
-                                        const element::Type& output_type,
-                                        const string& operation)
 {
-    const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
+    const string& input0_name = op->get_input_tensor_name(0);
+    const Shape& input0_shape = op->get_input_shape(0);
+    const element::Type& input0_type = op->get_input_element_type(0);
+    const string& input1_name = op->get_input_tensor_name(1);
+    const Shape& input1_shape = op->get_input_shape(1);
+    const string& output_name = op->get_output_tensor_name(0);
+    const Shape& output_shape = op->get_output_shape(0);
+    const element::Type& output_type = op->get_output_element_type(0);
    const string entry_point_name = "logic_" + output_name;
    CodeWriter writer;
    vector<size_t> gws;
@@ -1313,15 +1311,14 @@ void runtime::intelgpu::do_logic_kernel(cldnn::topology& topology,
    }
    writer.block_end();
-    const cldnn::custom_gpu_primitive op_logical(output_name,
+    const CustomKernelInfo op_logical(output_name,
-                                                 {input0_name, input1_name},
+                                      output_shape,
-                                                 {writer.get_code()},
+                                      output_type,
-                                                 entry_point_name,
+                                      {input0_name, input1_name},
-                                                 get_kernel_args(2, 1),
+                                      {writer.get_code()},
-                                                 "",
+                                      entry_point_name,
-                                                 layout,
+                                      gws);
-                                                 gws);
+    return {op_logical};
-    topology.add(op_logical);
 }
 void runtime::intelgpu::do_eltwise_kernel(cldnn::topology& topology,
@@ -2333,3 +2330,43 @@ size_t runtime::intelgpu::get_max_memory_rss()
    return result;
 }
+CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::And>& op) const
+{
+    return do_logic_kernel(op, " && ");
+}
+CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::Equal>& op) const
+{
+    return do_logic_kernel(op, " == ");
+}
+CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::Greater>& op) const
+{
+    return do_logic_kernel(op, " > ");
+}
+CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::GreaterEq>& op) const
+{
+    return do_logic_kernel(op, " >= ");
+}
+CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::Less>& op) const
+{
+    return do_logic_kernel(op, " < ");
+}
+CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::LessEq>& op) const
+{
+    return do_logic_kernel(op, " <= ");
+}
+CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::NotEqual>& op) const
+{
+    return do_logic_kernel(op, " != ");
+}
+CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::Or>& op) const
+{
+    return do_logic_kernel(op, " || ");
+}
--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
@@ -100,17 +100,6 @@ namespace ngraph
                                     const element::Type& output_type,
                                     size_t concat_axis);
-            void do_logic_kernel(cldnn::topology& topology,
-                                 const std::string& input0_name,
-                                 const Shape& input0_shape,
-                                 const element::Type& input0_type,
-                                 const std::string& input1_name,
-                                 const Shape& input1_shape,
-                                 const std::string& output_name,
-                                 const Shape& output_shape,
-                                 const element::Type& output_type,
-                                 const std::string& operation);
            void do_eltwise_kernel(cldnn::topology& topology,
                                   const std::string& input0_name,
                                   const Shape& input0_shape,