IntelGPUBackend: BatchNorm 5x1 operation (#1244)

* IntelGPUBackend: BatchNorm 5x1 operation * Update intelgpu_op_batchnorm.cpp * PR1244 Comments are adressed

IntelGPUBackend: BatchNorm 5x1 operation (#1244)
* IntelGPUBackend: BatchNorm 5x1 operation * Update intelgpu_op_batchnorm.cpp * PR1244 Comments are adressed
9a3a0314 · shssf · Robert Kimball · f3d7946b · 9a3a0314 · 9a3a0314
Commit 9a3a0314 authored Jul 18, 2018 by shssf Committed by Robert Kimball Jul 18, 2018
4 changed files
--- a/src/ngraph/runtime/intelgpu/CMakeLists.txt
+++ b/src/ngraph/runtime/intelgpu/CMakeLists.txt
@@ -18,6 +18,7 @@ set(SRC
    intelgpu_backend.cpp
    intelgpu_tensor_view.cpp
    intelgpu_layout.cpp
+    intelgpu_op_batchnorm.cpp
 )
 if (NGRAPH_INTELGPU_ENABLE)

--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
@@ -35,10 +35,10 @@ void arguments_check(const shared_ptr<Node>& op, size_t input, size_t output)
    if (op->get_input_size() != input || op->get_output_size() != output)
    {
        ostringstream os;
-        os << "Operation \"" << op->description() << "\" input and output sizes mismatch.\n"
+        os << "Operation \"" << op->description() << "\" input and output sizes mismatch."
-           << "Expected input size=" << op->get_input_size() << ", provided=" << input << "\n"
+           << " Expected input size=" << op->get_input_size() << ", provided=" << input
-           << "Expected output size=" << op->get_output_size() << ", provided=" << output;
+           << ". Expected output size=" << op->get_output_size() << ", provided=" << output;
-        throw std::invalid_argument(os.str());
+        throw invalid_argument(os.str());
    }
 }
@@ -140,6 +140,51 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
        {
            do_eltwise_operation(topology, op, cldnn::eltwise_mode::prod);
        }
+        else if ("BatchNorm" == op->description())
+        {
+            const shared_ptr<op::BatchNorm> batch_norm = static_pointer_cast<op::BatchNorm>(op);
+            const double eps = batch_norm->get_eps_value();
+            if (op->get_inputs().size() < 3 || op->get_outputs().empty())
+            {
+                arguments_check(op, 3, 1); // throw exception in this case
+            }
+            const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
+            const string& gamma_name = op->get_inputs().at(0).get_tensor().get_name();
+            const string& beta_name = op->get_inputs().at(1).get_tensor().get_name();
+            const string& input_name = op->get_inputs().at(2).get_tensor().get_name();
+            const Shape& input_shape = op->get_inputs().at(2).get_shape();
+            if (op->get_outputs().size() == 1)
+            {
+                arguments_check(op, 5, 1);
+                const string& mean_name = op->get_inputs().at(3).get_tensor().get_name();
+                const string& variance_name = op->get_inputs().at(4).get_tensor().get_name();
+                do_batch_norm_operation(topology,
+                                        output_name,
+                                        eps,
+                                        input_name,
+                                        input_shape,
+                                        gamma_name,
+                                        beta_name,
+                                        mean_name,
+                                        variance_name);
+            }
+            else if (op->get_outputs().size() == 3)
+            {
+                arguments_check(op, 3, 3);
+                do_batch_norm_operation(
+                    topology, output_name, eps, input_name, input_shape, gamma_name, beta_name);
+            }
+            else
+            {
+                arguments_check(op, 5, 1); // throw exception in this case
+            }
+        }
        else
        {
            ostringstream os;

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_batchnorm.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_batchnorm.cpp
+/*******************************************************************************
+* Copyright 2017-2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+#include <CPP/batch_norm.hpp>
+#include <CPP/concatenation.hpp>
+#include <CPP/scale.hpp>
+#include <CPP/split.hpp>
+#include "ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp"
+#include "ngraph/op/batch_norm.hpp"
+using namespace std;
+using namespace ngraph;
+// This function converts Shape dimension id into cldnn::concatenation id
+static cldnn::concatenation::concatenation_axis get_cldnn_axis(size_t tensor_channel)
+{
+    switch (tensor_channel)
+    {
+    case 0: return cldnn::concatenation::along_b;
+    case 1: return cldnn::concatenation::along_f;
+    case 2: return cldnn::concatenation::along_y;
+    case 3: return cldnn::concatenation::along_x;
+    default: throw invalid_argument("intelgpu::get_cldnn_axis() wrong input tensor channel.");
+    }
+}
+static string do_matrix_split(cldnn::topology& topology,
+                              const string& name,
+                              const vector<pair<cldnn::primitive_id, cldnn::tensor>>& offsets)
+{
+    const string result = name + "_split";
+    const cldnn::split op_split(result, name, offsets);
+    topology.add(op_split);
+    return result;
+}
+static string get_batch_norm_mean(cldnn::topology& topology, const string& input_name)
+{
+    throw invalid_argument(
+        "intelgpu::get_batch_norm_mean() Calculation matrix mean is not yet supported.");
+}
+static string get_batch_norm_variance(cldnn::topology& topology,
+                                      const string& input_name,
+                                      const string& mean_name)
+{
+    throw invalid_argument(
+        "intelgpu::get_batch_norm_variance() Calculation matrix variance is not yet supported.");
+}
+void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology,
+                                                const string& output_name,
+                                                double eps,
+                                                const string& input_name,
+                                                const Shape& input_shape,
+                                                const string& gamma_name,
+                                                const string& beta_name,
+                                                const string& mean_name_inp,
+                                                const string& variance_name_inp)
+{
+    vector<pair<cldnn::primitive_id, cldnn::tensor>> split_offsets;
+    vector<pair<cldnn::primitive_id, cldnn::tensor>> vec_offsets;
+    vector<cldnn::primitive_id> dim_set;
+    if (input_shape.size() < 2 || input_shape.size() > 4)
+    {
+        throw invalid_argument("intelgpu::do_batch_norm_operation() wrong input shape.");
+    }
+    // According to the documentation, input data channel is always being axis 1
+    // Assumed the second dimension from the left. Example {0, 1, 0, 0} or {0, 1}
+    // Also, input data must be at least 2D array
+    const size_t shape_channel = 1;
+    const size_t cldnn_channel = 4 - input_shape.size() + shape_channel;
+    const size_t split_arr_count = input_shape.at(shape_channel);
+    for (size_t i = 0; i < split_arr_count; ++i)
+    {
+        const string str_i = to_string(i);
+        const cldnn::tensor vec_offset(0, 0, i, 0);
+        vec_offsets.push_back(pair<cldnn::primitive_id, cldnn::tensor>(str_i, vec_offset));
+        vector<cldnn::tensor::value_type> offset({0, 0, 0, 0}); // No action by default
+        offset.at(cldnn_channel) = i;
+        cout << "Splitted to " << i << " with " << vector_to_string(offset) << "\n";
+        const cldnn::tensor input_offset(offset.at(0), offset.at(1), offset.at(3), offset.at(2));
+        split_offsets.push_back(pair<cldnn::primitive_id, cldnn::tensor>(str_i, input_offset));
+    }
+    string mean_name = mean_name_inp;
+    if (mean_name_inp.empty())
+    {
+        mean_name = get_batch_norm_mean(topology, input_name);
+    }
+    string variance_name = variance_name_inp;
+    if (variance_name_inp.empty())
+    {
+        variance_name = get_batch_norm_variance(topology, input_name, mean_name);
+    }
+    const string input_split_name = do_matrix_split(topology, input_name, split_offsets);
+    const string mean_split_name = do_matrix_split(topology, mean_name, vec_offsets);
+    const string variance_split_name = do_matrix_split(topology, variance_name, vec_offsets);
+    const string gamma_split_name = do_matrix_split(topology, gamma_name, vec_offsets);
+    const string beta_split_name = do_matrix_split(topology, beta_name, vec_offsets);
+    for (size_t i = 0; i < split_arr_count; ++i)
+    {
+        const string suf = ':' + to_string(i);
+        const string out_bn_name = output_name + "_out_bn";
+        const cldnn::batch_norm cldd_batchnorm(out_bn_name + suf,
+                                               input_split_name + suf,
+                                               mean_split_name + suf,
+                                               variance_split_name + suf,
+                                               eps);
+        topology.add(cldd_batchnorm);
+        const cldnn::scale op_scale(
+            output_name + suf, out_bn_name + suf, gamma_split_name + suf, beta_split_name + suf);
+        topology.add(op_scale);
+        dim_set.push_back(output_name + suf);
+    }
+    const cldnn::concatenation op_concat(output_name, dim_set, get_cldnn_axis(cldnn_channel));
+    topology.add(op_concat);
+}
--- a/src/ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp
+/*******************************************************************************
+* Copyright 2017-2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+#pragma once
+#include <CPP/topology.hpp>
+#include "ngraph/shape.hpp"
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace intelgpu
+        {
+            // This implements BatchNorm nGraph operation
+            // Since nGraph uses channels in this operation but clDNN uses full input data
+            // at one time we have to use following algorithm:
+            // 1. Split all input data arrays into several matrices by channel axis
+            // 2. Independently do cldnn::batch_norm on particular matrix
+            // 3. Every result of the cldnn::batch_norm must be scaled and
+            //    shifted because cldnn::batch_norm dosn't use gamma and beta
+            // 4. Concatenate all results into output matrix by channel axis
+            void do_batch_norm_operation(cldnn::topology& topology,
+                                         const std::string& output_name,
+                                         double eps,
+                                         const std::string& input_name,
+                                         const Shape& input_shape,
+                                         const std::string& gamma_name,
+                                         const std::string& beta_name,
+                                         const std::string& mean_name = std::string(),
+                                         const std::string& variance_name = std::string());
+        }
+    }
+}