IntelGPU backend: Use clDNN BatchNorm in operations (#2086)

* IntelGPU backend: Use clDNN BatchNorm in operations * IntelGPU backend: no duplications in cldnn output vector * IntelGPU backend: PR2086. Fix clDNN batchNorm usage

IntelGPU backend: Use clDNN BatchNorm in operations (#2086)
* IntelGPU backend: Use clDNN BatchNorm in operations * IntelGPU backend: no duplications in cldnn output vector * IntelGPU backend: PR2086. Fix clDNN batchNorm usage
82f23b38 · Sergey Shalnov · Scott Cyphers · e3a3ad2d · 82f23b38
Commit 82f23b38 authored Dec 01, 2018 by Sergey Shalnov Committed by Scott Cyphers Dec 01, 2018
Show whitespace changes
Inline Side-by-side

Showing with 81 additions and 10 deletions

intelgpu_backend.cpp src/ngraph/runtime/intelgpu/intelgpu_backend.cpp +81 -10

No files found.
--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
@@ -30,6 +30,7 @@
 #include <CPP/input_layout.hpp>
 #include <CPP/layout.hpp>
 #include <CPP/lrn.hpp>
+#include <CPP/mutable_data.hpp>
 #include <CPP/permute.hpp>
 #include <CPP/pooling.hpp>
 #include <CPP/reorder.hpp>
@@ -384,7 +385,7 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
        return true;
    }
-    vector<cldnn::primitive_id> function_output_names;
+    set<cldnn::primitive_id> func_output_names;
    cldnn::topology topology;
    if (m_dump_graph_enable)
@@ -438,7 +439,7 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
        {
            arguments_check(op, 1, 1);
-            function_output_names.push_back(get_input_name(op));
+            func_output_names.insert(get_input_name(op));
            break;
        }
        case OP_TYPEID::GetOutputElement:
@@ -1195,14 +1196,14 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
        }
        case OP_TYPEID::BatchNormInference:
        {
-            const shared_ptr<op::BatchNormInference> batch_norm =
+            const shared_ptr<op::BatchNormInference> bnorm =
                static_pointer_cast<op::BatchNormInference>(op);
-            const double eps = batch_norm->get_eps_value();
+            const double eps = bnorm->get_eps_value();
-            string mean_name;
-            string variance_name;
            arguments_check(op, 5, 1);
+            if (get_input_name(op, 2).size() != 4)
+            {
                do_batch_norm_operation(topology,
                                        get_output_name(op),
                                        get_output_type(op),
@@ -1213,13 +1214,28 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
                                        get_input_name(op, 1),
                                        get_input_name(op, 3),
                                        get_input_name(op, 4));
+            }
+            else
+            {
+                const cldnn::batch_norm batchnorm(get_output_name(op),
+                                                  get_input_name(op, 2), // input
+                                                  get_input_name(op, 3), // mean
+                                                  get_input_name(op, 4), // variance
+                                                  get_input_name(op, 0), // gamma
+                                                  get_input_name(op, 1), // beta
+                                                  eps);                  // epsilon (float)
+                topology.add(batchnorm);
+            }
            break;
        }
        case OP_TYPEID::BatchNormTraining:
        {
-            const shared_ptr<op::BatchNormTraining> batch_norm =
+            const shared_ptr<op::BatchNormTraining> bnorm =
                static_pointer_cast<op::BatchNormTraining>(op);
-            const double eps = batch_norm->get_eps_value();
+            const double eps = bnorm->get_eps_value();
+            if (get_input_name(op, 2).size() != 4)
+            {
                string mean_name;
                string variance_name;
@@ -1275,6 +1291,60 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
                {
                    arguments_check(op, 5, 1); // throw exception in this case
                }
+            }
+            else
+            {
+                if (op->get_inputs().size() == 5 && op->get_outputs().size() == 1)
+                {
+                    const cldnn::batch_norm batchnorm(get_output_name(op),
+                                                      get_input_name(op, 2), // input
+                                                      get_input_name(op, 3), // mean
+                                                      get_input_name(op, 4), // variance
+                                                      get_input_name(op, 0), // gamma
+                                                      get_input_name(op, 1), // beta
+                                                      eps);                  // epsilon (float)
+                    topology.add(batchnorm);
+                }
+                else if (op->get_inputs().size() == 3 && op->get_outputs().size() == 3)
+                {
+                    const string mean_name = get_output_name(op, 1);
+                    const string variance_name = get_output_name(op, 2);
+                    // Create a memory for mean as mutable_data to treat it as constant
+                    const cldnn::layout mean_layout = IntelGPULayout::create_cldnn_layout(
+                        get_output_type(op, 1), get_output_shape(op, 1));
+                    const cldnn::memory mean_mem(cldnn::memory::allocate(*ocl_engine, mean_layout));
+                    const cldnn::mutable_data mean_const(mean_name, mean_mem);
+                    topology.add(mean_const);
+                    // Create a memory for variance as mutable_data to treat it as constant
+                    const cldnn::layout variance_layout = IntelGPULayout::create_cldnn_layout(
+                        get_output_type(op, 2), get_output_shape(op, 2));
+                    const cldnn::memory variance_mem(
+                        cldnn::memory::allocate(*ocl_engine, variance_layout));
+                    const cldnn::mutable_data variance_const(variance_name, variance_mem);
+                    topology.add(variance_const);
+                    const cldnn::batch_norm batchnorm(get_output_name(op),
+                                                      get_input_name(op, 2), // input
+                                                      eps,                   // epsilon (float)
+                                                      mean_name,
+                                                      variance_name,
+                                                      get_input_name(op, 0),  // gamma
+                                                      get_input_name(op, 1)); // beta
+                    topology.add(batchnorm);
+                    // Need to mark this operation as "output" to keep mean and variance
+                    // in cldnn::network
+                    func_output_names.insert(get_output_name(op));
+                }
+                else
+                {
+                    arguments_check(op, 5, 1); // throw exception in this case
+                }
+            }
            break;
        }
        case OP_TYPEID::Convolution:
@@ -1538,9 +1608,10 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
    network_build_options.set_option(cldnn::build_option::optimize_data(m_cldnn_graph_optimize));
-    if (!function_output_names.empty())
+    if (!func_output_names.empty())
    {
-        network_build_options.set_option(cldnn::build_option::outputs(function_output_names));
+        vector<cldnn::primitive_id> names_vec(func_output_names.begin(), func_output_names.end());
+        network_build_options.set_option(cldnn::build_option::outputs(names_vec));
    }
    if (m_cldnn_dump_enable)