A reference implementation of batchnorm fprop and tests. (#861)

* interpreter implementation and tests * style * correct * tolerance * skip * type * cast * double * types * format * add bn to the inference engine backend

A reference implementation of batchnorm fprop and tests. (#861)
* interpreter implementation and tests * style * correct * tolerance * skip * type * cast * double * types * format * add bn to the inference engine backend
833a05b2 · varun-intel · Scott Cyphers · 3327985d · 833a05b2 · 833a05b2
Commit 833a05b2 authored Apr 30, 2018 by varun-intel Committed by Scott Cyphers Apr 30, 2018
6 changed files
--- a/src/ngraph/ngraph.hpp
+++ b/src/ngraph/ngraph.hpp
@@ -81,6 +81,7 @@
 #include "ngraph/op/exp.hpp"
 #include "ngraph/op/floor.hpp"
 #include "ngraph/op/function_call.hpp"
+#include "ngraph/op/get_output_element.hpp"
 #include "ngraph/op/greater.hpp"
 #include "ngraph/op/greater_eq.hpp"
 #include "ngraph/op/less.hpp"

--- a/src/ngraph/runtime/ie/ie_backend.hpp
+++ b/src/ngraph/runtime/ie/ie_backend.hpp
--- a/src/ngraph/runtime/interpreter/int_backend.cpp
+++ b/src/ngraph/runtime/interpreter/int_backend.cpp
@@ -170,7 +170,7 @@ bool runtime::interpreter::INTBackend::call(shared_ptr<Function> function,
        }
        else
        {
-            type = op->get_element_type();
+            type = op->get_outputs().at(0).get_element_type();
        }

        if (instance.m_performance_counters_enabled)

--- a/src/ngraph/runtime/interpreter/int_backend.hpp
+++ b/src/ngraph/runtime/interpreter/int_backend.hpp
@@ -26,11 +26,13 @@
 #include "ngraph/runtime/tensor_view.hpp"

 #include "ngraph/op/avg_pool.hpp"
+#include "ngraph/op/batch_norm.hpp"
 #include "ngraph/op/broadcast.hpp"
 #include "ngraph/op/concat.hpp"
 #include "ngraph/op/constant.hpp"
 #include "ngraph/op/convolution.hpp"
 #include "ngraph/op/dot.hpp"
+#include "ngraph/op/get_output_element.hpp"
 #include "ngraph/op/max.hpp"
 #include "ngraph/op/max_pool.hpp"
 #include "ngraph/op/min.hpp"
@@ -55,6 +57,7 @@
 #include "ngraph/runtime/reference/asin.hpp"
 #include "ngraph/runtime/reference/atan.hpp"
 #include "ngraph/runtime/reference/avg_pool.hpp"
+#include "ngraph/runtime/reference/batch_norm.hpp"
 #include "ngraph/runtime/reference/broadcast.hpp"
 #include "ngraph/runtime/reference/ceiling.hpp"
 #include "ngraph/runtime/reference/concat.hpp"
@@ -226,6 +229,41 @@ private:
                                   avg_pool->get_padding_above(),
                                   avg_pool->get_include_padding_in_avg_computation());
        }
+        else if (node_op == "GetOutputElement")
+        {
+            const op::GetOutputElement* get_output_element =
+                static_cast<const op::GetOutputElement*>(&node);
+            size_t n = get_output_element->get_n();
+            size_t num_bytes = out[0]->get_element_count() * out[0]->get_element_type().size();
+            std::memcpy(out[0]->get_data_ptr(), args[n]->get_data_ptr(), num_bytes);
+        }
+        else if (node_op == "BatchNorm")
+        {
+            ngraph::op::BatchNorm* bn = dynamic_cast<ngraph::op::BatchNorm*>(&node);
+            if (bn->get_output_size() == 3)
+            {
+                reference::batch_norm_three_outputs<T>(
+                    bn->get_eps_value(),
+                    reinterpret_cast<T*>(args[0]->get_data_ptr()),
+                    reinterpret_cast<T*>(args[1]->get_data_ptr()),
+                    reinterpret_cast<T*>(args[2]->get_data_ptr()),
+                    reinterpret_cast<T*>(out[0]->get_data_ptr()),
+                    reinterpret_cast<T*>(out[1]->get_data_ptr()),
+                    reinterpret_cast<T*>(out[2]->get_data_ptr()),
+                    args[2]->get_shape());
+            }
+            else
+            {
+                reference::batch_norm_one_output<T>(bn->get_eps_value(),
+                                                    reinterpret_cast<T*>(args[0]->get_data_ptr()),
+                                                    reinterpret_cast<T*>(args[1]->get_data_ptr()),
+                                                    reinterpret_cast<T*>(args[2]->get_data_ptr()),
+                                                    reinterpret_cast<T*>(args[3]->get_data_ptr()),
+                                                    reinterpret_cast<T*>(args[4]->get_data_ptr()),
+                                                    reinterpret_cast<T*>(out[0]->get_data_ptr()),
+                                                    args[2]->get_shape());
+            }
+        }
        else if (node_op == "AvgPoolBackprop")
        {
            op::AvgPoolBackprop* apb = dynamic_cast<op::AvgPoolBackprop*>(&node);

--- a/src/ngraph/runtime/reference/batch_norm.hpp
+++ b/src/ngraph/runtime/reference/batch_norm.hpp
+/*******************************************************************************
+* Copyright 2017-2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include <cmath>
+#include <iostream>
+
+#include "ngraph/axis_vector.hpp"
+#include "ngraph/coordinate_transform.hpp"
+#include "ngraph/util.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace reference
+        {
+            template <typename T>
+            void batch_norm_three_outputs(double eps,
+                                          const T* arg0,
+                                          const T* arg1,
+                                          const T* arg2,
+                                          T* out0,
+                                          T* out1,
+                                          T* out2,
+                                          const Shape& arg2_shape)
+            {
+                auto eps_casted = static_cast<T>(eps);
+                auto channels = arg2_shape[1];
+
+                // We use these objects to iterate over the indices in a channel.
+                // The start and end points for the channel axis are modified in the loop.
+                Coordinate start_corner;
+                Coordinate end_corner;
+                for (size_t i = 0; i < arg2_shape.size(); i++)
+                {
+                    start_corner.push_back(0);
+                    end_corner.push_back(arg2_shape[i]);
+                }
+
+                for (size_t c = 0; c < channels; c++)
+                {
+                    T channel_sum = 0;
+
+                    start_corner[1] = c;
+                    end_corner[1] = c + 1;
+
+                    // Compute the mean
+                    CoordinateTransform arg2_transform(arg2_shape, start_corner, end_corner);
+                    for (Coordinate arg2_coord : arg2_transform)
+                    {
+                        channel_sum += arg2[arg2_transform.index(arg2_coord)];
+                    }
+                    T channel_mean = channel_sum / (shape_size(arg2_shape) / channels);
+                    out1[c] = channel_mean;
+
+                    // Compute the variance
+                    T channel_diff_square_sum = 0;
+                    for (Coordinate arg2_coord : arg2_transform)
+                    {
+                        auto mean_diff = arg2[arg2_transform.index(arg2_coord)] - channel_mean;
+                        channel_diff_square_sum += mean_diff * mean_diff;
+                    }
+                    T channel_var = channel_diff_square_sum / (shape_size(arg2_shape) / channels);
+                    out2[c] = channel_var;
+
+                    // Compute the normalized output
+                    for (Coordinate arg2_coord : arg2_transform)
+                    {
+                        auto channel_gamma = arg0[c];
+                        auto channel_beta = arg1[c];
+
+                        auto input_index = arg2_transform.index(arg2_coord);
+                        auto normalized = (arg2[input_index] - channel_mean) /
+                                          (std::sqrt(channel_var + eps_casted));
+                        out0[input_index] = normalized * channel_gamma + channel_beta;
+                    }
+                }
+            }
+
+            template <typename T>
+            void batch_norm_one_output(double eps,
+                                       const T* arg0,
+                                       const T* arg1,
+                                       const T* arg2,
+                                       const T* arg3,
+                                       const T* arg4,
+                                       T* out0,
+                                       const Shape& arg2_shape)
+            {
+                auto eps_casted = static_cast<T>(eps);
+                CoordinateTransform arg2_transform(arg2_shape);
+
+                for (Coordinate arg2_coord : arg2_transform)
+                {
+                    auto channel_num = arg2_coord[1];
+                    auto channel_gamma = arg0[channel_num];
+                    auto channel_beta = arg1[channel_num];
+                    auto channel_mean = arg3[channel_num];
+                    auto channel_var = arg4[channel_num];
+
+                    auto input_index = arg2_transform.index(arg2_coord);
+                    auto normalized =
+                        (arg2[input_index] - channel_mean) / (std::sqrt(channel_var + eps_casted));
+                    out0[input_index] = normalized * channel_gamma + channel_beta;
+                }
+            }
+        }
+    }
+}
--- a/test/backend_test.in.cpp
+++ b/test/backend_test.in.cpp
@@ -286,6 +286,94 @@ TEST(${BACKEND_NAME}, abs)
    EXPECT_EQ((vector<float>{1, 2, 0, 4.75f}), read_vector<float>(result));
 }

+TEST(${BACKEND_NAME}, batch_norm_one_output)
+{
+    SKIP_TEST_FOR("CPU", "${BACKEND_NAME}");
+    SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
+
+    auto shape_in = Shape{2, 3};
+    auto shape_mean = Shape{3};
+
+    auto A = make_shared<op::Parameter>(element::f64, shape_in);
+    auto Mean =
+        op::Constant::create(element::f64, shape_mean, {0.00396654, -1.25294404, 1.16651872});
+    auto Variance =
+        op::Constant::create(element::f64, shape_mean, {2.40871689, 1.44969511, 0.23469392});
+    auto Beta =
+        op::Constant::create(element::f64, shape_mean, {2.14211921, -0.75733924, 0.42210531});
+    auto Gamma =
+        op::Constant::create(element::f64, shape_mean, {1.75437676, 0.37950502, 1.13727544});
+
+    auto BN = make_shared<op::BatchNorm>(1e-3, Gamma, Beta, A, Mean, Variance, false);
+    auto f = make_shared<Function>(BN, op::ParameterVector{A});
+
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::f64, shape_in);
+    copy_data(
+        a,
+        vector<double>{-1.97431703, -2.06521307, 0.54122217, 2.53375939, -0.22342691, 0.45340773});
+
+    auto result = backend->create_tensor(element::f64, shape_in);
+    vector<double> expected_result{
+        -0.09365749, -1.01327395, -1.04269195, 5.00118923, -0.43295258, -1.24840283};
+
+    backend->call(f, {result}, {a});
+    EXPECT_TRUE(test::all_close(vector<double>{expected_result}, read_vector<double>(result)));
+}
+
+TEST(${BACKEND_NAME}, batch_norm_three_outputs)
+{
+    SKIP_TEST_FOR("CPU", "${BACKEND_NAME}");
+    SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
+
+    auto shape_in = Shape{2, 3};
+    auto shape_mean = Shape{3};
+
+    auto A = make_shared<op::Parameter>(element::f64, shape_in);
+    auto Beta =
+        op::Constant::create(element::f64, shape_mean, {2.14211921, -0.75733924, 0.42210531});
+    auto Gamma =
+        op::Constant::create(element::f64, shape_mean, {1.75437676, 0.37950502, 1.13727544});
+
+    auto BN = make_shared<op::BatchNorm>(1e-3, Gamma, Beta, A);
+
+    auto f0 =
+        make_shared<Function>(make_shared<op::GetOutputElement>(BN, 0), op::ParameterVector{A});
+    auto f1 =
+        make_shared<Function>(make_shared<op::GetOutputElement>(BN, 1), op::ParameterVector{A});
+    auto f2 =
+        make_shared<Function>(make_shared<op::GetOutputElement>(BN, 2), op::ParameterVector{A});
+
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::f64, shape_in);
+    copy_data(
+        a,
+        vector<double>{-1.97431703, -2.06521307, 0.54122217, 2.53375939, -0.22342691, 0.45340773});
+
+    auto result0 = backend->create_tensor(element::f64, shape_in);
+    vector<double> expected_result0{
+        0.3879149, -1.13662076, 1.34494817, 3.89632344, -0.37805778, -0.50073695};
+
+    backend->call(f0, {result0}, {a});
+    EXPECT_TRUE(test::all_close(vector<double>{expected_result0}, read_vector<double>(result0)));
+
+    auto result1 = backend->create_tensor(element::f64, shape_mean);
+    vector<double> expected_result1{0.27972114, -1.14431989, 0.49731493};
+
+    backend->call(f1, {result1}, {a});
+    EXPECT_TRUE(test::all_close(vector<double>{expected_result1}, read_vector<double>(result1)));
+
+    auto result2 = backend->create_tensor(element::f64, shape_mean);
+    vector<double> expected_result2{5.08068895e+00, 8.48043919e-01, 1.92784308e-03};
+
+    backend->call(f2, {result2}, {a});
+    EXPECT_TRUE(test::all_close(vector<double>{expected_result2}, read_vector<double>(result2)));
+}
+
 TEST(${BACKEND_NAME}, ceiling)
 {
    Shape shape{2, 2};