gauss error function (erf) implementation through eigen for DEX and CODEGEN (#2660)

* - gauss error function (erf) implementationn through eigen * - erf implementation for cpu codegen ii) unit test for erf cpu v/s intr * fix style

gauss error function (erf) implementation through eigen for DEX and CODEGEN (#2660)
* - gauss error function (erf) implementationn through eigen * - erf implementation for cpu codegen ii) unit test for erf cpu v/s intr * fix style
105f03bc · Pruthvi · Scott Cyphers · 3611cc60 · 105f03bc · 105f03bc
Commit 105f03bc authored Mar 29, 2019 by Pruthvi Committed by Scott Cyphers Mar 29, 2019
7 changed files
--- a/src/ngraph/runtime/cpu/CMakeLists.txt
+++ b/src/ngraph/runtime/cpu/CMakeLists.txt
@@ -46,6 +46,7 @@ set(SRC
    builder/convolution.cpp
    builder/dot.cpp
    builder/embedding_lookup.cpp
+    builder/erf.cpp
    builder/leaky_relu.cpp
    builder/lstm.cpp
    builder/lrn.cpp

--- a/src/ngraph/runtime/cpu/builder/erf.cpp
+++ b/src/ngraph/runtime/cpu/builder/erf.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/runtime/cpu/kernel/erf.hpp"
+#include "ngraph/op/erf.hpp"
+#include "ngraph/runtime/cpu/cpu_builder.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            template <>
+            void Builder::BUILDER_DECL(ngraph::op::Erf)
+            {
+                auto element_type = args[0].get_element_type();
+                auto element_count = out[0].get_size();
+                auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
+                auto& out0_tensor = external_function->get_tensor_data(out[0].get_name());
+                auto& functors = external_function->get_functors();
+
+                if (element_type == element::f32 || element_type == element::f64)
+                {
+                    std::function<decltype(runtime::cpu::kernel::erf<float>)> kernel;
+                    if (element_type == element::f32)
+                    {
+                        kernel = runtime::cpu::kernel::erf<float>;
+                    }
+                    else if (element_type == element::f64)
+                    {
+                        kernel = runtime::cpu::kernel::erf<double>;
+                    }
+                    auto functor = [&, kernel, element_count](CPURuntimeContext* ctx,
+                                                              CPUExecutionContext* ectx) {
+                        kernel(arg0_tensor, out0_tensor, element_count, ectx->arena);
+                    };
+                    functors.emplace_back(functor);
+                }
+                else
+                {
+                    std::function<decltype(runtime::cpu::kernel::reference_erf<float>)> kernel;
+                    SELECT_KERNEL(
+                        kernel, args[0].get_element_type(), runtime::cpu::kernel::reference_erf);
+                    auto functor = [&, kernel, arg0_tensor, out0_tensor](
+                        CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
+                        kernel(arg0_tensor, out0_tensor, element_count);
+                    };
+
+                    functors.emplace_back(functor);
+                }
+            }
+            REGISTER_OP_BUILDER(Erf);
+        }
+    }
+}
--- a/src/ngraph/runtime/cpu/cpu_emitter.cpp
+++ b/src/ngraph/runtime/cpu/cpu_emitter.cpp
@@ -50,6 +50,7 @@
 #include "ngraph/op/dot.hpp"
 #include "ngraph/op/embedding_lookup.hpp"
 #include "ngraph/op/equal.hpp"
+#include "ngraph/op/erf.hpp"
 #include "ngraph/op/exp.hpp"
 #include "ngraph/op/experimental/generate_mask.hpp"
 #include "ngraph/op/experimental/quantized_avg_pool.hpp"
@@ -3237,6 +3238,28 @@ namespace ngraph
                writer.block_end();
            }

+            template <>
+            void CPU_Emitter::EMITTER_DECL(ngraph::op::Erf)
+            {
+                writer.block_begin();
+                auto element_count = out[0].get_size();
+                if (args[0].get_element_type() == element::f32 ||
+                    args[0].get_element_type() == element::f64)
+                {
+                    writer << "cpu::kernel::erf<" << args[0].get_element_type().c_type_string()
+                           << ">(" << args[0].get_name() << ", " << out[0].get_name() << ", "
+                           << element_count << ", 0);\n";
+                }
+                else
+                {
+                    writer << "cpu::kernel::reference_erf<"
+                           << args[0].get_element_type().c_type_string() << ">("
+                           << args[0].get_name() << ", " << out[0].get_name() << ", "
+                           << ", " << element_count << ");\n";
+                }
+                writer.block_end();
+            }
+
            template <>
            void CPU_Emitter::EMITTER_DECL(ngraph::op::Min)
            {

--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
@@ -65,6 +65,7 @@
 #include "ngraph/op/dot.hpp"
 #include "ngraph/op/embedding_lookup.hpp"
 #include "ngraph/op/equal.hpp"
+#include "ngraph/op/erf.hpp"
 #include "ngraph/op/exp.hpp"
 #include "ngraph/op/experimental/generate_mask.hpp"
 #include "ngraph/op/experimental/quantized_avg_pool.hpp"
@@ -305,6 +306,7 @@ static const runtime::cpu::OpMap dispatcher{
    {TI(ngraph::op::Concat), &runtime::cpu::CPU_Emitter::emit<op::Concat>},
    {TI(ngraph::op::Divide), &runtime::cpu::CPU_Emitter::emit<op::Divide>},
    {TI(ngraph::op::Equal), &runtime::cpu::CPU_Emitter::emit<op::Equal>},
+    {TI(ngraph::op::Erf), &runtime::cpu::CPU_Emitter::emit<op::Erf>},
    {TI(ngraph::op::GetOutputElement), &runtime::cpu::CPU_Emitter::emit<op::GetOutputElement>},
    {TI(ngraph::op::Greater), &runtime::cpu::CPU_Emitter::emit<op::Greater>},
    {TI(ngraph::op::GreaterEq), &runtime::cpu::CPU_Emitter::emit<op::GreaterEq>},

--- a/src/ngraph/runtime/cpu/cpu_kernels.hpp
+++ b/src/ngraph/runtime/cpu/cpu_kernels.hpp
@@ -218,6 +218,12 @@ namespace ngraph
                                          const Coordinate& upper_bounds,
                                          const Strides& slice_strides,
                                          int arena);
+
+                template <typename ElementType>
+                void erf(void* input0, void* output, size_t count, int arena);
+
+                template <typename ElementType>
+                void reference_erf(void* arg, void* out, size_t count);
            }
        }
    }

--- a/src/ngraph/runtime/cpu/kernel/erf.hpp
+++ b/src/ngraph/runtime/cpu/kernel/erf.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#define EIGEN_USE_THREADS
+#include <unsupported/Eigen/CXX11/Tensor>
+#include <unsupported/Eigen/SpecialFunctions>
+#include "ngraph/runtime/cpu/cpu_executor.hpp"
+#include "ngraph/runtime/reference/erf.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            namespace kernel
+            {
+                template <typename ElementType>
+                void erf(void* input0, void* output, size_t count, int arena)
+                {
+                    Eigen::array<Eigen::Index, 1> out_dims, in_dims;
+
+                    out_dims[0] = in_dims[0] = count;
+
+                    Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
+                        static_cast<ElementType*>(output), out_dims);
+                    Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
+                        static_cast<ElementType*>(input0), in_dims);
+
+                    out.device(ngraph::runtime::cpu::executor::GetCPUExecutor().get_device(arena)) =
+                        in0.unaryExpr(Eigen::internal::scalar_erf_op<ElementType>());
+                }
+
+                template <typename ElementType>
+                void reference_erf(void* arg, void* out, size_t count)
+                {
+                    reference::erf<ElementType>(static_cast<const ElementType*>(arg),
+                                                static_cast<ElementType*>(out),
+                                                count);
+                }
+            }
+        }
+    }
+}
--- a/test/cpu_test.cpp
+++ b/test/cpu_test.cpp
@@ -28,6 +28,7 @@
 #include "ngraph/log.hpp"
 #include "ngraph/ngraph.hpp"
 #include "ngraph/op/batch_norm.hpp"
+#include "ngraph/op/erf.hpp"
 #include "ngraph/op/get_output_element.hpp"
 #include "ngraph/op/parameter.hpp"
 #include "ngraph/pass/manager.hpp"
@@ -1009,3 +1010,32 @@ TEST(cpu_test, conv_negative_padding)
    };
    compare_backends(make_f(), make_f(), "CPU", "INTERPRETER");
 }
+
+TEST(cpu_test, guass_error_function_erf)
+{
+    auto make_function = []() -> std::shared_ptr<Function> {
+        auto A = make_shared<op::Parameter>(element::f32, Shape{1, 4, 10, 6, 10});
+        auto erf = make_shared<op::Erf>(A);
+        return make_shared<Function>(erf, ParameterVector{A});
+    };
+
+    auto backend = runtime::Backend::create("CPU");
+    auto cpu_f = make_function();
+    auto int_f = make_function();
+
+    test::Uniform<float> rng(-100.0f, 100.0f);
+    vector<vector<float>> args;
+    for (shared_ptr<op::Parameter> param : cpu_f->get_parameters())
+    {
+        vector<float> tensor_val(shape_size(param->get_shape()));
+        rng.initialize(tensor_val);
+        args.push_back(tensor_val);
+    }
+    auto int_results = execute(int_f, args, "INTERPRETER");
+    auto cpu_results = execute(cpu_f, args, "CPU");
+
+    for (size_t i = 0; i < cpu_results.size(); i++)
+    {
+        EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i)));
+    }
+}