Jmenon/dexec (#1092)

* CPU: Direct Execution Part 1 with bare minimum infrastructure * Refactor: Move build related functionality to a separate TU and external function method * Add TU back after merge * Remove an assert * Remove commented-out code

Jmenon/dexec (#1092)
* CPU: Direct Execution Part 1 with bare minimum infrastructure * Refactor: Move build related functionality to a separate TU and external function method * Add TU back after merge * Remove an assert * Remove commented-out code
abb68627 · Jaikrishnan Menon · Scott Cyphers · 79dd92d3 · abb68627 · abb68627
Commit abb68627 authored Jun 08, 2018 by Jaikrishnan Menon Committed by Scott Cyphers Jun 08, 2018
11 changed files
--- a/src/ngraph/runtime/cpu/CMakeLists.txt
+++ b/src/ngraph/runtime/cpu/CMakeLists.txt
@@ -16,6 +16,7 @@
 set(SRC
    cpu_backend.cpp
+    cpu_builder.cpp
    cpu_call_frame.cpp
    cpu_emitter.cpp
    cpu_external_function.cpp

--- a/src/ngraph/runtime/cpu/cpu_builder.cpp
+++ b/src/ngraph/runtime/cpu/cpu_builder.cpp
--- a/src/ngraph/runtime/cpu/cpu_builder.hpp
+++ b/src/ngraph/runtime/cpu/cpu_builder.hpp
+/*******************************************************************************
+* Copyright 2017-2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+#pragma once
+#include <string>
+#include <vector>
+#include "ngraph/node.hpp"
+#include "ngraph/runtime/cpu/cpu_external_function.hpp"
+#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
+#define BUILDER_DECL(op_name)                                                                      \
+    build<op_name>(CPU_ExternalFunction * external_function,                                       \
+                   const ngraph::Node* node,                                                       \
+                   const std::vector<TensorViewWrapper>& args,                                     \
+                   const std::vector<TensorViewWrapper>& out)
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            using BuildOpFunction =
+                std::function<void(CPU_ExternalFunction* external_function,
+                                   const ngraph::Node*,
+                                   const std::vector<TensorViewWrapper>& inputs,
+                                   const std::vector<TensorViewWrapper>& outputs)>;
+            using BuildOpMap = std::unordered_map<std::type_index, BuildOpFunction>;
+            extern const BuildOpMap build_dispatcher;
+            class Builder
+            {
+            public:
+                template <typename OP>
+                static void build(CPU_ExternalFunction* external_function,
+                                  const ngraph::Node* node,
+                                  const std::vector<TensorViewWrapper>& args,
+                                  const std::vector<TensorViewWrapper>& out)
+                {
+                    throw std::runtime_error("Unimplemented op in CPU builder");
+                }
+                static void nop(CPU_ExternalFunction* external_function,
+                                const ngraph::Node* node,
+                                const std::vector<TensorViewWrapper>& args,
+                                const std::vector<TensorViewWrapper>& out)
+                {
+                }
+                static void buildBatchNorm(CPU_ExternalFunction* external_function,
+                                           const ngraph::Node* node,
+                                           const std::vector<TensorViewWrapper>& args,
+                                           const std::vector<TensorViewWrapper>& out,
+                                           bool append_relu = false);
+            };
+        }
+    }
+}
--- a/src/ngraph/runtime/cpu/cpu_call_frame.cpp
+++ b/src/ngraph/runtime/cpu/cpu_call_frame.cpp
@@ -63,7 +63,14 @@ void runtime::cpu::CPU_CallFrame::call(
    }
    // Invoke compiled computation
-    m_compiled_function(inputs.data(), outputs.data(), ctx);
+    if (!m_external_function->is_direct_execution())
+    {
+        m_compiled_function(inputs.data(), outputs.data(), ctx);
+    }
+    else
+    {
+        m_external_function->get_executor()(ctx, inputs, outputs);
+    }
    if (runtime::cpu::IsTracingEnabled())
    {

--- a/src/ngraph/runtime/cpu/cpu_emitter.cpp
+++ b/src/ngraph/runtime/cpu/cpu_emitter.cpp
@@ -89,7 +89,6 @@
 #include "ngraph/op/sum.hpp"
 #include "ngraph/op/tan.hpp"
 #include "ngraph/op/tanh.hpp"
-#include "ngraph/runtime/cpu/cpu_emitter.hpp"
 #include "ngraph/runtime/cpu/cpu_kernel_emitters.hpp"
 #include "ngraph/runtime/cpu/cpu_op_annotations.hpp"
 #include "ngraph/runtime/cpu/mkldnn_utils.hpp"

--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
--- a/src/ngraph/runtime/cpu/cpu_external_function.hpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.hpp
@@ -17,6 +17,7 @@
 #pragma once
 #include <functional>
+#include <list>
 #include <map>
 #include <memory>
 #include <string>
@@ -94,7 +95,19 @@ namespace ngraph
                // Temporary Memory Pool alignment
                static const size_t s_memory_pool_alignment;
+                std::list<std::function<void(CPURuntimeContext*)>>& get_functors()
+                {
+                    return functors;
+                }
+                std::unordered_map<std::string, void*>& get_tensor_data() { return tensor_data; }
+                std::function<void(CPURuntimeContext*, std::vector<void*>&, std::vector<void*>&)>&
+                    get_executor()
+                {
+                    return executor;
+                }
+                bool is_direct_execution() const { return m_direct_execution; }
            protected:
+                void build();
                void compile();
            private:
@@ -126,6 +139,7 @@ namespace ngraph
                std::unique_ptr<codegen::ExecutionEngine> m_execution_engine;
                bool m_emit_timing;
                bool m_use_tbb;
                std::unordered_map<std::string, std::string> m_variable_name_map;
                std::map<std::string, size_t> m_name_index_map;
@@ -142,6 +156,15 @@ namespace ngraph
                std::unique_ptr<MKLDNNEmitter> m_mkldnn_emitter;
                std::string m_function_name;
+                std::list<std::function<void(CPURuntimeContext*)>> functors;
+                std::function<void(CPURuntimeContext*, std::vector<void*>&, std::vector<void*>&)>
+                    executor;
+                std::unordered_map<std::string, void*> tensor_data;
+                std::unordered_map<std::string, size_t> intermediates_offsets;
+                std::unordered_map<std::string, size_t> function_input_index, function_output_index;
+                bool m_is_built;
+                bool m_direct_execution;
            };
        }
    }

--- a/src/ngraph/runtime/cpu/kernel/abs.hpp
+++ b/src/ngraph/runtime/cpu/kernel/abs.hpp
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+#pragma once
+#define EIGEN_USE_THREADS
+#include <unsupported/Eigen/CXX11/Tensor>
+#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            namespace kernel
+            {
+                template <typename ElementType>
+                void abs(void* input0, void* output, size_t count)
+                {
+                    Eigen::array<Eigen::Index, 1> out_dims, in_dims;
+                    out_dims[0] = in_dims[0] = count;
+                    Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
+                        static_cast<ElementType*>(output), out_dims);
+                    Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
+                        static_cast<ElementType*>(input0), in_dims);
+                    out.device(eigen::global_thread_pool_device) = in0.abs();
+                }
+            }
+        }
+    }
+}
--- a/src/ngraph/runtime/cpu/kernel/add.hpp
+++ b/src/ngraph/runtime/cpu/kernel/add.hpp
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+#pragma once
+#define EIGEN_USE_THREADS
+#include <unsupported/Eigen/CXX11/Tensor>
+#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            namespace kernel
+            {
+                template <typename ElementType>
+                void add(void* input0, void* input1, void* output, size_t count)
+                {
+                    Eigen::array<Eigen::Index, 1> out_dims, in_dims;
+                    out_dims[0] = in_dims[0] = count;
+                    Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
+                        static_cast<ElementType*>(output), out_dims);
+                    Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
+                        static_cast<ElementType*>(input0), in_dims);
+                    Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in1(
+                        static_cast<ElementType*>(input1), in_dims);
+                    out.device(eigen::global_thread_pool_device) = in0 + in1;
+                }
+            }
+        }
+    }
+}
--- a/src/ngraph/runtime/cpu/kernel/multiply.hpp
+++ b/src/ngraph/runtime/cpu/kernel/multiply.hpp
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+#pragma once
+#define EIGEN_USE_THREADS
+#include <unsupported/Eigen/CXX11/Tensor>
+#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            namespace kernel
+            {
+                template <typename ElementType>
+                void multiply(void* input0, void* input1, void* output, size_t count)
+                {
+                    Eigen::array<Eigen::Index, 1> out_dims, in_dims;
+                    out_dims[0] = in_dims[0] = count;
+                    Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
+                        static_cast<ElementType*>(output), out_dims);
+                    Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
+                        static_cast<ElementType*>(input0), in_dims);
+                    Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in1(
+                        static_cast<ElementType*>(input1), in_dims);
+                    out.device(eigen::global_thread_pool_device) = in0 * in1;
+                }
+            }
+        }
+    }
+}
--- a/src/ngraph/runtime/cpu/kernel/result.hpp
+++ b/src/ngraph/runtime/cpu/kernel/result.hpp
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+#pragma once
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            namespace kernel
+            {
+                template <typename ElementType>
+                void result(const void* arg, void* out, size_t count)
+                {
+                    memcpy(out, arg, sizeof(ElementType) * count);
+                }
+            }
+        }
+    }
+}