Removing GOEE (#2212)

* make GOE extend from util::Op * fix build breaks * refactor GOEE * redundant after jbobba's fix * fix clang warnings * add an assert

Removing GOEE (#2212)
* make GOE extend from util::Op * fix build breaks * refactor GOEE * redundant after jbobba's fix * fix clang warnings * add an assert
18034315 · Nick Korovaiko · Scott Cyphers · 71f13654 · 18034315 · 18034315
Commit 18034315 authored Dec 12, 2018 by Nick Korovaiko Committed by Scott Cyphers Dec 12, 2018
8 changed files
--- a/src/ngraph/pass/memory_layout.cpp
+++ b/src/ngraph/pass/memory_layout.cpp
@@ -20,6 +20,7 @@
 #include "ngraph/log.hpp"
 #include "ngraph/log.hpp"
 #include "ngraph/op/concat.hpp"
+#include "ngraph/op/get_output_element.hpp"
 #include "ngraph/op/slice.hpp"
 #include "ngraph/pass/liveness.hpp"
 #include "ngraph/pass/manager.hpp"
@@ -66,6 +67,7 @@ bool pass::MemoryLayout::run_on_function(shared_ptr<ngraph::Function> function)
                        // For destructive kernel, this should be the last use
                        // Non-destructive kernels can pass through if memory sharing is disabled
                        if ((node->liveness_free_list.count(input) != 0 ||
+                             std::dynamic_pointer_cast<op::GetOutputElement>(node) ||
                             (m_disable_memory_sharing && !oi_pair.destructive)) &&
                            node->liveness_new_list.count(output) != 0)
                        {

--- a/src/ngraph/runtime/cpu/CMakeLists.txt
+++ b/src/ngraph/runtime/cpu/CMakeLists.txt
@@ -74,6 +74,7 @@ set(SRC
    builder/slice.cpp
    builder/state.cpp
    builder/softmax.cpp
+    builder/get_output_element.cpp
    builder/sum.cpp
    builder/topk.cpp
    builder/update_slice.cpp

--- a/src/ngraph/runtime/cpu/builder/get_output_element.cpp
+++ b/src/ngraph/runtime/cpu/builder/get_output_element.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <cstring>
+
+#include "ngraph/op/get_output_element.hpp"
+#include "ngraph/runtime/cpu/cpu_builder.hpp"
+#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
+#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            template <>
+            void Builder::BUILDER_DECL(ngraph::op::GetOutputElement)
+            {
+                auto& functors = external_function->get_functors();
+                auto goe = static_cast<const ngraph::op::GetOutputElement*>(node);
+                size_t n = goe->get_n();
+                auto& arg_tensor = external_function->get_tensor_data(args[n].get_name());
+                auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
+                auto functor = [&, n](CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
+                    if (arg_tensor != out_tensor)
+                    {
+                        throw ngraph_error("GOE's input and out must be equal");
+                    }
+                };
+                functors.emplace_back(functor);
+                return;
+            }
+
+            REGISTER_OP_BUILDER(GetOutputElement);
+        }
+    }
+}
--- a/src/ngraph/runtime/cpu/builder/sigmoid.cpp
+++ b/src/ngraph/runtime/cpu/builder/sigmoid.cpp
@@ -95,11 +95,10 @@ namespace ngraph
            void Builder::BUILDER_DECL(ngraph::op::SigmoidMultiply)
            {
                auto& functors = external_function->get_functors();
-                auto& tensor_data = external_function->get_tensor_data();

-                auto& arg0_tensor = tensor_data[args[0].get_name()];
-                auto& arg1_tensor = tensor_data[args[1].get_name()];
-                auto& out_tensor = tensor_data[out[0].get_name()];
+                auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
+                auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
+                auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
                auto tensor_size = shape_size(args[0].get_shape());

                auto sigmoid_mul = static_cast<const ngraph::op::SigmoidMultiply*>(node);
@@ -122,13 +121,11 @@ namespace ngraph
            void Builder::BUILDER_DECL(ngraph::op::SigmoidMultiplyBackprop)
            {
                auto& functors = external_function->get_functors();
-                auto& tensor_data = external_function->get_tensor_data();
-
-                auto& arg0_tensor = tensor_data[args[0].get_name()];
-                auto& arg1_tensor = tensor_data[args[1].get_name()];
-                auto& arg2_tensor = tensor_data[args[2].get_name()];
-                auto& out0_tensor = tensor_data[out[0].get_name()];
-                auto& out1_tensor = tensor_data[out[1].get_name()];
+                auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
+                auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
+                auto& arg2_tensor = external_function->get_tensor_data(args[2].get_name());
+                auto& out0_tensor = external_function->get_tensor_data(out[0].get_name());
+                auto& out1_tensor = external_function->get_tensor_data(out[1].get_name());
                auto tensor_size = shape_size(args[0].get_shape());

                auto sigmoid_mul = static_cast<const ngraph::op::SigmoidMultiplyBackprop*>(node);

--- a/src/ngraph/runtime/cpu/builder/topk.cpp
+++ b/src/ngraph/runtime/cpu/builder/topk.cpp
@@ -33,14 +33,12 @@ namespace ngraph
            void Builder::BUILDER_DECL(ngraph::op::TopK)
            {
                auto& functors = external_function->get_functors();
-                auto& tensor_data = external_function->get_tensor_data();
-
                const ngraph::op::TopK* topk = static_cast<const ngraph::op::TopK*>(node);
                CPUKernelFunctor functor;

-                auto& arg_tensor = tensor_data[args[0].get_name()];
-                auto& out_indices_tensor = tensor_data[out[0].get_name()];
-                auto& out_values_tensor = tensor_data[out[1].get_name()];
+                auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
+                auto& out_indices_tensor = external_function->get_tensor_data(out[0].get_name());
+                auto& out_values_tensor = external_function->get_tensor_data(out[1].get_name());
                if (out[0].get_element_type() != element::i64 &&
                    out[0].get_element_type() != element::i32)
                {

--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
@@ -1111,7 +1111,7 @@ void runtime::cpu::CPU_ExternalFunction::register_common_passes(ngraph::pass::Ma
        CommonSubexpressionElimination, true, ngraph::pass, runtime::cpu::get_cse_handlers_map());
    REGISTER_KNOBBED_PASS(CPUPostLayoutOptimizations, true, runtime::cpu::pass);
    REGISTER_KNOBBED_PASS(CPUMemoryOptimization, true, runtime::cpu::pass);
-    REGISTER_KNOBBED_PASS(GetOutputElementElimination, true, ngraph::pass);
+    REGISTER_KNOBBED_PASS(GetOutputElementElimination, false, ngraph::pass);
    pass_manager.get_state().set_visualize_tree_ops_map(runtime::cpu::get_visualize_tree_ops_map());
 }

@@ -1864,7 +1864,6 @@ void runtime::cpu::CPU_ExternalFunction::build()
                    }
                    CPUExecutionContext ectx{0};
                    executor::GetCPUExecutor().execute(functors.at(ctx->pc), ctx, &ectx);
-
                    if (ctx->breakpoints.count(ctx->pc + 1))
                    {
                        ctx->pc++;

--- a/src/ngraph/runtime/cpu/pass/cpu_assignment.cpp
+++ b/src/ngraph/runtime/cpu/pass/cpu_assignment.cpp
@@ -37,6 +37,7 @@
 #include "ngraph/op/experimental/quantized_conv_bias.hpp"
 #include "ngraph/op/experimental/quantized_conv_relu.hpp"
 #include "ngraph/op/experimental/quantized_max_pool.hpp"
+#include "ngraph/op/get_output_element.hpp"
 #include "ngraph/op/lrn.hpp"
 #include "ngraph/op/max_pool.hpp"
 #include "ngraph/op/quantize.hpp"
@@ -171,6 +172,16 @@ namespace ngraph
                    }
                }

+                template <>
+                void CPUAssignment::ASSIGN_DECL(ngraph::op::GetOutputElement)
+                {
+                    auto goe = static_cast<op::GetOutputElement*>(node);
+                    auto op_annotations =
+                        std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
+                    op_annotations->add_in_place_oi_pair({0, goe->get_n(), false});
+                    goe->set_op_annotations(op_annotations);
+                }
+
                template <>
                void CPUAssignment::ASSIGN_DECL(ngraph::op::ConvolutionAdd)
                {
@@ -871,6 +882,8 @@ static const runtime::cpu::pass::AssignOpMap s_dispatcher{
    {TI(ngraph::op::Quantize), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Quantize>},
    {TI(ngraph::op::Dequantize),
     &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Dequantize>},
+    {TI(ngraph::op::GetOutputElement),
+     &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::GetOutputElement>},
 };

 bool runtime::cpu::pass::CPUAssignment::run_on_call_graph(

--- a/src/ngraph/runtime/cpu/unit_test.manifest
+++ b/src/ngraph/runtime/cpu/unit_test.manifest
@@ -7,7 +7,6 @@ one_hot_vector_1_far_oob
 one_hot_vector_1_fp_nonint

 backwards_maxpool_n2_c1_hw5_3x3_str2_max_pad1x2_2x3
-backwards_batch_norm_training
 shape_of_scalar
 shape_of_vector
 shape_of_matrix