IntelGPU backend: Separate backend and executable classes (#2447)

013c2381 · Sergey Shalnov · Robert Kimball · 65141c5f · 013c2381 · 013c2381
Commit 013c2381 authored Feb 14, 2019 by Sergey Shalnov Committed by Robert Kimball Feb 14, 2019
7 changed files
--- a/src/ngraph/runtime/intelgpu/CMakeLists.txt
+++ b/src/ngraph/runtime/intelgpu/CMakeLists.txt
@@ -16,6 +16,7 @@
 set(SRC
    intelgpu_backend.cpp
+    intelgpu_executable.cpp
    intelgpu_tensor_view.cpp
    intelgpu_layout.cpp
    intelgpu_op_batchnorm.cpp

--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.hpp
@@ -20,7 +20,6 @@
 #include <memory>
 #include <CPP/engine.hpp>
-#include <CPP/network.hpp>
 #include "ngraph/runtime/backend.hpp"
@@ -31,7 +30,6 @@ namespace ngraph
        namespace intelgpu
        {
            class IntelGPUBackend;
-            class IntelGPUExecutable;
        }
    }
 }
@@ -67,39 +65,3 @@ private:
    bool m_disable_backend_optimizations = false;
    std::string m_cldnn_dump_dir = std::string("intelgpu_codegen");
 };
-class ngraph::runtime::intelgpu::IntelGPUExecutable : public runtime::Executable
-{
-public:
-    IntelGPUExecutable(std::shared_ptr<Function> func,
-                       std::shared_ptr<cldnn::network> network,
-                       bool enable_timing,
-                       bool enable_profile,
-                       double compilation_time,
-                       double consumed_memory,
-                       size_t profile_lines_limit_count);
-    bool call(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
-              const std::vector<std::shared_ptr<runtime::Tensor>>& inputs) override;
-    std::vector<PerformanceCounter> get_performance_data() const override;
-private:
-    std::shared_ptr<Function> m_function;
-    std::shared_ptr<cldnn::network> m_cldnn_network = nullptr;
-    bool m_performance_counters_enabled = false;
-    bool m_profile_enable = false;
-    double m_compilation_time = 0.0;
-    double m_consumed_memory = 0.0;
-    long m_profile_lines_limit_count = 10;
-    std::string delim = std::string(":");
-    // Statistic related things
-    void print_call_performance(const std::shared_ptr<cldnn::network> network,
-                                const std::shared_ptr<Function> func,
-                                double time_compile,
-                                double time_call,
-                                double mem_compilation_consumed,
-                                double mem_call_consumed,
-                                double mem_current) const;
-};
--- a/src/ngraph/runtime/intelgpu/intelgpu_executable.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_executable.cpp
--- a/src/ngraph/runtime/intelgpu/intelgpu_executable.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_executable.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+#include <CPP/network.hpp>
+#include "ngraph/runtime/tensor.hpp"
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace intelgpu
+        {
+            class IntelGPUExecutable;
+        }
+    }
+}
+class ngraph::runtime::intelgpu::IntelGPUExecutable : public runtime::Executable
+{
+public:
+    IntelGPUExecutable(std::shared_ptr<Function> func,
+                       std::shared_ptr<cldnn::network> network,
+                       bool enable_timing,
+                       bool enable_profile,
+                       double compilation_time,
+                       double consumed_memory,
+                       size_t profile_lines_limit_count);
+    bool call(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
+              const std::vector<std::shared_ptr<runtime::Tensor>>& inputs) override;
+    std::vector<PerformanceCounter> get_performance_data() const override;
+private:
+    std::shared_ptr<Function> m_function;
+    std::shared_ptr<cldnn::network> m_cldnn_network = nullptr;
+    bool m_performance_counters_enabled = false;
+    bool m_profile_enable = false;
+    double m_compilation_time = 0.0;
+    double m_consumed_memory = 0.0;
+    long m_profile_lines_limit_count = 10;
+    std::string delim = std::string(":");
+    // Statistic related things
+    void print_call_performance(const std::shared_ptr<cldnn::network> network,
+                                const std::shared_ptr<Function> func,
+                                double time_compile,
+                                double time_call,
+                                double mem_compilation_consumed,
+                                double mem_call_consumed,
+                                double mem_current) const;
+};
--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
@@ -14,6 +14,9 @@
 // limitations under the License.
 //*****************************************************************************
+#include <sys/resource.h>
+#include <sys/time.h>
 #include <CPP/concatenation.hpp>
 #include <CPP/custom_gpu_primitive.hpp>
 #include <CPP/reshape.hpp>
@@ -1515,3 +1518,19 @@ void runtime::intelgpu::do_reshape_operation(cldnn::topology& topology,
                                                 {1});
    topology.add(op_reshape);
 }
+size_t runtime::intelgpu::get_max_memory_rss()
+{
+    size_t result = 0;
+    struct rusage usage;
+    if (getrusage(RUSAGE_SELF, &usage) == 0)
+    {
+        result = usage.ru_maxrss; // the value is in kilobytes
+        // aligne result to return bytes
+        result *= 1000;
+    }
+    return result;
+}
--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
@@ -33,6 +33,8 @@ namespace ngraph
    {
        namespace intelgpu
        {
+            size_t get_max_memory_rss();
            void do_pad_operation(cldnn::topology& topology,
                                  const std::string& input_name,
                                  const Shape& input_shape,