Commit 013c2381 authored by Sergey Shalnov's avatar Sergey Shalnov Committed by Robert Kimball

IntelGPU backend: Separate backend and executable classes (#2447)

parent 65141c5f
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
set(SRC set(SRC
intelgpu_backend.cpp intelgpu_backend.cpp
intelgpu_executable.cpp
intelgpu_tensor_view.cpp intelgpu_tensor_view.cpp
intelgpu_layout.cpp intelgpu_layout.cpp
intelgpu_op_batchnorm.cpp intelgpu_op_batchnorm.cpp
......
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
#include <memory> #include <memory>
#include <CPP/engine.hpp> #include <CPP/engine.hpp>
#include <CPP/network.hpp>
#include "ngraph/runtime/backend.hpp" #include "ngraph/runtime/backend.hpp"
...@@ -31,7 +30,6 @@ namespace ngraph ...@@ -31,7 +30,6 @@ namespace ngraph
namespace intelgpu namespace intelgpu
{ {
class IntelGPUBackend; class IntelGPUBackend;
class IntelGPUExecutable;
} }
} }
} }
...@@ -67,39 +65,3 @@ private: ...@@ -67,39 +65,3 @@ private:
bool m_disable_backend_optimizations = false; bool m_disable_backend_optimizations = false;
std::string m_cldnn_dump_dir = std::string("intelgpu_codegen"); std::string m_cldnn_dump_dir = std::string("intelgpu_codegen");
}; };
class ngraph::runtime::intelgpu::IntelGPUExecutable : public runtime::Executable
{
public:
IntelGPUExecutable(std::shared_ptr<Function> func,
std::shared_ptr<cldnn::network> network,
bool enable_timing,
bool enable_profile,
double compilation_time,
double consumed_memory,
size_t profile_lines_limit_count);
bool call(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs) override;
std::vector<PerformanceCounter> get_performance_data() const override;
private:
std::shared_ptr<Function> m_function;
std::shared_ptr<cldnn::network> m_cldnn_network = nullptr;
bool m_performance_counters_enabled = false;
bool m_profile_enable = false;
double m_compilation_time = 0.0;
double m_consumed_memory = 0.0;
long m_profile_lines_limit_count = 10;
std::string delim = std::string(":");
// Statistic related things
void print_call_performance(const std::shared_ptr<cldnn::network> network,
const std::shared_ptr<Function> func,
double time_compile,
double time_call,
double mem_compilation_consumed,
double mem_call_consumed,
double mem_current) const;
};
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <CPP/network.hpp>
#include "ngraph/runtime/tensor.hpp"
namespace ngraph
{
namespace runtime
{
namespace intelgpu
{
class IntelGPUExecutable;
}
}
}
class ngraph::runtime::intelgpu::IntelGPUExecutable : public runtime::Executable
{
public:
IntelGPUExecutable(std::shared_ptr<Function> func,
std::shared_ptr<cldnn::network> network,
bool enable_timing,
bool enable_profile,
double compilation_time,
double consumed_memory,
size_t profile_lines_limit_count);
bool call(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs) override;
std::vector<PerformanceCounter> get_performance_data() const override;
private:
std::shared_ptr<Function> m_function;
std::shared_ptr<cldnn::network> m_cldnn_network = nullptr;
bool m_performance_counters_enabled = false;
bool m_profile_enable = false;
double m_compilation_time = 0.0;
double m_consumed_memory = 0.0;
long m_profile_lines_limit_count = 10;
std::string delim = std::string(":");
// Statistic related things
void print_call_performance(const std::shared_ptr<cldnn::network> network,
const std::shared_ptr<Function> func,
double time_compile,
double time_call,
double mem_compilation_consumed,
double mem_call_consumed,
double mem_current) const;
};
...@@ -14,6 +14,9 @@ ...@@ -14,6 +14,9 @@
// limitations under the License. // limitations under the License.
//***************************************************************************** //*****************************************************************************
#include <sys/resource.h>
#include <sys/time.h>
#include <CPP/concatenation.hpp> #include <CPP/concatenation.hpp>
#include <CPP/custom_gpu_primitive.hpp> #include <CPP/custom_gpu_primitive.hpp>
#include <CPP/reshape.hpp> #include <CPP/reshape.hpp>
...@@ -1515,3 +1518,19 @@ void runtime::intelgpu::do_reshape_operation(cldnn::topology& topology, ...@@ -1515,3 +1518,19 @@ void runtime::intelgpu::do_reshape_operation(cldnn::topology& topology,
{1}); {1});
topology.add(op_reshape); topology.add(op_reshape);
} }
size_t runtime::intelgpu::get_max_memory_rss()
{
size_t result = 0;
struct rusage usage;
if (getrusage(RUSAGE_SELF, &usage) == 0)
{
result = usage.ru_maxrss; // the value is in kilobytes
// aligne result to return bytes
result *= 1000;
}
return result;
}
...@@ -33,6 +33,8 @@ namespace ngraph ...@@ -33,6 +33,8 @@ namespace ngraph
{ {
namespace intelgpu namespace intelgpu
{ {
size_t get_max_memory_rss();
void do_pad_operation(cldnn::topology& topology, void do_pad_operation(cldnn::topology& topology,
const std::string& input_name, const std::string& input_name,
const Shape& input_shape, const Shape& input_shape,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment