Commit 4680678d authored by Sergey Shalnov's avatar Sergey Shalnov Committed by Robert Kimball

IntelGPU backend: minor fixes in statistic (#2300)

parent f79b40a7
...@@ -390,7 +390,14 @@ runtime::intelgpu::IntelGPUBackend::IntelGPUBackend() ...@@ -390,7 +390,14 @@ runtime::intelgpu::IntelGPUBackend::IntelGPUBackend()
m_function_cache_disabled = true; m_function_cache_disabled = true;
} }
cldnn::engine_configuration cldnn_configuration(profiling); cldnn::engine_configuration cldnn_configuration(profiling,
false,
m_cldnn_dump_enable,
string(),
string(),
true,
string(),
m_cldnn_dump_dir);
ocl_engine = make_shared<cldnn::engine>(cldnn_configuration); ocl_engine = make_shared<cldnn::engine>(cldnn_configuration);
} }
...@@ -419,6 +426,14 @@ runtime::Handle runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> ...@@ -419,6 +426,14 @@ runtime::Handle runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function>
set<cldnn::primitive_id> func_output_names; set<cldnn::primitive_id> func_output_names;
cldnn::topology topology; cldnn::topology topology;
stopwatch timer_compile;
double mem_before_compile = 0.0;
if (m_profile_enable)
{
mem_before_compile = get_max_memory_rss();
timer_compile.start();
}
if (m_dump_graph_enable) if (m_dump_graph_enable)
{ {
...@@ -1808,6 +1823,13 @@ runtime::Handle runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> ...@@ -1808,6 +1823,13 @@ runtime::Handle runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function>
instance.ocl_network = instance.ocl_network =
make_shared<cldnn::network>(*ocl_engine, topology, network_build_options); make_shared<cldnn::network>(*ocl_engine, topology, network_build_options);
if (m_profile_enable)
{
timer_compile.stop();
instance.m_compilation_time = timer_compile.get_milliseconds();
instance.m_consumed_memory = get_max_memory_rss() - mem_before_compile;
}
return func; return func;
} }
...@@ -1815,17 +1837,8 @@ bool runtime::intelgpu::IntelGPUBackend::call(shared_ptr<Function> func, ...@@ -1815,17 +1837,8 @@ bool runtime::intelgpu::IntelGPUBackend::call(shared_ptr<Function> func,
const vector<shared_ptr<runtime::Tensor>>& outputs, const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& inputs) const vector<shared_ptr<runtime::Tensor>>& inputs)
{ {
double mem_before_call = 0.0f; double mem_call_consumed = 0.0f;
double mem_after_compilation = 0.0f;
double mem_after_call = 0.0f;
stopwatch timer_call; stopwatch timer_call;
stopwatch timer_compile;
if (m_profile_enable)
{
mem_before_call = get_max_memory_rss();
timer_compile.start();
}
FunctionInstance& instance = ocl_networks[func]; FunctionInstance& instance = ocl_networks[func];
if (instance.ocl_network == nullptr) if (instance.ocl_network == nullptr)
...@@ -1835,8 +1848,7 @@ bool runtime::intelgpu::IntelGPUBackend::call(shared_ptr<Function> func, ...@@ -1835,8 +1848,7 @@ bool runtime::intelgpu::IntelGPUBackend::call(shared_ptr<Function> func,
if (m_profile_enable) if (m_profile_enable)
{ {
timer_compile.stop(); mem_call_consumed = get_max_memory_rss();
mem_after_compilation = get_max_memory_rss();
timer_call.start(); timer_call.start();
} }
...@@ -1884,15 +1896,18 @@ bool runtime::intelgpu::IntelGPUBackend::call(shared_ptr<Function> func, ...@@ -1884,15 +1896,18 @@ bool runtime::intelgpu::IntelGPUBackend::call(shared_ptr<Function> func,
if (m_profile_enable) if (m_profile_enable)
{ {
timer_call.stop(); timer_call.stop();
mem_after_call = get_max_memory_rss(); mem_call_consumed = get_max_memory_rss() - mem_call_consumed;
print_call_performance(network, print_call_performance(network,
func, func,
timer_compile.get_milliseconds(), instance.m_compilation_time,
timer_call.get_milliseconds(), timer_call.get_milliseconds(),
mem_before_call, instance.m_consumed_memory,
mem_after_compilation, mem_call_consumed,
mem_after_call); get_max_memory_rss());
// Output compile time only once
instance.m_compilation_time = 0.0;
} }
if (m_function_cache_disabled) if (m_function_cache_disabled)
...@@ -1995,11 +2010,11 @@ static Node* get_node_by_name(const shared_ptr<Function> func, const string& nam ...@@ -1995,11 +2010,11 @@ static Node* get_node_by_name(const shared_ptr<Function> func, const string& nam
void runtime::intelgpu::IntelGPUBackend::print_call_performance( void runtime::intelgpu::IntelGPUBackend::print_call_performance(
const shared_ptr<cldnn::network> network, const shared_ptr<cldnn::network> network,
const shared_ptr<Function> func, const shared_ptr<Function> func,
size_t time_compile, double time_compile,
size_t time_call, double time_call,
double mem_before_call, double mem_compilation_consumed,
double mem_after_compilation, double mem_call_consumed,
double mem_after_call) const double mem_current) const
{ {
struct data_item struct data_item
{ {
...@@ -2110,10 +2125,10 @@ void runtime::intelgpu::IntelGPUBackend::print_call_performance( ...@@ -2110,10 +2125,10 @@ void runtime::intelgpu::IntelGPUBackend::print_call_performance(
} }
// Print time and memory consumed in ::call function // Print time and memory consumed in ::call function
cout << func_name << delim << " Backend compilation(ms)" << delim << time_compile << " call(ms)" cout << func_name << delim << " Backend compilation(ms)" << delim << time_compile << delim
<< delim << time_call << delim << "memory before call(B)" << delim << mem_before_call << "call(ms)" << delim << time_call << delim << "memory consumption compile(B)" << delim
<< delim << "after compilation(B)" << delim << mem_after_compilation << delim << mem_compilation_consumed << delim << "call(B)" << delim << mem_call_consumed << delim
<< "after call(B)" << delim << mem_after_call << endl; << "RSS(B)" << delim << mem_current << endl;
cout.flags(saved_stream_flags); // Restore stream configuration to leave it in original state cout.flags(saved_stream_flags); // Restore stream configuration to leave it in original state
} }
...@@ -64,6 +64,8 @@ private: ...@@ -64,6 +64,8 @@ private:
public: public:
std::shared_ptr<cldnn::network> ocl_network = nullptr; std::shared_ptr<cldnn::network> ocl_network = nullptr;
bool m_performance_counters_enabled = false; bool m_performance_counters_enabled = false;
double m_compilation_time = 0.0;
double m_consumed_memory = 0.0;
}; };
std::map<std::shared_ptr<Function>, FunctionInstance> ocl_networks; std::map<std::shared_ptr<Function>, FunctionInstance> ocl_networks;
...@@ -74,11 +76,11 @@ private: ...@@ -74,11 +76,11 @@ private:
// Statistic related things // Statistic related things
void print_call_performance(const std::shared_ptr<cldnn::network> network, void print_call_performance(const std::shared_ptr<cldnn::network> network,
const std::shared_ptr<Function> func, const std::shared_ptr<Function> func,
size_t time_compile, double time_compile,
size_t time_call, double time_call,
double mem_before_call, double mem_compilation_consumed,
double mem_after_compilation, double mem_call_consumed,
double mem_after_call) const; double mem_current) const;
bool m_profile_enable = false; bool m_profile_enable = false;
long m_profile_lines_limit_count = 10; long m_profile_lines_limit_count = 10;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment