Commit 4680678d authored by Sergey Shalnov's avatar Sergey Shalnov Committed by Robert Kimball

IntelGPU backend: minor fixes in statistic (#2300)

parent f79b40a7
......@@ -390,7 +390,14 @@ runtime::intelgpu::IntelGPUBackend::IntelGPUBackend()
m_function_cache_disabled = true;
}
cldnn::engine_configuration cldnn_configuration(profiling);
cldnn::engine_configuration cldnn_configuration(profiling,
false,
m_cldnn_dump_enable,
string(),
string(),
true,
string(),
m_cldnn_dump_dir);
ocl_engine = make_shared<cldnn::engine>(cldnn_configuration);
}
......@@ -419,6 +426,14 @@ runtime::Handle runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function>
set<cldnn::primitive_id> func_output_names;
cldnn::topology topology;
stopwatch timer_compile;
double mem_before_compile = 0.0;
if (m_profile_enable)
{
mem_before_compile = get_max_memory_rss();
timer_compile.start();
}
if (m_dump_graph_enable)
{
......@@ -1808,6 +1823,13 @@ runtime::Handle runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function>
instance.ocl_network =
make_shared<cldnn::network>(*ocl_engine, topology, network_build_options);
if (m_profile_enable)
{
timer_compile.stop();
instance.m_compilation_time = timer_compile.get_milliseconds();
instance.m_consumed_memory = get_max_memory_rss() - mem_before_compile;
}
return func;
}
......@@ -1815,17 +1837,8 @@ bool runtime::intelgpu::IntelGPUBackend::call(shared_ptr<Function> func,
const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& inputs)
{
double mem_before_call = 0.0f;
double mem_after_compilation = 0.0f;
double mem_after_call = 0.0f;
double mem_call_consumed = 0.0f;
stopwatch timer_call;
stopwatch timer_compile;
if (m_profile_enable)
{
mem_before_call = get_max_memory_rss();
timer_compile.start();
}
FunctionInstance& instance = ocl_networks[func];
if (instance.ocl_network == nullptr)
......@@ -1835,8 +1848,7 @@ bool runtime::intelgpu::IntelGPUBackend::call(shared_ptr<Function> func,
if (m_profile_enable)
{
timer_compile.stop();
mem_after_compilation = get_max_memory_rss();
mem_call_consumed = get_max_memory_rss();
timer_call.start();
}
......@@ -1884,15 +1896,18 @@ bool runtime::intelgpu::IntelGPUBackend::call(shared_ptr<Function> func,
if (m_profile_enable)
{
timer_call.stop();
mem_after_call = get_max_memory_rss();
mem_call_consumed = get_max_memory_rss() - mem_call_consumed;
print_call_performance(network,
func,
timer_compile.get_milliseconds(),
instance.m_compilation_time,
timer_call.get_milliseconds(),
mem_before_call,
mem_after_compilation,
mem_after_call);
instance.m_consumed_memory,
mem_call_consumed,
get_max_memory_rss());
// Output compile time only once
instance.m_compilation_time = 0.0;
}
if (m_function_cache_disabled)
......@@ -1995,11 +2010,11 @@ static Node* get_node_by_name(const shared_ptr<Function> func, const string& nam
void runtime::intelgpu::IntelGPUBackend::print_call_performance(
const shared_ptr<cldnn::network> network,
const shared_ptr<Function> func,
size_t time_compile,
size_t time_call,
double mem_before_call,
double mem_after_compilation,
double mem_after_call) const
double time_compile,
double time_call,
double mem_compilation_consumed,
double mem_call_consumed,
double mem_current) const
{
struct data_item
{
......@@ -2110,10 +2125,10 @@ void runtime::intelgpu::IntelGPUBackend::print_call_performance(
}
// Print time and memory consumed in ::call function
cout << func_name << delim << " Backend compilation(ms)" << delim << time_compile << " call(ms)"
<< delim << time_call << delim << "memory before call(B)" << delim << mem_before_call
<< delim << "after compilation(B)" << delim << mem_after_compilation << delim
<< "after call(B)" << delim << mem_after_call << endl;
cout << func_name << delim << " Backend compilation(ms)" << delim << time_compile << delim
<< "call(ms)" << delim << time_call << delim << "memory consumption compile(B)" << delim
<< mem_compilation_consumed << delim << "call(B)" << delim << mem_call_consumed << delim
<< "RSS(B)" << delim << mem_current << endl;
cout.flags(saved_stream_flags); // Restore stream configuration to leave it in original state
}
......@@ -64,6 +64,8 @@ private:
public:
std::shared_ptr<cldnn::network> ocl_network = nullptr;
bool m_performance_counters_enabled = false;
double m_compilation_time = 0.0;
double m_consumed_memory = 0.0;
};
std::map<std::shared_ptr<Function>, FunctionInstance> ocl_networks;
......@@ -74,11 +76,11 @@ private:
// Statistic related things
void print_call_performance(const std::shared_ptr<cldnn::network> network,
const std::shared_ptr<Function> func,
size_t time_compile,
size_t time_call,
double mem_before_call,
double mem_after_compilation,
double mem_after_call) const;
double time_compile,
double time_call,
double mem_compilation_consumed,
double mem_call_consumed,
double mem_current) const;
bool m_profile_enable = false;
long m_profile_lines_limit_count = 10;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment