Commit 683822ef authored by Pruthvi's avatar Pruthvi Committed by Robert Kimball

- fixed performance_counter for CPU DEX (#1609)

* - fixed performance_counter for CPU DEX

* made changes to start clock @ the first functor and collected perf_counter @ the exceution of last functor for given kernel execution

* - made changes to have max_pool kernel to have exactly one functor
- assert if functor != num_of_ops

* fixed profiler_counter for TBB execution

* avoid unnecessary calculation to enables<map> value

* Addressed PR comments
-   updated "enables" -> list<functors>
parent 23442974
...@@ -144,7 +144,6 @@ namespace ngraph ...@@ -144,7 +144,6 @@ namespace ngraph
ctx, fdeps[2], ctx->mkldnn_workspaces[fdeps[3]]); ctx, fdeps[2], ctx->mkldnn_workspaces[fdeps[3]]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, max_pool_index - 1); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, max_pool_index - 1);
}; };
functors.emplace_back(functor_fprop);
auto& bdeps = mkldnn_emitter->get_primitive_deps(max_pool_index); auto& bdeps = mkldnn_emitter->get_primitive_deps(max_pool_index);
auto functor_bprop = [&, max_pool_index](CPURuntimeContext* ctx) { auto functor_bprop = [&, max_pool_index](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, bdeps[0], delta_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, bdeps[0], delta_tensor);
...@@ -153,7 +152,11 @@ namespace ngraph ...@@ -153,7 +152,11 @@ namespace ngraph
cpu::mkldnn_utils::set_memory_ptr(ctx, bdeps[2], out_tensor); cpu::mkldnn_utils::set_memory_ptr(ctx, bdeps[2], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, max_pool_index); cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, max_pool_index);
}; };
functors.emplace_back(functor_bprop); auto functor = [&, functor_fprop, functor_bprop](CPURuntimeContext* ctx) {
functor_fprop(ctx);
functor_bprop(ctx);
};
functors.emplace_back(functor);
} }
else else
{ {
......
...@@ -1282,7 +1282,6 @@ void runtime::cpu::CPU_ExternalFunction::build() ...@@ -1282,7 +1282,6 @@ void runtime::cpu::CPU_ExternalFunction::build()
m_op_attrs.emplace_back(node->description(), out_names, in_names); m_op_attrs.emplace_back(node->description(), out_names, in_names);
size_t functor_count = functors.size();
handler->second(this, node.get(), in, out); handler->second(this, node.get(), in, out);
bool disable_caching = computes_result(node.get()) || possibly_overwritten(node.get()); bool disable_caching = computes_result(node.get()) || possibly_overwritten(node.get());
...@@ -1335,10 +1334,13 @@ void runtime::cpu::CPU_ExternalFunction::build() ...@@ -1335,10 +1334,13 @@ void runtime::cpu::CPU_ExternalFunction::build()
}; };
} }
enables.emplace_back(make_pair(enable, functors.size() - functor_count)); enables.emplace_back(enable);
enable_nodename_list.emplace_back(make_pair(enable, node->get_name())); enable_nodename_list.emplace_back(make_pair(enable, node->get_name()));
} }
//This check ensures we have exactly one functor for Op.
assert(m_op_attrs.size() == functors.size());
executor = [&](CPURuntimeContext* ctx, vector<void*>& inputs, vector<void*>& outputs) { executor = [&](CPURuntimeContext* ctx, vector<void*>& inputs, vector<void*>& outputs) {
cpu::Timestamp start_ts; cpu::Timestamp start_ts;
int profiler_count = 0; int profiler_count = 0;
...@@ -1379,45 +1381,33 @@ void runtime::cpu::CPU_ExternalFunction::build() ...@@ -1379,45 +1381,33 @@ void runtime::cpu::CPU_ExternalFunction::build()
auto it = enable_nodename_list.begin(); auto it = enable_nodename_list.begin();
for (const auto& p : enables) for (const auto& p : enables)
{ {
std::vector<std::function<void(CPURuntimeContext*)>> ftrs;
for (size_t j = 0; j < p.second; j++)
{
ftrs.push_back(*functor);
std::advance(functor, 1);
}
tbb::flow::continue_node<tbb::flow::continue_msg, tbb::flow::lightweight>* tbb::flow::continue_node<tbb::flow::continue_msg, tbb::flow::lightweight>*
flowgraph_node = new tbb::flow::continue_node<tbb::flow::continue_msg, flowgraph_node = new tbb::flow::continue_node<tbb::flow::continue_msg,
tbb::flow::lightweight>( tbb::flow::lightweight>(
*(ctx->G), [&, ftrs](const tbb::flow::continue_msg& msg) { *(ctx->G), [&](const tbb::flow::continue_msg& msg) {
if (p.first(ctx) || ctx->first_iteration) if (p(ctx) || ctx->first_iteration)
{ {
for (size_t j = 0; j < p.second; j++) if (runtime::cpu::IsTracingEnabled())
{ {
if (runtime::cpu::IsTracingEnabled()) start_ts = cpu::Clock::now();
{ }
start_ts = cpu::Clock::now(); (*functor)(ctx);
} if (runtime::cpu::IsTracingEnabled())
ftrs[j](ctx); {
if (runtime::cpu::IsTracingEnabled()) ctx->op_durations[profiler_count++] =
{ (std::chrono::duration_cast<cpu::Timescale>(
ctx->op_durations[profiler_count++] = cpu::Clock::now() - start_ts))
(std::chrono::duration_cast<cpu::Timescale>( .count();
cpu::Clock::now() - start_ts))
.count();
}
} }
} }
else else
{ {
if (runtime::cpu::IsTracingEnabled()) if (runtime::cpu::IsTracingEnabled())
{ {
for (size_t j = 0; j < p.second; j++) ctx->op_durations[profiler_count++] = 0;
{
ctx->op_durations[profiler_count++] = 0;
}
} }
} }
std::advance(functor, 1);
}); });
nodename_tbbnode_map.insert({it->second, flowgraph_node}); nodename_tbbnode_map.insert({it->second, flowgraph_node});
it++; it++;
...@@ -1468,37 +1458,31 @@ void runtime::cpu::CPU_ExternalFunction::build() ...@@ -1468,37 +1458,31 @@ void runtime::cpu::CPU_ExternalFunction::build()
{ {
for (const auto& p : enables) for (const auto& p : enables)
{ {
if (p.first(ctx) || ctx->first_iteration) if (p(ctx) || ctx->first_iteration)
{ {
for (size_t j = 0; j < p.second; j++) // Each Op will have exactly one functor, start the clock before the exceution of functor
// and collect the profiler_count once the execution complets
if (runtime::cpu::IsTracingEnabled())
{ {
if (runtime::cpu::IsTracingEnabled()) start_ts = cpu::Clock::now();
{ }
start_ts = cpu::Clock::now(); (*functor)(ctx);
} if (runtime::cpu::IsTracingEnabled())
(*functor)(ctx); {
if (runtime::cpu::IsTracingEnabled()) ctx->op_durations[profiler_count++] =
{ (std::chrono::duration_cast<cpu::Timescale>(cpu::Clock::now() -
ctx->op_durations[profiler_count++] = start_ts))
(std::chrono::duration_cast<cpu::Timescale>(cpu::Clock::now() - .count();
start_ts))
.count();
}
std::advance(functor, 1);
} }
} }
else else
{ {
if (runtime::cpu::IsTracingEnabled()) if (runtime::cpu::IsTracingEnabled())
{ {
for (size_t j = 0; j < p.second; j++) ctx->op_durations[profiler_count++] = 0;
{
ctx->op_durations[profiler_count++] = 0;
}
} }
std::advance(functor, p.second);
} }
std::advance(functor, 1);
} }
} }
ctx->first_iteration = false; ctx->first_iteration = false;
......
...@@ -213,7 +213,7 @@ namespace ngraph ...@@ -213,7 +213,7 @@ namespace ngraph
std::string m_function_name; std::string m_function_name;
std::list<std::function<void(CPURuntimeContext*)>> functors; std::list<std::function<void(CPURuntimeContext*)>> functors;
std::list<std::pair<std::function<bool(CPURuntimeContext*)>, size_t>> enables; std::list<std::function<bool(CPURuntimeContext*)>> enables;
std::list<std::pair<std::function<bool(CPURuntimeContext*)>, std::string>> std::list<std::pair<std::function<bool(CPURuntimeContext*)>, std::string>>
enable_nodename_list; enable_nodename_list;
std::function<void(CPURuntimeContext*, std::vector<void*>&, std::vector<void*>&)> std::function<void(CPURuntimeContext*, std::vector<void*>&, std::vector<void*>&)>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment