Commit 683822ef authored by Pruthvi's avatar Pruthvi Committed by Robert Kimball

- fixed performance_counter for CPU DEX (#1609)

* - fixed performance_counter for CPU DEX

* made changes to start clock @ the first functor and collected perf_counter @ the exceution of last functor for given kernel execution

* - made changes to have max_pool kernel to have exactly one functor
- assert if functor != num_of_ops

* fixed profiler_counter for TBB execution

* avoid unnecessary calculation to enables<map> value

* Addressed PR comments
-   updated "enables" -> list<functors>
parent 23442974
......@@ -144,7 +144,6 @@ namespace ngraph
ctx, fdeps[2], ctx->mkldnn_workspaces[fdeps[3]]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, max_pool_index - 1);
};
functors.emplace_back(functor_fprop);
auto& bdeps = mkldnn_emitter->get_primitive_deps(max_pool_index);
auto functor_bprop = [&, max_pool_index](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, bdeps[0], delta_tensor);
......@@ -153,7 +152,11 @@ namespace ngraph
cpu::mkldnn_utils::set_memory_ptr(ctx, bdeps[2], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, max_pool_index);
};
functors.emplace_back(functor_bprop);
auto functor = [&, functor_fprop, functor_bprop](CPURuntimeContext* ctx) {
functor_fprop(ctx);
functor_bprop(ctx);
};
functors.emplace_back(functor);
}
else
{
......
......@@ -1282,7 +1282,6 @@ void runtime::cpu::CPU_ExternalFunction::build()
m_op_attrs.emplace_back(node->description(), out_names, in_names);
size_t functor_count = functors.size();
handler->second(this, node.get(), in, out);
bool disable_caching = computes_result(node.get()) || possibly_overwritten(node.get());
......@@ -1335,10 +1334,13 @@ void runtime::cpu::CPU_ExternalFunction::build()
};
}
enables.emplace_back(make_pair(enable, functors.size() - functor_count));
enables.emplace_back(enable);
enable_nodename_list.emplace_back(make_pair(enable, node->get_name()));
}
//This check ensures we have exactly one functor for Op.
assert(m_op_attrs.size() == functors.size());
executor = [&](CPURuntimeContext* ctx, vector<void*>& inputs, vector<void*>& outputs) {
cpu::Timestamp start_ts;
int profiler_count = 0;
......@@ -1379,45 +1381,33 @@ void runtime::cpu::CPU_ExternalFunction::build()
auto it = enable_nodename_list.begin();
for (const auto& p : enables)
{
std::vector<std::function<void(CPURuntimeContext*)>> ftrs;
for (size_t j = 0; j < p.second; j++)
{
ftrs.push_back(*functor);
std::advance(functor, 1);
}
tbb::flow::continue_node<tbb::flow::continue_msg, tbb::flow::lightweight>*
flowgraph_node = new tbb::flow::continue_node<tbb::flow::continue_msg,
tbb::flow::lightweight>(
*(ctx->G), [&, ftrs](const tbb::flow::continue_msg& msg) {
if (p.first(ctx) || ctx->first_iteration)
*(ctx->G), [&](const tbb::flow::continue_msg& msg) {
if (p(ctx) || ctx->first_iteration)
{
for (size_t j = 0; j < p.second; j++)
if (runtime::cpu::IsTracingEnabled())
{
if (runtime::cpu::IsTracingEnabled())
{
start_ts = cpu::Clock::now();
}
ftrs[j](ctx);
if (runtime::cpu::IsTracingEnabled())
{
ctx->op_durations[profiler_count++] =
(std::chrono::duration_cast<cpu::Timescale>(
cpu::Clock::now() - start_ts))
.count();
}
start_ts = cpu::Clock::now();
}
(*functor)(ctx);
if (runtime::cpu::IsTracingEnabled())
{
ctx->op_durations[profiler_count++] =
(std::chrono::duration_cast<cpu::Timescale>(
cpu::Clock::now() - start_ts))
.count();
}
}
else
{
if (runtime::cpu::IsTracingEnabled())
{
for (size_t j = 0; j < p.second; j++)
{
ctx->op_durations[profiler_count++] = 0;
}
ctx->op_durations[profiler_count++] = 0;
}
}
std::advance(functor, 1);
});
nodename_tbbnode_map.insert({it->second, flowgraph_node});
it++;
......@@ -1468,37 +1458,31 @@ void runtime::cpu::CPU_ExternalFunction::build()
{
for (const auto& p : enables)
{
if (p.first(ctx) || ctx->first_iteration)
if (p(ctx) || ctx->first_iteration)
{
for (size_t j = 0; j < p.second; j++)
// Each Op will have exactly one functor, start the clock before the exceution of functor
// and collect the profiler_count once the execution complets
if (runtime::cpu::IsTracingEnabled())
{
if (runtime::cpu::IsTracingEnabled())
{
start_ts = cpu::Clock::now();
}
(*functor)(ctx);
if (runtime::cpu::IsTracingEnabled())
{
ctx->op_durations[profiler_count++] =
(std::chrono::duration_cast<cpu::Timescale>(cpu::Clock::now() -
start_ts))
.count();
}
std::advance(functor, 1);
start_ts = cpu::Clock::now();
}
(*functor)(ctx);
if (runtime::cpu::IsTracingEnabled())
{
ctx->op_durations[profiler_count++] =
(std::chrono::duration_cast<cpu::Timescale>(cpu::Clock::now() -
start_ts))
.count();
}
}
else
{
if (runtime::cpu::IsTracingEnabled())
{
for (size_t j = 0; j < p.second; j++)
{
ctx->op_durations[profiler_count++] = 0;
}
ctx->op_durations[profiler_count++] = 0;
}
std::advance(functor, p.second);
}
std::advance(functor, 1);
}
}
ctx->first_iteration = false;
......
......@@ -213,7 +213,7 @@ namespace ngraph
std::string m_function_name;
std::list<std::function<void(CPURuntimeContext*)>> functors;
std::list<std::pair<std::function<bool(CPURuntimeContext*)>, size_t>> enables;
std::list<std::function<bool(CPURuntimeContext*)>> enables;
std::list<std::pair<std::function<bool(CPURuntimeContext*)>, std::string>>
enable_nodename_list;
std::function<void(CPURuntimeContext*, std::vector<void*>&, std::vector<void*>&)>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment