Commit f277e1c2 authored by Jaikrishnan Menon's avatar Jaikrishnan Menon Committed by Scott Cyphers

DEX Micro-optimizations: Use the mapped-value reference capture idiom everywhere (#1352)

parent 4efcb76e
...@@ -1129,7 +1129,9 @@ void runtime::cpu::CPU_ExternalFunction::build() ...@@ -1129,7 +1129,9 @@ void runtime::cpu::CPU_ExternalFunction::build()
for (size_t i = 0; i < param->get_output_size(); ++i) for (size_t i = 0; i < param->get_output_size(); ++i)
{ {
shared_ptr<descriptor::TensorView> tv = param->get_output_tensor_view(i); shared_ptr<descriptor::TensorView> tv = param->get_output_tensor_view(i);
function_input_index[tv->get_tensor().get_name()] = arg_index; function_input_index.emplace_back(tensor_data[tv->get_tensor().get_name()],
arg_index,
tensor_stale[tv->get_tensor().get_name()]);
arg_index++; arg_index++;
} }
} }
...@@ -1139,14 +1141,14 @@ void runtime::cpu::CPU_ExternalFunction::build() ...@@ -1139,14 +1141,14 @@ void runtime::cpu::CPU_ExternalFunction::build()
{ {
shared_ptr<Node> op = m_function->get_output_op(i); shared_ptr<Node> op = m_function->get_output_op(i);
shared_ptr<descriptor::TensorView> tv = op->get_output_tensor_view(); shared_ptr<descriptor::TensorView> tv = op->get_output_tensor_view();
function_output_index[tv->get_tensor().get_name()] = i; function_output_index.emplace_back(tensor_data[tv->get_tensor().get_name()], i);
auto res = std::dynamic_pointer_cast<ngraph::op::Result>(op); auto res = std::dynamic_pointer_cast<ngraph::op::Result>(op);
if (!res->needs_copy()) if (!res->needs_copy())
{ {
shared_ptr<descriptor::TensorView> itv = shared_ptr<descriptor::TensorView> itv =
res->get_inputs().at(0).get_output().get_tensor_view(); res->get_inputs().at(0).get_output().get_tensor_view();
function_output_index[itv->get_tensor().get_name()] = i; function_output_index.emplace_back(tensor_data[itv->get_tensor().get_name()], i);
} }
} }
...@@ -1159,7 +1161,8 @@ void runtime::cpu::CPU_ExternalFunction::build() ...@@ -1159,7 +1161,8 @@ void runtime::cpu::CPU_ExternalFunction::build()
{ {
for (auto tensor : node->liveness_new_list) for (auto tensor : node->liveness_new_list)
{ {
intermediates_offsets[tensor->get_name()] = tensor->get_pool_offset(); intermediates_offsets.emplace_back(tensor_data[tensor->get_name()],
tensor->get_pool_offset());
} }
} }
} }
...@@ -1212,21 +1215,47 @@ void runtime::cpu::CPU_ExternalFunction::build() ...@@ -1212,21 +1215,47 @@ void runtime::cpu::CPU_ExternalFunction::build()
handler->second(this, node.get(), in, out); handler->second(this, node.get(), in, out);
bool disable_caching = computes_result(node.get()) || possibly_overwritten(node.get()); bool disable_caching = computes_result(node.get()) || possibly_overwritten(node.get());
auto enable = [&, in_names, out_names, disable_caching](CPURuntimeContext* ctx) -> bool {
bool en = false; vector<reference_wrapper<bool>> in_stale, out_stale;
for (const auto& name : in_names) for (const auto& name : in_names)
{ {
if (tensor_stale[name] || disable_caching) in_stale.emplace_back(tensor_stale[name]);
}
for (const auto& name : out_names)
{
out_stale.emplace_back(tensor_stale[name]);
}
function<bool(CPURuntimeContext*)> enable;
if (disable_caching)
{
enable = [in_stale, out_stale](CPURuntimeContext* ctx) -> bool {
for (auto& stale : out_stale)
{ {
en = true; stale.get() = true;
} }
} return true;
for (const auto& name : out_names) };
{ }
tensor_stale[name] = en; else
} {
return en; enable = [in_stale, out_stale](CPURuntimeContext* ctx) -> bool {
}; bool en = false;
for (const auto& stale : in_stale)
{
if (stale)
{
en = true;
break;
}
}
for (auto& stale : out_stale)
{
stale.get() = en;
}
return en;
};
}
enables.emplace_back(make_pair(enable, functors.size() - functor_count)); enables.emplace_back(make_pair(enable, functors.size() - functor_count));
enable_nodename_list.emplace_back(make_pair(enable, node->get_name())); enable_nodename_list.emplace_back(make_pair(enable, node->get_name()));
...@@ -1240,20 +1269,19 @@ void runtime::cpu::CPU_ExternalFunction::build() ...@@ -1240,20 +1269,19 @@ void runtime::cpu::CPU_ExternalFunction::build()
{ {
for (auto& p : intermediates_offsets) for (auto& p : intermediates_offsets)
{ {
tensor_data[p.first] = p.first.get() = static_cast<uint8_t*>(ctx->memory_buffers[0]->get_ptr()) + p.second;
static_cast<uint8_t*>(ctx->memory_buffers[0]->get_ptr()) + p.second;
} }
} }
for (const auto& p : function_input_index) for (const auto& p : function_input_index)
{ {
tensor_data[p.first] = inputs[p.second]; get<0>(p).get() = inputs[get<1>(p)];
tensor_stale[p.first] = ctx->p_en[p.second]; get<2>(p).get() = ctx->p_en[get<1>(p)];
} }
for (const auto& p : function_output_index) for (const auto& p : function_output_index)
{ {
tensor_data[p.first] = outputs[p.second]; p.first.get() = outputs[p.second];
} }
auto functor = functors.begin(); auto functor = functors.begin();
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <typeindex> #include <typeindex>
#include <typeinfo> #include <typeinfo>
#include <unordered_map> #include <unordered_map>
#include <utility>
#include <vector> #include <vector>
#include "ngraph/codegen/code_writer.hpp" #include "ngraph/codegen/code_writer.hpp"
...@@ -178,8 +179,11 @@ namespace ngraph ...@@ -178,8 +179,11 @@ namespace ngraph
executor; executor;
std::unordered_map<std::string, void*> tensor_data; std::unordered_map<std::string, void*> tensor_data;
std::unordered_map<std::string, bool> tensor_stale; std::unordered_map<std::string, bool> tensor_stale;
std::unordered_map<std::string, size_t> intermediates_offsets; std::list<std::pair<std::reference_wrapper<void*>, size_t>> intermediates_offsets;
std::unordered_map<std::string, size_t> function_input_index, function_output_index; std::list<
std::tuple<std::reference_wrapper<void*>, size_t, std::reference_wrapper<bool>>>
function_input_index;
std::list<std::pair<std::reference_wrapper<void*>, size_t>> function_output_index;
std::unordered_map<std::string, std::shared_ptr<CPU_ExternalFunction>> callees; std::unordered_map<std::string, std::shared_ptr<CPU_ExternalFunction>> callees;
bool m_is_built; bool m_is_built;
bool m_direct_execution; bool m_direct_execution;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment