Commit 567bc822 authored by Robert Kimball's avatar Robert Kimball

nbench async option

parent 4b84262c
...@@ -196,23 +196,30 @@ void runtime::Backend::async_thread_stop() ...@@ -196,23 +196,30 @@ void runtime::Backend::async_thread_stop()
} }
} }
static void local_thread_entry(shared_ptr<runtime::Backend::AsyncEvent> event)
{
event->get_executable()->call(event->get_outputs(), event->get_inputs());
event->signal_result();
};
void runtime::Backend::async_thread_process(const shared_ptr<AsyncEvent>& event) void runtime::Backend::async_thread_process(const shared_ptr<AsyncEvent>& event)
{ {
switch (event->get_type()) switch (event->get_type())
{ {
case AsyncEvent::Type::READ: case AsyncEvent::Type::READ:
event->get_tensor()->read(event->get_data(), 0, event->get_size_in_bytes()); event->get_tensor()->read(event->get_data(), event->get_size_in_bytes());
event->signal_result(); event->signal_result();
break; break;
case AsyncEvent::Type::WRITE: case AsyncEvent::Type::WRITE:
event->get_tensor()->write(event->get_data(), 0, event->get_size_in_bytes()); event->get_tensor()->write(event->get_data(), event->get_size_in_bytes());
event->signal_result(); event->signal_result();
break; break;
case AsyncEvent::Type::EXECUTE: case AsyncEvent::Type::EXECUTE:
event->get_executable()->call(event->get_outputs(), event->get_inputs()); {
event->signal_result(); std::thread(local_thread_entry, event).detach();
break; break;
} }
}
} }
void runtime::Backend::async_thread_entry() void runtime::Backend::async_thread_entry()
......
This diff is collapsed.
...@@ -34,3 +34,11 @@ std::vector<ngraph::runtime::PerformanceCounter> run_benchmark(std::shared_ptr<n ...@@ -34,3 +34,11 @@ std::vector<ngraph::runtime::PerformanceCounter> run_benchmark(std::shared_ptr<n
bool timing_detail, bool timing_detail,
int warmup_iterations, int warmup_iterations,
bool copy_data); bool copy_data);
std::vector<ngraph::runtime::PerformanceCounter>
run_benchmark_double_buffered(std::shared_ptr<ngraph::Function> f,
const std::string& backend_name,
size_t iterations,
bool timing_detail,
int warmup_iterations,
bool copy_data);
...@@ -181,6 +181,7 @@ int main(int argc, char** argv) ...@@ -181,6 +181,7 @@ int main(int argc, char** argv)
int warmup_iterations = 1; int warmup_iterations = 1;
bool copy_data = true; bool copy_data = true;
bool dot_file = false; bool dot_file = false;
bool double_buffer = false;
for (size_t i = 1; i < argc; i++) for (size_t i = 1; i < argc; i++)
{ {
...@@ -229,6 +230,10 @@ int main(int argc, char** argv) ...@@ -229,6 +230,10 @@ int main(int argc, char** argv)
{ {
directory = argv[++i]; directory = argv[++i];
} }
else if (arg == "--double_buffer")
{
double_buffer = true;
}
else if (arg == "-w" || arg == "--warmup_iterations") else if (arg == "-w" || arg == "--warmup_iterations")
{ {
try try
...@@ -283,6 +288,7 @@ OPTIONS ...@@ -283,6 +288,7 @@ OPTIONS
-w|--warmup_iterations Number of warm-up iterations -w|--warmup_iterations Number of warm-up iterations
--no_copy_data Disable copy of input/result data every iteration --no_copy_data Disable copy of input/result data every iteration
--dot Generate Graphviz dot file --dot Generate Graphviz dot file
--double_buffer Double buffer inputs and outputs
)###"; )###";
return 1; return 1;
} }
...@@ -420,8 +426,17 @@ OPTIONS ...@@ -420,8 +426,17 @@ OPTIONS
{ {
cout << "\n---- Benchmark ----\n"; cout << "\n---- Benchmark ----\n";
shared_ptr<Function> f = deserialize(model); shared_ptr<Function> f = deserialize(model);
auto perf_data = run_benchmark( vector<runtime::PerformanceCounter> perf_data;
f, backend, iterations, timing_detail, warmup_iterations, copy_data); if (double_buffer)
{
perf_data = run_benchmark_double_buffered(
f, backend, iterations, timing_detail, warmup_iterations, copy_data);
}
else
{
perf_data = run_benchmark(
f, backend, iterations, timing_detail, warmup_iterations, copy_data);
}
auto perf_shape = to_perf_shape(f, perf_data); auto perf_shape = to_perf_shape(f, perf_data);
aggregate_perf_data.insert( aggregate_perf_data.insert(
aggregate_perf_data.end(), perf_shape.begin(), perf_shape.end()); aggregate_perf_data.end(), perf_shape.begin(), perf_shape.end());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment