Unverified Commit c631b50b authored by Robert Kimball's avatar Robert Kimball Committed by GitHub

nbench buffer copy each iteration (#1578)

* add option to copy intput/output data for each iteration

* add support for stale buffers
parent 20c2325c
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "benchmark.hpp" #include "benchmark.hpp"
#include "ngraph/file_util.hpp" #include "ngraph/file_util.hpp"
#include "ngraph/runtime/backend.hpp" #include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/host_tensor_view.hpp"
#include "ngraph/runtime/tensor_view.hpp" #include "ngraph/runtime/tensor_view.hpp"
#include "ngraph/runtime/tensor_view.hpp" #include "ngraph/runtime/tensor_view.hpp"
#include "ngraph/serializer.hpp" #include "ngraph/serializer.hpp"
...@@ -151,7 +152,8 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f, ...@@ -151,7 +152,8 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
const string& backend_name, const string& backend_name,
size_t iterations, size_t iterations,
bool timing_detail, bool timing_detail,
int warmup_iterations) int warmup_iterations,
bool copy_data)
{ {
stopwatch timer; stopwatch timer;
timer.start(); timer.start();
...@@ -162,20 +164,28 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f, ...@@ -162,20 +164,28 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
cout.imbue(locale("")); cout.imbue(locale(""));
cout << "compile time: " << timer.get_milliseconds() << "ms" << endl; cout << "compile time: " << timer.get_milliseconds() << "ms" << endl;
vector<shared_ptr<runtime::HostTensorView>> arg_data;
vector<shared_ptr<runtime::TensorView>> args; vector<shared_ptr<runtime::TensorView>> args;
vector<bool> args_cacheable; vector<bool> args_cacheable;
for (shared_ptr<op::Parameter> param : f->get_parameters()) for (shared_ptr<op::Parameter> param : f->get_parameters())
{ {
auto tensor = backend->create_tensor(param->get_element_type(), param->get_shape()); auto tensor = backend->create_tensor(param->get_element_type(), param->get_shape());
auto tensor_data =
make_shared<runtime::HostTensorView>(param->get_element_type(), param->get_shape());
random_init(tensor); random_init(tensor);
args.push_back(tensor); args.push_back(tensor);
arg_data.push_back(tensor_data);
args_cacheable.push_back(param->get_cacheable()); args_cacheable.push_back(param->get_cacheable());
} }
vector<shared_ptr<runtime::HostTensorView>> result_data;
vector<shared_ptr<runtime::TensorView>> results; vector<shared_ptr<runtime::TensorView>> results;
for (shared_ptr<Node> out : f->get_results()) for (shared_ptr<Node> out : f->get_results())
{ {
auto result = backend->create_tensor(out->get_element_type(), out->get_shape()); auto result = backend->create_tensor(out->get_element_type(), out->get_shape());
auto tensor_data =
make_shared<runtime::HostTensorView>(out->get_element_type(), out->get_shape());
results.push_back(result); results.push_back(result);
result_data.push_back(tensor_data);
} }
for (size_t i = 0; i < args.size(); i++) for (size_t i = 0; i < args.size(); i++)
...@@ -196,9 +206,33 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f, ...@@ -196,9 +206,33 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
stopwatch t1; stopwatch t1;
t1.start(); t1.start();
for (size_t i = 0; i < static_cast<size_t>(iterations); i++) for (size_t i = 0; i < iterations; i++)
{ {
if (copy_data)
{
for (size_t arg_index = 0; arg_index < args.size(); arg_index++)
{
const shared_ptr<runtime::TensorView>& arg = args[arg_index];
if (arg->get_stale())
{
const shared_ptr<runtime::HostTensorView>& data = arg_data[arg_index];
arg->write(data->get_data_ptr(),
0,
data->get_size() * data->get_element_type().size());
}
}
}
backend->call(f, results, args); backend->call(f, results, args);
if (copy_data)
{
for (size_t result_index = 0; result_index < results.size(); result_index++)
{
const shared_ptr<runtime::HostTensorView>& data = result_data[result_index];
const shared_ptr<runtime::TensorView>& result = results[result_index];
result->read(
data->get_data_ptr(), 0, data->get_size() * data->get_element_type().size());
}
}
} }
t1.stop(); t1.stop();
float time = t1.get_milliseconds(); float time = t1.get_milliseconds();
......
...@@ -32,4 +32,5 @@ std::vector<ngraph::runtime::PerformanceCounter> run_benchmark(std::shared_ptr<n ...@@ -32,4 +32,5 @@ std::vector<ngraph::runtime::PerformanceCounter> run_benchmark(std::shared_ptr<n
const std::string& backend_name, const std::string& backend_name,
size_t iterations, size_t iterations,
bool timing_detail, bool timing_detail,
int warmup_iterations); int warmup_iterations,
bool copy_data);
...@@ -153,7 +153,7 @@ void print_results(vector<PerfShape> perf_data, bool timing_detail) ...@@ -153,7 +153,7 @@ void print_results(vector<PerfShape> perf_data, bool timing_detail)
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
string model; string model;
string backend = "CPU"; string backend;
string directory; string directory;
int iterations = 10; int iterations = 10;
bool failed = false; bool failed = false;
...@@ -161,6 +161,7 @@ int main(int argc, char** argv) ...@@ -161,6 +161,7 @@ int main(int argc, char** argv)
bool timing_detail = false; bool timing_detail = false;
bool visualize = false; bool visualize = false;
int warmup_iterations = 1; int warmup_iterations = 1;
bool copy_data = true;
for (size_t i = 1; i < argc; i++) for (size_t i = 1; i < argc; i++)
{ {
...@@ -193,6 +194,10 @@ int main(int argc, char** argv) ...@@ -193,6 +194,10 @@ int main(int argc, char** argv)
{ {
timing_detail = true; timing_detail = true;
} }
else if (arg == "--no_copy_data")
{
copy_data = false;
}
else if (arg == "-v" || arg == "--visualize") else if (arg == "-v" || arg == "--visualize")
{ {
visualize = true; visualize = true;
...@@ -234,6 +239,11 @@ int main(int argc, char** argv) ...@@ -234,6 +239,11 @@ int main(int argc, char** argv)
cout << "Either file or directory must be specified\n"; cout << "Either file or directory must be specified\n";
failed = true; failed = true;
} }
else if (backend.empty())
{
cout << "Backend missing\n";
failed = true;
}
if (failed) if (failed)
{ {
...@@ -251,8 +261,9 @@ OPTIONS ...@@ -251,8 +261,9 @@ OPTIONS
-i|--iterations Iterations (default: 10) -i|--iterations Iterations (default: 10)
-s|--statistics Display op stastics -s|--statistics Display op stastics
-v|--visualize Visualize a model (WARNING: requires GraphViz installed) -v|--visualize Visualize a model (WARNING: requires GraphViz installed)
--timing-detail Gather detailed timing --timing_detail Gather detailed timing
-w|--warmup_iterations Number of warm-up iterations -w|--warmup_iterations Number of warm-up iterations
--no_copy_data Disable copy of input/result data every iteration
)###"; )###";
return 1; return 1;
} }
...@@ -322,10 +333,8 @@ OPTIONS ...@@ -322,10 +333,8 @@ OPTIONS
try try
{ {
shared_ptr<Function> f = deserialize(m); shared_ptr<Function> f = deserialize(m);
// cout << "Benchmarking " << m << ", " << backend << " backend, " << iterations auto perf_data = run_benchmark(
// << " iterations.\n"; f, backend, iterations, timing_detail, warmup_iterations, copy_data);
auto perf_data =
run_benchmark(f, backend, iterations, timing_detail, warmup_iterations);
auto perf_shape = to_perf_shape(f, perf_data); auto perf_shape = to_perf_shape(f, perf_data);
aggregate_perf_data.insert( aggregate_perf_data.insert(
aggregate_perf_data.end(), perf_shape.begin(), perf_shape.end()); aggregate_perf_data.end(), perf_shape.begin(), perf_shape.end());
...@@ -340,9 +349,13 @@ OPTIONS ...@@ -340,9 +349,13 @@ OPTIONS
else if (iterations > 0) else if (iterations > 0)
{ {
shared_ptr<Function> f = deserialize(model); shared_ptr<Function> f = deserialize(model);
cout << "Benchmarking " << model << ", " << backend << " backend, " << iterations cout << "Benchmarking " << model << endl;
<< " iterations.\n"; cout << " Backend: " << backend << endl;
auto perf_data = run_benchmark(f, backend, iterations, timing_detail, warmup_iterations); cout << " Iterations: " << iterations << endl;
cout << " Warmup: " << warmup_iterations << endl;
cout << " Copy Data: " << (copy_data ? "true" : "false") << endl;
auto perf_data =
run_benchmark(f, backend, iterations, timing_detail, warmup_iterations, copy_data);
auto perf_shape = to_perf_shape(f, perf_data); auto perf_shape = to_perf_shape(f, perf_data);
print_results(perf_shape, timing_detail); print_results(perf_shape, timing_detail);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment