Unverified Commit c631b50b authored by Robert Kimball's avatar Robert Kimball Committed by GitHub

nbench buffer copy each iteration (#1578)

* add option to copy intput/output data for each iteration

* add support for stale buffers
parent 20c2325c
......@@ -19,6 +19,7 @@
#include "benchmark.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/host_tensor_view.hpp"
#include "ngraph/runtime/tensor_view.hpp"
#include "ngraph/runtime/tensor_view.hpp"
#include "ngraph/serializer.hpp"
......@@ -151,7 +152,8 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
const string& backend_name,
size_t iterations,
bool timing_detail,
int warmup_iterations)
int warmup_iterations,
bool copy_data)
{
stopwatch timer;
timer.start();
......@@ -162,20 +164,28 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
cout.imbue(locale(""));
cout << "compile time: " << timer.get_milliseconds() << "ms" << endl;
vector<shared_ptr<runtime::HostTensorView>> arg_data;
vector<shared_ptr<runtime::TensorView>> args;
vector<bool> args_cacheable;
for (shared_ptr<op::Parameter> param : f->get_parameters())
{
auto tensor = backend->create_tensor(param->get_element_type(), param->get_shape());
auto tensor_data =
make_shared<runtime::HostTensorView>(param->get_element_type(), param->get_shape());
random_init(tensor);
args.push_back(tensor);
arg_data.push_back(tensor_data);
args_cacheable.push_back(param->get_cacheable());
}
vector<shared_ptr<runtime::HostTensorView>> result_data;
vector<shared_ptr<runtime::TensorView>> results;
for (shared_ptr<Node> out : f->get_results())
{
auto result = backend->create_tensor(out->get_element_type(), out->get_shape());
auto tensor_data =
make_shared<runtime::HostTensorView>(out->get_element_type(), out->get_shape());
results.push_back(result);
result_data.push_back(tensor_data);
}
for (size_t i = 0; i < args.size(); i++)
......@@ -196,9 +206,33 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
stopwatch t1;
t1.start();
for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
for (size_t i = 0; i < iterations; i++)
{
if (copy_data)
{
for (size_t arg_index = 0; arg_index < args.size(); arg_index++)
{
const shared_ptr<runtime::TensorView>& arg = args[arg_index];
if (arg->get_stale())
{
const shared_ptr<runtime::HostTensorView>& data = arg_data[arg_index];
arg->write(data->get_data_ptr(),
0,
data->get_size() * data->get_element_type().size());
}
}
}
backend->call(f, results, args);
if (copy_data)
{
for (size_t result_index = 0; result_index < results.size(); result_index++)
{
const shared_ptr<runtime::HostTensorView>& data = result_data[result_index];
const shared_ptr<runtime::TensorView>& result = results[result_index];
result->read(
data->get_data_ptr(), 0, data->get_size() * data->get_element_type().size());
}
}
}
t1.stop();
float time = t1.get_milliseconds();
......
......@@ -32,4 +32,5 @@ std::vector<ngraph::runtime::PerformanceCounter> run_benchmark(std::shared_ptr<n
const std::string& backend_name,
size_t iterations,
bool timing_detail,
int warmup_iterations);
int warmup_iterations,
bool copy_data);
......@@ -153,7 +153,7 @@ void print_results(vector<PerfShape> perf_data, bool timing_detail)
int main(int argc, char** argv)
{
string model;
string backend = "CPU";
string backend;
string directory;
int iterations = 10;
bool failed = false;
......@@ -161,6 +161,7 @@ int main(int argc, char** argv)
bool timing_detail = false;
bool visualize = false;
int warmup_iterations = 1;
bool copy_data = true;
for (size_t i = 1; i < argc; i++)
{
......@@ -193,6 +194,10 @@ int main(int argc, char** argv)
{
timing_detail = true;
}
else if (arg == "--no_copy_data")
{
copy_data = false;
}
else if (arg == "-v" || arg == "--visualize")
{
visualize = true;
......@@ -234,6 +239,11 @@ int main(int argc, char** argv)
cout << "Either file or directory must be specified\n";
failed = true;
}
else if (backend.empty())
{
cout << "Backend missing\n";
failed = true;
}
if (failed)
{
......@@ -251,8 +261,9 @@ OPTIONS
-i|--iterations Iterations (default: 10)
-s|--statistics Display op stastics
-v|--visualize Visualize a model (WARNING: requires GraphViz installed)
--timing-detail Gather detailed timing
--timing_detail Gather detailed timing
-w|--warmup_iterations Number of warm-up iterations
--no_copy_data Disable copy of input/result data every iteration
)###";
return 1;
}
......@@ -322,10 +333,8 @@ OPTIONS
try
{
shared_ptr<Function> f = deserialize(m);
// cout << "Benchmarking " << m << ", " << backend << " backend, " << iterations
// << " iterations.\n";
auto perf_data =
run_benchmark(f, backend, iterations, timing_detail, warmup_iterations);
auto perf_data = run_benchmark(
f, backend, iterations, timing_detail, warmup_iterations, copy_data);
auto perf_shape = to_perf_shape(f, perf_data);
aggregate_perf_data.insert(
aggregate_perf_data.end(), perf_shape.begin(), perf_shape.end());
......@@ -340,9 +349,13 @@ OPTIONS
else if (iterations > 0)
{
shared_ptr<Function> f = deserialize(model);
cout << "Benchmarking " << model << ", " << backend << " backend, " << iterations
<< " iterations.\n";
auto perf_data = run_benchmark(f, backend, iterations, timing_detail, warmup_iterations);
cout << "Benchmarking " << model << endl;
cout << " Backend: " << backend << endl;
cout << " Iterations: " << iterations << endl;
cout << " Warmup: " << warmup_iterations << endl;
cout << " Copy Data: " << (copy_data ? "true" : "false") << endl;
auto perf_data =
run_benchmark(f, backend, iterations, timing_detail, warmup_iterations, copy_data);
auto perf_shape = to_perf_shape(f, perf_data);
print_results(perf_shape, timing_detail);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment