Unverified Commit 2d2fc8c2 authored by Robert Kimball's avatar Robert Kimball Committed by GitHub

Add bigger models to performance benchmarks (#342)

* add larger test models
parent 19a10d79
......@@ -27,6 +27,7 @@
#include "ngraph/runtime/call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_call_frame.hpp"
#include "ngraph/runtime/manager.hpp"
#include "ngraph/runtime/utils.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
#include "util/random.hpp"
......@@ -59,51 +60,32 @@ static multimap<size_t, string>
return rc;
}
// Starting point CPU: 1.2ms/iteration
shared_ptr<runtime::TensorView> make_tensor(runtime::Backend& backend, const ValueType& value)
{
shared_ptr<runtime::TensorView> arg =
backend.make_primary_tensor_view(value.get_element_type(), value.get_shape());
return arg;
}
TEST(benchmark, mxnet_mnist_mlp_forward)
shared_ptr<runtime::Value> make_value(runtime::Backend& backend, const ValueType& value)
{
test::Uniform<float> rng{-1, 1, 0};
const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/mnist_mlp_forward.json");
const string json_string = file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> f = ngraph::deserialize(ss);
auto manager = runtime::Manager::get("CPU");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
vector<shared_ptr<runtime::Value>> args;
for (shared_ptr<op::Parameter> param : f->get_parameters())
shared_ptr<runtime::Value> rc;
const TupleType* tuple = dynamic_cast<const TupleType*>(&value);
const TensorViewType* tensor = dynamic_cast<const TensorViewType*>(&value);
if (tuple)
{
auto arg = make_tensor(*backend, *(param->get_value_type()));
rng.initialize(arg);
args.push_back(arg);
vector<shared_ptr<runtime::Value>> elements;
for (auto value_type : tuple->get_element_types())
{
elements.push_back(make_value(backend, *value_type));
}
shared_ptr<const ValueType> result_type = f->get_result_type();
auto result = make_tensor(*backend, *result_type);
stopwatch t1;
t1.start();
float count = 1000;
for (size_t i = 0; i < static_cast<size_t>(count); i++)
rc = backend.make_tuple(elements);
}
else if (tensor)
{
cf->call(args, {result});
rc = backend.make_primary_tensor_view(value.get_element_type(), value.get_shape());
}
t1.stop();
float time = t1.get_milliseconds();
cout << time / count << "ms per iteration\n";
else
{
NGRAPH_INFO;
}
return rc;
}
TEST(benchmark, mxnet_10_bucket_lstm)
void run_benchmark(const std::string& json_path, size_t iterations)
{
bool emit_timing = (std::getenv("NGRAPH_CPU_EMIT_TIMING") != nullptr);
if (!emit_timing)
......@@ -112,37 +94,52 @@ TEST(benchmark, mxnet_10_bucket_lstm)
}
test::Uniform<float> rng{-1, 1, 0};
const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/10_bucket_LSTM.json");
const string json_string = file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> f = ngraph::deserialize(ss);
stopwatch build_time;
build_time.start();
auto manager = runtime::Manager::get("CPU");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
runtime::cpu::CPU_CallFrame* cpu_cf = static_cast<runtime::cpu::CPU_CallFrame*>(cf.get());
build_time.stop();
cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
vector<shared_ptr<runtime::Value>> args;
for (shared_ptr<op::Parameter> param : f->get_parameters())
{
auto arg = make_tensor(*backend, *(param->get_value_type()));
rng.initialize(arg);
auto arg = make_value(*backend, *(param->get_value_type()));
auto tuple = dynamic_pointer_cast<runtime::Tuple>(arg);
auto tensor = dynamic_pointer_cast<runtime::TensorView>(arg);
if (tuple)
{
NGRAPH_INFO;
// Need to initialize this...maybe later
}
else if (tensor)
{
rng.initialize(tensor);
}
else
{
}
args.push_back(arg);
}
shared_ptr<const ValueType> result_type = f->get_result_type();
auto result = make_tensor(*backend, *result_type);
auto result = make_value(*backend, *result_type);
stopwatch t1;
t1.start();
float count = 30;
for (size_t i = 0; i < static_cast<size_t>(count); i++)
for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
{
cf->call(args, {result});
}
t1.stop();
float time = t1.get_milliseconds();
cout << time / count << "ms per iteration\n";
cout << time / iterations << "ms per iteration" << endl;
if (emit_timing)
{
......@@ -160,20 +157,33 @@ TEST(benchmark, mxnet_10_bucket_lstm)
cout << setw(15) << left << it->second << " " << setw(10) << right << it->first
<< "us\n";
}
// size_t count = 30;
// for (const runtime::cpu::PerformanceCounter& p : perf_data)
// {
// if (--count == 0)
// {
// break;
// }
// cout.imbue(locale(""));
// cout << setw(15) << left << p.name() << " " << setw(10) << right
// << p.microseconds() << "us\n";
// }
}
}
TEST(benchmark, mxnet_mnist_mlp_forward)
{
const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/mnist_mlp_forward.json");
run_benchmark(json_path, 1000);
}
TEST(benchmark, mxnet_10_bucket_lstm)
{
const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/10_bucket_LSTM.json");
run_benchmark(json_path, 10);
}
TEST(benchmark, mxnet_lstm_backward)
{
const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/LSTM_backward.json");
run_benchmark(json_path, 10);
}
TEST(benchmark, mxnet_lstm_forward)
{
const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/LSTM_forward.json");
run_benchmark(json_path, 10);
}
//
// Benchmarks a graph that concatenates six 32x1x200 arrays along the middle axis.
//
......
......@@ -31,20 +31,3 @@ TEST(eigen, simple)
m(1, 1) = m(1, 0) + m(0, 1);
EXPECT_FLOAT_EQ(m(1, 1), 1.5);
}
TEST(eigen, test)
{
float arg0[] = {1, 2, 3, 4, 5, 6, 7, 8};
float arg1[] = {1, 2, 3, 4, 5, 6, 7, 8};
vector<float> out(8);
Eigen::Map<Eigen::Array<float, 8, 1>, Eigen::Unaligned>(out.data()) =
Eigen::Map<Eigen::Array<float, 8, 1>, Eigen::Unaligned>(arg0) *
Eigen::Map<Eigen::Array<float, 8, 1>, Eigen::Unaligned>(arg1);
cout << "result " << join(out) << endl;
// EigenArray1d<float>(Multiply_163_0, fmt::V{12}) =
// EigenArray1d<float>(Maximum_162_0, fmt::V{12}) *
// EigenArray1d<float>(Parameter_158_0, fmt::V{12});
}
This diff is collapsed.
This diff is collapsed.
......@@ -126,7 +126,10 @@ TEST(serialize, main)
TEST(serialize, existing_models)
{
vector<string> models = {"mxnet/mnist_mlp_forward.json", "mxnet/10_bucket_LSTM.json"};
vector<string> models = {"mxnet/mnist_mlp_forward.json",
"mxnet/10_bucket_LSTM.json",
"mxnet/LSTM_backward.json",
"mxnet/LSTM_forward.json"};
for (const string& model : models)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment