Unverified Commit 2d2fc8c2 authored by Robert Kimball's avatar Robert Kimball Committed by GitHub

Add bigger models to performance benchmarks (#342)

* add larger test models
parent 19a10d79
......@@ -27,6 +27,7 @@
#include "ngraph/runtime/call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_call_frame.hpp"
#include "ngraph/runtime/manager.hpp"
#include "ngraph/runtime/utils.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
#include "util/random.hpp"
......@@ -59,51 +60,32 @@ static multimap<size_t, string>
return rc;
}
// Starting point CPU: 1.2ms/iteration
shared_ptr<runtime::TensorView> make_tensor(runtime::Backend& backend, const ValueType& value)
shared_ptr<runtime::Value> make_value(runtime::Backend& backend, const ValueType& value)
{
shared_ptr<runtime::TensorView> arg =
backend.make_primary_tensor_view(value.get_element_type(), value.get_shape());
return arg;
}
TEST(benchmark, mxnet_mnist_mlp_forward)
{
test::Uniform<float> rng{-1, 1, 0};
const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/mnist_mlp_forward.json");
const string json_string = file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> f = ngraph::deserialize(ss);
auto manager = runtime::Manager::get("CPU");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
vector<shared_ptr<runtime::Value>> args;
for (shared_ptr<op::Parameter> param : f->get_parameters())
shared_ptr<runtime::Value> rc;
const TupleType* tuple = dynamic_cast<const TupleType*>(&value);
const TensorViewType* tensor = dynamic_cast<const TensorViewType*>(&value);
if (tuple)
{
auto arg = make_tensor(*backend, *(param->get_value_type()));
rng.initialize(arg);
args.push_back(arg);
vector<shared_ptr<runtime::Value>> elements;
for (auto value_type : tuple->get_element_types())
{
elements.push_back(make_value(backend, *value_type));
}
rc = backend.make_tuple(elements);
}
shared_ptr<const ValueType> result_type = f->get_result_type();
auto result = make_tensor(*backend, *result_type);
stopwatch t1;
t1.start();
float count = 1000;
for (size_t i = 0; i < static_cast<size_t>(count); i++)
else if (tensor)
{
cf->call(args, {result});
rc = backend.make_primary_tensor_view(value.get_element_type(), value.get_shape());
}
t1.stop();
float time = t1.get_milliseconds();
cout << time / count << "ms per iteration\n";
else
{
NGRAPH_INFO;
}
return rc;
}
TEST(benchmark, mxnet_10_bucket_lstm)
void run_benchmark(const std::string& json_path, size_t iterations)
{
bool emit_timing = (std::getenv("NGRAPH_CPU_EMIT_TIMING") != nullptr);
if (!emit_timing)
......@@ -112,37 +94,52 @@ TEST(benchmark, mxnet_10_bucket_lstm)
}
test::Uniform<float> rng{-1, 1, 0};
const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/10_bucket_LSTM.json");
const string json_string = file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> f = ngraph::deserialize(ss);
stopwatch build_time;
build_time.start();
auto manager = runtime::Manager::get("CPU");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
runtime::cpu::CPU_CallFrame* cpu_cf = static_cast<runtime::cpu::CPU_CallFrame*>(cf.get());
build_time.stop();
cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
vector<shared_ptr<runtime::Value>> args;
for (shared_ptr<op::Parameter> param : f->get_parameters())
{
auto arg = make_tensor(*backend, *(param->get_value_type()));
rng.initialize(arg);
auto arg = make_value(*backend, *(param->get_value_type()));
auto tuple = dynamic_pointer_cast<runtime::Tuple>(arg);
auto tensor = dynamic_pointer_cast<runtime::TensorView>(arg);
if (tuple)
{
NGRAPH_INFO;
// Need to initialize this...maybe later
}
else if (tensor)
{
rng.initialize(tensor);
}
else
{
}
args.push_back(arg);
}
shared_ptr<const ValueType> result_type = f->get_result_type();
auto result = make_tensor(*backend, *result_type);
auto result = make_value(*backend, *result_type);
stopwatch t1;
t1.start();
float count = 30;
for (size_t i = 0; i < static_cast<size_t>(count); i++)
for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
{
cf->call(args, {result});
}
t1.stop();
float time = t1.get_milliseconds();
cout << time / count << "ms per iteration\n";
cout << time / iterations << "ms per iteration" << endl;
if (emit_timing)
{
......@@ -160,20 +157,33 @@ TEST(benchmark, mxnet_10_bucket_lstm)
cout << setw(15) << left << it->second << " " << setw(10) << right << it->first
<< "us\n";
}
// size_t count = 30;
// for (const runtime::cpu::PerformanceCounter& p : perf_data)
// {
// if (--count == 0)
// {
// break;
// }
// cout.imbue(locale(""));
// cout << setw(15) << left << p.name() << " " << setw(10) << right
// << p.microseconds() << "us\n";
// }
}
}
TEST(benchmark, mxnet_mnist_mlp_forward)
{
const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/mnist_mlp_forward.json");
run_benchmark(json_path, 1000);
}
TEST(benchmark, mxnet_10_bucket_lstm)
{
const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/10_bucket_LSTM.json");
run_benchmark(json_path, 10);
}
TEST(benchmark, mxnet_lstm_backward)
{
const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/LSTM_backward.json");
run_benchmark(json_path, 10);
}
TEST(benchmark, mxnet_lstm_forward)
{
const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/LSTM_forward.json");
run_benchmark(json_path, 10);
}
//
// Benchmarks a graph that concatenates six 32x1x200 arrays along the middle axis.
//
......
......@@ -31,20 +31,3 @@ TEST(eigen, simple)
m(1, 1) = m(1, 0) + m(0, 1);
EXPECT_FLOAT_EQ(m(1, 1), 1.5);
}
TEST(eigen, test)
{
float arg0[] = {1, 2, 3, 4, 5, 6, 7, 8};
float arg1[] = {1, 2, 3, 4, 5, 6, 7, 8};
vector<float> out(8);
Eigen::Map<Eigen::Array<float, 8, 1>, Eigen::Unaligned>(out.data()) =
Eigen::Map<Eigen::Array<float, 8, 1>, Eigen::Unaligned>(arg0) *
Eigen::Map<Eigen::Array<float, 8, 1>, Eigen::Unaligned>(arg1);
cout << "result " << join(out) << endl;
// EigenArray1d<float>(Multiply_163_0, fmt::V{12}) =
// EigenArray1d<float>(Maximum_162_0, fmt::V{12}) *
// EigenArray1d<float>(Parameter_158_0, fmt::V{12});
}
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -126,7 +126,10 @@ TEST(serialize, main)
TEST(serialize, existing_models)
{
vector<string> models = {"mxnet/mnist_mlp_forward.json", "mxnet/10_bucket_LSTM.json"};
vector<string> models = {"mxnet/mnist_mlp_forward.json",
"mxnet/10_bucket_LSTM.json",
"mxnet/LSTM_backward.json",
"mxnet/LSTM_forward.json"};
for (const string& model : models)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment