Commit a1962e76 authored by Ashok Emani's avatar Ashok Emani

add cmake for nbench, address review comments

parent 57ab9e06
......@@ -18,3 +18,4 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIN_NGRAPH_LIBRARY")
add_subdirectory(resource)
add_subdirectory(ngraph)
add_subdirectory(tools)
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <ngraph/file_util.hpp>
#include <ngraph/ngraph.hpp>
......@@ -21,6 +23,7 @@ namespace ngraph
using TViews = std::vector<std::shared_ptr<runtime::TensorView>>;
using CallFrameIO = std::tuple<CFrame, CFrame, TViews, TViews>;
/// Create forward/backward call frame(s) and input/ouput TensorViews for given function.
CallFrameIO
get_cfio(std::string backend_type, std::shared_ptr<Function> f, bool backward = false)
{
......@@ -58,76 +61,4 @@ namespace ngraph
auto bf_cf = backend->make_call_frame(backward_external);
return CallFrameIO{cf, bf_cf, viv, vrv};
}
template <typename T>
static std::vector<T> read_vector(std::shared_ptr<ngraph::runtime::TensorView> tv)
{
if (ngraph::element::from<T>() != tv->get_tensor_view_layout()->get_element_type())
{
throw std::invalid_argument("read_vector type must match TensorView type");
}
size_t element_count = ngraph::shape_size(tv->get_shape());
size_t size = element_count * sizeof(T);
std::vector<T> rc(element_count);
tv->read(rc.data(), 0, size);
return rc;
}
template <typename T>
inline void write_vector(std::shared_ptr<ngraph::runtime::TensorView> tv,
const std::vector<T>& values)
{
tv->write(values.data(), 0, values.size() * sizeof(T));
}
template <typename T>
inline void copy_data(std::shared_ptr<ngraph::runtime::TensorView> tv,
const std::vector<T>& data)
{
size_t data_size = data.size() * sizeof(T);
tv->write(data.data(), 0, data_size);
}
inline std::multimap<size_t, std::string>
agregate_timing(const std::vector<runtime::PerformanceCounter>& perf_data)
{
std::unordered_map<std::string, size_t> timing;
for (const runtime::PerformanceCounter& p : perf_data)
{
std::string op = p.name().substr(0, p.name().find('_'));
timing[op] += p.microseconds();
}
std::multimap<size_t, std::string> rc;
for (const std::pair<std::string, size_t>& t : timing)
{
rc.insert({t.second, t.first});
}
return rc;
}
template <typename T>
class Uniform
{
public:
Uniform(T min, T max, T seed = 0)
: m_engine(seed)
, m_distribution(min, max)
, m_r(std::bind(m_distribution, m_engine))
{
}
const std::shared_ptr<runtime::TensorView>
initialize(const std::shared_ptr<runtime::TensorView>& ptv)
{
std::vector<T> vec = read_vector<T>(ptv);
for (T& elt : vec)
{
elt = m_r();
}
write_vector(ptv, vec);
return ptv;
}
protected:
std::default_random_engine m_engine;
std::uniform_real_distribution<T> m_distribution;
std::function<T()> m_r;
};
}
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
// Standalone Goole Test example for ngraph.
// compile and test as follows.
// g++ -std=c++11 simple_gtest.cc -I$HOME/ngraph_dist/include -L$HOME/ngraph_dist/lib -lngraph -lpthread -lgtest -o /tmp/test
// g++ -std=c++11 simple_gtest.cpp -I$HOME/ngraph_dist/include -L$HOME/ngraph_dist/lib -pthread -lngraph -lgtest -o /tmp/test
// env LD_LIBRARY_PATH=$HOME/ngraph_dist/lib /tmp/test
#include <gtest/gtest.h>
#include "nutils.h"
#include <ngraph/ngraph.hpp>
#include "../../test/util/test_tools.hpp"
#include "nutils.hpp"
using namespace std;
using namespace ngraph;
......
# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
if (NGRAPH_CPU_ENABLE AND NOT APPLE)
set (SRC
nbench.cpp
)
add_executable(nbench ${SRC})
add_dependencies(nbench ngraph)
set(HEADER_SEARCH_DEFINES
"NGRAPH_HEADERS_PATH=\"${NGRAPH_INCLUDE_PATH}\""
)
target_link_libraries(nbench ngraph)
set_source_files_properties(nbench.cpp PROPERTIES COMPILE_DEFINITIONS "${HEADER_SEARCH_DEFINES}")
endif()
This diff is collapsed.
This diff is collapsed.
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
// tool to benchmark any ngraph json model with given backend.
// compile and run with:
// g++ ./nbench.cc -std=c++11 -I$HOME/ngraph_dist/include -L$HOME/ngraph_dist/lib -lngraph -o nbench
// env LD_LIBRARY_PATH=$HOME/ngraph_dist/lib env NGRAPH_CPU_EMIT_TIMING=1 ./nbench
#include <fstream>
#include <gtest/gtest.h>
#include "clipp.h"
#include "nutils.h"
using namespace std;
using namespace ngraph;
void run_benchmark(const string& json_path, const string& backend_name, size_t iterations)
{
string env_var_name = "NGRAPH_" + backend_name + "_EMIT_TIMING";
bool emit_timing = (std::getenv(env_var_name.c_str()) != nullptr);
if (!emit_timing)
{
cout << "To get per-op timing set the environment variable " << env_var_name << "\n";
}
Uniform<float> rng{-1, 1, 0};
const string json_string = ngraph::file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> f = ngraph::deserialize(ss);
stopwatch build_time;
build_time.start();
auto manager = runtime::Manager::get(backend_name);
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
build_time.stop();
cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
vector<shared_ptr<runtime::TensorView>> args;
for (shared_ptr<op::Parameter> param : f->get_parameters())
{
auto tensor =
backend->make_primary_tensor_view(param->get_element_type(), param->get_shape());
rng.initialize(tensor);
args.push_back(tensor);
}
vector<shared_ptr<runtime::TensorView>> results;
for (shared_ptr<Node> out : f->get_results())
{
auto result = backend->make_primary_tensor_view(out->get_element_type(), out->get_shape());
results.push_back(result);
}
stopwatch t1;
t1.start();
for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
{
cf->tensor_call(args, results);
}
t1.stop();
float time = t1.get_milliseconds();
cout << time / iterations << "ms per iteration" << endl;
vector<runtime::PerformanceCounter> perf_data = cf->get_performance_data();
sort(perf_data.begin(),
perf_data.end(),
[](const runtime::PerformanceCounter& p1, const runtime::PerformanceCounter& p2) {
return p1.total_microseconds() > p2.total_microseconds();
});
multimap<size_t, string> timing = agregate_timing(perf_data);
for (auto it = timing.rbegin(); it != timing.rend(); it++)
{
cout.imbue(locale(""));
cout << setw(15) << left << it->second << " " << setw(10) << right << it->first << "us\n";
}
}
int main(int argc, char** argv)
{
string model = "model.json";
string backend = "CPU";
int iter = 10;
auto cli =
("model json file to use (default: model.json)" % clipp::option("-f") &
clipp::value("filename", model),
"Backed to use (default: CPU)" % clipp::option("-b") & clipp::value("backend", backend),
"Iterations (default: 10)" % clipp::option("-i") & clipp::value("iterations", iter));
if (!clipp::parse(argc, argv, cli) || !static_cast<bool>(ifstream(model)))
{
cout << clipp::make_man_page(cli, argv[0])
.prepend_section("DESCRIPTION",
" Benchmark ngraph json model with given backend.");
return 1;
}
cout << "Benchmarking " << model << ", " << backend << " backend, " << iter << " iterations.\n";
run_benchmark(model, backend, iter);
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
// tool to benchmark any ngraph json model with given backend.
// compile and run with:
// g++ ./nbench.cpp -std=c++11 -I$HOME/ngraph_dist/include -L$HOME/ngraph_dist/lib -lngraph -o nbench
// env LD_LIBRARY_PATH=$HOME/ngraph_dist/lib env NGRAPH_INTERPRETER_EMIT_TIMING=1 ./nbench
// sample models are under ../../test/models
#include <fstream>
#include <ngraph/runtime/call_frame.hpp>
#include <ngraph/runtime/manager.hpp>
#include <ngraph/runtime/backend.hpp>
#include "../../test/util/test_tools.hpp"
using namespace std;
int main(int argc, char** argv)
{
string model = "model.json";
string backend = "INTERPRETER";
int iter = 10;
bool failed = false;
for (size_t i = 1; i < argc; i++)
{
if (string(argv[i]) == "-f")
{
model = argv[++i];
}
else if (string(argv[i]) == "-b")
{
backend = argv[++i];
}
else if (string(argv[i]) == "-i")
{
try
{
iter = stoi(argv[++i]);
}
catch (...)
{
cout << "Invalid Argument\n";
failed = true;
}
}
}
if (!static_cast<bool>(ifstream(model)))
{
cout << "File " << model << " not found\n";
failed = true;
}
if (failed)
{
cout << R"###(
DESCRIPTION
Benchmark ngraph json model with given backend.
SYNOPSIS
/tmp/nbench [-f <filename>] [-b <backend>] [-i <iterations>]
OPTIONS
-f model json file to use (default: model.json)
-b Backed to use (default: INTERPRETER)
-i Iterations (default: 10)
)###";
return 1;
}
cout << "Benchmarking " << model << ", " << backend << " backend, " << iter << " iterations.\n";
run_benchmark(model, backend, iter);
}
......@@ -37,86 +37,6 @@
using namespace std;
using namespace ngraph;
static multimap<size_t, string>
agregate_timing(const vector<runtime::PerformanceCounter>& perf_data)
{
unordered_map<string, size_t> timing;
for (const runtime::PerformanceCounter& p : perf_data)
{
string op = p.name().substr(0, p.name().find('_'));
timing[op] += p.microseconds();
}
multimap<size_t, string> rc;
for (const pair<string, size_t>& t : timing)
{
rc.insert({t.second, t.first});
}
return rc;
}
void run_benchmark(const string& json_path, const string& backend_name, size_t iterations)
{
string env_var_name = "NGRAPH_" + backend_name + "_EMIT_TIMING";
bool emit_timing = (std::getenv(env_var_name.c_str()) != nullptr);
if (!emit_timing)
{
cout << "To get per-op timing set the environment variable " << env_var_name << "\n";
}
test::Uniform<float> rng{-1, 1, 0};
const string json_string = file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> f = ngraph::deserialize(ss);
stopwatch build_time;
build_time.start();
auto manager = runtime::Manager::get(backend_name);
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
build_time.stop();
cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
vector<shared_ptr<runtime::TensorView>> args;
for (shared_ptr<op::Parameter> param : f->get_parameters())
{
auto tensor =
backend->make_primary_tensor_view(param->get_element_type(), param->get_shape());
rng.initialize(tensor);
args.push_back(tensor);
}
vector<shared_ptr<runtime::TensorView>> results;
for (shared_ptr<Node> out : f->get_results())
{
auto result = backend->make_primary_tensor_view(out->get_element_type(), out->get_shape());
results.push_back(result);
}
stopwatch t1;
t1.start();
for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
{
cf->tensor_call(args, results);
}
t1.stop();
float time = t1.get_milliseconds();
cout << time / iterations << "ms per iteration" << endl;
vector<runtime::PerformanceCounter> perf_data = cf->get_performance_data();
sort(perf_data.begin(),
perf_data.end(),
[](const runtime::PerformanceCounter& p1, const runtime::PerformanceCounter& p2) {
return p1.total_microseconds() > p2.total_microseconds();
});
multimap<size_t, string> timing = agregate_timing(perf_data);
for (auto it = timing.rbegin(); it != timing.rend(); it++)
{
cout.imbue(locale(""));
cout << setw(15) << left << it->second << " " << setw(10) << right << it->first << "us\n";
}
}
TEST(benchmark, mxnet_mnist_mlp_forward)
{
const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/mnist_mlp_forward.json");
......
......@@ -21,7 +21,9 @@
#include <memory>
#include "ngraph/descriptor/layout/tensor_view_layout.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/runtime/tensor_view.hpp"
#include "ngraph/serializer.hpp"
namespace ngraph
{
......@@ -73,3 +75,115 @@ size_t count_ops_of_type(std::shared_ptr<ngraph::Function> f)
return count;
}
/// performance test utilities
inline std::multimap<size_t, std::string>
agregate_timing(const std::vector<ngraph::runtime::PerformanceCounter>& perf_data)
{
std::unordered_map<std::string, size_t> timing;
for (const ngraph::runtime::PerformanceCounter& p : perf_data)
{
std::string op = p.name().substr(0, p.name().find('_'));
timing[op] += p.microseconds();
}
std::multimap<size_t, std::string> rc;
for (const std::pair<std::string, size_t>& t : timing)
{
rc.insert({t.second, t.first});
}
return rc;
}
template <typename T>
class Uniform
{
public:
Uniform(T min, T max, T seed = 0)
: m_engine(seed)
, m_distribution(min, max)
, m_r(std::bind(m_distribution, m_engine))
{
}
const std::shared_ptr<ngraph::runtime::TensorView>
initialize(const std::shared_ptr<ngraph::runtime::TensorView>& ptv)
{
std::vector<T> vec = read_vector<T>(ptv);
for (T& elt : vec)
{
elt = m_r();
}
write_vector(ptv, vec);
return ptv;
}
protected:
std::default_random_engine m_engine;
std::uniform_real_distribution<T> m_distribution;
std::function<T()> m_r;
};
static void
run_benchmark(const std::string& json_path, const std::string& backend_name, size_t iterations)
{
using namespace std;
using namespace ngraph;
string env_var_name = "NGRAPH_" + backend_name + "_EMIT_TIMING";
bool emit_timing = (std::getenv(env_var_name.c_str()) != nullptr);
if (!emit_timing)
{
cout << "To get per-op timing set the environment variable " << env_var_name << "\n";
}
Uniform<float> rng{-1, 1, 0};
const string json_string = file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> f = deserialize(ss);
stopwatch build_time;
build_time.start();
auto manager = runtime::Manager::get(backend_name);
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
build_time.stop();
cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
vector<shared_ptr<runtime::TensorView>> args;
for (shared_ptr<op::Parameter> param : f->get_parameters())
{
auto tensor =
backend->make_primary_tensor_view(param->get_element_type(), param->get_shape());
rng.initialize(tensor);
args.push_back(tensor);
}
vector<shared_ptr<runtime::TensorView>> results;
for (shared_ptr<Node> out : f->get_results())
{
auto result = backend->make_primary_tensor_view(out->get_element_type(), out->get_shape());
results.push_back(result);
}
stopwatch t1;
t1.start();
for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
{
cf->tensor_call(args, results);
}
t1.stop();
float time = t1.get_milliseconds();
cout << time / iterations << "ms per iteration" << endl;
vector<runtime::PerformanceCounter> perf_data = cf->get_performance_data();
sort(perf_data.begin(),
perf_data.end(),
[](const runtime::PerformanceCounter& p1, const runtime::PerformanceCounter& p2) {
return p1.total_microseconds() > p2.total_microseconds();
});
multimap<size_t, string> timing = agregate_timing(perf_data);
for (auto it = timing.rbegin(); it != timing.rend(); it++)
{
cout.imbue(locale(""));
cout << setw(15) << left << it->second << " " << setw(10) << right << it->first << "us\n";
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment