Commit a1962e76 authored by Ashok Emani's avatar Ashok Emani

add cmake for nbench, address review comments

parent 57ab9e06
...@@ -18,3 +18,4 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIN_NGRAPH_LIBRARY") ...@@ -18,3 +18,4 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIN_NGRAPH_LIBRARY")
add_subdirectory(resource) add_subdirectory(resource)
add_subdirectory(ngraph) add_subdirectory(ngraph)
add_subdirectory(tools)
// ---------------------------------------------------------------------------- /*******************************************************************************
// Copyright 2017 Nervana Systems Inc. * Copyright 2017-2018 Intel Corporation
// Licensed under the Apache License, Version 2.0 (the "License"); *
// you may not use this file except in compliance with the License. * Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at * you may not use this file except in compliance with the License.
// * You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 *
// * http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software *
// distributed under the License is distributed on an "AS IS" BASIS, * Unless required by applicable law or agreed to in writing, software
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// ---------------------------------------------------------------------------- * See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <ngraph/file_util.hpp> #include <ngraph/file_util.hpp>
#include <ngraph/ngraph.hpp> #include <ngraph/ngraph.hpp>
...@@ -21,6 +23,7 @@ namespace ngraph ...@@ -21,6 +23,7 @@ namespace ngraph
using TViews = std::vector<std::shared_ptr<runtime::TensorView>>; using TViews = std::vector<std::shared_ptr<runtime::TensorView>>;
using CallFrameIO = std::tuple<CFrame, CFrame, TViews, TViews>; using CallFrameIO = std::tuple<CFrame, CFrame, TViews, TViews>;
/// Create forward/backward call frame(s) and input/ouput TensorViews for given function.
CallFrameIO CallFrameIO
get_cfio(std::string backend_type, std::shared_ptr<Function> f, bool backward = false) get_cfio(std::string backend_type, std::shared_ptr<Function> f, bool backward = false)
{ {
...@@ -58,76 +61,4 @@ namespace ngraph ...@@ -58,76 +61,4 @@ namespace ngraph
auto bf_cf = backend->make_call_frame(backward_external); auto bf_cf = backend->make_call_frame(backward_external);
return CallFrameIO{cf, bf_cf, viv, vrv}; return CallFrameIO{cf, bf_cf, viv, vrv};
} }
template <typename T>
static std::vector<T> read_vector(std::shared_ptr<ngraph::runtime::TensorView> tv)
{
if (ngraph::element::from<T>() != tv->get_tensor_view_layout()->get_element_type())
{
throw std::invalid_argument("read_vector type must match TensorView type");
}
size_t element_count = ngraph::shape_size(tv->get_shape());
size_t size = element_count * sizeof(T);
std::vector<T> rc(element_count);
tv->read(rc.data(), 0, size);
return rc;
}
template <typename T>
inline void write_vector(std::shared_ptr<ngraph::runtime::TensorView> tv,
const std::vector<T>& values)
{
tv->write(values.data(), 0, values.size() * sizeof(T));
}
template <typename T>
inline void copy_data(std::shared_ptr<ngraph::runtime::TensorView> tv,
const std::vector<T>& data)
{
size_t data_size = data.size() * sizeof(T);
tv->write(data.data(), 0, data_size);
}
inline std::multimap<size_t, std::string>
agregate_timing(const std::vector<runtime::PerformanceCounter>& perf_data)
{
std::unordered_map<std::string, size_t> timing;
for (const runtime::PerformanceCounter& p : perf_data)
{
std::string op = p.name().substr(0, p.name().find('_'));
timing[op] += p.microseconds();
}
std::multimap<size_t, std::string> rc;
for (const std::pair<std::string, size_t>& t : timing)
{
rc.insert({t.second, t.first});
}
return rc;
}
template <typename T>
class Uniform
{
public:
Uniform(T min, T max, T seed = 0)
: m_engine(seed)
, m_distribution(min, max)
, m_r(std::bind(m_distribution, m_engine))
{
}
const std::shared_ptr<runtime::TensorView>
initialize(const std::shared_ptr<runtime::TensorView>& ptv)
{
std::vector<T> vec = read_vector<T>(ptv);
for (T& elt : vec)
{
elt = m_r();
}
write_vector(ptv, vec);
return ptv;
}
protected:
std::default_random_engine m_engine;
std::uniform_real_distribution<T> m_distribution;
std::function<T()> m_r;
};
} }
// ---------------------------------------------------------------------------- /*******************************************************************************
// Copyright 2017 Nervana Systems Inc. * Copyright 2017-2018 Intel Corporation
// Licensed under the Apache License, Version 2.0 (the "License"); *
// you may not use this file except in compliance with the License. * Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at * you may not use this file except in compliance with the License.
// * You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 *
// * http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software *
// distributed under the License is distributed on an "AS IS" BASIS, * Unless required by applicable law or agreed to in writing, software
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// ---------------------------------------------------------------------------- * See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
// Standalone Goole Test example for ngraph. // Standalone Goole Test example for ngraph.
// compile and test as follows. // compile and test as follows.
// g++ -std=c++11 simple_gtest.cc -I$HOME/ngraph_dist/include -L$HOME/ngraph_dist/lib -lngraph -lpthread -lgtest -o /tmp/test // g++ -std=c++11 simple_gtest.cpp -I$HOME/ngraph_dist/include -L$HOME/ngraph_dist/lib -pthread -lngraph -lgtest -o /tmp/test
// env LD_LIBRARY_PATH=$HOME/ngraph_dist/lib /tmp/test // env LD_LIBRARY_PATH=$HOME/ngraph_dist/lib /tmp/test
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "nutils.h" #include <ngraph/ngraph.hpp>
#include "../../test/util/test_tools.hpp"
#include "nutils.hpp"
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
......
# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
if (NGRAPH_CPU_ENABLE AND NOT APPLE)
set (SRC
nbench.cpp
)
add_executable(nbench ${SRC})
add_dependencies(nbench ngraph)
set(HEADER_SEARCH_DEFINES
"NGRAPH_HEADERS_PATH=\"${NGRAPH_INCLUDE_PATH}\""
)
target_link_libraries(nbench ngraph)
set_source_files_properties(nbench.cpp PROPERTIES COMPILE_DEFINITIONS "${HEADER_SEARCH_DEFINES}")
endif()
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
// tool to benchmark any ngraph json model with given backend.
// compile and run with:
// g++ ./nbench.cc -std=c++11 -I$HOME/ngraph_dist/include -L$HOME/ngraph_dist/lib -lngraph -o nbench
// env LD_LIBRARY_PATH=$HOME/ngraph_dist/lib env NGRAPH_CPU_EMIT_TIMING=1 ./nbench
#include <fstream>
#include <gtest/gtest.h>
#include "clipp.h"
#include "nutils.h"
using namespace std;
using namespace ngraph;
void run_benchmark(const string& json_path, const string& backend_name, size_t iterations)
{
string env_var_name = "NGRAPH_" + backend_name + "_EMIT_TIMING";
bool emit_timing = (std::getenv(env_var_name.c_str()) != nullptr);
if (!emit_timing)
{
cout << "To get per-op timing set the environment variable " << env_var_name << "\n";
}
Uniform<float> rng{-1, 1, 0};
const string json_string = ngraph::file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> f = ngraph::deserialize(ss);
stopwatch build_time;
build_time.start();
auto manager = runtime::Manager::get(backend_name);
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
build_time.stop();
cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
vector<shared_ptr<runtime::TensorView>> args;
for (shared_ptr<op::Parameter> param : f->get_parameters())
{
auto tensor =
backend->make_primary_tensor_view(param->get_element_type(), param->get_shape());
rng.initialize(tensor);
args.push_back(tensor);
}
vector<shared_ptr<runtime::TensorView>> results;
for (shared_ptr<Node> out : f->get_results())
{
auto result = backend->make_primary_tensor_view(out->get_element_type(), out->get_shape());
results.push_back(result);
}
stopwatch t1;
t1.start();
for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
{
cf->tensor_call(args, results);
}
t1.stop();
float time = t1.get_milliseconds();
cout << time / iterations << "ms per iteration" << endl;
vector<runtime::PerformanceCounter> perf_data = cf->get_performance_data();
sort(perf_data.begin(),
perf_data.end(),
[](const runtime::PerformanceCounter& p1, const runtime::PerformanceCounter& p2) {
return p1.total_microseconds() > p2.total_microseconds();
});
multimap<size_t, string> timing = agregate_timing(perf_data);
for (auto it = timing.rbegin(); it != timing.rend(); it++)
{
cout.imbue(locale(""));
cout << setw(15) << left << it->second << " " << setw(10) << right << it->first << "us\n";
}
}
int main(int argc, char** argv)
{
string model = "model.json";
string backend = "CPU";
int iter = 10;
auto cli =
("model json file to use (default: model.json)" % clipp::option("-f") &
clipp::value("filename", model),
"Backed to use (default: CPU)" % clipp::option("-b") & clipp::value("backend", backend),
"Iterations (default: 10)" % clipp::option("-i") & clipp::value("iterations", iter));
if (!clipp::parse(argc, argv, cli) || !static_cast<bool>(ifstream(model)))
{
cout << clipp::make_man_page(cli, argv[0])
.prepend_section("DESCRIPTION",
" Benchmark ngraph json model with given backend.");
return 1;
}
cout << "Benchmarking " << model << ", " << backend << " backend, " << iter << " iterations.\n";
run_benchmark(model, backend, iter);
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
// tool to benchmark any ngraph json model with given backend.
// compile and run with:
// g++ ./nbench.cpp -std=c++11 -I$HOME/ngraph_dist/include -L$HOME/ngraph_dist/lib -lngraph -o nbench
// env LD_LIBRARY_PATH=$HOME/ngraph_dist/lib env NGRAPH_INTERPRETER_EMIT_TIMING=1 ./nbench
// sample models are under ../../test/models
#include <fstream>
#include <ngraph/runtime/call_frame.hpp>
#include <ngraph/runtime/manager.hpp>
#include <ngraph/runtime/backend.hpp>
#include "../../test/util/test_tools.hpp"
using namespace std;
int main(int argc, char** argv)
{
string model = "model.json";
string backend = "INTERPRETER";
int iter = 10;
bool failed = false;
for (size_t i = 1; i < argc; i++)
{
if (string(argv[i]) == "-f")
{
model = argv[++i];
}
else if (string(argv[i]) == "-b")
{
backend = argv[++i];
}
else if (string(argv[i]) == "-i")
{
try
{
iter = stoi(argv[++i]);
}
catch (...)
{
cout << "Invalid Argument\n";
failed = true;
}
}
}
if (!static_cast<bool>(ifstream(model)))
{
cout << "File " << model << " not found\n";
failed = true;
}
if (failed)
{
cout << R"###(
DESCRIPTION
Benchmark ngraph json model with given backend.
SYNOPSIS
/tmp/nbench [-f <filename>] [-b <backend>] [-i <iterations>]
OPTIONS
-f model json file to use (default: model.json)
-b Backed to use (default: INTERPRETER)
-i Iterations (default: 10)
)###";
return 1;
}
cout << "Benchmarking " << model << ", " << backend << " backend, " << iter << " iterations.\n";
run_benchmark(model, backend, iter);
}
...@@ -37,86 +37,6 @@ ...@@ -37,86 +37,6 @@
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
static multimap<size_t, string>
agregate_timing(const vector<runtime::PerformanceCounter>& perf_data)
{
unordered_map<string, size_t> timing;
for (const runtime::PerformanceCounter& p : perf_data)
{
string op = p.name().substr(0, p.name().find('_'));
timing[op] += p.microseconds();
}
multimap<size_t, string> rc;
for (const pair<string, size_t>& t : timing)
{
rc.insert({t.second, t.first});
}
return rc;
}
void run_benchmark(const string& json_path, const string& backend_name, size_t iterations)
{
string env_var_name = "NGRAPH_" + backend_name + "_EMIT_TIMING";
bool emit_timing = (std::getenv(env_var_name.c_str()) != nullptr);
if (!emit_timing)
{
cout << "To get per-op timing set the environment variable " << env_var_name << "\n";
}
test::Uniform<float> rng{-1, 1, 0};
const string json_string = file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> f = ngraph::deserialize(ss);
stopwatch build_time;
build_time.start();
auto manager = runtime::Manager::get(backend_name);
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
build_time.stop();
cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
vector<shared_ptr<runtime::TensorView>> args;
for (shared_ptr<op::Parameter> param : f->get_parameters())
{
auto tensor =
backend->make_primary_tensor_view(param->get_element_type(), param->get_shape());
rng.initialize(tensor);
args.push_back(tensor);
}
vector<shared_ptr<runtime::TensorView>> results;
for (shared_ptr<Node> out : f->get_results())
{
auto result = backend->make_primary_tensor_view(out->get_element_type(), out->get_shape());
results.push_back(result);
}
stopwatch t1;
t1.start();
for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
{
cf->tensor_call(args, results);
}
t1.stop();
float time = t1.get_milliseconds();
cout << time / iterations << "ms per iteration" << endl;
vector<runtime::PerformanceCounter> perf_data = cf->get_performance_data();
sort(perf_data.begin(),
perf_data.end(),
[](const runtime::PerformanceCounter& p1, const runtime::PerformanceCounter& p2) {
return p1.total_microseconds() > p2.total_microseconds();
});
multimap<size_t, string> timing = agregate_timing(perf_data);
for (auto it = timing.rbegin(); it != timing.rend(); it++)
{
cout.imbue(locale(""));
cout << setw(15) << left << it->second << " " << setw(10) << right << it->first << "us\n";
}
}
TEST(benchmark, mxnet_mnist_mlp_forward) TEST(benchmark, mxnet_mnist_mlp_forward)
{ {
const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/mnist_mlp_forward.json"); const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/mnist_mlp_forward.json");
......
...@@ -21,7 +21,9 @@ ...@@ -21,7 +21,9 @@
#include <memory> #include <memory>
#include "ngraph/descriptor/layout/tensor_view_layout.hpp" #include "ngraph/descriptor/layout/tensor_view_layout.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/runtime/tensor_view.hpp" #include "ngraph/runtime/tensor_view.hpp"
#include "ngraph/serializer.hpp"
namespace ngraph namespace ngraph
{ {
...@@ -73,3 +75,115 @@ size_t count_ops_of_type(std::shared_ptr<ngraph::Function> f) ...@@ -73,3 +75,115 @@ size_t count_ops_of_type(std::shared_ptr<ngraph::Function> f)
return count; return count;
} }
/// performance test utilities
inline std::multimap<size_t, std::string>
agregate_timing(const std::vector<ngraph::runtime::PerformanceCounter>& perf_data)
{
std::unordered_map<std::string, size_t> timing;
for (const ngraph::runtime::PerformanceCounter& p : perf_data)
{
std::string op = p.name().substr(0, p.name().find('_'));
timing[op] += p.microseconds();
}
std::multimap<size_t, std::string> rc;
for (const std::pair<std::string, size_t>& t : timing)
{
rc.insert({t.second, t.first});
}
return rc;
}
template <typename T>
class Uniform
{
public:
Uniform(T min, T max, T seed = 0)
: m_engine(seed)
, m_distribution(min, max)
, m_r(std::bind(m_distribution, m_engine))
{
}
const std::shared_ptr<ngraph::runtime::TensorView>
initialize(const std::shared_ptr<ngraph::runtime::TensorView>& ptv)
{
std::vector<T> vec = read_vector<T>(ptv);
for (T& elt : vec)
{
elt = m_r();
}
write_vector(ptv, vec);
return ptv;
}
protected:
std::default_random_engine m_engine;
std::uniform_real_distribution<T> m_distribution;
std::function<T()> m_r;
};
static void
run_benchmark(const std::string& json_path, const std::string& backend_name, size_t iterations)
{
using namespace std;
using namespace ngraph;
string env_var_name = "NGRAPH_" + backend_name + "_EMIT_TIMING";
bool emit_timing = (std::getenv(env_var_name.c_str()) != nullptr);
if (!emit_timing)
{
cout << "To get per-op timing set the environment variable " << env_var_name << "\n";
}
Uniform<float> rng{-1, 1, 0};
const string json_string = file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> f = deserialize(ss);
stopwatch build_time;
build_time.start();
auto manager = runtime::Manager::get(backend_name);
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
build_time.stop();
cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
vector<shared_ptr<runtime::TensorView>> args;
for (shared_ptr<op::Parameter> param : f->get_parameters())
{
auto tensor =
backend->make_primary_tensor_view(param->get_element_type(), param->get_shape());
rng.initialize(tensor);
args.push_back(tensor);
}
vector<shared_ptr<runtime::TensorView>> results;
for (shared_ptr<Node> out : f->get_results())
{
auto result = backend->make_primary_tensor_view(out->get_element_type(), out->get_shape());
results.push_back(result);
}
stopwatch t1;
t1.start();
for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
{
cf->tensor_call(args, results);
}
t1.stop();
float time = t1.get_milliseconds();
cout << time / iterations << "ms per iteration" << endl;
vector<runtime::PerformanceCounter> perf_data = cf->get_performance_data();
sort(perf_data.begin(),
perf_data.end(),
[](const runtime::PerformanceCounter& p1, const runtime::PerformanceCounter& p2) {
return p1.total_microseconds() > p2.total_microseconds();
});
multimap<size_t, string> timing = agregate_timing(perf_data);
for (auto it = timing.rbegin(); it != timing.rend(); it++)
{
cout.imbue(locale(""));
cout << setw(15) << left << it->second << " " << setw(10) << right << it->first << "us\n";
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment