Commit 8a1b07a9 authored by Ashok Emani's avatar Ashok Emani

refactor tools

parent bf8cea00
......@@ -55,6 +55,7 @@ nervana_aeon.egg-info/
# vim
*.swp
*.swo
tags
build/
......
This source diff could not be displayed because it is too large. You can view the blob instead.
// compile and run with
// g++ ./nbench.cc -std=c++11 -I$HOME/ngraph_dist/include -L$HOME/ngraph_dist/lib -lngraph -o nbench
// env NGRAPH_CPU_EMIT_TIMING=1 ./nbench
#include <bits/stdc++.h>
#include <ngraph/file_util.hpp>
#include <ngraph/ngraph.hpp>
#include <ngraph/serializer.hpp>
#include "clipp.h"
using namespace std;
using namespace ngraph;
template <typename T>
vector<T> read_vector(shared_ptr<ngraph::runtime::TensorView> tv) {
if (ngraph::element::from<T>() !=
tv->get_tensor_view_layout()->get_element_type()) {
throw invalid_argument("read_vector type must match TensorView type");
}
size_t element_count = ngraph::shape_size(tv->get_shape());
size_t size = element_count * sizeof(T);
vector<T> rc(element_count);
tv->read(rc.data(), 0, size);
return rc;
}
template <typename T>
void write_vector(std::shared_ptr<ngraph::runtime::TensorView> tv,
const std::vector<T>& values) {
tv->write(values.data(), 0, values.size() * sizeof(T));
}
template <typename T>
void copy_data(shared_ptr<ngraph::runtime::TensorView> tv,
const vector<T>& data) {
size_t data_size = data.size() * sizeof(T);
tv->write(data.data(), 0, data_size);
}
static multimap<size_t, string> agregate_timing(
const vector<runtime::PerformanceCounter>& perf_data) {
unordered_map<string, size_t> timing;
for (const runtime::PerformanceCounter& p : perf_data) {
string op = p.name().substr(0, p.name().find('_'));
timing[op] += p.microseconds();
}
multimap<size_t, string> rc;
for (const pair<string, size_t>& t : timing) {
rc.insert({t.second, t.first});
}
return rc;
}
template <typename T>
class Uniform {
public:
Uniform(T min, T max, T seed = 0)
: m_engine(seed),
m_distribution(min, max),
m_r(std::bind(m_distribution, m_engine)) {}
const std::shared_ptr<runtime::TensorView> initialize(
const std::shared_ptr<runtime::TensorView>& ptv) {
std::vector<T> vec = read_vector<T>(ptv);
for (T& elt : vec) {
elt = m_r();
}
write_vector(ptv, vec);
return ptv;
}
protected:
std::default_random_engine m_engine;
std::uniform_real_distribution<T> m_distribution;
std::function<T()> m_r;
};
void run_benchmark(const string& json_path, const string& backend_name,
size_t iterations) {
string env_var_name = "NGRAPH_" + backend_name + "_EMIT_TIMING";
bool emit_timing = (std::getenv(env_var_name.c_str()) != nullptr);
if (!emit_timing) {
cout << "To get per-op timing set the environment variable " << env_var_name
<< "\n";
}
Uniform<float> rng{-1, 1, 0};
const string json_string = ngraph::file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> f = ngraph::deserialize(ss);
stopwatch build_time;
build_time.start();
auto manager = runtime::Manager::get(backend_name);
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
build_time.stop();
cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
vector<shared_ptr<runtime::TensorView>> args;
for (shared_ptr<op::Parameter> param : f->get_parameters()) {
auto tensor = backend->make_primary_tensor_view(param->get_element_type(),
param->get_shape());
rng.initialize(tensor);
args.push_back(tensor);
}
vector<shared_ptr<runtime::TensorView>> results;
for (shared_ptr<Node> out : f->get_results()) {
auto result = backend->make_primary_tensor_view(out->get_element_type(),
out->get_shape());
results.push_back(result);
}
stopwatch t1;
t1.start();
for (size_t i = 0; i < static_cast<size_t>(iterations); i++) {
cf->tensor_call(args, results);
}
t1.stop();
float time = t1.get_milliseconds();
cout << time / iterations << "ms per iteration" << endl;
vector<runtime::PerformanceCounter> perf_data = cf->get_performance_data();
sort(perf_data.begin(), perf_data.end(),
[](const runtime::PerformanceCounter& p1,
const runtime::PerformanceCounter& p2) {
return p1.total_microseconds() > p2.total_microseconds();
});
multimap<size_t, string> timing = agregate_timing(perf_data);
for (auto it = timing.rbegin(); it != timing.rend(); it++) {
cout.imbue(locale(""));
cout << setw(15) << left << it->second << " " << setw(10) << right
<< it->first << "us\n";
}
}
int main(int argc, char** argv) {
string model = "model.json";
string backend = "CPU";
int iter = 10;
auto cli =
("model json file to use (default: model.json)" % clipp::option("-f") &
clipp::value("filename", model),
"Backed to use (default: CPU)" % clipp::option("-b") &
clipp::value("backend", backend),
"Iterations (default: 10)" % clipp::option("-i") &
clipp::value("iterations", iter));
if (!clipp::parse(argc, argv, cli) || !static_cast<bool>(ifstream(model))) {
cout << clipp::make_man_page(cli, argv[0])
.prepend_section(
"DESCRIPTION",
" Benchmark ngraph json model with given backend.");
return 1;
}
cout << "Benchmarking " << model << ", " << backend << " backend, " << iter
<< " iterations.\n";
run_benchmark(model, backend, iter);
}
// compile and test as follows.
// g++ -std=c++11 simple_test.cc -I$HOME/ngraph_dist/include -L$HOME/ngraph_dist/lib -lngraph -lpthread -lgtest -o /tmp/test
// env LD_LIBRARY_PATH=$HOME/ngraph_dist/lib /tmp/test
#include <bits/stdc++.h>
#include <ngraph/ngraph.hpp>
#include "gtest/gtest.h"
using namespace std;
using namespace ngraph;
template <typename T>
vector<T> read_vector(shared_ptr<ngraph::runtime::TensorView> tv) {
if (ngraph::element::from<T>() !=
tv->get_tensor_view_layout()->get_element_type()) {
throw invalid_argument("read_vector type must match TensorView type");
}
size_t element_count = ngraph::shape_size(tv->get_shape());
size_t size = element_count * sizeof(T);
vector<T> rc(element_count);
tv->read(rc.data(), 0, size);
return rc;
}
template <typename T>
void copy_data(shared_ptr<ngraph::runtime::TensorView> tv,
const vector<T>& data) {
size_t data_size = data.size() * sizeof(T);
tv->write(data.data(), 0, data_size);
}
TEST(simple, test) {
auto manager = runtime::Manager::get("INTERPRETER");
auto backend = manager->allocate_backend();
auto shape = Shape{2, 2};
auto X = make_shared<op::Parameter>(element::f32, shape);
auto Y = make_shared<op::Parameter>(element::f32, shape);
auto op = make_shared<op::Divide>(X, Y);
auto f = make_shared<Function>(op, vector<shared_ptr<op::Parameter>>{X, Y});
auto C = make_shared<op::Parameter>(element::f32, shape);
vector<shared_ptr<Node>> dYdXs;
for (auto param : {X, Y}) {
dYdXs.push_back(op->backprop_node(param, C));
}
auto bf =
make_shared<Function>(dYdXs, vector<shared_ptr<op::Parameter>>{C, X, Y});
auto forward_external = manager->compile(f);
auto f_cf = backend->make_call_frame(forward_external);
auto backward_external = manager->compile(bf);
auto bf_cf = backend->make_call_frame(backward_external);
auto a = backend->make_primary_tensor_view(element::f32, shape);
copy_data(a, vector<float>{2, 4, 8, 16});
auto b = backend->make_primary_tensor_view(element::f32, shape);
copy_data(b, vector<float>{1, 2, 4, 8});
auto result = backend->make_primary_tensor_view(element::f32, shape);
f_cf->call({a, b}, {result});
EXPECT_EQ((vector<float>{2, 2, 2, 2}), read_vector<float>(result));
auto c = backend->make_primary_tensor_view(element::f32, shape);
copy_data(c, vector<float>{1, 1, 1, 1});
auto da = backend->make_primary_tensor_view(element::f32, shape);
auto db = backend->make_primary_tensor_view(element::f32, shape);
bf_cf->call({c, a, b}, {da, db});
EXPECT_EQ((vector<float>{1, 0.5, 0.25, 0.125}), read_vector<float>(da));
EXPECT_EQ((vector<float>{-2, -1, -0.5, -0.25}), read_vector<float>(db));
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
This source diff could not be displayed because it is too large. You can view the blob instead.
// Generated by the NGraph CPU backend
#include <cmath>
#include <tbb/flow_graph.h>
#include <Eigen/Dense>
#include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/runtime/cpu/cpu_eigen_utils.hpp"
#include "ngraph/runtime/cpu/cpu_kernels.hpp"
#include "ngraph/runtime/kernel/avg_pool.hpp"
#include "ngraph/runtime/kernel/broadcast.hpp"
#include "ngraph/runtime/kernel/concat.hpp"
#include "ngraph/runtime/kernel/convolution.hpp"
#include "ngraph/runtime/kernel/dot.hpp"
#include "ngraph/runtime/kernel/max_pool.hpp"
#include "ngraph/runtime/kernel/not.hpp"
#include "ngraph/runtime/kernel/one_hot.hpp"
#include "ngraph/runtime/kernel/pad.hpp"
#include "ngraph/runtime/kernel/reduce.hpp"
#include "ngraph/runtime/kernel/reduce_window.hpp"
#include "ngraph/runtime/kernel/replace_slice.hpp"
#include "ngraph/runtime/kernel/reverse.hpp"
#include "ngraph/runtime/kernel/select_and_scatter.hpp"
#include "ngraph/runtime/kernel/slice.hpp"
#include "ngraph/runtime/kernel/sum.hpp"
#include "ngraph/util.hpp"
using namespace ngraph::runtime::cpu::eigen;
using namespace ngraph::runtime;
void *__dso_handle = 0;
// Declare all constants
// Declare all functions
extern "C" void Function_0(void** inputs, void** outputs);
extern "C" void Function_0(void** inputs, void** outputs)
{
{ // Multiply_2
#pragma omp parallel for
for (size_t i = 0; i < 4; i++)
{
((float*)(outputs[0]))[i] = ((float*)(inputs[0]))[i] * ((float*)(inputs[1]))[i];
}
}
}
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
// tool to benchmark any ngraph json model with given backend.
// compile and run with:
// g++ ./nbench.cc -std=c++11 -I$HOME/ngraph_dist/include -L$HOME/ngraph_dist/lib -lngraph -o nbench
// env LD_LIBRARY_PATH=$HOME/ngraph_dist/lib env NGRAPH_CPU_EMIT_TIMING=1 ./nbench
#include <fstream>
#include <gtest/gtest.h>
#include "clipp.h"
#include "nutils.h"
using namespace std;
using namespace ngraph;
void run_benchmark(const string& json_path, const string& backend_name, size_t iterations)
{
string env_var_name = "NGRAPH_" + backend_name + "_EMIT_TIMING";
bool emit_timing = (std::getenv(env_var_name.c_str()) != nullptr);
if (!emit_timing)
{
cout << "To get per-op timing set the environment variable " << env_var_name << "\n";
}
Uniform<float> rng{-1, 1, 0};
const string json_string = ngraph::file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> f = ngraph::deserialize(ss);
stopwatch build_time;
build_time.start();
auto manager = runtime::Manager::get(backend_name);
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
build_time.stop();
cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
vector<shared_ptr<runtime::TensorView>> args;
for (shared_ptr<op::Parameter> param : f->get_parameters())
{
auto tensor =
backend->make_primary_tensor_view(param->get_element_type(), param->get_shape());
rng.initialize(tensor);
args.push_back(tensor);
}
vector<shared_ptr<runtime::TensorView>> results;
for (shared_ptr<Node> out : f->get_results())
{
auto result = backend->make_primary_tensor_view(out->get_element_type(), out->get_shape());
results.push_back(result);
}
stopwatch t1;
t1.start();
for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
{
cf->tensor_call(args, results);
}
t1.stop();
float time = t1.get_milliseconds();
cout << time / iterations << "ms per iteration" << endl;
vector<runtime::PerformanceCounter> perf_data = cf->get_performance_data();
sort(perf_data.begin(),
perf_data.end(),
[](const runtime::PerformanceCounter& p1, const runtime::PerformanceCounter& p2) {
return p1.total_microseconds() > p2.total_microseconds();
});
multimap<size_t, string> timing = agregate_timing(perf_data);
for (auto it = timing.rbegin(); it != timing.rend(); it++)
{
cout.imbue(locale(""));
cout << setw(15) << left << it->second << " " << setw(10) << right << it->first << "us\n";
}
}
int main(int argc, char** argv)
{
string model = "model.json";
string backend = "CPU";
int iter = 10;
auto cli =
("model json file to use (default: model.json)" % clipp::option("-f") &
clipp::value("filename", model),
"Backed to use (default: CPU)" % clipp::option("-b") & clipp::value("backend", backend),
"Iterations (default: 10)" % clipp::option("-i") & clipp::value("iterations", iter));
if (!clipp::parse(argc, argv, cli) || !static_cast<bool>(ifstream(model)))
{
cout << clipp::make_man_page(cli, argv[0])
.prepend_section("DESCRIPTION",
" Benchmark ngraph json model with given backend.");
return 1;
}
cout << "Benchmarking " << model << ", " << backend << " backend, " << iter << " iterations.\n";
run_benchmark(model, backend, iter);
}
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#include <ngraph/file_util.hpp>
#include <ngraph/ngraph.hpp>
#include <ngraph/serializer.hpp>
namespace ngraph
{
using CFrame = std::shared_ptr<runtime::CallFrame>;
using TViews = std::vector<std::shared_ptr<runtime::TensorView>>;
using CallFrameIO = std::tuple<CFrame, CFrame, TViews, TViews>;
CallFrameIO
get_cfio(std::string backend_type, std::shared_ptr<Function> f, bool backward = false)
{
auto manager = runtime::Manager::get(backend_type);
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
auto result = backend->make_primary_tensor_view(f->get_output_element_type(0),
f->get_output_shape(0));
std::vector<std::shared_ptr<runtime::TensorView>> viv;
for (const auto& i : f->get_parameters())
viv.push_back(backend->make_primary_tensor_view(i->get_element_type(), i->get_shape()));
std::vector<std::shared_ptr<runtime::TensorView>> vrv;
for (int i = 0; i < f->get_output_size(); ++i)
vrv.push_back(backend->make_primary_tensor_view(f->get_output_element_type(i),
f->get_output_shape(i)));
if (!backward)
return CallFrameIO{cf, nullptr, viv, vrv};
auto C =
std::make_shared<op::Parameter>(f->get_output_element_type(0), f->get_output_shape(0));
std::vector<std::shared_ptr<op::Parameter>> backparam;
backparam.push_back(C);
viv.push_back(backend->make_primary_tensor_view(C->get_element_type(), C->get_shape()));
for (const auto& i : f->get_parameters())
vrv.push_back(backend->make_primary_tensor_view(i->get_element_type(), i->get_shape()));
std::vector<std::shared_ptr<Node>> dYdXs;
auto op = f->get_result();
for (const auto& i : f->get_parameters())
{
dYdXs.push_back(op->backprop_node(i, C));
backparam.push_back(i);
}
auto bf = std::make_shared<Function>(dYdXs, backparam);
auto backward_external = manager->compile(bf);
auto bf_cf = backend->make_call_frame(backward_external);
return CallFrameIO{cf, bf_cf, viv, vrv};
}
template <typename T>
static std::vector<T> read_vector(std::shared_ptr<ngraph::runtime::TensorView> tv)
{
if (ngraph::element::from<T>() != tv->get_tensor_view_layout()->get_element_type())
{
throw std::invalid_argument("read_vector type must match TensorView type");
}
size_t element_count = ngraph::shape_size(tv->get_shape());
size_t size = element_count * sizeof(T);
std::vector<T> rc(element_count);
tv->read(rc.data(), 0, size);
return rc;
}
template <typename T>
inline void write_vector(std::shared_ptr<ngraph::runtime::TensorView> tv,
const std::vector<T>& values)
{
tv->write(values.data(), 0, values.size() * sizeof(T));
}
template <typename T>
inline void copy_data(std::shared_ptr<ngraph::runtime::TensorView> tv,
const std::vector<T>& data)
{
size_t data_size = data.size() * sizeof(T);
tv->write(data.data(), 0, data_size);
}
inline std::multimap<size_t, std::string>
agregate_timing(const std::vector<runtime::PerformanceCounter>& perf_data)
{
std::unordered_map<std::string, size_t> timing;
for (const runtime::PerformanceCounter& p : perf_data)
{
std::string op = p.name().substr(0, p.name().find('_'));
timing[op] += p.microseconds();
}
std::multimap<size_t, std::string> rc;
for (const std::pair<std::string, size_t>& t : timing)
{
rc.insert({t.second, t.first});
}
return rc;
}
template <typename T>
class Uniform
{
public:
Uniform(T min, T max, T seed = 0)
: m_engine(seed)
, m_distribution(min, max)
, m_r(std::bind(m_distribution, m_engine))
{
}
const std::shared_ptr<runtime::TensorView>
initialize(const std::shared_ptr<runtime::TensorView>& ptv)
{
std::vector<T> vec = read_vector<T>(ptv);
for (T& elt : vec)
{
elt = m_r();
}
write_vector(ptv, vec);
return ptv;
}
protected:
std::default_random_engine m_engine;
std::uniform_real_distribution<T> m_distribution;
std::function<T()> m_r;
};
}
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
// Standalone Goole Test example for ngraph.
// compile and test as follows.
// g++ -std=c++11 simple_gtest.cc -I$HOME/ngraph_dist/include -L$HOME/ngraph_dist/lib -lngraph -lpthread -lgtest -o /tmp/test
// env LD_LIBRARY_PATH=$HOME/ngraph_dist/lib /tmp/test
#include <gtest/gtest.h>
#include "nutils.h"
using namespace std;
using namespace ngraph;
TEST(simple, mul_forward)
{
auto shape = Shape{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto o = ngraph::builder::make_with_numpy_broadcast<op::Multiply>(A, B);
auto f = make_shared<Function>(o, op::Parameters{A, B});
CFrame cf;
TViews inp, out;
tie(cf, ignore, inp, out) = get_cfio("CPU", f);
copy_data(inp[0], vector<float>{1, 2, 3, 4});
copy_data(inp[1], vector<float>{1, 2, 3, 4});
cf->call(inp, out);
EXPECT_EQ((vector<float>{1, 4, 9, 16}), read_vector<float>(out[0]));
}
TEST(simple, div_forward_backward)
{
auto shape = Shape{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto o = ngraph::builder::make_with_numpy_broadcast<op::Divide>(A, B);
auto f = make_shared<Function>(o, op::Parameters{A, B});
CFrame cf, cb;
TViews inp, out;
tie(cf, cb, inp, out) = get_cfio("INTERPRETER", f, true);
copy_data(inp[0], vector<float>{2, 4, 8, 16});
copy_data(inp[1], vector<float>{1, 2, 4, 8});
cf->call(inp, {out[0]});
EXPECT_EQ((vector<float>{2, 2, 2, 2}), read_vector<float>(out[0]));
copy_data(inp[2], vector<float>{1, 1, 1, 1});
cb->call({inp[2], inp[0], inp[1]}, {out[1], out[2]});
EXPECT_EQ((vector<float>{1, 0.5, 0.25, 0.125}), read_vector<float>(out[1]));
EXPECT_EQ((vector<float>{-2, -1, -0.5, -0.25}), read_vector<float>(out[2]));
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment