Commit 348dd27f authored by Ashok Emani's avatar Ashok Emani

refactor benchmark util

parent 40d404f4
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
if (NGRAPH_CPU_ENABLE) if (NGRAPH_CPU_ENABLE)
set (SRC set (SRC
nbench.cpp nbench.cpp
${PROJECT_SOURCE_DIR}/test/util/benchmark.cpp
) )
add_executable(nbench ${SRC}) add_executable(nbench ${SRC})
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <ngraph/runtime/backend.hpp> #include <ngraph/runtime/backend.hpp>
#include <ngraph/runtime/call_frame.hpp> #include <ngraph/runtime/call_frame.hpp>
#include <ngraph/runtime/manager.hpp> #include <ngraph/runtime/manager.hpp>
#include "../../test/util/benchmark.hpp"
#include "../../test/util/test_tools.hpp" #include "../../test/util/test_tools.hpp"
using namespace std; using namespace std;
......
...@@ -50,6 +50,7 @@ set (SRC ...@@ -50,6 +50,7 @@ set (SRC
type_prop.cpp type_prop.cpp
util/autodiff/backprop_function.cpp util/autodiff/backprop_function.cpp
util/test_tools.cpp util/test_tools.cpp
util/benchmark.cpp
util.cpp util.cpp
uuid.cpp uuid.cpp
) )
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include "ngraph/runtime/manager.hpp" #include "ngraph/runtime/manager.hpp"
#include "ngraph/serializer.hpp" #include "ngraph/serializer.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
#include "util/benchmark.hpp"
#include "util/random.hpp" #include "util/random.hpp"
#include "util/test_tools.hpp" #include "util/test_tools.hpp"
......
...@@ -76,114 +76,3 @@ size_t count_ops_of_type(std::shared_ptr<ngraph::Function> f) ...@@ -76,114 +76,3 @@ size_t count_ops_of_type(std::shared_ptr<ngraph::Function> f)
return count; return count;
} }
/// performance test utilities
inline std::multimap<size_t, std::string>
aggregate_timing(const std::vector<ngraph::runtime::PerformanceCounter>& perf_data)
{
std::unordered_map<std::string, size_t> timing;
for (const ngraph::runtime::PerformanceCounter& p : perf_data)
{
std::string op = p.name().substr(0, p.name().find('_'));
timing[op] += p.microseconds();
}
std::multimap<size_t, std::string> rc;
for (const std::pair<std::string, size_t>& t : timing)
{
rc.insert({t.second, t.first});
}
return rc;
}
template <typename T>
class Uniform
{
public:
Uniform(T min, T max, T seed = 0)
: m_engine(seed)
, m_distribution(min, max)
, m_r(std::bind(m_distribution, m_engine))
{
}
const std::shared_ptr<ngraph::runtime::TensorView>
initialize(const std::shared_ptr<ngraph::runtime::TensorView>& ptv)
{
std::vector<T> vec = read_vector<T>(ptv);
for (T& elt : vec)
{
elt = m_r();
}
write_vector(ptv, vec);
return ptv;
}
protected:
std::default_random_engine m_engine;
std::uniform_real_distribution<T> m_distribution;
std::function<T()> m_r;
};
static void
run_benchmark(const std::string& json_path, const std::string& backend_name, size_t iterations)
{
using namespace std;
using namespace ngraph;
string env_var_name = "NGRAPH_" + backend_name + "_EMIT_TIMING";
bool emit_timing = (std::getenv(env_var_name.c_str()) != nullptr);
if (!emit_timing)
{
cout << "To get per-op timing set the environment variable " << env_var_name << "\n";
}
Uniform<float> rng{-1, 1, 0};
const string json_string = file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> f = deserialize(ss);
stopwatch build_time;
build_time.start();
auto manager = runtime::Manager::get(backend_name);
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
build_time.stop();
cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
vector<shared_ptr<runtime::TensorView>> args;
for (shared_ptr<op::Parameter> param : f->get_parameters())
{
auto tensor =
backend->make_primary_tensor_view(param->get_element_type(), param->get_shape());
rng.initialize(tensor);
args.push_back(tensor);
}
vector<shared_ptr<runtime::TensorView>> results;
for (shared_ptr<Node> out : f->get_results())
{
auto result = backend->make_primary_tensor_view(out->get_element_type(), out->get_shape());
results.push_back(result);
}
stopwatch t1;
t1.start();
for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
{
cf->tensor_call(args, results);
}
t1.stop();
float time = t1.get_milliseconds();
cout << time / iterations << "ms per iteration" << endl;
vector<runtime::PerformanceCounter> perf_data = cf->get_performance_data();
sort(perf_data.begin(),
perf_data.end(),
[](const runtime::PerformanceCounter& p1, const runtime::PerformanceCounter& p2) {
return p1.total_microseconds() > p2.total_microseconds();
});
multimap<size_t, string> timing = aggregate_timing(perf_data);
for (auto it = timing.rbegin(); it != timing.rend(); it++)
{
cout.imbue(locale(""));
cout << setw(15) << left << it->second << " " << setw(10) << right << it->first << "us\n";
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment