refactor benchmark util

348dd27f · Ashok Emani · 40d404f4 · 348dd27f · 348dd27f · 348dd27f
Commit 348dd27f authored Feb 20, 2018 by Ashok Emani
5 changed files
--- a/src/tools/CMakeLists.txt
+++ b/src/tools/CMakeLists.txt
@@ -17,6 +17,7 @@
 if (NGRAPH_CPU_ENABLE)
    set (SRC
        nbench.cpp
+        ${PROJECT_SOURCE_DIR}/test/util/benchmark.cpp
    )

    add_executable(nbench ${SRC})

--- a/src/tools/nbench.cpp
+++ b/src/tools/nbench.cpp
@@ -24,6 +24,7 @@
 #include <ngraph/runtime/backend.hpp>
 #include <ngraph/runtime/call_frame.hpp>
 #include <ngraph/runtime/manager.hpp>
+#include "../../test/util/benchmark.hpp"
 #include "../../test/util/test_tools.hpp"
 using namespace std;


--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -50,6 +50,7 @@ set (SRC
    type_prop.cpp
    util/autodiff/backprop_function.cpp
    util/test_tools.cpp
+    util/benchmark.cpp
    util.cpp
    uuid.cpp
 )

--- a/test/backend_performance.cpp
+++ b/test/backend_performance.cpp
@@ -31,6 +31,7 @@
 #include "ngraph/runtime/manager.hpp"
 #include "ngraph/serializer.hpp"
 #include "ngraph/util.hpp"
+#include "util/benchmark.hpp"
 #include "util/random.hpp"
 #include "util/test_tools.hpp"


--- a/test/util/test_tools.hpp
+++ b/test/util/test_tools.hpp
@@ -76,114 +76,3 @@ size_t count_ops_of_type(std::shared_ptr<ngraph::Function> f)
    return count;
 }

-/// performance test utilities
-inline std::multimap<size_t, std::string>
-    aggregate_timing(const std::vector<ngraph::runtime::PerformanceCounter>& perf_data)
-{
-    std::unordered_map<std::string, size_t> timing;
-    for (const ngraph::runtime::PerformanceCounter& p : perf_data)
-    {
-        std::string op = p.name().substr(0, p.name().find('_'));
-        timing[op] += p.microseconds();
-    }
-
-    std::multimap<size_t, std::string> rc;
-    for (const std::pair<std::string, size_t>& t : timing)
-    {
-        rc.insert({t.second, t.first});
-    }
-    return rc;
-}
-template <typename T>
-class Uniform
-{
-public:
-    Uniform(T min, T max, T seed = 0)
-        : m_engine(seed)
-        , m_distribution(min, max)
-        , m_r(std::bind(m_distribution, m_engine))
-    {
-    }
-
-    const std::shared_ptr<ngraph::runtime::TensorView>
-        initialize(const std::shared_ptr<ngraph::runtime::TensorView>& ptv)
-    {
-        std::vector<T> vec = read_vector<T>(ptv);
-        for (T& elt : vec)
-        {
-            elt = m_r();
-        }
-        write_vector(ptv, vec);
-        return ptv;
-    }
-
-protected:
-    std::default_random_engine m_engine;
-    std::uniform_real_distribution<T> m_distribution;
-    std::function<T()> m_r;
-};
-
-static void
-    run_benchmark(const std::string& json_path, const std::string& backend_name, size_t iterations)
-{
-    using namespace std;
-    using namespace ngraph;
-    string env_var_name = "NGRAPH_" + backend_name + "_EMIT_TIMING";
-    bool emit_timing = (std::getenv(env_var_name.c_str()) != nullptr);
-    if (!emit_timing)
-    {
-        cout << "To get per-op timing set the environment variable " << env_var_name << "\n";
-    }
-
-    Uniform<float> rng{-1, 1, 0};
-    const string json_string = file_util::read_file_to_string(json_path);
-    stringstream ss(json_string);
-    shared_ptr<Function> f = deserialize(ss);
-
-    stopwatch build_time;
-    build_time.start();
-    auto manager = runtime::Manager::get(backend_name);
-    auto external = manager->compile(f);
-    auto backend = manager->allocate_backend();
-    auto cf = backend->make_call_frame(external);
-    build_time.stop();
-    cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
-
-    vector<shared_ptr<runtime::TensorView>> args;
-    for (shared_ptr<op::Parameter> param : f->get_parameters())
-    {
-        auto tensor =
-            backend->make_primary_tensor_view(param->get_element_type(), param->get_shape());
-        rng.initialize(tensor);
-        args.push_back(tensor);
-    }
-    vector<shared_ptr<runtime::TensorView>> results;
-    for (shared_ptr<Node> out : f->get_results())
-    {
-        auto result = backend->make_primary_tensor_view(out->get_element_type(), out->get_shape());
-        results.push_back(result);
-    }
-
-    stopwatch t1;
-    t1.start();
-    for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
-    {
-        cf->tensor_call(args, results);
-    }
-    t1.stop();
-    float time = t1.get_milliseconds();
-    cout << time / iterations << "ms per iteration" << endl;
-
-    vector<runtime::PerformanceCounter> perf_data = cf->get_performance_data();
-    sort(perf_data.begin(),
-         perf_data.end(),
-         [](const runtime::PerformanceCounter& p1, const runtime::PerformanceCounter& p2) {
-             return p1.total_microseconds() > p2.total_microseconds();
-         });
-    multimap<size_t, string> timing = aggregate_timing(perf_data);
-    for (auto it = timing.rbegin(); it != timing.rend(); it++)
-    {
-        cout.imbue(locale(""));
-        cout << setw(15) << left << it->second << " " << setw(10) << right << it->first << "us\n";
-    }
-}