add standalone benchmark tool

Merge pull request #493

add standalone benchmark tool
Merge pull request #493
3380928c · Ashok Emani · GitHub · 233e4b1b · 6f977a62 · 3380928c
Unverified Commit 3380928c authored Feb 21, 2018 by Ashok Emani Committed by GitHub Feb 21, 2018
9 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -58,6 +58,7 @@ nervana_aeon.egg-info/
 # vim
 *.swp
 *.swo
+tags

 build/


--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -18,3 +18,4 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIN_NGRAPH_LIBRARY")

 add_subdirectory(resource)
 add_subdirectory(ngraph)
+add_subdirectory(tools)
--- a/src/tools/CMakeLists.txt
+++ b/src/tools/CMakeLists.txt
+# ******************************************************************************
+# Copyright 2017-2018 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ******************************************************************************
+
+if(MKLDNN_INCLUDE_DIR)
+    link_directories(${MKLDNN_LIB_DIR})
+endif()
+
+if (NGRAPH_CPU_ENABLE)
+    set (SRC
+        nbench.cpp
+        ${PROJECT_SOURCE_DIR}/test/util/benchmark.cpp
+    )
+
+    add_executable(nbench ${SRC})
+    add_dependencies(nbench ngraph)
+    
+    set(HEADER_SEARCH_DEFINES
+        "NGRAPH_HEADERS_PATH=\"${NGRAPH_INCLUDE_PATH}\""
+    )
+    
+    target_link_libraries(nbench ngraph)
+    
+    set_source_files_properties(nbench.cpp PROPERTIES COMPILE_DEFINITIONS "${HEADER_SEARCH_DEFINES}")
+
+endif()
--- a/src/tools/nbench.cpp
+++ b/src/tools/nbench.cpp
+/*******************************************************************************
+* Copyright 2017-2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+// tool to benchmark any ngraph json model with given backend.
+// compile and run with:
+// g++ ./nbench.cpp -std=c++11 -I$HOME/ngraph_dist/include -L$HOME/ngraph_dist/lib -lngraph -o nbench
+// env LD_LIBRARY_PATH=$HOME/ngraph_dist/lib env NGRAPH_INTERPRETER_EMIT_TIMING=1 ./nbench
+// sample models are under ../../test/models
+
+#include <fstream>
+#include <ngraph/runtime/backend.hpp>
+#include <ngraph/runtime/call_frame.hpp>
+#include <ngraph/runtime/manager.hpp>
+#include "../../test/util/benchmark.hpp"
+#include "../../test/util/test_tools.hpp"
+using namespace std;
+
+int main(int argc, char** argv)
+{
+    string model = "model.json";
+    string backend = "INTERPRETER";
+    int iter = 10;
+    bool failed = false;
+    for (size_t i = 1; i < argc; i++)
+    {
+        if (string(argv[i]) == "-f")
+        {
+            model = argv[++i];
+        }
+        else if (string(argv[i]) == "-b")
+        {
+            backend = argv[++i];
+        }
+        else if (string(argv[i]) == "-i")
+        {
+            try
+            {
+                iter = stoi(argv[++i]);
+            }
+            catch (...)
+            {
+                cout << "Invalid Argument\n";
+                failed = true;
+            }
+        }
+    }
+    if (!static_cast<bool>(ifstream(model)))
+    {
+        cout << "File " << model << " not found\n";
+        failed = true;
+    }
+
+    if (failed)
+    {
+        cout << R"###(
+DESCRIPTION                                                         
+    Benchmark ngraph json model with given backend.                 
+                                                                    
+SYNOPSIS                                                            
+        nbench [-f <filename>] [-b <backend>] [-i <iterations>]
+                                                                    
+OPTIONS                                                             
+        -f          model json file to use (default: model.json)    
+        -b          Backend to use (default: INTERPRETER)           
+        -i          Iterations (default: 10)                        
+)###";
+        return 1;
+    }
+    cout << "Benchmarking " << model << ", " << backend << " backend, " << iter << " iterations.\n";
+    run_benchmark(model, backend, iter);
+}
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -50,6 +50,7 @@ set (SRC
    type_prop.cpp
    util/autodiff/backprop_function.cpp
    util/test_tools.cpp
+    util/benchmark.cpp
    util.cpp
    uuid.cpp
 )

--- a/test/backend_performance.cpp
+++ b/test/backend_performance.cpp
@@ -31,92 +31,13 @@
 #include "ngraph/runtime/manager.hpp"
 #include "ngraph/serializer.hpp"
 #include "ngraph/util.hpp"
+#include "util/benchmark.hpp"
 #include "util/random.hpp"
 #include "util/test_tools.hpp"

 using namespace std;
 using namespace ngraph;

-static multimap<size_t, string>
-    agregate_timing(const vector<runtime::PerformanceCounter>& perf_data)
-{
-    unordered_map<string, size_t> timing;
-    for (const runtime::PerformanceCounter& p : perf_data)
-    {
-        string op = p.name().substr(0, p.name().find('_'));
-        timing[op] += p.microseconds();
-    }
-
-    multimap<size_t, string> rc;
-    for (const pair<string, size_t>& t : timing)
-    {
-        rc.insert({t.second, t.first});
-    }
-    return rc;
-}
-
-void run_benchmark(const string& json_path, const string& backend_name, size_t iterations)
-{
-    string env_var_name = "NGRAPH_" + backend_name + "_EMIT_TIMING";
-    bool emit_timing = (std::getenv(env_var_name.c_str()) != nullptr);
-    if (!emit_timing)
-    {
-        cout << "To get per-op timing set the environment variable " << env_var_name << "\n";
-    }
-
-    test::Uniform<float> rng{-1, 1, 0};
-    const string json_string = file_util::read_file_to_string(json_path);
-    stringstream ss(json_string);
-    shared_ptr<Function> f = ngraph::deserialize(ss);
-
-    stopwatch build_time;
-    build_time.start();
-    auto manager = runtime::Manager::get(backend_name);
-    auto external = manager->compile(f);
-    auto backend = manager->allocate_backend();
-    auto cf = backend->make_call_frame(external);
-    build_time.stop();
-    cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
-
-    vector<shared_ptr<runtime::TensorView>> args;
-    for (shared_ptr<op::Parameter> param : f->get_parameters())
-    {
-        auto tensor =
-            backend->make_primary_tensor_view(param->get_element_type(), param->get_shape());
-        rng.initialize(tensor);
-        args.push_back(tensor);
-    }
-    vector<shared_ptr<runtime::TensorView>> results;
-    for (shared_ptr<Node> out : f->get_results())
-    {
-        auto result = backend->make_primary_tensor_view(out->get_element_type(), out->get_shape());
-        results.push_back(result);
-    }
-
-    stopwatch t1;
-    t1.start();
-    for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
-    {
-        cf->tensor_call(args, results);
-    }
-    t1.stop();
-    float time = t1.get_milliseconds();
-    cout << time / iterations << "ms per iteration" << endl;
-
-    vector<runtime::PerformanceCounter> perf_data = cf->get_performance_data();
-    sort(perf_data.begin(),
-         perf_data.end(),
-         [](const runtime::PerformanceCounter& p1, const runtime::PerformanceCounter& p2) {
-             return p1.total_microseconds() > p2.total_microseconds();
-         });
-    multimap<size_t, string> timing = agregate_timing(perf_data);
-    for (auto it = timing.rbegin(); it != timing.rend(); it++)
-    {
-        cout.imbue(locale(""));
-        cout << setw(15) << left << it->second << " " << setw(10) << right << it->first << "us\n";
-    }
-}
-
 TEST(benchmark, mxnet_mnist_mlp_forward)
 {
    const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/mnist_mlp_forward.json");

--- a/test/util/benchmark.cpp
+++ b/test/util/benchmark.cpp
+/*******************************************************************************
+* Copyright 2017-2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+#include "benchmark.hpp"
+#include "ngraph/runtime/backend.hpp"
+#include "ngraph/runtime/call_frame.hpp"
+#include "ngraph/runtime/manager.hpp"
+#include "ngraph/runtime/tensor_view.hpp"
+#include "ngraph/serializer.hpp"
+#include "random.hpp"
+
+std::multimap<size_t, std::string>
+    aggregate_timing(const std::vector<ngraph::runtime::PerformanceCounter>& perf_data)
+{
+    std::unordered_map<std::string, size_t> timing;
+    for (const ngraph::runtime::PerformanceCounter& p : perf_data)
+    {
+        std::string op = p.name().substr(0, p.name().find('_'));
+        timing[op] += p.microseconds();
+    }
+
+    std::multimap<size_t, std::string> rc;
+    for (const std::pair<std::string, size_t>& t : timing)
+    {
+        rc.insert({t.second, t.first});
+    }
+    return rc;
+}
+
+void run_benchmark(const std::string& json_path, const std::string& backend_name, size_t iterations)
+{
+    using namespace std;
+    using namespace ngraph;
+    string env_var_name = "NGRAPH_" + backend_name + "_EMIT_TIMING";
+    bool emit_timing = (std::getenv(env_var_name.c_str()) != nullptr);
+    if (!emit_timing)
+    {
+        cout << "To get per-op timing set the environment variable " << env_var_name << "\n";
+    }
+
+    ngraph::test::Uniform<float> rng{-1, 1, 0};
+    const string json_string = file_util::read_file_to_string(json_path);
+    stringstream ss(json_string);
+    shared_ptr<Function> f = deserialize(ss);
+
+    stopwatch build_time;
+    build_time.start();
+    auto manager = runtime::Manager::get(backend_name);
+    auto external = manager->compile(f);
+    auto backend = manager->allocate_backend();
+    auto cf = backend->make_call_frame(external);
+    build_time.stop();
+    cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
+
+    vector<shared_ptr<runtime::TensorView>> args;
+    for (shared_ptr<op::Parameter> param : f->get_parameters())
+    {
+        auto tensor =
+            backend->make_primary_tensor_view(param->get_element_type(), param->get_shape());
+        rng.initialize(tensor);
+        args.push_back(tensor);
+    }
+    vector<shared_ptr<runtime::TensorView>> results;
+    for (shared_ptr<Node> out : f->get_results())
+    {
+        auto result = backend->make_primary_tensor_view(out->get_element_type(), out->get_shape());
+        results.push_back(result);
+    }
+
+    stopwatch t1;
+    t1.start();
+    for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
+    {
+        cf->tensor_call(args, results);
+    }
+    t1.stop();
+    float time = t1.get_milliseconds();
+    cout << time / iterations << "ms per iteration" << endl;
+
+    vector<runtime::PerformanceCounter> perf_data = cf->get_performance_data();
+    sort(perf_data.begin(),
+         perf_data.end(),
+         [](const runtime::PerformanceCounter& p1, const runtime::PerformanceCounter& p2) {
+             return p1.total_microseconds() > p2.total_microseconds();
+         });
+    multimap<size_t, string> timing = aggregate_timing(perf_data);
+    for (auto it = timing.rbegin(); it != timing.rend(); it++)
+    {
+        cout.imbue(locale(""));
+        cout << setw(15) << left << it->second << " " << setw(10) << right << it->first << "us\n";
+    }
+}
--- a/test/util/benchmark.hpp
+++ b/test/util/benchmark.hpp
+/*******************************************************************************
+* Copyright 2017-2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <ngraph/runtime/call_frame.hpp>
+#include "test_tools.hpp"
+
+/// performance test utilities
+std::multimap<size_t, std::string>
+    aggregate_timing(const std::vector<ngraph::runtime::PerformanceCounter>& perf_data);
+
+void run_benchmark(const std::string& json_path,
+                   const std::string& backend_name,
+                   size_t iterations);
--- a/test/util/test_tools.hpp
+++ b/test/util/test_tools.hpp
@@ -21,7 +21,9 @@
 #include <memory>

 #include "ngraph/descriptor/layout/tensor_view_layout.hpp"
+#include "ngraph/file_util.hpp"
 #include "ngraph/runtime/tensor_view.hpp"
+#include "ngraph/serializer.hpp"

 namespace ngraph
 {
@@ -73,3 +75,4 @@ size_t count_ops_of_type(std::shared_ptr<ngraph::Function> f)

    return count;
 }
+