add cmake for nbench, address review comments

a1962e76 · Ashok Emani · 57ab9e06 · a1962e76 · a1962e76 · a1962e76
Commit a1962e76 authored Feb 15, 2018 by Ashok Emani
10 changed files
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -18,3 +18,4 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIN_NGRAPH_LIBRARY")
 add_subdirectory(resource)
 add_subdirectory(ngraph)
+add_subdirectory(tools)
--- a/src/tools/nutils.h
+++ b/src/tools/nutils.h
-// ----------------------------------------------------------------------------
+/*******************************************************************************
-// Copyright 2017 Nervana Systems Inc.
+* Copyright 2017-2018 Intel Corporation
-// Licensed under the Apache License, Version 2.0 (the "License");
+*
-// you may not use this file except in compliance with the License.
+* Licensed under the Apache License, Version 2.0 (the "License");
-// You may obtain a copy of the License at
+* you may not use this file except in compliance with the License.
-//
+* You may obtain a copy of the License at
-//      http://www.apache.org/licenses/LICENSE-2.0
+*
-//
+*     http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
+*
-// distributed under the License is distributed on an "AS IS" BASIS,
+* Unless required by applicable law or agreed to in writing, software
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* distributed under the License is distributed on an "AS IS" BASIS,
-// See the License for the specific language governing permissions and
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// ----------------------------------------------------------------------------
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
 #include <ngraph/file_util.hpp>
 #include <ngraph/ngraph.hpp>
@@ -21,6 +23,7 @@ namespace ngraph
    using TViews = std::vector<std::shared_ptr<runtime::TensorView>>;
    using CallFrameIO = std::tuple<CFrame, CFrame, TViews, TViews>;
+    /// Create forward/backward call frame(s) and input/ouput TensorViews for given function.
    CallFrameIO
        get_cfio(std::string backend_type, std::shared_ptr<Function> f, bool backward = false)
    {
@@ -58,76 +61,4 @@ namespace ngraph
        auto bf_cf = backend->make_call_frame(backward_external);
        return CallFrameIO{cf, bf_cf, viv, vrv};
    }
-    template <typename T>
-    static std::vector<T> read_vector(std::shared_ptr<ngraph::runtime::TensorView> tv)
-    {
-        if (ngraph::element::from<T>() != tv->get_tensor_view_layout()->get_element_type())
-        {
-            throw std::invalid_argument("read_vector type must match TensorView type");
-        }
-        size_t element_count = ngraph::shape_size(tv->get_shape());
-        size_t size = element_count * sizeof(T);
-        std::vector<T> rc(element_count);
-        tv->read(rc.data(), 0, size);
-        return rc;
-    }
-    template <typename T>
-    inline void write_vector(std::shared_ptr<ngraph::runtime::TensorView> tv,
-                             const std::vector<T>& values)
-    {
-        tv->write(values.data(), 0, values.size() * sizeof(T));
-    }
-    template <typename T>
-    inline void copy_data(std::shared_ptr<ngraph::runtime::TensorView> tv,
-                          const std::vector<T>& data)
-    {
-        size_t data_size = data.size() * sizeof(T);
-        tv->write(data.data(), 0, data_size);
-    }
-    inline std::multimap<size_t, std::string>
-        agregate_timing(const std::vector<runtime::PerformanceCounter>& perf_data)
-    {
-        std::unordered_map<std::string, size_t> timing;
-        for (const runtime::PerformanceCounter& p : perf_data)
-        {
-            std::string op = p.name().substr(0, p.name().find('_'));
-            timing[op] += p.microseconds();
-        }
-        std::multimap<size_t, std::string> rc;
-        for (const std::pair<std::string, size_t>& t : timing)
-        {
-            rc.insert({t.second, t.first});
-        }
-        return rc;
-    }
-    template <typename T>
-    class Uniform
-    {
-    public:
-        Uniform(T min, T max, T seed = 0)
-            : m_engine(seed)
-            , m_distribution(min, max)
-            , m_r(std::bind(m_distribution, m_engine))
-        {
-        }
-        const std::shared_ptr<runtime::TensorView>
-            initialize(const std::shared_ptr<runtime::TensorView>& ptv)
-        {
-            std::vector<T> vec = read_vector<T>(ptv);
-            for (T& elt : vec)
-            {
-                elt = m_r();
-            }
-            write_vector(ptv, vec);
-            return ptv;
-        }
-    protected:
-        std::default_random_engine m_engine;
-        std::uniform_real_distribution<T> m_distribution;
-        std::function<T()> m_r;
-    };
 }
--- a/src/tools/simple_gtest.cc
+++ b/src/tools/simple_gtest.cc
-// ----------------------------------------------------------------------------
+/*******************************************************************************
-// Copyright 2017 Nervana Systems Inc.
+* Copyright 2017-2018 Intel Corporation
-// Licensed under the Apache License, Version 2.0 (the "License");
+*
-// you may not use this file except in compliance with the License.
+* Licensed under the Apache License, Version 2.0 (the "License");
-// You may obtain a copy of the License at
+* you may not use this file except in compliance with the License.
-//
+* You may obtain a copy of the License at
-//      http://www.apache.org/licenses/LICENSE-2.0
+*
-//
+*     http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
+*
-// distributed under the License is distributed on an "AS IS" BASIS,
+* Unless required by applicable law or agreed to in writing, software
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* distributed under the License is distributed on an "AS IS" BASIS,
-// See the License for the specific language governing permissions and
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// ----------------------------------------------------------------------------
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
 // Standalone Goole Test example for ngraph.
 // compile and test as follows.
-// g++ -std=c++11 simple_gtest.cc -I$HOME/ngraph_dist/include -L$HOME/ngraph_dist/lib -lngraph -lpthread -lgtest -o /tmp/test
+// g++ -std=c++11 simple_gtest.cpp -I$HOME/ngraph_dist/include -L$HOME/ngraph_dist/lib -pthread -lngraph -lgtest -o /tmp/test
 // env LD_LIBRARY_PATH=$HOME/ngraph_dist/lib /tmp/test
 #include <gtest/gtest.h>
-#include "nutils.h"
+#include <ngraph/ngraph.hpp>
+#include "../../test/util/test_tools.hpp"
+#include "nutils.hpp"
 using namespace std;
 using namespace ngraph;

--- a/src/tools/CMakeLists.txt
+++ b/src/tools/CMakeLists.txt
+# ******************************************************************************
+# Copyright 2017-2018 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ******************************************************************************
+if (NGRAPH_CPU_ENABLE AND NOT APPLE)
+    set (SRC
+        nbench.cpp
+    )
+    add_executable(nbench ${SRC})
+    add_dependencies(nbench ngraph)
+    set(HEADER_SEARCH_DEFINES
+        "NGRAPH_HEADERS_PATH=\"${NGRAPH_INCLUDE_PATH}\""
+    )
+    target_link_libraries(nbench ngraph)
+    set_source_files_properties(nbench.cpp PROPERTIES COMPILE_DEFINITIONS "${HEADER_SEARCH_DEFINES}")
+endif()
--- a/src/tools/clipp.h
+++ b/src/tools/clipp.h
--- a/src/tools/model.json
+++ b/src/tools/model.json
--- a/src/tools/nbench.cc
+++ b/src/tools/nbench.cc
-// ----------------------------------------------------------------------------
-// Copyright 2017 Nervana Systems Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// ----------------------------------------------------------------------------
-// tool to benchmark any ngraph json model with given backend.
-// compile and run with:
-// g++ ./nbench.cc -std=c++11 -I$HOME/ngraph_dist/include -L$HOME/ngraph_dist/lib -lngraph -o nbench
-// env LD_LIBRARY_PATH=$HOME/ngraph_dist/lib env NGRAPH_CPU_EMIT_TIMING=1 ./nbench
-#include <fstream>
-#include <gtest/gtest.h>
-#include "clipp.h"
-#include "nutils.h"
-using namespace std;
-using namespace ngraph;
-void run_benchmark(const string& json_path, const string& backend_name, size_t iterations)
-{
-    string env_var_name = "NGRAPH_" + backend_name + "_EMIT_TIMING";
-    bool emit_timing = (std::getenv(env_var_name.c_str()) != nullptr);
-    if (!emit_timing)
-    {
-        cout << "To get per-op timing set the environment variable " << env_var_name << "\n";
-    }
-    Uniform<float> rng{-1, 1, 0};
-    const string json_string = ngraph::file_util::read_file_to_string(json_path);
-    stringstream ss(json_string);
-    shared_ptr<Function> f = ngraph::deserialize(ss);
-    stopwatch build_time;
-    build_time.start();
-    auto manager = runtime::Manager::get(backend_name);
-    auto external = manager->compile(f);
-    auto backend = manager->allocate_backend();
-    auto cf = backend->make_call_frame(external);
-    build_time.stop();
-    cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
-    vector<shared_ptr<runtime::TensorView>> args;
-    for (shared_ptr<op::Parameter> param : f->get_parameters())
-    {
-        auto tensor =
-            backend->make_primary_tensor_view(param->get_element_type(), param->get_shape());
-        rng.initialize(tensor);
-        args.push_back(tensor);
-    }
-    vector<shared_ptr<runtime::TensorView>> results;
-    for (shared_ptr<Node> out : f->get_results())
-    {
-        auto result = backend->make_primary_tensor_view(out->get_element_type(), out->get_shape());
-        results.push_back(result);
-    }
-    stopwatch t1;
-    t1.start();
-    for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
-    {
-        cf->tensor_call(args, results);
-    }
-    t1.stop();
-    float time = t1.get_milliseconds();
-    cout << time / iterations << "ms per iteration" << endl;
-    vector<runtime::PerformanceCounter> perf_data = cf->get_performance_data();
-    sort(perf_data.begin(),
-         perf_data.end(),
-         [](const runtime::PerformanceCounter& p1, const runtime::PerformanceCounter& p2) {
-             return p1.total_microseconds() > p2.total_microseconds();
-         });
-    multimap<size_t, string> timing = agregate_timing(perf_data);
-    for (auto it = timing.rbegin(); it != timing.rend(); it++)
-    {
-        cout.imbue(locale(""));
-        cout << setw(15) << left << it->second << " " << setw(10) << right << it->first << "us\n";
-    }
-}
-int main(int argc, char** argv)
-{
-    string model = "model.json";
-    string backend = "CPU";
-    int iter = 10;
-    auto cli =
-        ("model json file to use (default: model.json)" % clipp::option("-f") &
-             clipp::value("filename", model),
-         "Backed to use (default: CPU)" % clipp::option("-b") & clipp::value("backend", backend),
-         "Iterations (default: 10)" % clipp::option("-i") & clipp::value("iterations", iter));
-    if (!clipp::parse(argc, argv, cli) || !static_cast<bool>(ifstream(model)))
-    {
-        cout << clipp::make_man_page(cli, argv[0])
-                    .prepend_section("DESCRIPTION",
-                                     "    Benchmark ngraph json model with given backend.");
-        return 1;
-    }
-    cout << "Benchmarking " << model << ", " << backend << " backend, " << iter << " iterations.\n";
-    run_benchmark(model, backend, iter);
-}
--- a/src/tools/nbench.cpp
+++ b/src/tools/nbench.cpp
+/*******************************************************************************
+* Copyright 2017-2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+// tool to benchmark any ngraph json model with given backend.
+// compile and run with:
+// g++ ./nbench.cpp -std=c++11 -I$HOME/ngraph_dist/include -L$HOME/ngraph_dist/lib -lngraph -o nbench
+// env LD_LIBRARY_PATH=$HOME/ngraph_dist/lib env NGRAPH_INTERPRETER_EMIT_TIMING=1 ./nbench
+// sample models are under ../../test/models
+#include <fstream>
+#include <ngraph/runtime/call_frame.hpp>
+#include <ngraph/runtime/manager.hpp>
+#include <ngraph/runtime/backend.hpp>
+#include "../../test/util/test_tools.hpp"
+using namespace std;
+int main(int argc, char** argv)
+{
+    string model = "model.json";
+    string backend = "INTERPRETER";
+    int iter = 10;
+    bool failed = false;
+    for (size_t i = 1; i < argc; i++)
+    {
+        if (string(argv[i]) == "-f")
+        {
+            model = argv[++i];
+        }
+        else if (string(argv[i]) == "-b")
+        {
+            backend = argv[++i];
+        }
+        else if (string(argv[i]) == "-i")
+        {
+            try
+            {
+                iter = stoi(argv[++i]);
+            }
+            catch (...)
+            {
+                cout << "Invalid Argument\n";
+                failed = true;
+            }
+        }
+    }
+    if (!static_cast<bool>(ifstream(model)))
+    {
+        cout << "File " << model << " not found\n";
+        failed = true;
+    }
+    if (failed)
+    {
+        cout << R"###(
+DESCRIPTION                                                         
+    Benchmark ngraph json model with given backend.                 
+SYNOPSIS                                                            
+        /tmp/nbench [-f <filename>] [-b <backend>] [-i <iterations>]
+OPTIONS                                                             
+        -f          model json file to use (default: model.json)    
+        -b          Backed to use (default: INTERPRETER)                    
+        -i          Iterations (default: 10)                        
+)###";
+        return 1;
+    }
+    cout << "Benchmarking " << model << ", " << backend << " backend, " << iter << " iterations.\n";
+    run_benchmark(model, backend, iter);
+}
--- a/test/backend_performance.cpp
+++ b/test/backend_performance.cpp
@@ -37,86 +37,6 @@
 using namespace std;
 using namespace ngraph;
-static multimap<size_t, string>
-    agregate_timing(const vector<runtime::PerformanceCounter>& perf_data)
-{
-    unordered_map<string, size_t> timing;
-    for (const runtime::PerformanceCounter& p : perf_data)
-    {
-        string op = p.name().substr(0, p.name().find('_'));
-        timing[op] += p.microseconds();
-    }
-    multimap<size_t, string> rc;
-    for (const pair<string, size_t>& t : timing)
-    {
-        rc.insert({t.second, t.first});
-    }
-    return rc;
-}
-void run_benchmark(const string& json_path, const string& backend_name, size_t iterations)
-{
-    string env_var_name = "NGRAPH_" + backend_name + "_EMIT_TIMING";
-    bool emit_timing = (std::getenv(env_var_name.c_str()) != nullptr);
-    if (!emit_timing)
-    {
-        cout << "To get per-op timing set the environment variable " << env_var_name << "\n";
-    }
-    test::Uniform<float> rng{-1, 1, 0};
-    const string json_string = file_util::read_file_to_string(json_path);
-    stringstream ss(json_string);
-    shared_ptr<Function> f = ngraph::deserialize(ss);
-    stopwatch build_time;
-    build_time.start();
-    auto manager = runtime::Manager::get(backend_name);
-    auto external = manager->compile(f);
-    auto backend = manager->allocate_backend();
-    auto cf = backend->make_call_frame(external);
-    build_time.stop();
-    cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
-    vector<shared_ptr<runtime::TensorView>> args;
-    for (shared_ptr<op::Parameter> param : f->get_parameters())
-    {
-        auto tensor =
-            backend->make_primary_tensor_view(param->get_element_type(), param->get_shape());
-        rng.initialize(tensor);
-        args.push_back(tensor);
-    }
-    vector<shared_ptr<runtime::TensorView>> results;
-    for (shared_ptr<Node> out : f->get_results())
-    {
-        auto result = backend->make_primary_tensor_view(out->get_element_type(), out->get_shape());
-        results.push_back(result);
-    }
-    stopwatch t1;
-    t1.start();
-    for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
-    {
-        cf->tensor_call(args, results);
-    }
-    t1.stop();
-    float time = t1.get_milliseconds();
-    cout << time / iterations << "ms per iteration" << endl;
-    vector<runtime::PerformanceCounter> perf_data = cf->get_performance_data();
-    sort(perf_data.begin(),
-         perf_data.end(),
-         [](const runtime::PerformanceCounter& p1, const runtime::PerformanceCounter& p2) {
-             return p1.total_microseconds() > p2.total_microseconds();
-         });
-    multimap<size_t, string> timing = agregate_timing(perf_data);
-    for (auto it = timing.rbegin(); it != timing.rend(); it++)
-    {
-        cout.imbue(locale(""));
-        cout << setw(15) << left << it->second << " " << setw(10) << right << it->first << "us\n";
-    }
-}
 TEST(benchmark, mxnet_mnist_mlp_forward)
 {
    const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/mnist_mlp_forward.json");

--- a/test/util/test_tools.hpp
+++ b/test/util/test_tools.hpp
@@ -21,7 +21,9 @@
 #include <memory>
 #include "ngraph/descriptor/layout/tensor_view_layout.hpp"
+#include "ngraph/file_util.hpp"
 #include "ngraph/runtime/tensor_view.hpp"
+#include "ngraph/serializer.hpp"
 namespace ngraph
 {
@@ -73,3 +75,115 @@ size_t count_ops_of_type(std::shared_ptr<ngraph::Function> f)
    return count;
 }
+/// performance test utilities
+inline std::multimap<size_t, std::string>
+    agregate_timing(const std::vector<ngraph::runtime::PerformanceCounter>& perf_data)
+{
+    std::unordered_map<std::string, size_t> timing;
+    for (const ngraph::runtime::PerformanceCounter& p : perf_data)
+    {
+        std::string op = p.name().substr(0, p.name().find('_'));
+        timing[op] += p.microseconds();
+    }
+    std::multimap<size_t, std::string> rc;
+    for (const std::pair<std::string, size_t>& t : timing)
+    {
+        rc.insert({t.second, t.first});
+    }
+    return rc;
+}
+template <typename T>
+class Uniform
+{
+public:
+    Uniform(T min, T max, T seed = 0)
+        : m_engine(seed)
+        , m_distribution(min, max)
+        , m_r(std::bind(m_distribution, m_engine))
+    {
+    }
+    const std::shared_ptr<ngraph::runtime::TensorView>
+        initialize(const std::shared_ptr<ngraph::runtime::TensorView>& ptv)
+    {
+        std::vector<T> vec = read_vector<T>(ptv);
+        for (T& elt : vec)
+        {
+            elt = m_r();
+        }
+        write_vector(ptv, vec);
+        return ptv;
+    }
+protected:
+    std::default_random_engine m_engine;
+    std::uniform_real_distribution<T> m_distribution;
+    std::function<T()> m_r;
+};
+static void
+    run_benchmark(const std::string& json_path, const std::string& backend_name, size_t iterations)
+{
+    using namespace std;
+    using namespace ngraph;
+    string env_var_name = "NGRAPH_" + backend_name + "_EMIT_TIMING";
+    bool emit_timing = (std::getenv(env_var_name.c_str()) != nullptr);
+    if (!emit_timing)
+    {
+        cout << "To get per-op timing set the environment variable " << env_var_name << "\n";
+    }
+    Uniform<float> rng{-1, 1, 0};
+    const string json_string = file_util::read_file_to_string(json_path);
+    stringstream ss(json_string);
+    shared_ptr<Function> f = deserialize(ss);
+    stopwatch build_time;
+    build_time.start();
+    auto manager = runtime::Manager::get(backend_name);
+    auto external = manager->compile(f);
+    auto backend = manager->allocate_backend();
+    auto cf = backend->make_call_frame(external);
+    build_time.stop();
+    cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
+    vector<shared_ptr<runtime::TensorView>> args;
+    for (shared_ptr<op::Parameter> param : f->get_parameters())
+    {
+        auto tensor =
+            backend->make_primary_tensor_view(param->get_element_type(), param->get_shape());
+        rng.initialize(tensor);
+        args.push_back(tensor);
+    }
+    vector<shared_ptr<runtime::TensorView>> results;
+    for (shared_ptr<Node> out : f->get_results())
+    {
+        auto result = backend->make_primary_tensor_view(out->get_element_type(), out->get_shape());
+        results.push_back(result);
+    }
+    stopwatch t1;
+    t1.start();
+    for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
+    {
+        cf->tensor_call(args, results);
+    }
+    t1.stop();
+    float time = t1.get_milliseconds();
+    cout << time / iterations << "ms per iteration" << endl;
+    vector<runtime::PerformanceCounter> perf_data = cf->get_performance_data();
+    sort(perf_data.begin(),
+         perf_data.end(),
+         [](const runtime::PerformanceCounter& p1, const runtime::PerformanceCounter& p2) {
+             return p1.total_microseconds() > p2.total_microseconds();
+         });
+    multimap<size_t, string> timing = agregate_timing(perf_data);
+    for (auto it = timing.rbegin(); it != timing.rend(); it++)
+    {
+        cout.imbue(locale(""));
+        cout << setw(15) << left << it->second << " " << setw(10) << right << it->first << "us\n";
+    }
+}