nbench.cpp 16.5 KB
Newer Older
1
//*****************************************************************************
2
// Copyright 2017-2019 Intel Corporation
3 4 5 6 7 8 9 10 11 12 13 14 15
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
16 17 18

// tool to benchmark any ngraph json model with given backend.
// compile and run with:
19 20 21 22 23 24 25
// $ g++ ./nbench.cpp
//             -std=c++11
//             -I$HOME/ngraph_dist/include
//             -L$HOME/ngraph_dist/lib
//             -lngraph
//             -o nbench
// $ env LD_LIBRARY_PATH=$HOME/ngraph_dist/lib env NGRAPH_INTERPRETER_EMIT_TIMING=1 ./nbench
26 27 28
// sample models are under ../../test/models

#include <fstream>
29
#include <iomanip>
30

31
#include "benchmark.hpp"
32
#include "benchmark_pipelined.hpp"
33
#include "ngraph/distributed.hpp"
34
#include "ngraph/except.hpp"
35
#include "ngraph/file_util.hpp"
36
#include "ngraph/graph_util.hpp"
37
#include "ngraph/pass/liveness.hpp"
38
#include "ngraph/pass/manager.hpp"
39
#include "ngraph/pass/memory_layout.hpp"
40 41
#include "ngraph/pass/visualize_tree.hpp"
#include "ngraph/runtime/backend.hpp"
Robert Kimball's avatar
Robert Kimball committed
42 43
#include "ngraph/runtime/backend_manager.hpp"
#include "ngraph/runtime/interpreter/int_backend.hpp"
44 45
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
46

47
using namespace std;
48
using namespace ngraph;
49

Robert Kimball's avatar
Robert Kimball committed
50 51 52 53 54 55 56 57
static void configure_static_backends()
{
#ifdef NGRAPH_INTERPRETER_STATIC_LIB_ENABLE
    ngraph::runtime::BackendManager::register_backend(
        "INTERPRETER", ngraph::runtime::interpreter::get_backend_constructor_pointer());
#endif
}

58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
class PerfShape : public ngraph::runtime::PerformanceCounter
{
public:
    PerfShape(const runtime::PerformanceCounter& p, Shape s)
        : PerformanceCounter(p)
        , shape(s)
    {
    }
    Shape shape;
};

vector<PerfShape> to_perf_shape(shared_ptr<Function> f,
                                const vector<runtime::PerformanceCounter>& perf_data)
{
    vector<PerfShape> result;
    for (const runtime::PerformanceCounter& p : perf_data)
    {
75
        auto node = p.get_node();
76 77 78
        if (node == nullptr)
        {
            ostringstream os;
79 80
            os << "Can't find \"" << node->get_name() << "\" in Function \"" << f->get_name()
               << "\".";
81 82 83
            throw runtime_error(os.str());
        }

84
        Shape shape = node->output(0).get_shape();
85 86 87 88 89 90 91 92 93 94 95
        result.push_back(PerfShape(p, shape));
    }
    return result;
}

multimap<size_t, string> aggregate_timing_details(const vector<PerfShape>& perf_data)
{
    unordered_map<string, size_t> timing;
    unordered_map<string, size_t> count;
    for (const PerfShape& p : perf_data)
    {
96
        auto node = p.get_node();
97
        string op = node->description();
98 99 100 101 102 103
        string shape_name = " {" + join(p.shape) + "} ";
        timing[op + shape_name] += p.microseconds();
        count[op + shape_name] += 1;
    }

    multimap<size_t, string> rc;
104
    for (auto& t : timing)
105 106 107 108 109 110 111 112 113 114 115
    {
        rc.insert({t.second, t.first + to_string(count[t.first])});
    }
    return rc;
}

multimap<size_t, string> aggregate_timing(const vector<PerfShape>& perf_data)
{
    unordered_map<string, size_t> timing;
    for (const PerfShape& p : perf_data)
    {
116
        auto node = p.get_node();
117
        string op = node->description();
118 119 120 121
        timing[op] += p.microseconds();
    }

    multimap<size_t, string> rc;
122
    for (auto& t : timing)
123 124 125 126 127 128 129 130 131 132 133
    {
        rc.insert({t.second, t.first});
    }
    return rc;
}

void print_times(const multimap<size_t, string>& timing)
{
    // set the column widths
    int name_width = 0;
    int time_width = 0;
134
    for (auto& p : timing)
135 136
    {
        name_width = max(name_width, static_cast<int>(p.second.size()));
137
        time_width = max(time_width, static_cast<int>(locale_string(p.first).size()));
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
    }
    for (auto it = timing.rbegin(); it != timing.rend(); it++)
    {
        cout << setw(name_width + 2) << left << it->second << " " << setw(time_width + 2) << right
             << it->first << "us\n";
    }
}

void print_results(vector<PerfShape> perf_data, bool timing_detail)
{
    sort(perf_data.begin(), perf_data.end(), [](const PerfShape& p1, const PerfShape& p2) {
        return p1.total_microseconds() > p2.total_microseconds();
    });
    multimap<size_t, string> timing = aggregate_timing(perf_data);
    multimap<size_t, string> timing_details = aggregate_timing_details(perf_data);

    if (timing_detail)
    {
        cout << "\n---- Aggregate times per op type ----\n";
        print_times(timing);

        cout << "\n---- Aggregate times per op type/shape/count ----\n";
        print_times(timing_details);
    }
}

164 165 166 167 168
element::Type get_op_element_type(const Node& op)
{
    element::Type type;
    if (op.description() == "Convert")
    {
169
        type = op.input(0).get_element_type();
170 171 172 173 174 175 176 177
    }
    else if (op.description() == "Equal" || op.description() == "Greater" ||
             op.description() == "GreaterEq" || op.description() == "Less" ||
             op.description() == "LessEq" || op.description() == "NotEqual")
    {
        // Get the type of the second input, not the first
        // All BinaryElementwiseComparision ops have the same type for inputs
        // Select has bool for first input and the type we are interested in for the second
178
        type = op.input(1).get_element_type();
179 180 181
    }
    else
    {
182
        type = op.output(0).get_element_type();
183 184 185 186
    }
    return type;
}

187 188
int main(int argc, char** argv)
{
189
    string model_arg;
190
    string backend;
191
    string directory;
192
    int iterations = 10;
193
    bool failed = false;
194 195
    bool statistics = false;
    bool timing_detail = false;
196
    bool visualize = false;
197
    int warmup_iterations = 1;
198
    bool copy_data = true;
199
    bool dot_file = false;
200
    bool double_buffer = false;
201

Robert Kimball's avatar
Robert Kimball committed
202
    configure_static_backends();
203
    for (int i = 1; i < argc; i++)
204
    {
205 206
        string arg = argv[i];
        if (arg == "-f" || arg == "--file")
207
        {
208
            model_arg = argv[++i];
209
        }
210
        else if (arg == "-b" || arg == "--backend")
211 212 213
        {
            backend = argv[++i];
        }
214
        else if (arg == "-i" || arg == "--iterations")
215 216 217
        {
            try
            {
218
                iterations = stoi(argv[++i]);
219 220 221 222 223 224 225
            }
            catch (...)
            {
                cout << "Invalid Argument\n";
                failed = true;
            }
        }
226 227 228 229
        else if (arg == "-s" || arg == "--statistics")
        {
            statistics = true;
        }
230
        else if (arg == "--timing_detail" || arg == "--timing-detail")
231 232 233
        {
            timing_detail = true;
        }
234 235 236 237
        else if (arg == "--no_copy_data")
        {
            copy_data = false;
        }
238 239 240 241
        else if (arg == "-v" || arg == "--visualize")
        {
            visualize = true;
        }
242 243 244 245
        else if (arg == "--dot")
        {
            dot_file = true;
        }
246 247 248 249
        else if (arg == "-d" || arg == "--directory")
        {
            directory = argv[++i];
        }
250 251 252 253
        else if (arg == "--double_buffer")
        {
            double_buffer = true;
        }
254 255 256 257 258 259 260 261 262 263 264 265
        else if (arg == "-w" || arg == "--warmup_iterations")
        {
            try
            {
                warmup_iterations = stoi(argv[++i]);
            }
            catch (...)
            {
                cout << "Invalid Argument\n";
                failed = true;
            }
        }
266 267 268 269 270
        else
        {
            cout << "Unknown option: " << arg << endl;
            failed = true;
        }
271
    }
272
    if (!model_arg.empty() && !file_util::exists(model_arg))
273
    {
274
        cout << "File " << model_arg << " not found\n";
275 276
        failed = true;
    }
277 278
    else if (!directory.empty() && !file_util::exists(directory))
    {
279
        cout << "Directory " << directory << " not found\n";
280 281
        failed = true;
    }
282
    else if (directory.empty() && model_arg.empty())
283 284 285 286
    {
        cout << "Either file or directory must be specified\n";
        failed = true;
    }
287 288 289 290

    if (failed)
    {
        cout << R"###(
291
DESCRIPTION
292
    Benchmark nGraph JSON model with given backend.
293 294

SYNOPSIS
Ashok Emani's avatar
Ashok Emani committed
295
        nbench [-f <filename>] [-b <backend>] [-i <iterations>]
296 297

OPTIONS
298 299 300 301
        -f|--file                 Serialized model file
        -b|--backend              Backend to use (default: CPU)
        -d|--directory            Directory to scan for models. All models are benchmarked.
        -i|--iterations           Iterations (default: 10)
302 303
        -s|--statistics           Display op statistics
        -v|--visualize            Visualize a model (WARNING: requires Graphviz installed)
304
        --timing_detail           Gather detailed timing
305
        -w|--warmup_iterations    Number of warm-up iterations
306
        --no_copy_data            Disable copy of input/result data every iteration
307
        --dot                     Generate Graphviz dot file
308
        --double_buffer           Double buffer inputs and outputs
309 310 311
)###";
        return 1;
    }
312

313 314
    vector<string> models;
    if (!directory.empty())
315
    {
316 317 318 319 320 321 322 323 324
        vector<PerfShape> aggregate_perf_data;
        file_util::iterate_files(directory,
                                 [&](const string& file, bool is_dir) {
                                     if (!is_dir)
                                     {
                                         models.push_back(file);
                                     }
                                 },
                                 true);
325
    }
326
    else
327
    {
328 329 330
        // Error case where model is missing already checked above
        models.push_back(model_arg);
    }
331

332
    vector<PerfShape> aggregate_perf_data;
333
    int rc = 0;
334 335 336 337 338 339 340
    for (const string& model : models)
    {
        cout << "\n";
        cout << "============================================================================\n";
        cout << "---- Processing '" << model << "'\n";
        cout << "============================================================================\n";
        try
341
        {
342 343 344
            if (visualize)
            {
                shared_ptr<Function> f = deserialize(model);
345 346
                auto model_file_name = ngraph::file_util::get_file_name(model) +
                                       (dot_file ? ".dot" : ngraph::file_util::get_file_ext(model));
347 348

                pass::Manager pass_manager;
349
                pass_manager.register_pass<pass::VisualizeTree>(model_file_name, nullptr, true);
350 351
                pass_manager.run_passes(f);
            }
352

353
            if (statistics)
354
            {
355 356
                shared_ptr<Function> f = deserialize(model);

357 358 359 360 361
                pass::Manager pass_manager;
                pass_manager.register_pass<pass::Liveness>();
                pass_manager.register_pass<pass::MemoryLayout>();
                pass_manager.run_passes(f);

362
                cout << "\n---- Source Graph Statistics ----\n";
363
                cout << "Total nodes: " << locale_string(f->get_ops().size()) << endl;
364
                size_t total_constant_bytes = 0;
365 366 367 368 369 370 371
                size_t total_parameter_bytes = 0;
                size_t total_result_bytes = 0;
                size_t total_temporary_bytes = 0;
                size_t total_constant_count = 0;
                size_t total_parameter_count = 0;
                size_t total_result_count = 0;
                size_t total_temporary_count = 0;
372 373 374 375
                unordered_map<string, size_t> op_list;
                set<string> type_list;
                for (shared_ptr<Node> node : f->get_ordered_ops())
                {
376 377 378 379 380
                    for (descriptor::Tensor* tensor : node->liveness_new_list)
                    {
                        total_temporary_bytes += tensor->size();
                        total_temporary_count++;
                    }
381
                    string op_name = node->description();
382
                    string shape_name = "{" + join(node->output(0).get_shape()) + "}";
383 384 385 386 387 388 389
                    op_list[op_name + shape_name]++;
                    auto et = get_op_element_type(*node);
                    string type_string = et.c_type_string();
                    type_list.insert(type_string);

                    if (op_name == "Constant")
                    {
390
                        total_constant_count++;
391 392
                        const Shape& shape = node->output(0).get_shape();
                        size_t const_size = node->output(0).get_element_type().size();
393 394 395 396 397 398 399
                        if (shape.size() == 0)
                        {
                            total_constant_bytes += const_size;
                        }
                        else
                        {
                            total_constant_bytes +=
400
                                (const_size * shape_size(node->output(0).get_shape()));
401 402
                        }
                    }
403 404 405 406 407 408 409 410 411 412 413 414 415 416
                    else if (op_name == "Parameter")
                    {
                        total_parameter_count++;
                        const Shape& shape = node->output(0).get_shape();
                        size_t size = node->output(0).get_element_type().size() * shape_size(shape);
                        total_parameter_bytes += size;
                    }
                    else if (op_name == "Result")
                    {
                        total_result_count++;
                        const Shape& shape = node->input(0).get_shape();
                        size_t size = node->input(0).get_element_type().size() * shape_size(shape);
                        total_result_bytes += size;
                    }
417 418
                }
                cout << "--\n";
419 420 421 422 423 424 425 426 427 428
                cout << "Total Constant size: " << locale_string(total_constant_bytes)
                     << " bytes in " << total_constant_count << " constants\n";
                cout << "Total Parameter size: " << locale_string(total_parameter_bytes)
                     << " bytes in " << total_parameter_count << " parameters\n";
                cout << "Total Result size: " << locale_string(total_result_bytes) << " bytes in "
                     << total_result_count << " results\n";
                cout << "Total Temporary size: " << locale_string(total_temporary_bytes)
                     << " bytes in " << total_temporary_count << " temporaries\n";
                cout << "Temporary size with reuse : "
                     << locale_string(f->get_temporary_pool_size()) << " bytes\n";
429 430 431
                cout << "--\n";
                cout << "Types used:\n";
                for (const string& type : type_list)
432
                {
433
                    cout << "    " << type << "\n";
434
                }
435
                cout << "--\n";
436
                for (auto& op_info : op_list)
437
                {
438
                    cout << op_info.first << ": " << op_info.second << " ops" << endl;
439 440
                }
            }
441 442

            if (!backend.empty())
443
            {
444 445
                cout << "\n---- Benchmark ----\n";
                shared_ptr<Function> f = deserialize(model);
446 447 448
                vector<runtime::PerformanceCounter> perf_data;
                if (double_buffer)
                {
449
                    perf_data = run_benchmark_pipelined(
450 451 452 453 454 455 456
                        f, backend, iterations, timing_detail, warmup_iterations, copy_data);
                }
                else
                {
                    perf_data = run_benchmark(
                        f, backend, iterations, timing_detail, warmup_iterations, copy_data);
                }
457 458 459
                auto perf_shape = to_perf_shape(f, perf_data);
                aggregate_perf_data.insert(
                    aggregate_perf_data.end(), perf_shape.begin(), perf_shape.end());
460
                print_results(perf_shape, timing_detail);
461
            }
462
        }
463
        catch (ngraph::unsupported_op& ue)
464
        {
465
            cout << "Unsupported op '" << ue.what() << "' in model " << model << endl;
466
            rc += 1;
467
        }
468
        catch (exception& e)
469
        {
470
            cout << "Exception caught on '" << model << "'\n" << e.what() << endl;
471
            rc += 1;
472
        }
473 474
    }

475 476 477 478 479 480 481 482 483
    if (models.size() > 1)
    {
        cout << "\n";
        cout << "============================================================================\n";
        cout << "---- Aggregate over all models\n";
        cout << "============================================================================\n";
        print_results(aggregate_perf_data, timing_detail);
    }

484
    return rc;
485
}