nbench.cpp 16.5 KB
Newer Older
1
//*****************************************************************************
2
// Copyright 2017-2019 Intel Corporation
3 4 5 6 7 8 9 10 11 12 13 14 15
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
16 17 18 19 20 21 22 23

// tool to benchmark any ngraph json model with given backend.
// compile and run with:
// g++ ./nbench.cpp -std=c++11 -I$HOME/ngraph_dist/include -L$HOME/ngraph_dist/lib -lngraph -o nbench
// env LD_LIBRARY_PATH=$HOME/ngraph_dist/lib env NGRAPH_INTERPRETER_EMIT_TIMING=1 ./nbench
// sample models are under ../../test/models

#include <fstream>
24
#include <iomanip>
25

26
#include "benchmark.hpp"
27
#include "benchmark_pipelined.hpp"
28
#include "ngraph/distributed.hpp"
29
#include "ngraph/except.hpp"
30
#include "ngraph/file_util.hpp"
31
#include "ngraph/graph_util.hpp"
32
#include "ngraph/pass/liveness.hpp"
33
#include "ngraph/pass/manager.hpp"
34
#include "ngraph/pass/memory_layout.hpp"
35 36
#include "ngraph/pass/visualize_tree.hpp"
#include "ngraph/runtime/backend.hpp"
Robert Kimball's avatar
Robert Kimball committed
37 38
#include "ngraph/runtime/backend_manager.hpp"
#include "ngraph/runtime/interpreter/int_backend.hpp"
39 40
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
41

42
using namespace std;
43
using namespace ngraph;
44

Robert Kimball's avatar
Robert Kimball committed
45 46 47 48 49 50 51 52
static void configure_static_backends()
{
#ifdef NGRAPH_INTERPRETER_STATIC_LIB_ENABLE
    ngraph::runtime::BackendManager::register_backend(
        "INTERPRETER", ngraph::runtime::interpreter::get_backend_constructor_pointer());
#endif
}

53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
class PerfShape : public ngraph::runtime::PerformanceCounter
{
public:
    PerfShape(const runtime::PerformanceCounter& p, Shape s)
        : PerformanceCounter(p)
        , shape(s)
    {
    }
    Shape shape;
};

vector<PerfShape> to_perf_shape(shared_ptr<Function> f,
                                const vector<runtime::PerformanceCounter>& perf_data)
{
    vector<PerfShape> result;
    for (const runtime::PerformanceCounter& p : perf_data)
    {
70
        auto node = p.get_node();
71 72 73
        if (node == nullptr)
        {
            ostringstream os;
74 75
            os << "Can't find \"" << node->get_name() << "\" in Function \"" << f->get_name()
               << "\".";
76 77 78
            throw runtime_error(os.str());
        }

79
        Shape shape = node->output(0).get_shape();
80 81 82 83 84 85 86 87 88 89 90
        result.push_back(PerfShape(p, shape));
    }
    return result;
}

multimap<size_t, string> aggregate_timing_details(const vector<PerfShape>& perf_data)
{
    unordered_map<string, size_t> timing;
    unordered_map<string, size_t> count;
    for (const PerfShape& p : perf_data)
    {
91
        auto node = p.get_node();
92
        string op = node->description();
93 94 95 96 97 98
        string shape_name = " {" + join(p.shape) + "} ";
        timing[op + shape_name] += p.microseconds();
        count[op + shape_name] += 1;
    }

    multimap<size_t, string> rc;
99
    for (auto& t : timing)
100 101 102 103 104 105 106 107 108 109 110
    {
        rc.insert({t.second, t.first + to_string(count[t.first])});
    }
    return rc;
}

multimap<size_t, string> aggregate_timing(const vector<PerfShape>& perf_data)
{
    unordered_map<string, size_t> timing;
    for (const PerfShape& p : perf_data)
    {
111
        auto node = p.get_node();
112
        string op = node->description();
113 114 115 116
        timing[op] += p.microseconds();
    }

    multimap<size_t, string> rc;
117
    for (auto& t : timing)
118 119 120 121 122 123 124 125 126 127 128
    {
        rc.insert({t.second, t.first});
    }
    return rc;
}

void print_times(const multimap<size_t, string>& timing)
{
    // set the column widths
    int name_width = 0;
    int time_width = 0;
129
    for (auto& p : timing)
130 131
    {
        name_width = max(name_width, static_cast<int>(p.second.size()));
132
        time_width = max(time_width, static_cast<int>(locale_string(p.first).size()));
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
    }
    for (auto it = timing.rbegin(); it != timing.rend(); it++)
    {
        cout << setw(name_width + 2) << left << it->second << " " << setw(time_width + 2) << right
             << it->first << "us\n";
    }
}

void print_results(vector<PerfShape> perf_data, bool timing_detail)
{
    sort(perf_data.begin(), perf_data.end(), [](const PerfShape& p1, const PerfShape& p2) {
        return p1.total_microseconds() > p2.total_microseconds();
    });
    multimap<size_t, string> timing = aggregate_timing(perf_data);
    multimap<size_t, string> timing_details = aggregate_timing_details(perf_data);

    if (timing_detail)
    {
        cout << "\n---- Aggregate times per op type ----\n";
        print_times(timing);

        cout << "\n---- Aggregate times per op type/shape/count ----\n";
        print_times(timing_details);
    }
}

159 160 161 162 163
element::Type get_op_element_type(const Node& op)
{
    element::Type type;
    if (op.description() == "Convert")
    {
164
        type = op.input(0).get_element_type();
165 166 167 168 169 170 171 172
    }
    else if (op.description() == "Equal" || op.description() == "Greater" ||
             op.description() == "GreaterEq" || op.description() == "Less" ||
             op.description() == "LessEq" || op.description() == "NotEqual")
    {
        // Get the type of the second input, not the first
        // All BinaryElementwiseComparision ops have the same type for inputs
        // Select has bool for first input and the type we are interested in for the second
173
        type = op.input(1).get_element_type();
174 175 176
    }
    else
    {
177
        type = op.output(0).get_element_type();
178 179 180 181
    }
    return type;
}

182 183
int main(int argc, char** argv)
{
184
    string model_arg;
185
    string backend;
186
    string directory;
187
    int iterations = 10;
188
    bool failed = false;
189 190
    bool statistics = false;
    bool timing_detail = false;
191
    bool visualize = false;
192
    int warmup_iterations = 1;
193
    bool copy_data = true;
194
    bool dot_file = false;
195
    bool double_buffer = false;
196

Robert Kimball's avatar
Robert Kimball committed
197
    configure_static_backends();
198 199
    for (size_t i = 1; i < argc; i++)
    {
200 201
        string arg = argv[i];
        if (arg == "-f" || arg == "--file")
202
        {
203
            model_arg = argv[++i];
204
        }
205
        else if (arg == "-b" || arg == "--backend")
206 207 208
        {
            backend = argv[++i];
        }
209
        else if (arg == "-i" || arg == "--iterations")
210 211 212
        {
            try
            {
213
                iterations = stoi(argv[++i]);
214 215 216 217 218 219 220
            }
            catch (...)
            {
                cout << "Invalid Argument\n";
                failed = true;
            }
        }
221 222 223 224
        else if (arg == "-s" || arg == "--statistics")
        {
            statistics = true;
        }
225
        else if (arg == "--timing_detail" || arg == "--timing-detail")
226 227 228
        {
            timing_detail = true;
        }
229 230 231 232
        else if (arg == "--no_copy_data")
        {
            copy_data = false;
        }
233 234 235 236
        else if (arg == "-v" || arg == "--visualize")
        {
            visualize = true;
        }
237 238 239 240
        else if (arg == "--dot")
        {
            dot_file = true;
        }
241 242 243 244
        else if (arg == "-d" || arg == "--directory")
        {
            directory = argv[++i];
        }
245 246 247 248
        else if (arg == "--double_buffer")
        {
            double_buffer = true;
        }
249 250 251 252 253 254 255 256 257 258 259 260
        else if (arg == "-w" || arg == "--warmup_iterations")
        {
            try
            {
                warmup_iterations = stoi(argv[++i]);
            }
            catch (...)
            {
                cout << "Invalid Argument\n";
                failed = true;
            }
        }
261 262 263 264 265
        else
        {
            cout << "Unknown option: " << arg << endl;
            failed = true;
        }
266
    }
267
    if (!model_arg.empty() && !file_util::exists(model_arg))
268
    {
269
        cout << "File " << model_arg << " not found\n";
270 271
        failed = true;
    }
272 273
    else if (!directory.empty() && !file_util::exists(directory))
    {
274
        cout << "Directory " << directory << " not found\n";
275 276
        failed = true;
    }
277
    else if (directory.empty() && model_arg.empty())
278 279 280 281
    {
        cout << "Either file or directory must be specified\n";
        failed = true;
    }
282 283 284 285

    if (failed)
    {
        cout << R"###(
286
DESCRIPTION
287
    Benchmark nGraph JSON model with given backend.
288 289

SYNOPSIS
Ashok Emani's avatar
Ashok Emani committed
290
        nbench [-f <filename>] [-b <backend>] [-i <iterations>]
291 292

OPTIONS
293 294 295 296
        -f|--file                 Serialized model file
        -b|--backend              Backend to use (default: CPU)
        -d|--directory            Directory to scan for models. All models are benchmarked.
        -i|--iterations           Iterations (default: 10)
297 298
        -s|--statistics           Display op statistics
        -v|--visualize            Visualize a model (WARNING: requires Graphviz installed)
299
        --timing_detail           Gather detailed timing
300
        -w|--warmup_iterations    Number of warm-up iterations
301
        --no_copy_data            Disable copy of input/result data every iteration
302
        --dot                     Generate Graphviz dot file
303
        --double_buffer           Double buffer inputs and outputs
304 305 306
)###";
        return 1;
    }
307

308 309
    vector<string> models;
    if (!directory.empty())
310
    {
311 312 313 314 315 316 317 318 319
        vector<PerfShape> aggregate_perf_data;
        file_util::iterate_files(directory,
                                 [&](const string& file, bool is_dir) {
                                     if (!is_dir)
                                     {
                                         models.push_back(file);
                                     }
                                 },
                                 true);
320
    }
321
    else
322
    {
323 324 325
        // Error case where model is missing already checked above
        models.push_back(model_arg);
    }
326

327
    vector<PerfShape> aggregate_perf_data;
328
    int rc = 0;
329 330 331 332 333 334 335
    for (const string& model : models)
    {
        cout << "\n";
        cout << "============================================================================\n";
        cout << "---- Processing '" << model << "'\n";
        cout << "============================================================================\n";
        try
336
        {
337 338 339
            if (visualize)
            {
                shared_ptr<Function> f = deserialize(model);
340 341
                auto model_file_name = ngraph::file_util::get_file_name(model) +
                                       (dot_file ? ".dot" : ngraph::file_util::get_file_ext(model));
342 343

                pass::Manager pass_manager;
344
                pass_manager.register_pass<pass::VisualizeTree>(model_file_name, nullptr, true);
345 346
                pass_manager.run_passes(f);
            }
347

348
            if (statistics)
349
            {
350 351
                shared_ptr<Function> f = deserialize(model);

352 353 354 355 356
                pass::Manager pass_manager;
                pass_manager.register_pass<pass::Liveness>();
                pass_manager.register_pass<pass::MemoryLayout>();
                pass_manager.run_passes(f);

357
                cout << "\n---- Source Graph Statistics ----\n";
358
                cout << "Total nodes: " << locale_string(f->get_ops().size()) << endl;
359
                size_t total_constant_bytes = 0;
360 361 362 363 364 365 366
                size_t total_parameter_bytes = 0;
                size_t total_result_bytes = 0;
                size_t total_temporary_bytes = 0;
                size_t total_constant_count = 0;
                size_t total_parameter_count = 0;
                size_t total_result_count = 0;
                size_t total_temporary_count = 0;
367 368 369 370
                unordered_map<string, size_t> op_list;
                set<string> type_list;
                for (shared_ptr<Node> node : f->get_ordered_ops())
                {
371 372 373 374 375
                    for (descriptor::Tensor* tensor : node->liveness_new_list)
                    {
                        total_temporary_bytes += tensor->size();
                        total_temporary_count++;
                    }
376
                    string op_name = node->description();
377
                    string shape_name = "{" + join(node->output(0).get_shape()) + "}";
378 379 380 381 382 383 384
                    op_list[op_name + shape_name]++;
                    auto et = get_op_element_type(*node);
                    string type_string = et.c_type_string();
                    type_list.insert(type_string);

                    if (op_name == "Constant")
                    {
385
                        total_constant_count++;
386 387
                        const Shape& shape = node->output(0).get_shape();
                        size_t const_size = node->output(0).get_element_type().size();
388 389 390 391 392 393 394
                        if (shape.size() == 0)
                        {
                            total_constant_bytes += const_size;
                        }
                        else
                        {
                            total_constant_bytes +=
395
                                (const_size * shape_size(node->output(0).get_shape()));
396 397
                        }
                    }
398 399 400 401 402 403 404 405 406 407 408 409 410 411
                    else if (op_name == "Parameter")
                    {
                        total_parameter_count++;
                        const Shape& shape = node->output(0).get_shape();
                        size_t size = node->output(0).get_element_type().size() * shape_size(shape);
                        total_parameter_bytes += size;
                    }
                    else if (op_name == "Result")
                    {
                        total_result_count++;
                        const Shape& shape = node->input(0).get_shape();
                        size_t size = node->input(0).get_element_type().size() * shape_size(shape);
                        total_result_bytes += size;
                    }
412 413
                }
                cout << "--\n";
414 415 416 417 418 419 420 421 422 423
                cout << "Total Constant size: " << locale_string(total_constant_bytes)
                     << " bytes in " << total_constant_count << " constants\n";
                cout << "Total Parameter size: " << locale_string(total_parameter_bytes)
                     << " bytes in " << total_parameter_count << " parameters\n";
                cout << "Total Result size: " << locale_string(total_result_bytes) << " bytes in "
                     << total_result_count << " results\n";
                cout << "Total Temporary size: " << locale_string(total_temporary_bytes)
                     << " bytes in " << total_temporary_count << " temporaries\n";
                cout << "Temporary size with reuse : "
                     << locale_string(f->get_temporary_pool_size()) << " bytes\n";
424 425 426
                cout << "--\n";
                cout << "Types used:\n";
                for (const string& type : type_list)
427
                {
428
                    cout << "    " << type << "\n";
429
                }
430
                cout << "--\n";
431
                for (auto& op_info : op_list)
432
                {
433
                    cout << op_info.first << ": " << op_info.second << " ops" << endl;
434 435
                }
            }
436 437

            if (!backend.empty())
438
            {
439 440
                cout << "\n---- Benchmark ----\n";
                shared_ptr<Function> f = deserialize(model);
441 442 443
                vector<runtime::PerformanceCounter> perf_data;
                if (double_buffer)
                {
444
                    perf_data = run_benchmark_pipelined(
445 446 447 448 449 450 451
                        f, backend, iterations, timing_detail, warmup_iterations, copy_data);
                }
                else
                {
                    perf_data = run_benchmark(
                        f, backend, iterations, timing_detail, warmup_iterations, copy_data);
                }
452 453 454
                auto perf_shape = to_perf_shape(f, perf_data);
                aggregate_perf_data.insert(
                    aggregate_perf_data.end(), perf_shape.begin(), perf_shape.end());
455
                print_results(perf_shape, timing_detail);
456
            }
457
        }
458
        catch (ngraph::unsupported_op& ue)
459
        {
460
            cout << "Unsupported op '" << ue.what() << "' in model " << model << endl;
461
            rc += 1;
462
        }
463
        catch (exception& e)
464
        {
465
            cout << "Exception caught on '" << model << "'\n" << e.what() << endl;
466
            rc += 1;
467
        }
468 469
    }

470 471 472 473 474 475 476 477 478
    if (models.size() > 1)
    {
        cout << "\n";
        cout << "============================================================================\n";
        cout << "---- Aggregate over all models\n";
        cout << "============================================================================\n";
        print_results(aggregate_perf_data, timing_detail);
    }

479
    return rc;
480
}