benchmark_pipelined.cpp 6.77 KB
Newer Older
1
//*****************************************************************************
2
// Copyright 2017-2020 Intel Corporation
3 4 5 6 7 8 9 10 11 12 13 14 15 16
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************

Robert Kimball's avatar
Robert Kimball committed
17
#include <array>
Robert Kimball's avatar
Robert Kimball committed
18 19 20 21
#include <condition_variable>
#include <mutex>
#include <thread>

22
#include "benchmark.hpp"
Robert Kimball's avatar
Robert Kimball committed
23
#include "benchmark_utils.hpp"
24 25 26 27 28 29 30 31 32 33
#include "ngraph/file_util.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/host_tensor.hpp"
#include "ngraph/runtime/tensor.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"

using namespace std;
using namespace ngraph;

Robert Kimball's avatar
Robert Kimball committed
34 35 36 37
class TensorCollection
{
public:
    vector<shared_ptr<runtime::HostTensor>> parameter_data;
Robert Kimball's avatar
Robert Kimball committed
38
    vector<shared_ptr<runtime::HostTensor>> result_data;
Robert Kimball's avatar
Robert Kimball committed
39 40 41 42 43 44 45

    vector<shared_ptr<runtime::Tensor>> input_tensors;
    vector<shared_ptr<runtime::Tensor>> output_tensors;

private:
};

Robert Kimball's avatar
Robert Kimball committed
46 47 48
static mutex s_mutex;
static condition_variable s_condition;
static size_t current_iteration = 0;
Robert Kimball's avatar
Robert Kimball committed
49 50
static size_t s_iterations;
static size_t s_warmup_iterations;
Robert Kimball's avatar
Robert Kimball committed
51
static stopwatch s_timer;
Robert Kimball's avatar
Robert Kimball committed
52

53 54
static void
    thread_entry(runtime::Executable* exec, const TensorCollection& tensors, size_t pipeline_stage)
Robert Kimball's avatar
Robert Kimball committed
55
{
56
    bool data_written = false;
Robert Kimball's avatar
Robert Kimball committed
57 58
    const vector<shared_ptr<runtime::Tensor>>& args = tensors.input_tensors;
    const vector<shared_ptr<runtime::Tensor>>& results = tensors.output_tensors;
59
    while (current_iteration < s_iterations + s_warmup_iterations)
Robert Kimball's avatar
Robert Kimball committed
60
    {
61
        if (!data_written)
Robert Kimball's avatar
Robert Kimball committed
62
        {
63 64 65 66 67 68 69
            for (size_t arg_index = 0; arg_index < args.size(); arg_index++)
            {
                const shared_ptr<runtime::Tensor>& arg = args[arg_index];
                if (arg->get_stale())
                {
                    const shared_ptr<runtime::HostTensor>& data = tensors.parameter_data[arg_index];
                    arg->write(data->get_data_ptr(),
Robert Kimball's avatar
Robert Kimball committed
70
                               data->get_element_count() * data->get_element_type().size());
71 72 73
                }
            }
            data_written = true;
Robert Kimball's avatar
Robert Kimball committed
74
        }
Robert Kimball's avatar
Robert Kimball committed
75 76 77 78 79 80 81
        unique_lock<mutex> lock(s_mutex);
        if ((current_iteration & 1) != pipeline_stage)
        {
            s_condition.wait(lock);
        }
        else
        {
Robert Kimball's avatar
Robert Kimball committed
82 83 84 85
            if (current_iteration == s_warmup_iterations)
            {
                s_timer.start();
            }
Robert Kimball's avatar
Robert Kimball committed
86
            // our turn to run
87
            exec->call(results, args);
Robert Kimball's avatar
Robert Kimball committed
88 89 90
            current_iteration++;
            data_written = false;
            s_condition.notify_all();
Robert Kimball's avatar
Robert Kimball committed
91
            lock.unlock();
92 93 94 95 96
            for (size_t result_index = 0; result_index < results.size(); result_index++)
            {
                const shared_ptr<runtime::HostTensor>& data = tensors.result_data[result_index];
                const shared_ptr<runtime::Tensor>& result = results[result_index];
                result->read(data->get_data_ptr(),
Robert Kimball's avatar
Robert Kimball committed
97
                             data->get_element_count() * data->get_element_type().size());
98
            }
99 100 101 102
            if (current_iteration == (s_iterations + s_warmup_iterations - 1))
            {
                s_timer.stop();
            }
Robert Kimball's avatar
Robert Kimball committed
103
        }
Robert Kimball's avatar
Robert Kimball committed
104 105 106
    }
}

107
vector<runtime::PerformanceCounter> run_benchmark_pipelined(shared_ptr<Function> f,
Robert Kimball's avatar
Robert Kimball committed
108 109 110 111
                                                            const string& backend_name,
                                                            size_t iterations,
                                                            bool timing_detail,
                                                            int warmup_iterations,
112
                                                            bool /* copy_data */)
113 114
{
    constexpr size_t pipeline_depth = 2;
Robert Kimball's avatar
Robert Kimball committed
115 116
    s_iterations = iterations;
    s_warmup_iterations = warmup_iterations;
Robert Kimball's avatar
Robert Kimball committed
117
    array<TensorCollection, pipeline_depth> tensor_collections;
118 119 120 121 122
    stopwatch timer;
    timer.start();
    auto backend = runtime::Backend::create(backend_name);
    auto exec = backend->compile(f, timing_detail);
    timer.stop();
123 124 125
    stringstream ss;
    ss.imbue(locale(""));
    ss << "compile time: " << timer.get_milliseconds() << "ms" << endl;
126 127 128 129 130 131 132 133 134 135
    set_denormals_flush_to_zero();

    // Create random input data for all input tensors
    for (size_t i = 0; i < pipeline_depth; i++)
    {
        for (shared_ptr<op::Parameter> param : f->get_parameters())
        {
            auto tensor_data =
                make_shared<runtime::HostTensor>(param->get_element_type(), param->get_shape());
            random_init(tensor_data);
Robert Kimball's avatar
Robert Kimball committed
136
            tensor_collections[i].parameter_data.push_back(tensor_data);
137 138 139
        }
    }

Robert Kimball's avatar
Robert Kimball committed
140 141 142 143 144 145 146 147 148 149 150
    // Create output tensors for all outputs
    for (size_t i = 0; i < pipeline_depth; i++)
    {
        for (shared_ptr<Node> result : f->get_results())
        {
            auto tensor_data =
                make_shared<runtime::HostTensor>(result->get_element_type(), result->get_shape());
            tensor_collections[i].result_data.push_back(tensor_data);
        }
    }

151 152 153 154 155 156
    // Create input tensors for all Parameters
    array<vector<shared_ptr<runtime::Tensor>>, pipeline_depth> input_tensors_array;
    size_t input_index = 0;
    for (shared_ptr<op::Parameter> param : f->get_parameters())
    {
        auto input_tensors = exec->create_input_tensor(input_index++, pipeline_depth);
Robert Kimball's avatar
Robert Kimball committed
157
        for (size_t i = 0; i < pipeline_depth; i++)
158
        {
Robert Kimball's avatar
Robert Kimball committed
159
            tensor_collections[i].input_tensors.push_back(input_tensors[i]);
160 161 162
        }
    }

Robert Kimball's avatar
Robert Kimball committed
163 164 165 166 167 168
    // Create output tensors for all Results
    array<vector<shared_ptr<runtime::Tensor>>, pipeline_depth> output_tensors_array;
    size_t output_index = 0;
    for (shared_ptr<Node> result : f->get_results())
    {
        auto output_tensors = exec->create_output_tensor(output_index++, pipeline_depth);
Robert Kimball's avatar
Robert Kimball committed
169
        for (size_t i = 0; i < pipeline_depth; i++)
Robert Kimball's avatar
Robert Kimball committed
170
        {
Robert Kimball's avatar
Robert Kimball committed
171
            tensor_collections[i].output_tensors.push_back(output_tensors[i]);
Robert Kimball's avatar
Robert Kimball committed
172 173
        }
    }
174

Robert Kimball's avatar
Robert Kimball committed
175 176 177 178 179 180 181 182 183 184
    thread threads[pipeline_depth];
    for (size_t i = 0; i < pipeline_depth; i++)
    {
        threads[i] = thread(thread_entry, exec.get(), tensor_collections[i], i);
    }

    for (size_t i = 0; i < pipeline_depth; i++)
    {
        threads[i].join();
    }
Robert Kimball's avatar
Robert Kimball committed
185
    float time = s_timer.get_milliseconds();
186 187
    ss << time / iterations << "ms per iteration" << endl;
    cout << ss.str();
Robert Kimball's avatar
Robert Kimball committed
188

189 190 191
    vector<runtime::PerformanceCounter> perf_data = exec->get_performance_data();
    return perf_data;
}