// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include <list>
#include <set>
#include <unordered_set>
#include <sstream>
#include <api/cldnn.hpp>
#include <api/network.hpp>
#include <api/profiling.hpp>
#include <api/custom_gpu_primitive.hpp>
#include <chrono>
#include <cmath>
#include <algorithm>
#include "cldnn_graph.h"
#include "simple_math.h"
#include <description_buffer.hpp>
#include <cldnn/cldnn_config.hpp>
#include <graph_tools.hpp>
#include <ie_layers_internal.hpp>
#include <net_pass.h>
#include "cldnn_infer_request.h"
#include <threading/ie_executor_manager.hpp>
#include "details/caseless.hpp"
#include <fstream>
#include <utility>
#include <sys/types.h>
#include <sys/stat.h>
#include <exec_graph_info.hpp>

using namespace InferenceEngine;
using namespace InferenceEngine::details;

namespace CLDNNPlugin {

CLDNNGraph::CLDNNGraph(InferenceEngine::ICNNNetwork& network, gpu::ClContext::Ptr context, Config config, uint16_t stream_id)
    : m_context(context)
    , m_networkName(network.getName())
    , m_config(config)
    , m_stream_id(stream_id) {
    m_program = std::make_shared<Program>(network, GetEngine(), m_config);
    Build();
}

CLDNNGraph::CLDNNGraph(std::shared_ptr<CLDNNGraph> graph, uint16_t stream_id)
        : m_context(graph->m_context)
        , m_program(graph->m_program)
        , m_networkName(graph->m_networkName)
        , m_config(graph->m_config)
        , m_stream_id(stream_id) {
    Build();
}

void CLDNNGraph::UpdateLayersMaps() {
    primitiveIDs = m_program->primitiveIDs;
    primitivesToIRLayersMap = m_program->primitivesToIRLayersMap;
    prevPrimitiveIDs = m_program->prevPrimitiveIDs;
    profilingIDs = m_program->profilingIDs;
    perfMap = m_program->perfMap;
    outputDims = m_program->outputDims;
}

void CLDNNGraph::Build() {
    UpdateLayersMaps();

    if (GetMaxDynamicBatchSize() > 1) {
        int m_bv_sz = m_program->GetMaxBatchSizeForSingleProgram();
        for (int b = m_bv_sz - 1; b >= 0; b--) {
            auto network = BuildNetwork(m_program->getCompiledProgram(b));
            m_networks.insert(m_networks.begin(), network);
            GetEngine()->release_pending_memory(network->get_id());
        }
    } else {
        auto network = BuildNetwork(m_program->getCompiledProgram());
        m_networks.emplace_back(network);
        GetEngine()->release_pending_memory(network->get_id());
    }

    UpdateImplementationsMap();
}

std::shared_ptr<cldnn::network> CLDNNGraph::BuildNetwork(std::shared_ptr<cldnn::program> program) {
    auto network = std::make_shared<cldnn::network>(*program, m_stream_id);

    if (!m_config.graph_dumps_dir.empty() && m_stream_id == 0) {
        static int net_id = 0;
        auto steps_info = network->get_optimization_steps_info();
        size_t step_idx = 0;
        for (auto& step : steps_info) {
            CNNNetwork net(GetExecGraphInfoByPrimitivesInfo(step.second, true));
            net.serialize(m_config.graph_dumps_dir + std::to_string(net_id) + "_" +
                          std::to_string(step_idx) + "_" + step.first + "_graph.xml");
            step_idx++;
        }
        net_id++;
    }

    return network;
}

InferenceEngine::ICNNNetwork::Ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(std::vector<cldnn::primitive_info>& primitives_info,
                                                                               bool filter_const_primitives) {
    auto net = std::make_shared<details::CNNNetworkImpl>();
    net->setPrecision(Precision::FP32);
    net->setName("runtime_gpu_graph");
    if (m_config.useProfiling) {
        try {
            // Update may throw an exception for step-by-step runtime graph dump,
            // since network->get_executed_primitives() method can't be called before network execution
            UpdatePerfStatistics();
        } catch (std::exception&) {
        }
    }

    std::vector<std::pair<cldnn::primitive_info, CNNLayerPtr>> node2layer;

    auto data_type_to_precision = [](cldnn::data_types dt) {
        switch (dt) {
            case cldnn::data_types::bin: return Precision::BIN;
            case cldnn::data_types::f32: return Precision::FP32;
            case cldnn::data_types::f16: return Precision::FP16;
            case cldnn::data_types::i32: return Precision::I32;
            case cldnn::data_types::u8: return Precision::U8;
            case cldnn::data_types::i8: return Precision::I8;
            default: return Precision::UNSPECIFIED;
        }
    };

    auto to_IE_type_name = [](const std::string& cldnn_name) -> std::string{
        static std::map<std::string, std::string> type_n2l {
                { "activation", "Activation" },
                { "arg_max_min", "ArgMax" },
                { "average_unpooling", "AverageUnpooling" },
                { "batch_norm", "BatchNormalization" },
                { "binary_convolution", "BinaryConvolution" },
                { "border", "Pad" },
                { "concatenation", "Concat" },
                { "convolution", "Convolution" },
                { "deformable_convolution", "DeformableConvolution" },
                { "crop", "Crop" },
                { "custom_gpu_primitive", "CustomGPUPrimitive" },
                { "data", "Const" },
                { "deconvolution", "Deconvolution" },
                { "depth_to_space", "DepthToSpace" },
                { "detection_output", "DetectionOutput" },
                { "eltwise", "Eltwise" },
                { "fully_connected", "FullyConnected" },
                { "gather", "Gather" },
                { "gemm", "Gemm" },
                { "input_layout", "Input" },
                { "lrn", "LRN" },
                { "lstm", "LSTM" },
                { "lstm_elt", "LSTM_Eltwise" },
                { "lstm_gemm", "LSTM_Gemm" },
                { "mvn", "MVN" },
                { "normalize", "Normalize" },
                { "permute", "Permute" },
                { "pooling", "Pooling" },
                { "prior_box", "PriorBox" },
                { "proposal", "Proposal" },
                { "quantize", "Quantize" },
                { "region_yolo", "RegionYolo" },
                { "reorder", "Reorder" },
                { "reorg_yolo", "ReorgYolo" },
                { "reshape", "Reshape" },
                { "reverse_sequence", "ReverseSequence" },
                { "roi_pooling", "ROIPooling" },
                { "scale", "ScaleShift" },
                { "shuffle_channels", "ShuffleChannels" },
                { "softmax", "SoftMax" },
                { "split", "Split" },
                { "strided_slice", "StridedSlice" },
                { "tile", "Tile" },
                { "resample", "Resample" },
                { "interp", "Interp" },
                { "reduce_max", "ReduceMax" },
                { "reduce_min", "ReduceMin" },
                { "reduce_mean", "ReduceMean" },
                { "reduce_prod", "ReduceProd" },
                { "reduce_sum", "ReduceSum" },
                { "reduce_and", "ReduceAnd" },
                { "reduce_or", "ReduceOr" },
                { "reduce_sum_square", "ReduceSumSquare" },
                { "reduce_l1", "ReduceL1" },
                { "reduce_l2", "ReduceL2" },
                { "reduce_log_sum", "ReduceLogSum" },
                { "reduce_log_sum_exp", "ReduceLogSumExp" }
        };

        if (type_n2l.find(cldnn_name) != type_n2l.end())
            return type_n2l.at(cldnn_name);

        return cldnn_name;
    };

    auto concat_strings = [](std::vector<std::string> strs, char sep) -> std::string {
        if (strs.empty())
            return "";

        std::string res = strs[0];
        for (size_t i = 1; i < strs.size(); i++) {
            res += sep + strs[i];
        }

        return res;
    };

    auto remove_type_from_name = [](const std::string& name) -> std::string {
        auto it = std::find(name.begin(), name.end(), ':');
        if (it == name.end() || (it + 1) == name.end())
            return name;

        return std::string((it+1), name.end());
    };

    auto find_origin_layers = [&](const std::string& name) -> std::vector<std::string> {
        if (primitivesToIRLayersMap.find(name) == primitivesToIRLayersMap.end())
            return {};

        return primitivesToIRLayersMap.at(name);
    };

    auto create_layer = [&](const cldnn::primitive_info& prim_info) -> CNNLayer::Ptr {
        CNNLayer::Ptr layer(new CNNLayer({"name", "type", Precision::UNSPECIFIED}));

        layer->name = remove_type_from_name(prim_info.original_id);
        layer->type = to_IE_type_name(prim_info.type_id);
        layer->precision = data_type_to_precision(prim_info.output_layout.data_type);
        std::vector<std::string> originalNames{find_origin_layers(prim_info.original_id)};
        for (auto& fused_id : prim_info.c_fused_ids) {
            for (auto& origin_id : find_origin_layers(fused_id)) {
                if (std::find(originalNames.begin(), originalNames.end(), origin_id) == originalNames.end())
                    originalNames.push_back(origin_id);
            }
        }

        layer->params[ExecGraphInfoSerialization::ORIGINAL_NAMES] = concat_strings(originalNames, ',');
        layer->params[ExecGraphInfoSerialization::IMPL_TYPE] = prim_info.kernel_id;
        layer->params[ExecGraphInfoSerialization::OUTPUT_PRECISIONS] = layer->precision.name();
        std::string exec_time = "not_executed";
        if (perfMap.find(prim_info.original_id) != perfMap.end()) {
            auto perfCounter = perfMap.at(prim_info.original_id).second;
            if (perfCounter.num > 0) {
                exec_time = std::to_string(perfCounter.realTime_avg());
            }
        }

        layer->params[ExecGraphInfoSerialization::PERF_COUNTER] = exec_time;
        layer->params[ExecGraphInfoSerialization::OUTPUT_LAYOUTS] = prim_info.layout_str;
        layer->params[ExecGraphInfoSerialization::EXECUTION_ORDER] = std::to_string(prim_info.exec_id);

        node2layer.emplace_back(prim_info, layer);

        size_t in_size = prim_info.c_dependencies.size();

        if (filter_const_primitives) {
            // Decrease expected dependencies count if there is a const input without original id in the IR
            for (auto& dep : prim_info.c_dependencies) {
                auto it = std::find_if(primitives_info.begin(), primitives_info.end(), [&](cldnn::primitive_info& entry) {
                    return entry.original_id == dep;
                });

                if (it == primitives_info.end())
                    --in_size;

                if (it->type_id == "data") {
                    std::vector<std::string> childOriginalNames{find_origin_layers(prim_info.original_id)};
                    --in_size;
                }
            }
        }
        layer->insData.resize(in_size);
        layer->outData.resize(prim_info.c_users.size());

        return layer;
    };

    if (filter_const_primitives) {
        for (auto& pi : primitives_info) {
            // extract mutable_data primitives and connect it's dependencies and users directly
            if (pi.type_id == "mutable_data") {
                if (pi.c_dependencies.size() == 1 && !pi.c_users.empty()) {
                    auto dep = pi.c_dependencies[0];
                    auto users = pi.c_users;
                    auto it = std::find_if(primitives_info.begin(), primitives_info.end(), [&](cldnn::primitive_info& entry) {
                        return entry.original_id == dep;
                    });
                    if (it == primitives_info.end())
                        continue;

                    auto& dep_users = it->c_users;
                    // Remove mutable data from users list
                    dep_users.erase(std::find_if(dep_users.begin(), dep_users.end(), [&](std::string user_id) {
                        return user_id == pi.original_id;
                    }));

                    // Add mutable data users to it's dependency users
                    dep_users.insert(dep_users.end(), users.begin(), users.end());

                    for (auto& user : users) {
                        it = std::find_if(primitives_info.begin(), primitives_info.end(), [&](cldnn::primitive_info& entry) {
                            return entry.original_id == user;
                        });
                        if (it == primitives_info.end())
                            continue;

                        for (auto& d : it->c_dependencies) {
                            if (d == pi.original_id)
                                d = dep;
                        }
                    }
                }
            }
        }
    }

    for (auto& pi : primitives_info) {
        if (filter_const_primitives) {
            // Skip const inputs
            if (pi.type_id == "data") {
                continue;
            }

            // Skip mutable_data
            if (pi.type_id == "mutable_data" &&
                pi.c_dependencies.size() == 1 &&
                !pi.c_users.empty()) {
                continue;
            }
        }
        auto layer = create_layer(pi);
        net->addLayer(layer);
    }

    auto desc_from_layout = [&](cldnn::layout layout) -> TensorDesc {
        Precision precision = data_type_to_precision(layout.data_type);
        SizeVector dims;
        Layout l = Layout::NCHW;
        auto size = layout.size;
        if (layout.format.dimension() == 4) {
            dims = {static_cast<size_t>(size.batch[0]),
                    static_cast<size_t>(size.feature[0]),
                    static_cast<size_t>(size.spatial[1]),
                    static_cast<size_t>(size.spatial[0])};
        } else if (layout.format.dimension() == 5) {
            dims = {static_cast<size_t>(size.batch[0]),
                    static_cast<size_t>(size.feature[0]),
                    static_cast<size_t>(size.spatial[2]),
                    static_cast<size_t>(size.spatial[1]),
                    static_cast<size_t>(size.spatial[0])};
            l = Layout::NCDHW;
        } else if (layout.format.dimension() == 6) {
            dims = {static_cast<size_t>(size.batch[0]),
                    static_cast<size_t>(size.feature[0]),
                    static_cast<size_t>(size.spatial[3]),
                    static_cast<size_t>(size.spatial[2]),
                    static_cast<size_t>(size.spatial[1]),
                    static_cast<size_t>(size.spatial[0])};
            // Should be NC?DHW but there is no such layout yet
            l = Layout::BLOCKED;
        }
        TensorDesc dst{precision, dims, l};
        return dst;
    };

    for (auto& pair : node2layer) {
        auto pi = pair.first;
        auto layer = pair.second;
        auto user_ids = pi.c_users;
        for (int i = 0; i < user_ids.size(); i++) {
            auto it = std::find_if(node2layer.begin(), node2layer.end(), [&](std::pair<cldnn::primitive_info, CNNLayerPtr>& entry) {
                return entry.first.original_id == user_ids[i];
            });

            if (it == node2layer.end())
                continue;

            auto& child_layer = it->second;

            DataPtr data;
            if (i < layer->outData.size()) {
                std::string data_name = pi.original_id + "_out" + std::to_string(i);
                layer->outData[i] = std::make_shared<Data>(data_name, desc_from_layout(pi.output_layout));
                data = layer->outData[i];
                data->getCreatorLayer() = layer;
            } else {
                data = layer->outData[0];
            }

            int in_port_id = 0;
            for (auto& dep : it->first.c_dependencies) {
                if (filter_const_primitives) {
                    auto it = std::find_if(node2layer.begin(), node2layer.end(), [&](std::pair<cldnn::primitive_info, CNNLayerPtr>& entry) {
                        return entry.first.original_id == dep;
                    });

                    if (it == node2layer.end())
                        continue;
                }

                if (dep == pi.original_id && child_layer->insData[in_port_id].lock() == nullptr) {
                    data->getInputTo()[child_layer->name] = child_layer;
                    child_layer->insData[in_port_id] = data;
                    break;
                }
                in_port_id++;
            }
        }
    }
    // Specify inputs data
    for (auto& pair : node2layer) {
        auto pi = pair.first;
        auto layer = pair.second;
        if (pi.c_dependencies.size() != 0)
            continue;

        auto in_info = std::make_shared<InputInfo>();
        if (layer->outData.empty())
            continue;

        auto dt = layer->outData[0];
        auto tensor_desc = desc_from_layout(pi.output_layout);

        dt->setDims(tensor_desc.getDims());
        dt->setPrecision(tensor_desc.getPrecision());
        dt->setLayout(tensor_desc.getLayout());

        in_info->setInputData(dt);
        net->setInputInfo(in_info);
    }

    return net;
}

void CLDNNGraph::GetExecGraphInfo(InferenceEngine::ICNNNetwork::Ptr &graphPtr) {
    auto primitives_info = GetNetwork()->get_primitives_info();
    graphPtr = GetExecGraphInfoByPrimitivesInfo(primitives_info, true);
}


void CLDNNGraph::UpdatePerfStatistics() {
    if (GetNetworksCount() == 0) {
        return;
    }

    // Collect timings
    auto collectTimings = [](cldnn::instrumentation::profiling_info& cldnnInfo, PerfCounter& pc) {
        for (auto &interval : cldnnInfo.intervals) {
            using duration_t = std::chrono::duration<long long, std::chrono::microseconds::period>;
            auto count = std::chrono::duration_cast<duration_t>(interval.value->value()).count();

            if (interval.name == "submission") {
                pc.cpu_uSec += count;
            } else if (interval.name == "executing") {
                pc.realTime_uSec += count;
            } else if (interval.name == "duration") {  // "duration" is used for CPU layers
                pc.cpu_uSec += count;

                if (pc.num == 0)
                    pc.isCPU = true;
            }
        }
    };

    std::map<cldnn::primitive_id, cldnn::event> executedPrimitives = GetNetwork()->get_executed_primitives();
    auto allPrimitives = GetNetwork()->get_all_primitives();

    // Get profiling info for all layers
    for (auto &profiledID : profilingIDs) {
        auto pcIter = perfMap.find(profiledID);

        if (pcIter == perfMap.end())  continue;

        auto execIter = executedPrimitives.find(profiledID);
        auto& perfCount = pcIter->second.second;
        // Change status if layer wasn't executed by cldnn engine
        if (execIter == executedPrimitives.end()) {
            if (perfCount.num == 0) {
                perfCount.status = InferenceEngineProfileInfo::OPTIMIZED_OUT;
            }
            continue;
        }

        auto event = execIter->second;
        executedPrimitives.erase(execIter);

        cldnn::instrumentation::profiling_info cldnnInfo{profiledID, event.get_profiling_info()};

        collectTimings(cldnnInfo, perfCount);
        perfCount.num++;
    }

    for (auto &executedID : executedPrimitives) {
        auto pcIter = perfMap.find(executedID.first);
        if (pcIter == perfMap.end()) {
            perfMap[executedID.first].first = executedID.first;
            pcIter = perfMap.find(executedID.first);
            auto& perfCount = pcIter->second.second;

            cldnn::instrumentation::profiling_info cldnnInfo{executedID.first, executedID.second.get_profiling_info()};

            collectTimings(cldnnInfo, perfCount);
            perfCount.num++;
        }
    }
}

bool CLDNNGraph::IsLoaded() const {
    return GetNetwork() != nullptr;
}

void CLDNNGraph::UpdateImplementationsMap() {
    if (m_config.useProfiling) {
        auto extractImplementationFromInfo = [](const std::string& info) -> std::string {
            std::string def_implementation = "undef";
            std::string impl_section = "implementation :";
            std::string::size_type pos = info.find(impl_section);
            if (pos == std::string::npos) {
                return def_implementation;
            }

            std::string::size_type end_pos = info.find(',', pos);
            if (end_pos == std::string::npos) {
                return def_implementation;
            }

            std::string::size_type length = end_pos - pos - impl_section.size();

            auto trim = [](const std::string& str) {
                size_t first = str.find_first_not_of(' ');
                if (std::string::npos == first) {
                    return str;
                }
                size_t last = str.find_last_not_of(' ');
                return str.substr(first, (last - first + 1));
            };
            std::string tmp = trim(info.substr(pos + impl_section.size(), length));

            return tmp.length() > 1 ? tmp : def_implementation;
        };

        // Parse primitive info and extract implementation name.
        for (auto& id : profilingIDs) {
            std::string prim_info = "";
            try {
                prim_info = GetNetwork()->get_primitive_info(id);
            } catch (std::exception& /*e*/) { }

            implementationsMap.insert({id, extractImplementationFromInfo(prim_info)});
        }
    }
}

void CLDNNGraph::GetPerformanceCounts(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &result) const {
    bool combinePrimByIRLayers = false;
    unsigned i = 0;
    auto allIds = GetNetwork()->get_all_primitive_org_ids();
    auto executedPrimitives = GetNetwork()->get_executed_primitives();
    auto primitivesInfo = GetNetwork()->get_primitives_info();

    auto getUpperCaseName = [&](std::string name) {
        if (name.length() > 0)
            name[0] = toupper(name[0]);
        return name;
    };

    auto getFromProfiling = [&](std::string primId) -> bool {
        auto perfIter = perfMap.find(primId);

        if (perfIter == perfMap.end())  return false;

        const auto& layerName = perfIter->second.first;
        if (layerName.length() == 0)  // no layer directly associated
            return false;

        const auto& perfCounter = perfIter->second.second;

        if (!perfCounter.parentPrimitive.empty() && combinePrimByIRLayers)
            return false;

        auto& extPerfEntry = result[layerName];

        memset(extPerfEntry.exec_type, 0, sizeof(extPerfEntry.exec_type));
        if (perfCounter.isCPU) {
            static const std::string cpuExecType("CPU");
            cpuExecType.copy(extPerfEntry.exec_type, cpuExecType.length());  // Override execType as CPU
        } else {
            std::string impl = implementationsMap.at(primId);
            impl.copy(extPerfEntry.exec_type, impl.length());
        }

        extPerfEntry.execution_index = i++;
        extPerfEntry.status = perfCounter.status;
        extPerfEntry.cpu_uSec = perfCounter.cpu_avg();
        extPerfEntry.realTime_uSec = perfCounter.realTime_avg();

        if (combinePrimByIRLayers) {
            std::string kernelId = "";
            long long kernelTime = 0;  // used for finding the most complex computation kernel in sub_graph for perf stat
            for (auto &id : profilingIDs) {
                auto iter = perfMap.find(id);
                if (iter == perfMap.end())  continue;

                const auto &pc = iter->second.second;
                if (id != primId && pc.parentPrimitive == primId) {
                    extPerfEntry.cpu_uSec += pc.cpu_avg();
                    extPerfEntry.realTime_uSec += pc.realTime_avg();
                    if (pc.realTime_avg() > kernelTime) {
                        kernelTime = pc.realTime_avg();
                        kernelId = id;
                    }
                    allIds.erase(std::find(allIds.begin(), allIds.end(), id));
                }
            }
            if (!kernelId.empty())
                implementationsMap.at(kernelId).copy(extPerfEntry.exec_type, implementationsMap.at(kernelId).length());
        }

        getUpperCaseName(perfCounter.layerType).copy(extPerfEntry.layer_type, perfCounter.layerType.length());
        return true;
    };

    // Step 1. Get all primitives in execution order which was added by clDNNPlugin
    for (auto& primId : profilingIDs) {
        getFromProfiling(primId);
    }

    // Step 2. Find all other primitives which was added while optimization process and executed after
    for (auto& primId : allIds) {
        auto perfIter = perfMap.find(primId);
        if (perfIter == perfMap.end())  continue;

        bool existInProfiling = std::find(profilingIDs.begin(), profilingIDs.end(), primId) != profilingIDs.end();
        if ((!existInProfiling || (existInProfiling && perfIter->second.first.length() == 0)) &&
            executedPrimitives.find(primId) != executedPrimitives.end()) {
            auto event = executedPrimitives.at(primId);

            cldnn::instrumentation::profiling_info cldnnInfo{primId, event.get_profiling_info()};

            // Collect timings
            long long cpuTime = 0;
            long long deviceTime = 0;

            for (auto &interval : cldnnInfo.intervals) {
                using duration_t = std::chrono::duration<long long, std::chrono::microseconds::period>;
                auto count = std::chrono::duration_cast<duration_t>(interval.value->value()).count();

                if (interval.name == "submission") {
                    cpuTime += count;
                } else if (interval.name == "executing") {
                    deviceTime += count;
                } else if (interval.name == "duration") {  // "duration" is used for CPU layers
                    cpuTime += count;
                }
            }

            std::string layerName = primId;
            if (primId.find(":") != std::string::npos) {
                layerName = primId.substr(primId.find(":") + 1, primId.length());
            }

            for (auto& pi : primitivesInfo) {
                if (pi.original_id == primId) {
                    if (pi.type_id == "mutable_data")
                        continue;

                    auto& extPerfEntry = result[layerName];

                    if (pi.is_cpu) {
                        static const std::string cpuExecType("CPU");
                        memset(extPerfEntry.exec_type, 0, sizeof(extPerfEntry.exec_type));
                        cpuExecType.copy(extPerfEntry.exec_type, cpuExecType.length());  // Override execType as CPU
                    } else {
                        std::string impl = pi.kernel_id;
                        impl.copy(extPerfEntry.exec_type, impl.length());
                    }

                    getUpperCaseName(pi.type_id).copy(extPerfEntry.layer_type, pi.type_id.length());
                    extPerfEntry.execution_index = i++;
                    extPerfEntry.status = InferenceEngineProfileInfo::LayerStatus::EXECUTED;
                    extPerfEntry.cpu_uSec = cpuTime;
                    extPerfEntry.realTime_uSec = deviceTime;

                    if (pi.type_id == "input_layout") {
                        const std::string input_string = "Input";
                        const std::string undef_string = "undef";
                        input_string.copy(extPerfEntry.layer_type, 256);
                        undef_string.copy(extPerfEntry.exec_type, 256);
                    }
                }
            }
        }
    }

    // Step 3. Checking primitives which has been deleted from execution order but added by clDNNPlugin
    for (auto& primId : profilingIDs)
        if (std::find(allIds.begin(), allIds.end(), primId) == allIds.end()) {
            getFromProfiling(primId);
        }
}

std::shared_ptr<cldnn::network> CLDNNGraph::GetNetwork(size_t idx) const {
    if (idx >= GetNetworksCount())
        THROW_IE_EXCEPTION << "Unable to find network with id=" << idx << ". Stored networks count: " << GetNetworksCount();

    return m_networks[idx];
}


std::string CLDNNGraph::MapOutputName(std::string outName) const {
    auto networkOutputsIDs = GetNetwork()->get_output_ids();
    auto allPrimitiveIds = GetNetwork()->get_all_primitives();

    // Find correct output ID. Start with name stored in IR.
    std::string outputID = primitiveIDs.at(outName);
    while (std::find(networkOutputsIDs.begin(), networkOutputsIDs.end(), outputID) == networkOutputsIDs.end()) {
        // If current ID isn't found in cldnn network outputs, get previous primitive id and try again.
        auto prim = allPrimitiveIds.find(outputID);
        if (prim == allPrimitiveIds.end()) {
            THROW_IE_EXCEPTION << "Unknown primitive id " << outputID;
        }

        if (prevPrimitiveIDs.at(outputID).size() != 1 || prim->second != "_optimized_") {
            THROW_IE_EXCEPTION << "Unable to find parent for output primitive " << outputID;
        }
        outputID = prevPrimitiveIDs.at(outputID)[0];
    }

    return outputID;
}

InferenceEngine::SizeVector CLDNNGraph::GetOutputSize(std::string outName) const {
    auto res_output = outputDims.find(outName);

    InferenceEngine::SizeVector sz;
    if (res_output != outputDims.end())
        sz = res_output->second;
    else
        sz = outputDims.at(primitiveIDs.at(outName));

    return sz;
}

};  // namespace CLDNNPlugin