cleanup

40c80392 · Alexey Suhov · adc0d278 · 40c80392 · 40c80392 · adc0d278
Commit 40c80392 authored Oct 14, 2019 by Alexey Suhov
6 changed files
--- a/inference-engine/cmake/dependencies.cmake
+++ b/inference-engine/cmake/dependencies.cmake
@@ -159,26 +159,6 @@ if (ENABLE_GNA)
    debug_message(STATUS "gna=" ${GNA})
 endif()

-if (ENABLE_ROCKHOPER)
-    set(rh_decoder_version "Rockhopper_1.0.0.682")
-
-    set(INCLUDE_RH_DECODER "include(\"\$\{IE_ROOT_DIR\}/share/ie_rh_decoder.cmake\")")
-
-    RESOLVE_DEPENDENCY(RH_Decoder
-            ARCHIVE_UNIFIED "${rh_decoder_version}.zip"
-            TARGET_PATH "${TEMP}/${rh_decoder_version}"
-            VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*")
-
-    configure_file(
-            "${IE_MAIN_SOURCE_DIR}/cmake/InitRHDecoder.cmake.in"
-            "${CMAKE_BINARY_DIR}/share/ie_rh_decoder.cmake"
-            @ONLY)
-
-    list(APPEND CMAKE_MODULE_PATH ${CMAKE_BINARY_DIR}/share)
-    # for inference engine in tree build - lets include this finder
-    include(ie_rh_decoder)
-endif()
-
 configure_file(
        "${IE_MAIN_SOURCE_DIR}/cmake/share/InferenceEngineConfig.cmake.in"
        "${CMAKE_BINARY_DIR}/share/InferenceEngineConfig.cmake"

--- a/inference-engine/cmake/features_ie.cmake
+++ b/inference-engine/cmake/features_ie.cmake
@@ -8,7 +8,6 @@ include (options)
 #these options are aimed to optimize build time on development system

 ie_option (ENABLE_GNA "GNA support for inference engine" ON)
-ie_option (ENABLE_ROCKHOPER "use Rockhopper decoder for converting / output scores" ON)

 ie_option (ENABLE_MKL_DNN "MKL-DNN plugin for inference engine" ON)


--- a/inference-engine/samples/speech_recognition_offline_demo/CMakeLists.txt
+++ b/inference-engine/samples/speech_recognition_offline_demo/CMakeLists.txt
-# Copyright (C) 2018-2019 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-#
-
-if(COMMAND init_rh_decoder)
-    init_rh_decoder()
-
-    ie_add_sample(NAME speech_recognition_offline_demo
-                  SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp"
-                  HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/speech_sample.hpp"
-                  INCLUDE_DIRECTORIES "${libRH_Decoder_INCLUDE_DIRS}"
-                  DEPENDENCIES "${libRH_Decoder_LIBRARIES}")
-endif()
\ No newline at end of file
--- a/inference-engine/samples/speech_recognition_offline_demo/README.md
+++ b/inference-engine/samples/speech_recognition_offline_demo/README.md
-# Offline Automatic Speech Recognition C++ Demo
-
-This topic shows how to run speech recognition, demonstrates acoustic model inference and Weighted Finite State Transducer (WFST) language model decoding based on Kaldi\* acoustic neural models, Intel&reg; Rockhopper Trail language models, and speech feature vectors.
-
-## How It Works
-
-The workflow is as follows:
-1. The application reads command-line parameters
-and loads a Kaldi-trained neural network along with a Kaldi `.ark` speech feature vector file to the Inference Engine plugin.
-2. The application performs inference and passes acoustic scores vectors to decoding stage, and
-Intel&reg; Rockhopper Trail decoder translates them into a text transcription.
-3. The application prints recognized text on a screen.
-
-### Acoustic and Language Model Setup
-
-Pretrained models are available at [Intel&reg; Open Source Technology Center](https://download.01.org/openvinotoolkit/models_contrib/speech/kaldi) and [Intel&reg; OpenVINO&trade; Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader). For this sample, we use models from `librispeech\s5_ext` folder.
-
-To train models from scratch, refer to a shell-script Kaldi training recipe `lspeech_s5_ext_run.sh` and corresponding documentation `lspeech_s5_ext.md`.
-
-To convert a Kaldi acoustic model into an Intermediate Representation (IR) format acceptable by this sample, use the following Model Optimizer command:
-
-```sh
-$ python3 mo.py --framework kaldi --input_model lspeech_s5_ext.nnet --counts lspeech_s5_ext.counts --remove_output_softmax
-```
-
-The command produces an IR network consisting of `lspeech_s5_ext.xml` and
-`lspeech_s5_ext.bin`.
-
-> **NOTE**: Model Optimizer (`mo.py`), Kaldi-trained neural network (`lspeech_s5_ext.nnet`)
-and Kaldi class counts file (`lspeech_s5_ext.counts`) must be in your working directory.
-
-### Speech Recognition
-
-Once the IR is created or downloaded, you can use the following command for
-speech recognition on Intel&reg; processors with a GNA coprocessor (or
-emulation library) and Rockhopper Trail decoder library:
-
-```sh
-$ ./speech_recognition_offline_demo -d GNA_AUTO -bs 1 -i test_feat_1_10.ark -m lspeech_s5_ext.xml -hmm rht_language_model/rh.hmm -cl rht_language_model/cl.fst -g rht_language_model/g.fst -labels rht_language_model/labels.bin -amsf 0.08
-```
-
-## Sample Output
-
-```
-[ INFO ] InferenceEngine:
-        API version ............ 1.6
-        Build .................. R3
-        Description ....... API
-[ INFO ] Parsing input parameters
-[ INFO ] No extensions provided
-[ INFO ] Loading Inference Engine
-[ INFO ] Device info:
-        GNA
-        GNAPlugin version ......... 1.6
-        Build ........... GNAPlugin
-
-[ INFO ] Loading network files
-[ INFO ] Batch size is 1
-[ INFO ] Using scale factor of 4079.14 calculated from first utterance.
-[ INFO ] Loading model to the device
-[ INFO ] Model loading time 301.864 ms
-Utterance 0:
-1272-128104-0012        ONLY UNFORTUNATELY HIS OWN WORK NEVER DOES GET GOOD
-
-Total time in Infer (HW and SW):        1522.28 ms
-Frames in utterance:                    536 frames
-Average Infer time per frame:           2.84008 ms
-End of Utterance 0
-
-Utterance 1:
-174-84280-0011  BUT NOW IT DOESN'T SEEM TO MATTER VERY MUCH
-
-Total time in Infer (HW and SW):        957.779 ms
-Frames in utterance:                    334 frames
-Average Infer time per frame:           2.8676 ms
-End of Utterance 1
-
-Utterance 2:
-1988-147956-0010        I REMEMBERED WHAT THE CONDUCTOR HAD SAID ABOUT HER EYES
-
-Total time in Infer (HW and SW):        1082.91 ms
-Frames in utterance:                    384 frames
-Average Infer time per frame:           2.82008 ms
-End of Utterance 2
-
-Utterance 3:
-1988-147956-0026        WE WERE SO DEEP IN THE GRASS THAT WE COULD SEE NOTHING BUT THE BLUE SKY OVER US AND THE GOLD TREE IN FRONT OF US
-
-Total time in Infer (HW and SW):        1963.4 ms
-Frames in utterance:                    690 frames
-Average Infer time per frame:           2.84551 ms
-End of Utterance 3
-
-Utterance 4:
-2086-149220-0045        FEWER WORDS THAN BEFORE BUT WITH THE SAME MYSTERIOUS MUSIC IN
-
-Total time in Infer (HW and SW):        1283.32 ms
-Frames in utterance:                    453 frames
-Average Infer time per frame:           2.83293 ms
-End of Utterance 4
-
-Utterance 5:
-2277-149874-0011        HE SEEMED TO BE THINKING OF SOMETHING ELSE
-
-Total time in Infer (HW and SW):        690.602 ms
-Frames in utterance:                    245 frames
-Average Infer time per frame:           2.81878 ms
-End of Utterance 5
-
-Utterance 6:
-2277-149896-0034        HE RANG AGAIN THIS TIME HARDER STILL NO ANSWER
-
-Total time in Infer (HW and SW):        1128.91 ms
-Frames in utterance:                    399 frames
-Average Infer time per frame:           2.82934 ms
-End of Utterance 6
-
-Utterance 7:
-2277-149897-0015        IN ABOUT AN HOUR AND THREE QUARTERS THE BOY RETURNED
-
-Total time in Infer (HW and SW):        857.916 ms
-Frames in utterance:                    302 frames
-Average Infer time per frame:           2.84078 ms
-End of Utterance 7
-
-Utterance 8:
-2412-153948-0005        I WAS DELIGHTED WITH THE COUNTRY AND THE MANNER OF LIFE
-
-Total time in Infer (HW and SW):        897.309 ms
-Frames in utterance:                    312 frames
-Average Infer time per frame:           2.87599 ms
-End of Utterance 8
-
-Utterance 9:
-3081-166546-0044        HE WAS THE PLAIN FACE DETECTIVE WHO HAD SPOKEN TO GEORGE
-
-Total time in Infer (HW and SW):        1280.3 ms
-Frames in utterance:                    448 frames
-Average Infer time per frame:           2.8578 ms
-End of Utterance 9
-
-[ INFO ] Execution successful
-```
-
-## Input Preparation
-
-Speech Recognition Offline Demo application accepts Kaldi binary `.ark` files holding stacked feature frames.
-To prepare such files, please follow steps described in `lspeech_s5_ext.md` from folder `librispeech\s5_ext` of Model Zoo.
-
-## See Also
-* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
-* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
-* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader)
--- a/inference-engine/samples/speech_recognition_offline_demo/main.cpp
+++ b/inference-engine/samples/speech_recognition_offline_demo/main.cpp
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "speech_sample.hpp"
-#include "rockhopper_decoder.h"
-
-#include <gflags/gflags.h>
-#include <functional>
-#include <iostream>
-#include <memory>
-#include <map>
-#include <fstream>
-#include <random>
-#include <string>
-#include <vector>
-#include <utility>
-#include <time.h>
-#include <thread>
-#include <chrono>
-#include <limits>
-#include <iomanip>
-#include <inference_engine.hpp>
-#include <gna/gna_config.hpp>
-
-#include <samples/common.hpp>
-#include <samples/slog.hpp>
-#include <samples/args_helper.hpp>
-
-#ifndef ALIGN
-#define ALIGN(memSize, pad)   ((static_cast<int>((memSize) + pad - 1) / pad) * pad)
-#endif
-#define MAX_SCORE_DIFFERENCE 0.0001f
-#define MAX_VAL_2B_FEAT 16384
-
-using namespace InferenceEngine;
-
-typedef std::chrono::high_resolution_clock Time;
-typedef std::chrono::duration<double, std::ratio<1, 1000>> ms;
-typedef std::chrono::duration<float> fsec;
-typedef struct {
-    uint32_t numScores;
-    uint32_t numErrors;
-    float threshold;
-    float maxError;
-    float rmsError;
-    float sumError;
-    float sumRmsError;
-    float sumSquaredError;
-    float maxRelError;
-    float sumRelError;
-    float sumSquaredRelError;
-} score_error_t;
-
-struct InferRequestStruct {
-    InferRequest inferRequest;
-    int frameIndex;
-    uint32_t numFramesThisBatch;
-};
-
-struct RhDecoderInstanceParams {
-    RhDecoderInstanceHandle handle;
-    uint8_t* hmm_data;
-    uint8_t* cl_data;
-    uint8_t* g_data;
-    uint8_t* label_data;
-};
-
-void GetKaldiArkInfo(const char *fileName,
-                     uint32_t numArrayToFindSize,
-                     uint32_t *ptrNumArrays,
-                     uint32_t *ptrNumMemoryBytes) {
-    uint32_t numArrays = 0;
-    uint32_t numMemoryBytes = 0;
-
-    std::ifstream in_file(fileName, std::ios::binary);
-    if (in_file.good()) {
-        while (!in_file.eof()) {
-            std::string line;
-            uint32_t numRows = 0u, numCols = 0u, num_bytes = 0u;
-            std::getline(in_file, line, '\0');  // read variable length name followed by space and NUL
-            std::getline(in_file, line, '\4');  // read "BFM" followed by space and control-D
-            if (line.compare("BFM ") != 0) {
-                break;
-            }
-            in_file.read(reinterpret_cast<char *>(&numRows), sizeof(uint32_t));  // read number of rows
-            std::getline(in_file, line, '\4');                                   // read control-D
-            in_file.read(reinterpret_cast<char *>(&numCols), sizeof(uint32_t));  // read number of columns
-            num_bytes = numRows * numCols * sizeof(float);
-            in_file.seekg(num_bytes, in_file.cur);                               // read data
-
-            if (numArrays == numArrayToFindSize) {
-                numMemoryBytes += num_bytes;
-            }
-            numArrays++;
-        }
-        in_file.close();
-    } else {
-        fprintf(stderr, "Failed to open %s for reading in GetKaldiArkInfo()!\n", fileName);
-        exit(-1);
-    }
-
-    if (ptrNumArrays != NULL) *ptrNumArrays = numArrays;
-    if (ptrNumMemoryBytes != NULL) *ptrNumMemoryBytes = numMemoryBytes;
-}
-
-void LoadKaldiArkArray(const char *fileName, uint32_t arrayIndex, std::string &ptrName, std::vector<uint8_t> &memory,
-                       uint32_t *ptrNumRows, uint32_t *ptrNumColumns, uint32_t *ptrNumBytesPerElement) {
-    std::ifstream in_file(fileName, std::ios::binary);
-    if (in_file.good()) {
-        uint32_t i = 0;
-        while (i < arrayIndex) {
-            std::string line;
-            uint32_t numRows = 0u, numCols = 0u;
-            std::getline(in_file, line, '\0');  // read variable length name followed by space and NUL
-            std::getline(in_file, line, '\4');  // read "BFM" followed by space and control-D
-            if (line.compare("BFM ") != 0) {
-                break;
-            }
-            in_file.read(reinterpret_cast<char *>(&numRows), sizeof(uint32_t));     // read number of rows
-            std::getline(in_file, line, '\4');                                     // read control-D
-            in_file.read(reinterpret_cast<char *>(&numCols), sizeof(uint32_t));     // read number of columns
-            in_file.seekg(numRows * numCols * sizeof(float), in_file.cur);         // read data
-            i++;
-        }
-        if (!in_file.eof()) {
-            std::string line;
-            std::getline(in_file, ptrName, '\0');     // read variable length name followed by space and NUL
-            std::getline(in_file, line, '\4');       // read "BFM" followed by space and control-D
-            if (line.compare("BFM ") != 0) {
-                fprintf(stderr, "Cannot find array specifier in file %s in LoadKaldiArkArray()!\n", fileName);
-                exit(-1);
-            }
-            in_file.read(reinterpret_cast<char *>(ptrNumRows), sizeof(uint32_t));        // read number of rows
-            std::getline(in_file, line, '\4');                                            // read control-D
-            in_file.read(reinterpret_cast<char *>(ptrNumColumns), sizeof(uint32_t));    // read number of columns
-            in_file.read(reinterpret_cast<char *>(&memory.front()),
-                         *ptrNumRows * *ptrNumColumns * sizeof(float));  // read array data
-        }
-        in_file.close();
-    } else {
-        fprintf(stderr, "Failed to open %s for reading in GetKaldiArkInfo()!\n", fileName);
-        exit(-1);
-    }
-
-    *ptrNumBytesPerElement = sizeof(float);
-}
-
-void SaveKaldiArkArray(const char *fileName,
-                       bool shouldAppend,
-                       std::string name,
-                       void *ptrMemory,
-                       uint32_t numRows,
-                       uint32_t numColumns) {
-    std::ios_base::openmode mode = std::ios::binary;
-    if (shouldAppend) {
-        mode |= std::ios::app;
-    }
-    std::ofstream out_file(fileName, mode);
-    if (out_file.good()) {
-        out_file.write(name.c_str(), name.length());  // write name
-        out_file.write("\0", 1);
-        out_file.write("BFM ", 4);
-        out_file.write("\4", 1);
-        out_file.write(reinterpret_cast<char *>(&numRows), sizeof(uint32_t));
-        out_file.write("\4", 1);
-        out_file.write(reinterpret_cast<char *>(&numColumns), sizeof(uint32_t));
-        out_file.write(reinterpret_cast<char *>(ptrMemory), numRows * numColumns * sizeof(float));
-        out_file.close();
-    } else {
-        throw std::runtime_error(std::string("Failed to open %s for writing in SaveKaldiArkArray()!\n") + fileName);
-    }
-}
-
-float ScaleFactorForQuantization(void *ptrFloatMemory, float targetMax, uint32_t numElements) {
-    float *ptrFloatFeat = reinterpret_cast<float *>(ptrFloatMemory);
-    float max = 0.0;
-    float scaleFactor;
-
-    for (uint32_t i = 0; i < numElements; i++) {
-        if (fabs(ptrFloatFeat[i]) > max) {
-            max = fabs(ptrFloatFeat[i]);
-        }
-    }
-
-    if (max == 0) {
-        scaleFactor = 1.0;
-    } else {
-        scaleFactor = targetMax / max;
-    }
-
-    return (scaleFactor);
-}
-
-void ClearScoreError(score_error_t *error) {
-    error->numScores = 0;
-    error->numErrors = 0;
-    error->maxError = 0.0;
-    error->rmsError = 0.0;
-    error->sumError = 0.0;
-    error->sumRmsError = 0.0;
-    error->sumSquaredError = 0.0;
-    error->maxRelError = 0.0;
-    error->sumRelError = 0.0;
-    error->sumSquaredRelError = 0.0;
-}
-
-void UpdateScoreError(score_error_t *error, score_error_t *totalError) {
-    totalError->numErrors += error->numErrors;
-    totalError->numScores += error->numScores;
-    totalError->sumRmsError += error->rmsError;
-    totalError->sumError += error->sumError;
-    totalError->sumSquaredError += error->sumSquaredError;
-    if (error->maxError > totalError->maxError) {
-        totalError->maxError = error->maxError;
-    }
-    totalError->sumRelError += error->sumRelError;
-    totalError->sumSquaredRelError += error->sumSquaredRelError;
-    if (error->maxRelError > totalError->maxRelError) {
-        totalError->maxRelError = error->maxRelError;
-    }
-}
-
-uint32_t CompareScores(float *ptrScoreArray,
-                       void *ptrRefScoreArray,
-                       score_error_t *scoreError,
-                       uint32_t numRows,
-                       uint32_t numColumns) {
-    uint32_t numErrors = 0;
-
-    ClearScoreError(scoreError);
-
-    float *A = ptrScoreArray;
-    float *B = reinterpret_cast<float *>(ptrRefScoreArray);
-    for (uint32_t i = 0; i < numRows; i++) {
-        for (uint32_t j = 0; j < numColumns; j++) {
-            float score = A[i * numColumns + j];
-            float refscore = B[i * numColumns + j];
-            float error = fabs(refscore - score);
-            float rel_error = error / (static_cast<float>(fabs(refscore)) + 1e-20f);
-            float squared_error = error * error;
-            float squared_rel_error = rel_error * rel_error;
-            scoreError->numScores++;
-            scoreError->sumError += error;
-            scoreError->sumSquaredError += squared_error;
-            if (error > scoreError->maxError) {
-                scoreError->maxError = error;
-            }
-            scoreError->sumRelError += rel_error;
-            scoreError->sumSquaredRelError += squared_rel_error;
-            if (rel_error > scoreError->maxRelError) {
-                scoreError->maxRelError = rel_error;
-            }
-            if (error > scoreError->threshold) {
-                numErrors++;
-            }
-        }
-    }
-    scoreError->rmsError = sqrt(scoreError->sumSquaredError / (numRows * numColumns));
-    scoreError->sumRmsError += scoreError->rmsError;
-    scoreError->numErrors = numErrors;
-
-    return (numErrors);
-}
-
-float StdDevError(score_error_t error) {
-    return (sqrt(error.sumSquaredError / error.numScores
-                 - (error.sumError / error.numScores) * (error.sumError / error.numScores)));
-}
-
-float StdDevRelError(score_error_t error) {
-    return (sqrt(error.sumSquaredRelError / error.numScores
-                 - (error.sumRelError / error.numScores) * (error.sumRelError / error.numScores)));
-}
-
-#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
-#if defined(_WIN32) || defined(WIN32)
-#include <intrin.h>
-#include <windows.h>
-#else
-
-#include <cpuid.h>
-
-#endif
-
-inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
-                         unsigned int *ecx, unsigned int *edx) {
-    size_t level = *eax;
-#if defined(_WIN32) || defined(WIN32)
-    int regs[4] = {static_cast<int>(*eax), static_cast<int>(*ebx), static_cast<int>(*ecx), static_cast<int>(*edx)};
-    __cpuid(regs, level);
-    *eax = static_cast<uint32_t>(regs[0]);
-    *ebx = static_cast<uint32_t>(regs[1]);
-    *ecx = static_cast<uint32_t>(regs[2]);
-    *edx = static_cast<uint32_t>(regs[3]);
-#else
-    __get_cpuid(level, eax, ebx, ecx, edx);
-#endif
-}
-
-// return GNA module frequency in MHz
-float getGnaFrequencyMHz() {
-    uint32_t eax = 1;
-    uint32_t ebx = 0;
-    uint32_t ecx = 0;
-    uint32_t edx = 0;
-    uint32_t family = 0;
-    uint32_t model = 0;
-    const uint8_t sixth_family = 6;
-    const uint8_t cannon_lake_model = 102;
-    const uint8_t gemini_lake_model = 122;
-
-    native_cpuid(&eax, &ebx, &ecx, &edx);
-    family = (eax >> 8) & 0xF;
-
-    // model is the concatenation of two fields
-    // | extended model | model |
-    // copy extended model data
-    model = (eax >> 16) & 0xF;
-    // shift
-    model <<= 4;
-    // copy model data
-    model += (eax >> 4) & 0xF;
-
-    if (family == sixth_family && model == cannon_lake_model) {
-        return 400;
-    } else if (family == sixth_family &&
-               model == gemini_lake_model) {
-        return 200;
-    } else {
-        // counters not supported and we retrns just default value
-        return 1;
-    }
-}
-
-#endif  // if not ARM
-
-void printReferenceCompareResults(score_error_t const &totalError,
-                                  size_t framesNum,
-                                  std::ostream &stream) {
-    stream << "         max error: " <<
-           totalError.maxError << std::endl;
-    stream << "         avg error: " <<
-           totalError.sumError / totalError.numScores << std::endl;
-    stream << "     avg rms error: " <<
-           totalError.sumRmsError / framesNum << std::endl;
-    stream << "       stdev error: " <<
-           StdDevError(totalError) << std::endl << std::endl;
-    stream << std::endl;
-}
-
-void printPerformanceCounters(std::map<std::string,
-        InferenceEngine::InferenceEngineProfileInfo> const &utterancePerfMap,
-                              size_t callsNum,
-                              std::ostream &stream, std::string fullDeviceName) {
-#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
-    std::ios_base::fmtflags fmt_flags(stream.flags() );
-    stream << std::endl << "Performance counts:" << std::endl;
-    stream << std::setw(10) << std::right << "" << "Counter descriptions";
-    stream << std::setw(22) << "Utt scoring time";
-    stream << std::setw(18) << "Avg infer time";
-    stream << std::endl;
-
-    stream << std::setw(46) << "(ms)";
-    stream << std::setw(24) << "(us per call)";
-    stream << std::endl;
-
-    for (const auto &it : utterancePerfMap) {
-        std::string const &counter_name = it.first;
-        float current_units = static_cast<float>(it.second.realTime_uSec);
-        float call_units = current_units / callsNum;
-        // if GNA HW counters
-        // get frequency of GNA module
-        float freq = getGnaFrequencyMHz();
-        current_units /= freq * 1000;
-        call_units /= freq;
-        stream << std::setw(30) << std::left << counter_name.substr(4, counter_name.size() - 1);
-        stream << std::setw(16) << std::right << current_units;
-        stream << std::setw(21) << std::right << call_units;
-        stream << std::endl;
-    }
-    stream << std::endl;
-    std::cout << std::endl;
-    std::cout << "Full device name: " << fullDeviceName << std::endl;
-    std::cout << std::endl;
-    stream.flags(fmt_flags);
-#endif
-}
-
-void getPerformanceCounters(InferenceEngine::InferRequest &request,
-                            std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfCounters) {
-    auto retPerfCounters = request.GetPerformanceCounts();
-
-    for (const auto &pair : retPerfCounters) {
-        perfCounters[pair.first] = pair.second;
-    }
-}
-
-void sumPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> const &perfCounters,
-                            std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &totalPerfCounters) {
-    for (const auto &pair : perfCounters) {
-        totalPerfCounters[pair.first].realTime_uSec += pair.second.realTime_uSec;
-    }
-}
-
-bool ParseAndCheckCommandLine(int argc, char *argv[]) {
-    // ---------------------------Parsing and validation of input args--------------------------------------
-    slog::info << "Parsing input parameters" << slog::endl;
-
-    gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
-    if (FLAGS_h) {
-        showUsage();
-        showAvailableDevices();
-        return false;
-    }
-    bool isDumpMode = !FLAGS_wg.empty() || !FLAGS_we.empty();
-
-    // input not required only in dump mode and if external scale factor provided
-    if (FLAGS_i.empty() && (!isDumpMode || FLAGS_q.compare("user") != 0)) {
-        if (isDumpMode) {
-            throw std::logic_error("In model dump mode either static quantization is used (-i) or user scale"
-                                   " factor need to be provided. See -q user option");
-        }
-        throw std::logic_error("Input file not set. Please use -i.");
-    }
-
-    if (FLAGS_m.empty() && FLAGS_rg.empty()) {
-        throw std::logic_error("Either IR file (-m) or GNAModel file (-rg) need to be set.");
-    }
-
-    if ((!FLAGS_m.empty() && !FLAGS_rg.empty())) {
-        throw std::logic_error("Only one of -m and -rg is allowed.");
-    }
-
-    std::vector<std::string> supportedDevices = {
-            "CPU",
-            "GPU",
-            "GNA_AUTO",
-            "GNA_HW",
-            "GNA_SW_EXACT",
-            "GNA_SW",
-            "GNA_SW_FP32",
-            "HETERO:GNA,CPU",
-            "HETERO:GNA_HW,CPU",
-            "HETERO:GNA_SW_EXACT,CPU",
-            "HETERO:GNA_SW,CPU",
-            "HETERO:GNA_SW_FP32,CPU",
-            "MYRIAD"
-    };
-
-    if (std::find(supportedDevices.begin(), supportedDevices.end(), FLAGS_d) == supportedDevices.end()) {
-        throw std::logic_error("Specified device is not supported.");
-    }
-
-    float scaleFactorInput = static_cast<float>(FLAGS_sf);
-    if (scaleFactorInput <= 0.0f) {
-        throw std::logic_error("Scale factor out of range (must be non-negative).");
-    }
-
-    uint32_t batchSize = (uint32_t) FLAGS_bs;
-    if ((batchSize < 1) || (batchSize > 8)) {
-        throw std::logic_error("Batch size out of range (1..8).");
-    }
-
-    /** default is a static quantisation **/
-    if ((FLAGS_q.compare("static") != 0) && (FLAGS_q.compare("dynamic") != 0) && (FLAGS_q.compare("user") != 0)) {
-        throw std::logic_error("Quantization mode not supported (static, dynamic, user).");
-    }
-
-    if (FLAGS_q.compare("dynamic") == 0) {
-        throw std::logic_error("Dynamic quantization not yet supported.");
-    }
-
-    if (FLAGS_qb != 16 && FLAGS_qb != 8) {
-        throw std::logic_error("Only 8 or 16 bits supported.");
-    }
-
-    if (FLAGS_nthreads <= 0) {
-        throw std::logic_error("Invalid value for 'nthreads' argument. It must be greater that or equal to 0");
-    }
-
-    if (FLAGS_cw_r < 0) {
-        throw std::logic_error("Invalid value for 'cw_r' argument. It must be greater than or equal to 0");
-    }
-
-    if (FLAGS_cw_l < 0) {
-        throw std::logic_error("Invalid value for 'cw_l' argument. It must be greater than or equal to 0");
-    }
-
-    // RH decoder parameters
-    if (FLAGS_hmm.empty()) {
-        throw std::logic_error("RH HMM model file not set. Please use -hmm.");
-    }
-    if (FLAGS_labels.empty()) {
-        throw std::logic_error("RH labels file not set. Please use -labels.");
-    }
-    if (FLAGS_g.empty()) {
-        throw std::logic_error("RH LM: G.fst model file not set. Please use -g.");
-    }
-    if (FLAGS_cl.empty()) {
-        throw std::logic_error("RH LM: CL.fst model file not set. Please use -cl.");
-    }
-
-    return true;
-}
-
-uint8_t* ReadBinaryFile(const char* filename, uint32_t* size) {
-    if (nullptr == size) {
-        throw std::logic_error("Size parameter is null");
-    }
-
-    FILE * f = fopen(filename, "rb");
-    if (!f) {
-        throw std::runtime_error("Failed to open binary file " + std::string(filename));
-    }
-
-    int32_t res = fseek(f, 0, SEEK_END);
-    if (res != 0) {
-        fclose(f);
-        throw std::runtime_error("Error occured while loading (fseek) file " + std::string(filename));
-    }
-
-    auto fileSize = ftell(f);
-    if (fileSize < 0) {
-        fclose(f);
-        throw std::runtime_error("Error occured while loading (ftell) file " + std::string(filename));
-        return nullptr;
-    }
-
-    res = fseek(f, 0, SEEK_SET);
-    uint8_t* data = new (std::nothrow) uint8_t[fileSize];
-    if (!data) {
-        fclose(f);
-        throw std::runtime_error("Not enough memory to load file " + std::string(filename));
-    }
-
-    *size = fread(data, 1, fileSize, f);
-    fclose(f);
-
-    if (*size != fileSize) {
-        delete[] data;
-        throw std::runtime_error("Could not read all the data from file " + std::string(filename));
-    }
-
-    return data;
-}
-
-void InitializeRhDecoder(RhDecoderInstanceParams& instanceParams, int32_t scoreVectorSize) {
-    uint32_t hmm_size = 0;
-    uint32_t cl_size = 0;
-    uint32_t g_size = 0;
-    uint32_t label_size = 0;
-
-    instanceParams.hmm_data = ReadBinaryFile(FLAGS_hmm.c_str(), &hmm_size);
-    instanceParams.cl_data = ReadBinaryFile(FLAGS_cl.c_str(), &cl_size);
-    instanceParams.g_data = ReadBinaryFile(FLAGS_g.c_str(), &g_size);
-    instanceParams.label_data = ReadBinaryFile(FLAGS_labels.c_str(), &label_size);
-
-    if (instanceParams.hmm_data && instanceParams.cl_data &&
-        instanceParams.g_data && instanceParams.label_data) {
-        RhDecoderStatus status = RhDecoderCreateInstance(&instanceParams.handle);
-
-        do {
-            if (RhDecoderStatus::RH_DECODER_SUCCESS != status) {
-                throw std::logic_error("Failed to create decoder");
-            }
-
-            status = RhDecoderSetDefaultParameterValues(instanceParams.handle,
-                RhAcousticModelType::RH_ACOUSTIC_MODEL_TYPE_GENERIC_CHAIN);
-            if (RhDecoderStatus::RH_DECODER_SUCCESS != status) {
-                throw std::logic_error("Failed to set default decoder values");
-            }
-
-            // now overwrite some of the parameters
-            float acoustic_scale_factor = static_cast<float>(FLAGS_amsf);
-            status = RhDecoderSetParameterValue(instanceParams.handle, RH_DECODER_ACOUSTIC_SCALE_FACTOR,
-                &acoustic_scale_factor, sizeof(float));
-            if (RhDecoderStatus::RH_DECODER_SUCCESS != status) {
-                throw std::logic_error("Failed to set parameter acoustic_scale_factor value");
-            }
-
-            status = RhDecoderSetParameterValue(instanceParams.handle, RH_DECODER_ACOUSTIC_SCORE_VECTOR_SIZE,
-                &scoreVectorSize, sizeof(int));
-            if (RhDecoderStatus::RH_DECODER_SUCCESS != status) {
-                throw std::logic_error("Failed to set parameter score_vector_size value");
-            }
-
-            float beam_width = static_cast<float>(FLAGS_beam_width);
-            status = RhDecoderSetParameterValue(instanceParams.handle, RH_DECODER_BEAM_WIDTH,
-                &beam_width, sizeof(float));
-            if (RhDecoderStatus::RH_DECODER_SUCCESS != status) {
-                throw std::logic_error("Failed to set parameter beam_width value");
-            }
-
-            status = RhDecoderSetParameterValue(instanceParams.handle, RH_DECODER_NBEST,
-                &FLAGS_nbest, sizeof(int));
-            if (RhDecoderStatus::RH_DECODER_SUCCESS != status) {
-                throw std::logic_error("Failed to set parameter nbest value");
-            }
-
-            status = RhDecoderSetParameterValue(instanceParams.handle, RH_DECODER_G_CACHE_LOG_SIZE,
-                &FLAGS_gcls, sizeof(int));
-            if (RhDecoderStatus::RH_DECODER_SUCCESS != status) {
-                throw std::logic_error("Failed to set parameter g_cache_log_size value");
-            }
-
-            status = RhDecoderSetParameterValue(instanceParams.handle, RH_DECODER_TRACE_BACK_LOG_SIZE,
-                &FLAGS_tbls, sizeof(int));
-            if (RhDecoderStatus::RH_DECODER_SUCCESS != status) {
-                throw std::logic_error("Failed to set parameter trace_back_log_size value");
-            }
-
-            status = RhDecoderSetParameterValue(instanceParams.handle, RH_DECODER_MIN_STABLE_FRAMES,
-                &FLAGS_msf, sizeof(int));
-            if (RhDecoderStatus::RH_DECODER_SUCCESS != status) {
-                throw std::logic_error("Failed to set parameter min_stable_frames value");
-            }
-
-            status = RhDecoderSetParameterValue(instanceParams.handle, RH_DECODER_TOKEN_BUFFER_SIZE,
-                &FLAGS_tbs, sizeof(int));
-            if (RhDecoderStatus::RH_DECODER_SUCCESS != status) {
-                throw std::logic_error("Failed to set parameter token_buffer_size value");
-            }
-
-            status = RhDecoderSetupResource(instanceParams.handle,
-                                            RhResourceType::HMM, instanceParams.hmm_data, hmm_size);
-            if (RhDecoderStatus::RH_DECODER_SUCCESS != status) {
-                throw std::logic_error("Failed to load HMM model");
-            }
-
-            status = RhDecoderSetupResource(instanceParams.handle,
-                                            RhResourceType::PRONUNCIATION_MODEL, instanceParams.cl_data, cl_size);
-            if (RhDecoderStatus::RH_DECODER_SUCCESS != status) {
-                throw std::logic_error("Failed to load pronunciation model");
-            }
-
-            status = RhDecoderSetupResource(instanceParams.handle,
-                                            RhResourceType::LANGUAGE_MODEL, instanceParams.g_data, g_size);
-            if (RhDecoderStatus::RH_DECODER_SUCCESS != status) {
-                throw std::logic_error("Failed to load language model");
-            }
-
-            status = RhDecoderSetupResource(instanceParams.handle,
-                                            RhResourceType::LABELS, instanceParams.label_data, label_size);
-            if (RhDecoderStatus::RH_DECODER_SUCCESS != status) {
-                throw std::logic_error("Failed to load labels");
-            }
-
-            status = RhDecoderInitInstance(instanceParams.handle);
-            if (RhDecoderStatus::RH_DECODER_SUCCESS != status) {
-                throw std::logic_error("Failed to initialize decoder");
-            }
-        } while (0);
-    } else {
-        throw std::logic_error("Failed to read one of the resources");
-    }
-}
-
-void FreeRhDecoder(RhDecoderInstanceParams& instanceParams) {
-    if (instanceParams.handle) {
-        RhDecoderStatus status = RhDecoderFreeInstance(instanceParams.handle);
-        if (status != RH_DECODER_SUCCESS) {
-            slog::err << "Failed to free decoder. Status: " << status << slog::endl;
-            throw std::logic_error("Failed to free decoder. Status: " + std::to_string(status));
-        }
-    }
-
-    if (instanceParams.hmm_data) {
-        delete[] instanceParams.hmm_data;
-        instanceParams.hmm_data = nullptr;
-    }
-
-    if (instanceParams.cl_data) {
-        delete[] instanceParams.cl_data;
-        instanceParams.cl_data = nullptr;
-    }
-
-    if (instanceParams.g_data) {
-        delete[] instanceParams.g_data;
-        instanceParams.g_data = nullptr;
-    }
-
-    if (instanceParams.label_data) {
-        delete[] instanceParams.label_data;
-        instanceParams.label_data = nullptr;
-    }
-}
-
-/**
- * @brief The entry point for inference engine automatic speech recognition sample
- * @file speech_sample/main.cpp
- * @example speech_sample/main.cpp
- */
-int main(int argc, char *argv[]) {
-    try {
-        slog::info << "InferenceEngine: " << GetInferenceEngineVersion() << slog::endl;
-
-        // ------------------------------ Parsing and validation of input args ---------------------------------
-        if (!ParseAndCheckCommandLine(argc, argv)) {
-            return 0;
-        }
-
-        if (FLAGS_l.empty()) {
-            slog::info << "No extensions provided" << slog::endl;
-        }
-
-        auto isFeature = [&](const std::string xFeature) { return FLAGS_d.find(xFeature) != std::string::npos; };
-
-        bool useGna = isFeature("GNA");
-        bool useHetero = isFeature("HETERO");
-        std::string deviceStr =
-                useHetero && useGna ? "HETERO:GNA,CPU" : FLAGS_d.substr(0, (FLAGS_d.find("_")));
-        float scaleFactorInput = static_cast<float>(FLAGS_sf);
-        uint32_t batchSize = (FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : (uint32_t) FLAGS_bs;
-
-        std::vector<std::string> inputArkFiles;
-        std::vector<uint32_t> numBytesThisUtterance;
-        uint32_t numUtterances(0);
-        if (!FLAGS_i.empty()) {
-            std::string outStr;
-            std::istringstream stream(FLAGS_i);
-
-            uint32_t currentNumUtterances(0), currentNumBytesThisUtterance(0);
-            while (getline(stream, outStr, ',')) {
-                std::string filename(fileNameNoExt(outStr) + ".ark");
-                inputArkFiles.push_back(filename);
-
-                GetKaldiArkInfo(filename.c_str(), 0, &currentNumUtterances, &currentNumBytesThisUtterance);
-                if (numUtterances == 0) {
-                    numUtterances = currentNumUtterances;
-                } else if (currentNumUtterances != numUtterances) {
-                    throw std::logic_error("Incorrect input files. Number of utterance must be the same for all ark files");
-                }
-                numBytesThisUtterance.push_back(currentNumBytesThisUtterance);
-            }
-        }
-        size_t numInputArkFiles(inputArkFiles.size());
-        // -----------------------------------------------------------------------------------------------------
-
-        // --------------------------- 1. Load inference engine -------------------------------------
-        slog::info << "Loading Inference Engine" << slog::endl;
-        Core ie;
-
-        /** Printing device version **/
-        slog::info << "Device info: " << slog::endl;
-        std::cout << ie.GetVersions(deviceStr) << std::endl;
-        // -----------------------------------------------------------------------------------------------------
-
-        // --------------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
-        slog::info << "Loading network files" << slog::endl;
-
-        CNNNetReader netBuilder;
-        if (!FLAGS_m.empty()) {
-            /** Read network model **/
-            netBuilder.ReadNetwork(FLAGS_m);
-
-            /** Extract model name and load weights **/
-            std::string binFileName = fileNameNoExt(FLAGS_m) + ".bin";
-            netBuilder.ReadWeights(binFileName);
-
-            // -------------------------------------------------------------------------------------------------
-
-            // --------------------------- 3. Set batch size ---------------------------------------------------
-            /** Set batch size.  Unlike in imaging, batching in time (rather than space) is done for speech recognition. **/
-            netBuilder.getNetwork().setBatchSize(batchSize);
-            slog::info << "Batch size is " << std::to_string(netBuilder.getNetwork().getBatchSize())
-                       << slog::endl;
-        }
-
-        /** Setting parameter for per layer metrics **/
-        std::map<std::string, std::string> gnaPluginConfig;
-        std::map<std::string, std::string> genericPluginConfig;
-        if (useGna) {
-            std::string gnaDevice =
-                    useHetero ? FLAGS_d.substr(FLAGS_d.find("GNA"), FLAGS_d.find(",") - FLAGS_d.find("GNA")) : FLAGS_d;
-            gnaPluginConfig[GNAConfigParams::KEY_GNA_DEVICE_MODE] =
-                    gnaDevice.find("_") == std::string::npos ? "GNA_AUTO" : gnaDevice;
-        }
-
-        if (FLAGS_pc) {
-            genericPluginConfig[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::YES;
-        }
-
-        if (FLAGS_q.compare("user") == 0) {
-            if (numInputArkFiles > 1) {
-                std::string errMessage("Incorrect use case for multiple input ark files. Please don't use -q 'user' for this case.");
-                throw std::logic_error(errMessage);
-            }
-            slog::info << "Using scale factor of " << FLAGS_sf << slog::endl;
-            gnaPluginConfig[GNA_CONFIG_KEY(SCALE_FACTOR)] = std::to_string(FLAGS_sf);
-        } else {
-            // "static" quantization with calculated scale factor
-            for (size_t i = 0; i < numInputArkFiles; i++) {
-                auto inputArkName = inputArkFiles[i].c_str();
-                std::string name;
-                std::vector<uint8_t> ptrFeatures;
-                uint32_t numArrays(0), numBytes(0), numFrames(0), numFrameElements(0), numBytesPerElement(0);
-                GetKaldiArkInfo(inputArkName, 0, &numArrays, &numBytes);
-                ptrFeatures.resize(numBytes);
-                LoadKaldiArkArray(inputArkName,
-                                  0,
-                                  name,
-                                  ptrFeatures,
-                                  &numFrames,
-                                  &numFrameElements,
-                                  &numBytesPerElement);
-                scaleFactorInput =
-                        ScaleFactorForQuantization(ptrFeatures.data(), MAX_VAL_2B_FEAT, numFrames * numFrameElements);
-                slog::info << "Using scale factor of " << scaleFactorInput << " calculated from first utterance."
-                           << slog::endl;
-                std::string scaleFactorConfigKey = GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_") + std::to_string(i);
-                gnaPluginConfig[scaleFactorConfigKey] = std::to_string(scaleFactorInput);
-            }
-        }
-
-        if (FLAGS_qb == 8) {
-            gnaPluginConfig[GNAConfigParams::KEY_GNA_PRECISION] = "I8";
-        } else {
-            gnaPluginConfig[GNAConfigParams::KEY_GNA_PRECISION] = "I16";
-        }
-
-        gnaPluginConfig[GNAConfigParams::KEY_GNA_LIB_N_THREADS] = std::to_string((FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : FLAGS_nthreads);
-        gnaPluginConfig[GNA_CONFIG_KEY(COMPACT_MODE)] = CONFIG_VALUE(NO);
-        // -----------------------------------------------------------------------------------------------------
-
-        // --------------------------- 4. Write model to file --------------------------------------------------
-        // Embedded GNA model dumping (for Intel(R) Speech Enabling Developer Kit)
-        if (!FLAGS_we.empty()) {
-            gnaPluginConfig[GNAConfigParams::KEY_GNA_FIRMWARE_MODEL_IMAGE] = FLAGS_we;
-        }
-        // -----------------------------------------------------------------------------------------------------
-
-        // --------------------------- 5. Loading model to the device ------------------------------------------
-
-        if (useGna) {
-            genericPluginConfig.insert(std::begin(gnaPluginConfig), std::end(gnaPluginConfig));
-        }
-        auto t0 = Time::now();
-        ExecutableNetwork executableNet;
-
-        if (!FLAGS_m.empty()) {
-            slog::info << "Loading model to the device" << slog::endl;
-            executableNet = ie.LoadNetwork(netBuilder.getNetwork(), deviceStr, genericPluginConfig);
-        } else {
-            slog::info << "Importing model to the device" << slog::endl;
-            executableNet = ie.ImportNetwork(FLAGS_rg.c_str(), deviceStr, genericPluginConfig);
-        }
-
-        ms loadTime = std::chrono::duration_cast<ms>(Time::now() - t0);
-        slog::info << "Model loading time " << loadTime.count() << " ms" << slog::endl;
-
-        // --------------------------- 6. Exporting gna model using InferenceEngine AOT API---------------------
-        if (!FLAGS_wg.empty()) {
-            slog::info << "Writing GNA Model to file " << FLAGS_wg << slog::endl;
-            t0 = Time::now();
-            executableNet.Export(FLAGS_wg);
-            ms exportTime = std::chrono::duration_cast<ms>(Time::now() - t0);
-            slog::info << "Exporting time " << exportTime.count() << " ms" << slog::endl;
-            return 0;
-        }
-
-        if (!FLAGS_we.empty()) {
-            slog::info << "Exported GNA embedded model to file " << FLAGS_we << slog::endl;
-            return 0;
-        }
-
-        std::vector<InferRequestStruct> inferRequests((FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : FLAGS_nthreads);
-        for (auto& inferRequest : inferRequests) {
-            inferRequest = {executableNet.CreateInferRequest(), -1, batchSize};
-        }
-        // -----------------------------------------------------------------------------------------------------
-
-        // --------------------------- 7. Prepare input blobs --------------------------------------------------
-        /** Taking information about all topology inputs **/
-        ConstInputsDataMap cInputInfo = executableNet.GetInputsInfo();
-        /** Stores all input blobs data **/
-        if (cInputInfo.size() != numInputArkFiles) {
-            throw std::logic_error("Number of network inputs("
-                + std::to_string(cInputInfo.size()) + ") is not equal to number of ark files("
-                + std::to_string(numInputArkFiles) + ")");
-        }
-
-        std::vector<Blob::Ptr> ptrInputBlobs;
-        for (auto& input : cInputInfo) {
-            ptrInputBlobs.push_back(inferRequests.begin()->inferRequest.GetBlob(input.first));
-        }
-
-        InputsDataMap inputInfo;
-        if (!FLAGS_m.empty()) {
-            inputInfo = netBuilder.getNetwork().getInputsInfo();
-        }
-        /** configure input precision if model loaded from IR **/
-        for (auto &item : inputInfo) {
-            Precision inputPrecision = Precision::FP32;  // specify Precision::I16 to provide quantized inputs
-            item.second->setPrecision(inputPrecision);
-            item.second->getInputData()->setLayout(Layout::NC);  // row major layout
-        }
-
-        // -----------------------------------------------------------------------------------------------------
-
-        // --------------------------- 8. Prepare output blobs -------------------------------------------------
-        ConstOutputsDataMap cOutputInfo(executableNet.GetOutputsInfo());
-        OutputsDataMap outputInfo;
-        if (!FLAGS_m.empty()) {
-            outputInfo = netBuilder.getNetwork().getOutputsInfo();
-        }
-
-        Blob::Ptr ptrOutputBlob = inferRequests.begin()->inferRequest.GetBlob(cOutputInfo.rbegin()->first);
-
-        for (auto &item : outputInfo) {
-            DataPtr outData = item.second;
-            if (!outData) {
-                throw std::logic_error("output data pointer is not valid");
-            }
-
-            Precision outputPrecision = Precision::FP32;  // specify Precision::I32 to retrieve quantized outputs
-            outData->setPrecision(outputPrecision);
-            outData->setLayout(Layout::NC);  // row major layout
-        }
-        // -----------------------------------------------------------------------------------------------------
-
-        // --------------------------- 9. Initialize RH decoder ------------------------------------------------
-
-        RhDecoderInstanceParams rhDecoderInstanceParams{ nullptr };
-        auto lastLayerOutputCount = outputInfo.begin()->second->getDims()[1];
-        InitializeRhDecoder(rhDecoderInstanceParams, lastLayerOutputCount);
-
-        // allocate 1MB for result
-        std::vector<char> rh_utterance_transcription(1024 * 1024);
-
-        // -----------------------------------------------------------------------------------------------------
-
-        // --------------------------- 10. Do inference --------------------------------------------------------
-
-        std::vector<std::vector<uint8_t>> ptrUtterances;
-        std::vector<uint8_t> ptrScores;
-        std::vector<uint8_t> ptrReferenceScores;
-        score_error_t frameError, totalError;
-
-        ptrUtterances.resize(inputArkFiles.size());
-        for (uint32_t utteranceIndex = 0; utteranceIndex < numUtterances; ++utteranceIndex) {
-            std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> utterancePerfMap;
-            std::string uttName;
-            uint32_t numFrames(0), n(0);
-            std::vector<uint32_t> numFrameElementsInput;
-
-            uint32_t numFramesReference(0), numFrameElementsReference(0), numBytesPerElementReference(0),
-                    numBytesReferenceScoreThisUtterance(0);
-            const uint32_t numScoresPerFrame = ptrOutputBlob->size() / batchSize;
-
-            numFrameElementsInput.resize(numInputArkFiles);
-            for (size_t i = 0; i < inputArkFiles.size(); i++) {
-                std::vector<uint8_t> ptrUtterance;
-                auto inputArkFilename = inputArkFiles[i].c_str();
-                uint32_t currentNumFrames(0), currentNumFrameElementsInput(0), currentNumBytesPerElementInput(0);
-                GetKaldiArkInfo(inputArkFilename, utteranceIndex, &n, &numBytesThisUtterance[i]);
-                ptrUtterance.resize(numBytesThisUtterance[i]);
-                LoadKaldiArkArray(inputArkFilename,
-                                  utteranceIndex,
-                                  uttName,
-                                  ptrUtterance,
-                                  &currentNumFrames,
-                                  &currentNumFrameElementsInput,
-                                  &currentNumBytesPerElementInput);
-                if (numFrames == 0) {
-                    numFrames = currentNumFrames;
-                } else if (numFrames != currentNumFrames) {
-                    std::string errMessage("Number of frames in ark files is different: " + std::to_string(numFrames) +
-                                           " and " + std::to_string(currentNumFrames));
-                    throw std::logic_error(errMessage);
-                }
-
-                ptrUtterances[i] = ptrUtterance;
-                numFrameElementsInput[i] = currentNumFrameElementsInput;
-            }
-
-            int i = 0;
-            for (auto& ptrInputBlob : ptrInputBlobs) {
-                if (ptrInputBlob->size() != numFrameElementsInput[i++] * batchSize) {
-                    throw std::logic_error("network input size(" + std::to_string(ptrInputBlob->size()) +
-                                           ") mismatch to ark file size (" +
-                                           std::to_string(numFrameElementsInput[i-1] * batchSize) + ")");
-                }
-            }
-
-            ptrScores.resize(numFrames * numScoresPerFrame * sizeof(float));
-            if (!FLAGS_r.empty()) {
-                std::string refUtteranceName;
-                GetKaldiArkInfo(FLAGS_r.c_str(), utteranceIndex, &n, &numBytesReferenceScoreThisUtterance);
-                ptrReferenceScores.resize(numBytesReferenceScoreThisUtterance);
-                LoadKaldiArkArray(FLAGS_r.c_str(),
-                                  utteranceIndex,
-                                  refUtteranceName,
-                                  ptrReferenceScores,
-                                  &numFramesReference,
-                                  &numFrameElementsReference,
-                                  &numBytesPerElementReference);
-            }
-
-            double totalTime = 0.0;
-
-            std::cout << "Utterance " << utteranceIndex << ": " << std::endl;
-
-            ClearScoreError(&totalError);
-            totalError.threshold = frameError.threshold = MAX_SCORE_DIFFERENCE;
-            auto outputFrame = &ptrScores.front();
-            std::vector<uint8_t*> inputFrame;
-            for (auto& ut : ptrUtterances) {
-                inputFrame.push_back(&ut.front());
-            }
-
-            std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> callPerfMap;
-
-            size_t frameIndex = 0;
-            uint32_t numFramesArkFile = numFrames;
-            numFrames += FLAGS_cw_l + FLAGS_cw_r;
-            uint32_t numFramesThisBatch{batchSize};
-
-            auto t0 = Time::now();
-            auto t1 = t0;
-
-            while (frameIndex <= numFrames) {
-                if (frameIndex == numFrames) {
-                    if (std::find_if(inferRequests.begin(),
-                            inferRequests.end(),
-                            [&](InferRequestStruct x) { return (x.frameIndex != -1); } ) == inferRequests.end()) {
-                        break;
-                    }
-                }
-
-                bool inferRequestFetched = false;
-                for (auto &inferRequest : inferRequests) {
-                    if (frameIndex == numFrames) {
-                        numFramesThisBatch = 1;
-                    } else {
-                        numFramesThisBatch = (numFrames - frameIndex < batchSize) ? (numFrames - frameIndex)
-                                                                                  : batchSize;
-                    }
-
-                    if (inferRequest.frameIndex != -1) {
-                        StatusCode code = inferRequest.inferRequest.Wait(
-                                InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
-
-                        if (code != StatusCode::OK) {
-                            if (!useHetero) continue;
-                            if (code != StatusCode::INFER_NOT_STARTED) continue;
-                        }
-
-                        if (inferRequest.frameIndex >= 0) {
-                            Blob::Ptr outputBlob = inferRequest.inferRequest.GetBlob(cOutputInfo.rbegin()->first);
-                            RhDecoderInfo info;
-                            const float* acoustic_score_vector_index = outputBlob->buffer();
-
-                            for (uint32_t f = 0; f < inferRequest.numFramesThisBatch; ++f) {
-                                RhDecoderStatus rh_status = RhDecoderProcessFrame(rhDecoderInstanceParams.handle,
-                                    acoustic_score_vector_index, numScoresPerFrame, &info);
-                                if (RhDecoderStatus::RH_DECODER_SUCCESS != rh_status) {
-                                    throw std::logic_error(
-                                        "Decoder failed to process frame: " + std::to_string(inferRequest.frameIndex));
-                                }
-                                if (info.is_result_stable || inferRequest.frameIndex + f == numFrames - 1) {
-                                    RhDecoderGetResult(rhDecoderInstanceParams.handle,
-                                                       RhDecoderResultType::RH_DECODER_FINAL_RESULT,
-                                                       rh_utterance_transcription.data(),
-                                                       rh_utterance_transcription.size());
-                                    if (RhDecoderStatus::RH_DECODER_SUCCESS != rh_status) {
-                                        throw std::logic_error("Failed to retrieve speech recognition result");
-                                    }
-
-                                    std::cout << uttName << "\t" << rh_utterance_transcription.data() << std::endl;
-                                }
-
-                                acoustic_score_vector_index += lastLayerOutputCount;
-                            }
-
-                            if (!FLAGS_o.empty()) {
-                                outputFrame =
-                                        &ptrScores.front() + numScoresPerFrame * sizeof(float) * (inferRequest.frameIndex);
-                                Blob::Ptr outputBlob = inferRequest.inferRequest.GetBlob(cOutputInfo.rbegin()->first);
-                                auto byteSize = inferRequest.numFramesThisBatch * numScoresPerFrame * sizeof(float);
-                                std::memcpy(outputFrame,
-                                            outputBlob->buffer(),
-                                            byteSize);
-                            }
-
-                            if (!FLAGS_r.empty()) {
-                                Blob::Ptr outputBlob = inferRequest.inferRequest.GetBlob(cOutputInfo.begin()->first);
-                                CompareScores(outputBlob->buffer().as<float*>(),
-                                              &ptrReferenceScores[inferRequest.frameIndex *
-                                                                  numFrameElementsReference *
-                                                                  numBytesPerElementReference],
-                                              &frameError,
-                                              inferRequest.numFramesThisBatch,
-                                              numFrameElementsReference);
-                                UpdateScoreError(&frameError, &totalError);
-                            }
-                            if (FLAGS_pc) {
-                                // retrive new counters
-                                getPerformanceCounters(inferRequest.inferRequest, callPerfMap);
-                                // summarize retrived counters with all previous
-                                sumPerformanceCounters(callPerfMap, utterancePerfMap);
-                            }
-                        }
-                    }
-
-                    if (frameIndex == numFrames) {
-                        inferRequest.frameIndex = -1;
-                        continue;
-                    }
-
-                    ptrInputBlobs.clear();
-                    for (auto& input : cInputInfo) {
-                        ptrInputBlobs.push_back(inferRequest.inferRequest.GetBlob(input.first));
-                    }
-
-                    for (size_t i = 0; i < numInputArkFiles; ++i) {
-                        std::memcpy(ptrInputBlobs[i]->buffer(),
-                                    inputFrame[i],
-                                    ptrInputBlobs[i]->byteSize());
-                    }
-
-                    int index = static_cast<int>(frameIndex) - (FLAGS_cw_l + FLAGS_cw_r);
-                    inferRequest.inferRequest.StartAsync();
-                    inferRequest.frameIndex = index < 0 ? -2 : index;
-                    inferRequest.numFramesThisBatch = numFramesThisBatch;
-
-                    frameIndex += numFramesThisBatch;
-                    for (size_t j = 0; j < inputArkFiles.size(); j++) {
-                        if (FLAGS_cw_l > 0 || FLAGS_cw_r > 0) {
-                            int i = frameIndex - FLAGS_cw_l;
-                            if (i > 0 && i < static_cast<int>(numFramesArkFile)) {
-                                inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch;
-                            } else if (i >= static_cast<int>(numFramesArkFile)) {
-                                inputFrame[j] = &ptrUtterances[0].front() +
-                                        (numFramesArkFile - 1) * sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch;
-                            } else if (i < 0) {
-                                inputFrame[j] = &ptrUtterances[0].front();
-                            }
-                        } else {
-                            inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch;
-                        }
-                    }
-                    inferRequestFetched |= true;
-                }
-
-                if (!inferRequestFetched) {
-                    std::this_thread::sleep_for(std::chrono::milliseconds(1));
-                    continue;
-                }
-            }
-            t1 = Time::now();
-
-            fsec fs = t1 - t0;
-            ms d = std::chrono::duration_cast<ms>(fs);
-            totalTime += d.count();
-
-            // resetting state between utterances
-            for (auto &&state : executableNet.QueryState()) {
-                state.Reset();
-            }
-
-            if (!FLAGS_o.empty()) {
-                bool shouldAppend = (utteranceIndex == 0) ? false : true;
-                SaveKaldiArkArray(FLAGS_o.c_str(), shouldAppend, uttName, &ptrScores.front(),
-                                  numFrames, numScoresPerFrame);
-            }
-
-            /** Show performance results **/
-            std::cout << "Total time in Infer (HW and SW):\t" << totalTime << " ms"
-                      << std::endl;
-            std::cout << "Frames in utterance:\t\t\t" << numFrames << " frames"
-                      << std::endl;
-            std::cout << "Average Infer time per frame:\t\t" << totalTime / static_cast<double>(numFrames) << " ms"
-                      << std::endl;
-            if (FLAGS_pc) {
-                // print
-                printPerformanceCounters(utterancePerfMap, frameIndex, std::cout, getFullDeviceName(ie, FLAGS_d));
-            }
-            if (!FLAGS_r.empty()) {
-                printReferenceCompareResults(totalError, numFrames, std::cout);
-            }
-            std::cout << "End of Utterance " << utteranceIndex << std::endl << std::endl;
-        }
-
-        FreeRhDecoder(rhDecoderInstanceParams);
-        // -----------------------------------------------------------------------------------------------------
-    }
-    catch (const std::exception &error) {
-        slog::err << error.what() << slog::endl;
-        return 1;
-    }
-    catch (...) {
-        slog::err << "Unknown/internal exception happened" << slog::endl;
-        return 1;
-    }
-
-    slog::info << "Execution successful" << slog::endl;
-    return 0;
-}
--- a/inference-engine/samples/speech_recognition_offline_demo/speech_sample.hpp
+++ b/inference-engine/samples/speech_recognition_offline_demo/speech_sample.hpp
-// Copyright (C) 2018-2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <string>
-#include <vector>
-#include <gflags/gflags.h>
-#include <iostream>
-
-
-/// @brief message for help argument
-static const char help_message[] = "Print a usage message.";
-
-/// @brief message for images argument
-static const char input_message[] = "Required. Paths to an .ark files. Example of usage: <file1.ark,file2.ark> or <file.ark>.";
-/// @brief message for model argument
-static const char model_message[] = "Required. Path to an .xml file with a trained model (required if -rg is missing).";
-
-/// @brief message for plugin argument
-static const char plugin_message[] = "Plugin name. For example MKLDNNPlugin. If this parameter is pointed, " \
-                                     "the sample will look for this plugin only";
-
-/// @brief message for assigning cnn calculation to device
-static const char target_device_message[] = "Specify a target device to infer on. CPU, GPU, GNA_AUTO, GNA_HW, GNA_SW, GNA_SW_FP32 "
-                                            "GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU"
-                                            " as a secondary (e.g. HETERO:GNA,CPU) are supported. The sample will look "
-                                            "for a suitable plugin for device specified.";
-
-/// @brief message for performance counters
-static const char performance_counter_message[] = "Enables per-layer performance report";
-
-/// @brief message for user library argument
-static const char custom_cpu_library_message[] = "Required for MKLDNN (CPU)-targeted custom layers." \
-"Absolute path to a shared library with the kernels impl.";
-
-/// @brief message for score output argument
-static const char output_message[] = "Output file name (default name is scores.ark).";
-
-/// @brief message for reference score file argument
-static const char reference_score_message[] = "Read reference score .ark file and compare scores.";
-
-/// @brief message for read GNA model argument
-static const char read_gna_model_message[] = "Read GNA model from file using path/filename provided (required if -m is missing).";
-
-/// @brief message for write GNA model argument
-static const char write_gna_model_message[] = "Write GNA model to file using path/filename provided.";
-
-/// @brief message for write GNA embedded model argument
-static const char write_embedded_model_message[] = "Write GNA embedded model to file using path/filename provided.";
-
-/// @brief message for quantization argument
-static const char quantization_message[] = "Input quantization mode:  static (default), dynamic, or user (use with -sf).";
-
-/// @brief message for quantization bits argument
-static const char quantization_bits_message[] = "Weight bits for quantization:  8 or 16 (default)";
-
-/// @brief message for scale factor argument
-static const char scale_factor_message[] = "Optional user-specified input scale factor for quantization (use with -q user).";
-
-/// @brief message for batch size argument
-static const char batch_size_message[] = "Batch size 1-8 (default 1)";
-
-/// @brief message for #threads for CPU inference
-static const char infer_num_threads_message[] = "Optional. Number of threads to use for concurrent async" \
-" inference requests on the GNA.";
-
-/// @brief message for context window argument
-static const char context_window_message_l[] = "Optional. Number of frames for left context windows (default is 0). " \
-                                               "Works only with context window networks."
-                                               " If you use the cw_l or cw_r flag, then batch size and nthreads arguments are ignored.";
-
-/// @brief message for right context window argument
-static const char context_window_message_r[] = "Optional. Number of frames for right context windows (default is 0). " \
-                                               "Works only with context window networks."
-                                               " If you use the cw_r or cw_l flag, then batch size and nthreads arguments are ignored.";
-
-/// @brief message for RH HMM model argument
-static const char rh_hmm_model_message[] = "Required. Path to RH .hmm file.";
-
-/// @brief message for RH model argument
-static const char rh_labels_message[] = "Required. Path to RH labels file.";
-
-/// @brief message for RH LM: G model argument
-static const char rh_g_model_message[] = "Required. Path to RH LM: G .fst model file.";
-
-/// @brief message for RH LM: CL model argument
-static const char rh_cl_model_message[] = "Required. Path to RH LM: CL .fst model file.";
-
-/// @brief message for RH acoustic model scale factor argument
-static const char rh_am_scale_factor_message[] = "Optional. RH acoustic model scale factor.";
-
-/// @brief message for RH beam width argument
-static const char rh_beam_width_message[] = "Optional. RH beam width.";
-
-/// @brief message for RH N-best result argument
-static const char rh_nbest_message[] = "Optional. RH N-best results.";
-
-/// @brief message for RH G-cache log size argument
-static const char rh_g_cache_log_size_message[] = "Optional. RH G-cache log size.";
-
-/// @brief message for RH trace back log size argument
-static const char rh_trace_back_log_size_message[] = "Optional. RH trace back log size.";
-
-/// @brief message for RH minimum number of stable frames to attribute result as final
-static const char rh_min_stable_frames_message[] = "Optional. Minimum number of stable frames to attribute result as final.";
-
-/// @brief message for RH token buffer size argument
-static const char rh_token_buffer_size_message[] = "Optional. RH token buffer size.";
-
-
-/// @brief Define flag for showing help message <br>
-DEFINE_bool(h, false, help_message);
-
-/// @brief Define parameter for set image file <br>
-/// It is a required parameter
-DEFINE_string(i, "", input_message);
-
-/// @brief Define parameter for set model file <br>
-/// It is a required parameter
-DEFINE_string(m, "", model_message);
-
-/// @brief Define parameter for set plugin name <br>
-/// It is a required parameter
-DEFINE_string(p, "", plugin_message);
-
-/// @brief device the target device to infer on <br>
-DEFINE_string(d, "GNA_AUTO", target_device_message);
-
-/// @brief Enable per-layer performance report
-DEFINE_bool(pc, false, performance_counter_message);
-
-/// @brief Absolute path to CPU library with user layers <br>
-/// It is a optional parameter
-DEFINE_string(l, "", custom_cpu_library_message);
-
-/// @brief Write model to file (model.bin)
-DEFINE_string(o, "", output_message);
-
-/// @brief Read reference score file
-DEFINE_string(r, "", reference_score_message);
-
-/// @brief Read GNA model from file (model.bin)
-DEFINE_string(rg, "", read_gna_model_message);
-
-/// @brief Write GNA model to file (model.bin)
-DEFINE_string(wg, "", write_gna_model_message);
-
-/// @brief Write GNA embedded model to file (model.bin)
-DEFINE_string(we, "", write_embedded_model_message);
-
-/// @brief Input quantization mode (default static)
-DEFINE_string(q, "static", quantization_message);
-
-/// @brief Input quantization bits (default 16)
-DEFINE_int32(qb, 16, quantization_bits_message);
-
-/// @brief Scale factor for quantization (default 1.0)
-DEFINE_double(sf, 1.0, scale_factor_message);
-
-/// @brief Batch size (default 1)
-DEFINE_int32(bs, 1, batch_size_message);
-
-/// @brief Number of threads to use for inference on the CPU (also affects Hetero cases)
-DEFINE_int32(nthreads, 1, infer_num_threads_message);
-
-/// @brief Right context window size (default 0)
-DEFINE_int32(cw_r, 0, context_window_message_r);
-
-/// @brief Left context window size (default 0)
-DEFINE_int32(cw_l, 0, context_window_message_l);
-
-/// @brief Define parameter for set RH HMM model file
-/// It is a required parameter
-DEFINE_string(hmm, "rh.hmm", rh_hmm_model_message);
-
-/// @brief Define parameter for set RH labels file
-/// It is a required parameter
-DEFINE_string(labels, "labels.bin", rh_labels_message);
-
-/// @brief Define parameter for set RH LM: G model file
-/// It is a required parameter
-DEFINE_string(g, "g.fst", rh_g_model_message);
-
-/// @brief Define parameter for set RH LM: CL model file
-/// It is a required parameter
-DEFINE_string(cl, "cl.fst", rh_cl_model_message);
-
-/// @brief RH Acoustic model scale factor (default 1.0)
-DEFINE_double(amsf, 1.0, rh_am_scale_factor_message);
-
-/// @brief RH beam width (default 14.0)
-DEFINE_double(beam_width, 14.0, rh_beam_width_message);
-
-/// @brief RH N-best (default 1)
-DEFINE_int32(nbest, 1, rh_nbest_message);
-
-/// @brief RH G cache log size (default 19)
-DEFINE_int32(gcls, 19, rh_g_cache_log_size_message);
-
-/// @brief RH trace back log size (default 19)
-DEFINE_int32(tbls, 19, rh_trace_back_log_size_message);
-
-/// @brief RH minimum stable frames (default -1)
-DEFINE_int32(msf, -1, rh_min_stable_frames_message);
-
-/// @brief RH token buffer size (default 150000)
-DEFINE_int32(tbs, 150000, rh_token_buffer_size_message);
-
-
-/**
- * @brief This function show a help message
- */
-static void showUsage() {
-    std::cout << std::endl;
-    std::cout << "speech_recognition_offline_demo [OPTION]" << std::endl;
-    std::cout << "Options:" << std::endl;
-    std::cout << std::endl;
-    std::cout << "    -h                        " << help_message << std::endl;
-    std::cout << "    -i \"<path>\"             " << input_message << std::endl;
-    std::cout << "    -m \"<path>\"             " << model_message << std::endl;
-    std::cout << "    -o \"<path>\"             " << output_message << std::endl;
-    std::cout << "    -l \"<absolute_path>\"    " << custom_cpu_library_message << std::endl;
-    std::cout << "    -d \"<device>\"           " << target_device_message << std::endl;
-    std::cout << "    -p                        " << plugin_message << std::endl;
-    std::cout << "    -pc                       " << performance_counter_message << std::endl;
-    std::cout << "    -q \"<mode>\"             " << quantization_message << std::endl;
-    std::cout << "    -qb \"<integer>\"         " << quantization_bits_message << std::endl;
-    std::cout << "    -sf \"<double>\"          " << scale_factor_message << std::endl;
-    std::cout << "    -bs \"<integer>\"         " << batch_size_message << std::endl;
-    std::cout << "    -r \"<path>\"             " << reference_score_message << std::endl;
-    std::cout << "    -rg \"<path>\"            " << read_gna_model_message << std::endl;
-    std::cout << "    -wg \"<path>\"            " << write_gna_model_message << std::endl;
-    std::cout << "    -we \"<path>\"            " << write_embedded_model_message << std::endl;
-    std::cout << "    -nthreads \"<integer>\"   " << infer_num_threads_message << std::endl;
-    std::cout << "    -cw_l \"<integer>\"         " << context_window_message_l << std::endl;
-    std::cout << "    -cw_r \"<integer>\"         " << context_window_message_r << std::endl;
-    std::cout << "    -hmm \"<path>\"           " << rh_hmm_model_message << std::endl;
-    std::cout << "    -labels \"<path>\"        " << rh_labels_message << std::endl;
-    std::cout << "    -g \"<path>\"             " << rh_g_model_message << std::endl;
-    std::cout << "    -cl \"<path>\"            " << rh_cl_model_message << std::endl;
-    std::cout << "    -amsf \"<double>\"        " << rh_am_scale_factor_message << std::endl;
-    std::cout << "    -beam_width \"<double>\"  " << rh_beam_width_message << std::endl;
-    std::cout << "    -nbest \"<integer>\"      " << rh_nbest_message << std::endl;
-    std::cout << "    -gcls \"<integer>\"       " << rh_g_cache_log_size_message << std::endl;
-    std::cout << "    -tbls \"<integer>\"       " << rh_trace_back_log_size_message << std::endl;
-    std::cout << "    -msf \"<integer>\"        " << rh_min_stable_frames_message << std::endl;
-    std::cout << "    -tbs \"<integer>\"        " << rh_token_buffer_size_message << std::endl;
-}
-