Backend/API: Implementation of the call method for IntelGPU (#1199)

* Backend/API: Implementation of the call method for IntelGPU * intel_gpu_style_fix_1199 * Copy memory from clDNN to Tensor * Code style fix in 1199.2

Backend/API: Implementation of the call method for IntelGPU (#1199)
* Backend/API: Implementation of the call method for IntelGPU * intel_gpu_style_fix_1199 * Copy memory from clDNN to Tensor * Code style fix in 1199.2
8bde818c · dmyershov · Robert Kimball · 83e7dba5 · 8bde818c · 8bde818c
Commit 8bde818c authored Jul 13, 2018 by dmyershov Committed by Robert Kimball Jul 13, 2018
3 changed files
--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
@@ -21,6 +21,7 @@
 #include <CPP/network.hpp>
 #include <CPP/reorder.hpp>
 #include <CPP/scale.hpp>
+#include <CPP/topology.hpp>
 #include "ngraph/runtime/intelgpu/intelgpu_backend.hpp"
 #include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
@@ -157,5 +158,46 @@ bool runtime::intelgpu::IntelGPUBackend::call(
    const vector<shared_ptr<runtime::TensorView>>& outputs,
    const vector<shared_ptr<runtime::TensorView>>& inputs)
 {
-    throw runtime_error("IntelGPUBackend::call: Not implemented yet");
+    validate_call(func, outputs, inputs);
+    FunctionInstance& instance = ocl_networks[func];
+    if (instance.ocl_network == nullptr)
+    {
+        if (!compile(func))
+        {
+            return false;
+        }
+    }
+    std::shared_ptr<cldnn::network> network = instance.ocl_network;
+    // Process input parameters. Correctness of parameters was validated by validate_call.
+    // Since we have no correlation between Function::m_parameters and inputs, there is
+    // we try to match them by index number in vectors.
+    for (size_t i = 0; i < inputs.size(); i++)
+    {
+        shared_ptr<runtime::intelgpu::IntelGPUTensorView> tv =
+            static_pointer_cast<runtime::intelgpu::IntelGPUTensorView>(inputs[i]);
+        const op::ParameterVector& input_params = func->get_parameters();
+        network->set_input_data(input_params[i]->get_output_tensor().get_name(),
+                                *tv->get_data_ptr());
+    }
+    // Execute network
+    std::map<cldnn::primitive_id, cldnn::network_output> result = network->execute();
+    // Process output parameters. Correctness of parameters was validated by validate_call.
+    // Since we have no correlation between Function::m_results and outputs, there is
+    // we try to match them by index number in vectors.
+    for (size_t i = 0; i < func->get_output_size(); i++)
+    {
+        shared_ptr<runtime::intelgpu::IntelGPUTensorView> ngraph_res =
+            static_pointer_cast<runtime::intelgpu::IntelGPUTensorView>(outputs[i]);
+        const std::string& tensor_name = func->get_output_op(i)->get_output_tensor().get_name();
+        auto result_memory = result.at(tensor_name).get_memory().pointer<char>();
+        ngraph_res->write(result_memory.data(), 0, result_memory.size());
+    }
+    return true;
 }
--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.hpp
@@ -53,5 +53,12 @@ public:
              const std::vector<std::shared_ptr<runtime::TensorView>>& inputs) override;
 private:
+    class FunctionInstance
+    {
+    public:
+        std::shared_ptr<cldnn::network> ocl_network = nullptr;
+    };
+    std::map<std::shared_ptr<Function>, FunctionInstance> ocl_networks;
    std::shared_ptr<cldnn::engine> ocl_engine;
 };
--- a/src/ngraph/runtime/intelgpu/intelgpu_tensor_view.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_tensor_view.hpp
@@ -54,6 +54,7 @@ public:
    /// @param n Number of bytes to read, must be integral number of elements.
    void read(void* p, size_t tensor_offset, size_t n) const override;
+    cldnn::memory* get_data_ptr() { return ocl_memory.get(); }
 private:
    cldnn::data_types get_cldnn_type(const ngraph::element::Type& element_type) const;