Commit 8bde818c authored by dmyershov's avatar dmyershov Committed by Robert Kimball

Backend/API: Implementation of the call method for IntelGPU (#1199)

* Backend/API: Implementation of the call method for IntelGPU

* intel_gpu_style_fix_1199

* Copy memory from clDNN to Tensor

* Code style fix in 1199.2
parent 83e7dba5
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <CPP/network.hpp> #include <CPP/network.hpp>
#include <CPP/reorder.hpp> #include <CPP/reorder.hpp>
#include <CPP/scale.hpp> #include <CPP/scale.hpp>
#include <CPP/topology.hpp>
#include "ngraph/runtime/intelgpu/intelgpu_backend.hpp" #include "ngraph/runtime/intelgpu/intelgpu_backend.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp" #include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
...@@ -157,5 +158,46 @@ bool runtime::intelgpu::IntelGPUBackend::call( ...@@ -157,5 +158,46 @@ bool runtime::intelgpu::IntelGPUBackend::call(
const vector<shared_ptr<runtime::TensorView>>& outputs, const vector<shared_ptr<runtime::TensorView>>& outputs,
const vector<shared_ptr<runtime::TensorView>>& inputs) const vector<shared_ptr<runtime::TensorView>>& inputs)
{ {
throw runtime_error("IntelGPUBackend::call: Not implemented yet"); validate_call(func, outputs, inputs);
FunctionInstance& instance = ocl_networks[func];
if (instance.ocl_network == nullptr)
{
if (!compile(func))
{
return false;
}
}
std::shared_ptr<cldnn::network> network = instance.ocl_network;
// Process input parameters. Correctness of parameters was validated by validate_call.
// Since we have no correlation between Function::m_parameters and inputs, there is
// we try to match them by index number in vectors.
for (size_t i = 0; i < inputs.size(); i++)
{
shared_ptr<runtime::intelgpu::IntelGPUTensorView> tv =
static_pointer_cast<runtime::intelgpu::IntelGPUTensorView>(inputs[i]);
const op::ParameterVector& input_params = func->get_parameters();
network->set_input_data(input_params[i]->get_output_tensor().get_name(),
*tv->get_data_ptr());
}
// Execute network
std::map<cldnn::primitive_id, cldnn::network_output> result = network->execute();
// Process output parameters. Correctness of parameters was validated by validate_call.
// Since we have no correlation between Function::m_results and outputs, there is
// we try to match them by index number in vectors.
for (size_t i = 0; i < func->get_output_size(); i++)
{
shared_ptr<runtime::intelgpu::IntelGPUTensorView> ngraph_res =
static_pointer_cast<runtime::intelgpu::IntelGPUTensorView>(outputs[i]);
const std::string& tensor_name = func->get_output_op(i)->get_output_tensor().get_name();
auto result_memory = result.at(tensor_name).get_memory().pointer<char>();
ngraph_res->write(result_memory.data(), 0, result_memory.size());
}
return true;
} }
...@@ -53,5 +53,12 @@ public: ...@@ -53,5 +53,12 @@ public:
const std::vector<std::shared_ptr<runtime::TensorView>>& inputs) override; const std::vector<std::shared_ptr<runtime::TensorView>>& inputs) override;
private: private:
class FunctionInstance
{
public:
std::shared_ptr<cldnn::network> ocl_network = nullptr;
};
std::map<std::shared_ptr<Function>, FunctionInstance> ocl_networks;
std::shared_ptr<cldnn::engine> ocl_engine; std::shared_ptr<cldnn::engine> ocl_engine;
}; };
...@@ -54,6 +54,7 @@ public: ...@@ -54,6 +54,7 @@ public:
/// @param n Number of bytes to read, must be integral number of elements. /// @param n Number of bytes to read, must be integral number of elements.
void read(void* p, size_t tensor_offset, size_t n) const override; void read(void* p, size_t tensor_offset, size_t n) const override;
cldnn::memory* get_data_ptr() { return ocl_memory.get(); }
private: private:
cldnn::data_types get_cldnn_type(const ngraph::element::Type& element_type) const; cldnn::data_types get_cldnn_type(const ngraph::element::Type& element_type) const;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment