Commit 8908c9df authored by shssf's avatar shssf Committed by Robert Kimball

IntelGPUBackend: const, div, maxpool and max operations (#1234)

* IntelGPUBackend: const, div, maxpool and max operations

* IntelGPUBackend: negative, abs, relu, sqrt, tanh and substract operations

* Update intelgpu_backend.cpp
parent e2255fbd
......@@ -14,23 +14,39 @@
* limitations under the License.
*******************************************************************************/
#include <CPP/activation.hpp>
#include <CPP/batch_norm.hpp>
#include <CPP/concatenation.hpp>
#include <CPP/convolution.hpp>
#include <CPP/data.hpp>
#include <CPP/eltwise.hpp>
#include <CPP/input_layout.hpp>
#include <CPP/layout.hpp>
#include <CPP/network.hpp>
#include <CPP/permute.hpp>
#include <CPP/pooling.hpp>
#include <CPP/reorder.hpp>
#include <CPP/reshape.hpp>
#include <CPP/scale.hpp>
#include <CPP/topology.hpp>
#include "ngraph/runtime/intelgpu/intelgpu_backend.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_tensor_view.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/max_pool.hpp"
#include "ngraph/op/reshape.hpp"
using namespace std;
using namespace ngraph;
void arguments_check(const shared_ptr<Node>& op, size_t input, size_t output)
static void arguments_check(const shared_ptr<Node>& op, size_t input, size_t output)
{
if (op->get_input_size() != input || op->get_output_size() != output)
{
......@@ -42,25 +58,39 @@ void arguments_check(const shared_ptr<Node>& op, size_t input, size_t output)
}
}
void do_eltwise_operation(cldnn::topology& topology,
const shared_ptr<Node>& op,
cldnn::eltwise_mode mode)
static void do_eltwise_operation(cldnn::topology& topology,
const shared_ptr<Node>& op,
cldnn::eltwise_mode mode)
{
arguments_check(op, 2, 1);
std::vector<cldnn::primitive_id> op_add_inputs;
vector<cldnn::primitive_id> op_add_inputs;
for (const descriptor::Input& op_input : op->get_inputs())
{
const std::string& element_name = op_input.get_tensor().get_name();
const string& element_name = op_input.get_tensor().get_name();
op_add_inputs.push_back(element_name);
}
const std::string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const cldnn::eltwise op_add(output_name, op_add_inputs, mode);
topology.add(op_add);
}
static void do_unary_operation(cldnn::topology& topology,
const shared_ptr<Node>& op,
cldnn_activation_func mode,
const cldnn_activation_additional_params& param = {0.f, 0.f})
{
arguments_check(op, 1, 1);
const string& input_name = op->get_inputs().begin()->get_tensor().get_name();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const cldnn::activation cldnn_unary(output_name, input_name, mode, param);
topology.add(cldnn_unary);
}
extern "C" const char* get_ngraph_version_string()
{
return NGRAPH_VERSION;
......@@ -78,7 +108,7 @@ extern "C" void delete_backend(runtime::Backend* backend)
runtime::intelgpu::IntelGPUBackend::IntelGPUBackend()
{
ocl_engine = std::make_shared<cldnn::engine>();
ocl_engine = make_shared<cldnn::engine>();
}
shared_ptr<runtime::TensorView>
......@@ -111,7 +141,7 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{
arguments_check(op, 0, 1);
const std::string& element_name = op->get_output_tensor_view()->get_tensor().get_name();
const string& element_name = op->get_output_tensor_view()->get_tensor().get_name();
const cldnn::layout element_layout =
IntelGPULayout::create_cldnn_layout(op->get_element_type(), op->get_shape());
......@@ -124,8 +154,8 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
const descriptor::Tensor& input_tensor = op->get_inputs().begin()->get_tensor();
const descriptor::Tensor& output_tensor = op->get_outputs().begin()->get_tensor();
const std::string& input_name = input_tensor.get_name();
const std::string& output_name = output_tensor.get_name();
const string& input_name = input_tensor.get_name();
const string& output_name = output_tensor.get_name();
const cldnn::layout input_layout = IntelGPULayout::create_cldnn_layout(
input_tensor.get_element_type(), op->get_inputs().begin()->get_shape());
......@@ -140,6 +170,122 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{
do_eltwise_operation(topology, op, cldnn::eltwise_mode::prod);
}
else if ("Divide" == op->description())
{
do_eltwise_operation(topology, op, cldnn::eltwise_mode::div);
}
else if ("Maximum" == op->description())
{
do_eltwise_operation(topology, op, cldnn::eltwise_mode::max);
}
else if ("Constant" == op->description())
{
arguments_check(op, 0, 1);
auto input_it = op->get_outputs().cbegin();
const descriptor::Tensor& output_tensor = input_it->get_tensor();
const string& output_name = output_tensor.get_name();
const shared_ptr<op::Constant> constant_inst = static_pointer_cast<op::Constant>(op);
void* memory_pointer = const_cast<void*>(constant_inst->get_data_ptr());
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(
output_tensor.get_element_type(), input_it->get_shape());
const cldnn::memory mem(
cldnn::memory::attach<void>(layout, memory_pointer, layout.bytes_count()));
const cldnn::data op_const(output_name, mem);
topology.add(op_const);
}
else if ("MaxPool" == op->description())
{
arguments_check(op, 1, 1);
const string& input_name = op->get_inputs().begin()->get_tensor().get_name();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const Shape& out_shape = op->get_outputs().begin()->get_shape();
const cldnn::tensor output_size =
runtime::intelgpu::IntelGPULayout::create_cldnn_tensor(out_shape);
const shared_ptr<op::MaxPool> max_pool = static_pointer_cast<op::MaxPool>(op);
const Shape& pool_shape = max_pool->get_window_shape();
const Strides& pool_strides = max_pool->get_window_movement_strides();
const Shape& pad = max_pool->get_padding_below();
vector<cldnn::tensor::value_type> offset({0, 0, 0, 0}); // No action by default
size_t ridx = 4;
for (auto i = pad.rbegin(); i != pad.rend() && ridx > 0; ++i, --ridx)
{
offset.at(ridx - 1) = -(*i);
}
const cldnn::tensor input_offset(
offset.at(0), offset.at(1), offset.at(3), offset.at(2));
const cldnn::tensor size =
runtime::intelgpu::IntelGPULayout::create_cldnn_tensor(pool_shape);
const cldnn::tensor strides =
runtime::intelgpu::IntelGPULayout::create_cldnn_tensor(pool_strides);
const cldnn::pooling cldd_pooling(output_name,
input_name,
cldnn::pooling_mode::max,
size,
strides,
input_offset,
output_size);
topology.add(cldd_pooling);
}
else if ("Reshape" == op->description())
{
arguments_check(op, 1, 1);
const string& input_name = op->get_inputs().begin()->get_tensor().get_name();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const shared_ptr<op::Reshape> op_broadcast = static_pointer_cast<op::Reshape>(op);
const AxisVector& broadcast_axes = op_broadcast->get_input_order();
vector<uint16_t> permute_order({0, 1, 2, 3}); // No action by default
const size_t max_dim = 4;
const size_t scale =
broadcast_axes.size() < max_dim ? max_dim - broadcast_axes.size() : 0;
// Need to scale indexes up according on array rank.
// For example, in 2D array, indexes are 0,1 but in 4D array it should be 2,3
// because cldnn::tensor is always 4D assuming cldnn::bfyx model
size_t rindex = max_dim;
for (auto i = broadcast_axes.rbegin(); i != broadcast_axes.rend() && rindex > 0;
++i, --rindex)
{
permute_order.at(rindex - 1) = *i + scale;
}
const cldnn::permute cldnn_permute(output_name, input_name, permute_order);
topology.add(cldnn_permute);
}
else if ("Negative" == op->description())
{
const cldnn_activation_additional_params param = {-1.f, 0.f};
do_unary_operation(topology, op, activation_linear, param);
}
else if ("Relu" == op->description())
{
do_unary_operation(topology, op, activation_relu);
}
else if ("Abs" == op->description())
{
do_unary_operation(topology, op, activation_abs);
}
else if ("Sqrt" == op->description())
{
do_unary_operation(topology, op, activation_sqrt);
}
else if ("Tanh" == op->description())
{
do_unary_operation(topology, op, activation_hyperbolic_tan);
}
else if ("Subtract" == op->description())
{
do_eltwise_operation(topology, op, cldnn::eltwise_mode::sub);
}
else if ("BatchNorm" == op->description())
{
const shared_ptr<op::BatchNorm> batch_norm = static_pointer_cast<op::BatchNorm>(op);
......@@ -189,11 +335,11 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{
ostringstream os;
os << "Unsupported operation \"" << op->description() << '\"';
throw std::invalid_argument(os.str());
throw invalid_argument(os.str());
}
}
instance.ocl_network = std::make_shared<cldnn::network>(*ocl_engine, topology);
instance.ocl_network = make_shared<cldnn::network>(*ocl_engine, topology);
return true;
}
......@@ -214,7 +360,7 @@ bool runtime::intelgpu::IntelGPUBackend::call(
}
}
std::shared_ptr<cldnn::network> network = instance.ocl_network;
shared_ptr<cldnn::network> network = instance.ocl_network;
// Process input parameters. Correctness of parameters was validated by validate_call.
// Since we have no correlation between Function::m_parameters and inputs, there is
......@@ -224,12 +370,12 @@ bool runtime::intelgpu::IntelGPUBackend::call(
shared_ptr<runtime::intelgpu::IntelGPUTensorView> tv =
static_pointer_cast<runtime::intelgpu::IntelGPUTensorView>(inputs[i]);
const op::ParameterVector& input_params = func->get_parameters();
network->set_input_data(input_params[i]->get_output_tensor().get_name(),
*tv->get_data_ptr());
const string& tensor_name = input_params[i]->get_output_tensor().get_name();
network->set_input_data(tensor_name, *tv->get_data_ptr());
}
// Execute network
std::map<cldnn::primitive_id, cldnn::network_output> result = network->execute();
map<cldnn::primitive_id, cldnn::network_output> result = network->execute();
// Process output parameters. Correctness of parameters was validated by validate_call.
// Since we have no correlation between Function::m_results and outputs, there is
......@@ -238,7 +384,7 @@ bool runtime::intelgpu::IntelGPUBackend::call(
{
shared_ptr<runtime::intelgpu::IntelGPUTensorView> ngraph_res =
static_pointer_cast<runtime::intelgpu::IntelGPUTensorView>(outputs[i]);
const std::string& tensor_name = func->get_output_op(i)->get_output_tensor().get_name();
const string& tensor_name = func->get_output_op(i)->get_output_tensor().get_name();
auto result_memory = result.at(tensor_name).get_memory().pointer<char>();
ngraph_res->write(result_memory.data(), 0, result_memory.size());
......
......@@ -20,6 +20,7 @@
#include <memory>
#include <CPP/engine.hpp>
#include <CPP/network.hpp>
#include "ngraph/runtime/backend.hpp"
......
......@@ -73,18 +73,39 @@ cldnn::data_types
else
{
ostringstream os;
os << "IntelGPUTensorView::get_cldnn_type: Unknown type " << element_type;
os << "IntelGPULayout::get_cldnn_type: Unknown type " << element_type;
throw std::invalid_argument(os.str());
}
}
cldnn::tensor runtime::intelgpu::IntelGPULayout::create_cldnn_tensor(const Shape& element_shape)
{
std::vector<size_t> idx(4, 1);
size_t index = 0;
for (auto i = element_shape.rbegin(); i != element_shape.rend() && index < 3; ++i, ++index)
{
idx.at(index) = *i;
}
if (element_shape.size() > 3)
{
idx.at(3) =
accumulate(element_shape.rbegin() + 3, element_shape.rend(), 1, multiplies<size_t>());
}
//Parameters for this ctor: batch, feature, spatial_x, spatial_y
const cldnn::tensor tns(idx.at(3), idx.at(2), idx.at(0), idx.at(1));
return tns;
}
cldnn::layout runtime::intelgpu::IntelGPULayout::create_cldnn_layout(
const ngraph::element::Type& element_type, const Shape& element_shape)
{
const size_t mem_size = shape_size(element_shape);
const cldnn::data_types data_type = get_cldnn_type(element_type);
const cldnn::tensor tensor(1, mem_size, 1, 1);
const cldnn::format::type format = cldnn::format::yxfb;
const cldnn::format::type format = cldnn::format::bfyx;
const cldnn::tensor tensor = create_cldnn_tensor(element_shape);
return cldnn::layout(data_type, format, tensor);
}
......@@ -17,6 +17,7 @@
#pragma once
#include <CPP/layout.hpp>
#include <CPP/tensor.hpp>
#include "ngraph/descriptor/layout/tensor_view_layout.hpp"
......@@ -48,6 +49,7 @@ public:
static cldnn::data_types get_cldnn_type(const ngraph::element::Type& element_type);
static cldnn::layout create_cldnn_layout(const ngraph::element::Type& element_type,
const Shape& element_shape);
static cldnn::tensor create_cldnn_tensor(const Shape& element_shape);
private:
Strides strides;
......
......@@ -41,7 +41,7 @@ runtime::intelgpu::IntelGPUTensorView::IntelGPUTensorView(const ngraph::element:
if (nullptr != memory_pointer)
{
ocl_memory = make_shared<cldnn::memory>(
cldnn::memory::attach<void>(layout, memory_pointer, layout.get_linear_size()));
cldnn::memory::attach<void>(layout, memory_pointer, layout.bytes_count()));
}
else
{
......
......@@ -17,11 +17,9 @@
#pragma once
#include <CPP/engine.hpp>
#include <CPP/layout.hpp>
#include <CPP/memory.hpp>
#include "ngraph/runtime/tensor_view.hpp"
#include "ngraph/type/element_type.hpp"
namespace ngraph
{
......@@ -56,7 +54,5 @@ public:
cldnn::memory* get_data_ptr() { return ocl_memory.get(); }
private:
cldnn::data_types get_cldnn_type(const ngraph::element::Type& element_type) const;
std::shared_ptr<cldnn::memory> ocl_memory;
};
ab
abc
abc_int64
abs
acos
......@@ -18,11 +16,10 @@ avg_pool_2d_2channel_2image_padded_3x3_strided
avg_pool_2d_2channel_2image_padded_3x3_strided_uneven
avg_pool_2d_2channel_2image_padded_only_above
avg_pool_2d_2channel_2image_padded_only_below
backwards_abc
avg_pool_3d
backwards_abs
backwards_acos
backwards_add
backwards_add_nested
backwards_asin
backwards_atan
backwards_avgpool_n1_c1_hw2x2
......@@ -56,18 +53,17 @@ backwards_maxpool_n2_c1_hw5_3x3_str2_max
backwards_maxpool_n4c1h4w4_kh2kw2_sh1sw1
backwards_maxpool_n4_c1_hw4_2x2_max
backwards_minimum
backwards_multiply
backwards_negative
backwards_parameter
backwards_power
backwards_relu
backwards_replace_slice
backwards_reshape
backwards_reverse_3d_02
backwards_reverse_sequence_n3_c2_h3
backwards_reverse_sequence_n4d2c3h2w2
backwards_select
backwards_select_nested
backwards_sigmoid
backwards_sign
backwards_sin
backwards_sinh
......@@ -152,9 +148,7 @@ convolution_4d_4items_strided_dilated_padded_same
convolution_outlining
cos
cosh
divide
divide_adjoint_stability
divide_by_zero_float32
divide_by_zero_int32
dot_0_0
dot1d
......@@ -176,8 +170,6 @@ equal
exp
floor
function_call
function_name
fuse_max_with_constant_zero_input_as_relu
greater
greatereq
kahan_sum_3d_to_vector
......@@ -193,20 +185,16 @@ max_3d_to_matrix_least_sig
max_3d_to_matrix_most_sig
max_3d_to_scalar
max_3d_to_vector
maximum
max_matrix_cols_zero
max_matrix_columns
max_matrix_rows
max_matrix_rows_zero
max_matrix_to_scalar_zero_by_zero
max_pool_1d_1channel_1image
max_pool_1d_1channel_2image
max_pool_1d_2channel_2image
max_pool_2d_1channel_1image_overpadded
max_pool_2d_1channel_1image_padded
max_pool_2d_1channel_1image_padded_negative_values
max_pool_2d_1channel_1image_strided
max_pool_2d_2channel_2image
max_pool_3d
max_to_scalar
max_trivial
max_trivial_5d
......@@ -226,10 +214,10 @@ min_to_scalar
min_trivial
min_trivial_5d
min_vector_zero
mkldnn_layouts
multiple_backends
multiple_result
negative
node_name
not
notequal
numeric_double_inf
......@@ -257,7 +245,6 @@ pad_interior_1d
pad_interior_exterior_1d
pad_interior_exterior_2d
pad_interior_exterior_4d_2x0x3x2
parameter_as_output
power
product_3d_eliminate_zero_dim
product_3d_to_matrix_least_sig
......@@ -297,19 +284,7 @@ replace_slice_3d_strided_different_strides
replace_slice_matrix
replace_slice_scalar
replace_slice_vector
reshape_3d_transpose
reshape_4d_transpose
reshape_6d
reshape_m2m_dim_change_transpose
reshape_m2m_same
reshape_m2m_transpose
reshape_s2t
reshape_t2s_012
reshape_t2s_120
reshape_t2v_012
reshape_v2m_col
reshape_v2m_row
reshape_v2t_middle
reverse_0d
reverse_1d_0
reverse_1d_nochange
......@@ -328,12 +303,14 @@ reverse_3d_nochange
reverse_sequence_n2c3h4w2
reverse_sequence_n4c3h2w2
reverse_sequence_n4d2c3h2w2
scalar_constant_float32
scalar_constant_int64
select
select_and_scatter_3d_without_overlap
select_and_scatter_without_overlap
select_and_scatter_with_overlap
sigmoid_bprop_n1c1h4
sigmoid_n1c1h2w2
sigmoid_n1c1h4
sign
sin
sinh
......@@ -369,11 +346,8 @@ sum_trivial_5d
sum_vector_zero
tan
tanh
tensor_constant
tensor_constant_float32
tensor_constant_int64
tensor_constant_with_op
tensorview_custom_mem
validate_call_input_type
validate_call_output_type
zero_sized_abs
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment