Commit d4349db8 authored by shssf's avatar shssf Committed by Robert Kimball

IntelGPU backend: broadcast operation (#1252)

* IntelGPUBackend: Broadcast operation

* IntelGPUBackend: more tests for Broadcast operation

* Move macro to static C function in Broadcast tests
parent 8c1aad8f
...@@ -19,6 +19,7 @@ set(SRC ...@@ -19,6 +19,7 @@ set(SRC
intelgpu_tensor_view.cpp intelgpu_tensor_view.cpp
intelgpu_layout.cpp intelgpu_layout.cpp
intelgpu_op_batchnorm.cpp intelgpu_op_batchnorm.cpp
intelgpu_op_broadcast.cpp
) )
if (NGRAPH_INTELGPU_ENABLE) if (NGRAPH_INTELGPU_ENABLE)
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
#include <CPP/activation.hpp> #include <CPP/activation.hpp>
#include <CPP/batch_norm.hpp> #include <CPP/batch_norm.hpp>
#include <CPP/concatenation.hpp>
#include <CPP/convolution.hpp> #include <CPP/convolution.hpp>
#include <CPP/data.hpp> #include <CPP/data.hpp>
#include <CPP/eltwise.hpp> #include <CPP/eltwise.hpp>
...@@ -25,13 +24,13 @@ ...@@ -25,13 +24,13 @@
#include <CPP/permute.hpp> #include <CPP/permute.hpp>
#include <CPP/pooling.hpp> #include <CPP/pooling.hpp>
#include <CPP/reorder.hpp> #include <CPP/reorder.hpp>
#include <CPP/reshape.hpp>
#include <CPP/scale.hpp> #include <CPP/scale.hpp>
#include <CPP/topology.hpp> #include <CPP/topology.hpp>
#include "ngraph/runtime/intelgpu/intelgpu_backend.hpp" #include "ngraph/runtime/intelgpu/intelgpu_backend.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp" #include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp" #include "ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_tensor_view.hpp" #include "ngraph/runtime/intelgpu/intelgpu_tensor_view.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
...@@ -42,6 +41,7 @@ ...@@ -42,6 +41,7 @@
#include "ngraph/op/get_output_element.hpp" #include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/max_pool.hpp" #include "ngraph/op/max_pool.hpp"
#include "ngraph/op/reshape.hpp" #include "ngraph/op/reshape.hpp"
#include "ngraph/util.hpp"
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
...@@ -91,6 +91,18 @@ static void do_unary_operation(cldnn::topology& topology, ...@@ -91,6 +91,18 @@ static void do_unary_operation(cldnn::topology& topology,
topology.add(cldnn_unary); topology.add(cldnn_unary);
} }
// This function needed to only change the name of the data in topology
// No real data copy needed
static void do_equal_propagation(cldnn::topology& topology,
const string& input_name,
const string& output_name)
{
const vector<cldnn::primitive_id> input_names(1, input_name);
const cldnn::concatenation op_concat(output_name, input_names, cldnn::concatenation::along_x);
topology.add(op_concat);
}
extern "C" const char* get_ngraph_version_string() extern "C" const char* get_ngraph_version_string()
{ {
return NGRAPH_VERSION; return NGRAPH_VERSION;
...@@ -152,15 +164,21 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -152,15 +164,21 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{ {
arguments_check(op, 1, 1); arguments_check(op, 1, 1);
const descriptor::Tensor& input_tensor = op->get_inputs().begin()->get_tensor(); const string& input_name = op->get_inputs().begin()->get_tensor().get_name();
const descriptor::Tensor& output_tensor = op->get_outputs().begin()->get_tensor(); const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const string& input_name = input_tensor.get_name();
const string& output_name = output_tensor.get_name();
const cldnn::layout input_layout = IntelGPULayout::create_cldnn_layout(
input_tensor.get_element_type(), op->get_inputs().begin()->get_shape());
const cldnn::reorder op_reorder(output_name, input_name, input_layout); do_equal_propagation(topology, input_name, output_name);
topology.add(op_reorder); }
else if ("GetOutputElement" == op->description())
{
arguments_check(op, 3, 1);
const shared_ptr<op::GetOutputElement> elem =
static_pointer_cast<op::GetOutputElement>(op);
const string& input_name = op->get_inputs().at(elem->get_n()).get_tensor().get_name();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
do_equal_propagation(topology, input_name, output_name);
} }
else if ("Add" == op->description()) else if ("Add" == op->description())
{ {
...@@ -213,7 +231,7 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -213,7 +231,7 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
vector<cldnn::tensor::value_type> offset({0, 0, 0, 0}); // No action by default vector<cldnn::tensor::value_type> offset({0, 0, 0, 0}); // No action by default
size_t ridx = 4; size_t ridx = 4;
for (auto i = pad.rbegin(); i != pad.rend() && ridx > 0; ++i, --ridx) for (auto i = pad.crbegin(); i != pad.crend() && ridx > 0; ++i, --ridx)
{ {
offset.at(ridx - 1) = -(*i); offset.at(ridx - 1) = -(*i);
} }
...@@ -234,6 +252,29 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -234,6 +252,29 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
output_size); output_size);
topology.add(cldd_pooling); topology.add(cldd_pooling);
} }
else if ("Broadcast" == op->description())
{
arguments_check(op, 1, 1);
const string& input_name = op->get_inputs().begin()->get_tensor().get_name();
const Shape& input_shape = op->get_inputs().begin()->get_shape();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const Shape& output_shape = op->get_outputs().begin()->get_shape();
const shared_ptr<op::Broadcast> broadcast = static_pointer_cast<op::Broadcast>(op);
const AxisSet& axis = broadcast->get_broadcast_axes();
if (axis.empty())
{
do_equal_propagation(topology, input_name, output_name);
}
else
{
do_broadcast_operation(
topology, input_name, input_shape, output_name, output_shape, axis);
}
}
else if ("Reshape" == op->description()) else if ("Reshape" == op->description())
{ {
arguments_check(op, 1, 1); arguments_check(op, 1, 1);
...@@ -252,7 +293,7 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -252,7 +293,7 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
// For example, in 2D array, indexes are 0,1 but in 4D array it should be 2,3 // For example, in 2D array, indexes are 0,1 but in 4D array it should be 2,3
// because cldnn::tensor is always 4D assuming cldnn::bfyx model // because cldnn::tensor is always 4D assuming cldnn::bfyx model
size_t rindex = max_dim; size_t rindex = max_dim;
for (auto i = broadcast_axes.rbegin(); i != broadcast_axes.rend() && rindex > 0; for (auto i = broadcast_axes.crbegin(); i != broadcast_axes.crend() && rindex > 0;
++i, --rindex) ++i, --rindex)
{ {
permute_order.at(rindex - 1) = *i + scale; permute_order.at(rindex - 1) = *i + scale;
......
...@@ -29,7 +29,7 @@ runtime::intelgpu::IntelGPULayout::IntelGPULayout(const descriptor::TensorView& ...@@ -29,7 +29,7 @@ runtime::intelgpu::IntelGPULayout::IntelGPULayout(const descriptor::TensorView&
{ {
} }
size_t runtime::intelgpu::IntelGPULayout::get_index_offset(const std::vector<size_t>& indices) size_t runtime::intelgpu::IntelGPULayout::get_index_offset(const vector<size_t>& indices)
{ {
if (indices.size() != strides.size()) if (indices.size() != strides.size())
{ {
...@@ -74,16 +74,16 @@ cldnn::data_types ...@@ -74,16 +74,16 @@ cldnn::data_types
{ {
ostringstream os; ostringstream os;
os << "IntelGPULayout::get_cldnn_type: Unknown type " << element_type; os << "IntelGPULayout::get_cldnn_type: Unknown type " << element_type;
throw std::invalid_argument(os.str()); throw invalid_argument(os.str());
} }
} }
cldnn::tensor runtime::intelgpu::IntelGPULayout::create_cldnn_tensor(const Shape& element_shape) cldnn::tensor runtime::intelgpu::IntelGPULayout::create_cldnn_tensor(const Shape& element_shape)
{ {
std::vector<size_t> idx(4, 1); vector<size_t> idx(4, 1);
size_t index = 0; size_t index = 0;
for (auto i = element_shape.rbegin(); i != element_shape.rend() && index < 3; ++i, ++index) for (auto i = element_shape.crbegin(); i != element_shape.crend() && index < 3; ++i, ++index)
{ {
idx.at(index) = *i; idx.at(index) = *i;
} }
...@@ -109,3 +109,21 @@ cldnn::layout runtime::intelgpu::IntelGPULayout::create_cldnn_layout( ...@@ -109,3 +109,21 @@ cldnn::layout runtime::intelgpu::IntelGPULayout::create_cldnn_layout(
return cldnn::layout(data_type, format, tensor); return cldnn::layout(data_type, format, tensor);
} }
cldnn::concatenation::concatenation_axis
runtime::intelgpu::IntelGPULayout::get_cldnn_axis(size_t tensor_channel)
{
switch (tensor_channel)
{
case 0: return cldnn::concatenation::along_b;
case 1: return cldnn::concatenation::along_f;
case 2: return cldnn::concatenation::along_y;
case 3: return cldnn::concatenation::along_x;
default:
{
ostringstream os;
os << "IntelGPULayout::get_cldnn_axis: wrong tensor channel " << tensor_channel;
throw invalid_argument(os.str());
}
}
}
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#pragma once #pragma once
#include <CPP/concatenation.hpp>
#include <CPP/layout.hpp> #include <CPP/layout.hpp>
#include <CPP/tensor.hpp> #include <CPP/tensor.hpp>
...@@ -51,6 +52,9 @@ public: ...@@ -51,6 +52,9 @@ public:
const Shape& element_shape); const Shape& element_shape);
static cldnn::tensor create_cldnn_tensor(const Shape& element_shape); static cldnn::tensor create_cldnn_tensor(const Shape& element_shape);
// This function converts Shape dimension_id into cldnn::concatenation id
static cldnn::concatenation::concatenation_axis get_cldnn_axis(size_t tensor_channel);
private: private:
Strides strides; Strides strides;
cldnn::layout cldnn_layout; cldnn::layout cldnn_layout;
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <CPP/scale.hpp> #include <CPP/scale.hpp>
#include <CPP/split.hpp> #include <CPP/split.hpp>
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp" #include "ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp"
#include "ngraph/op/batch_norm.hpp" #include "ngraph/op/batch_norm.hpp"
...@@ -26,19 +27,6 @@ ...@@ -26,19 +27,6 @@
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
// This function converts Shape dimension id into cldnn::concatenation id
static cldnn::concatenation::concatenation_axis get_cldnn_axis(size_t tensor_channel)
{
switch (tensor_channel)
{
case 0: return cldnn::concatenation::along_b;
case 1: return cldnn::concatenation::along_f;
case 2: return cldnn::concatenation::along_y;
case 3: return cldnn::concatenation::along_x;
default: throw invalid_argument("intelgpu::get_cldnn_axis() wrong input tensor channel.");
}
}
static string do_matrix_split(cldnn::topology& topology, static string do_matrix_split(cldnn::topology& topology,
const string& name, const string& name,
const vector<pair<cldnn::primitive_id, cldnn::tensor>>& offsets) const vector<pair<cldnn::primitive_id, cldnn::tensor>>& offsets)
...@@ -88,6 +76,8 @@ void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology, ...@@ -88,6 +76,8 @@ void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology,
// Also, input data must be at least 2D array // Also, input data must be at least 2D array
const size_t shape_channel = 1; const size_t shape_channel = 1;
const size_t cldnn_channel = 4 - input_shape.size() + shape_channel; const size_t cldnn_channel = 4 - input_shape.size() + shape_channel;
const cldnn::concatenation::concatenation_axis direction =
runtime::intelgpu::IntelGPULayout::get_cldnn_axis(cldnn_channel);
const size_t split_arr_count = input_shape.at(shape_channel); const size_t split_arr_count = input_shape.at(shape_channel);
for (size_t i = 0; i < split_arr_count; ++i) for (size_t i = 0; i < split_arr_count; ++i)
...@@ -99,7 +89,6 @@ void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology, ...@@ -99,7 +89,6 @@ void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology,
vector<cldnn::tensor::value_type> offset({0, 0, 0, 0}); // No action by default vector<cldnn::tensor::value_type> offset({0, 0, 0, 0}); // No action by default
offset.at(cldnn_channel) = i; offset.at(cldnn_channel) = i;
cout << "Splitted to " << i << " with " << vector_to_string(offset) << "\n";
const cldnn::tensor input_offset(offset.at(0), offset.at(1), offset.at(3), offset.at(2)); const cldnn::tensor input_offset(offset.at(0), offset.at(1), offset.at(3), offset.at(2));
split_offsets.push_back(pair<cldnn::primitive_id, cldnn::tensor>(str_i, input_offset)); split_offsets.push_back(pair<cldnn::primitive_id, cldnn::tensor>(str_i, input_offset));
} }
...@@ -141,6 +130,6 @@ void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology, ...@@ -141,6 +130,6 @@ void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology,
dim_set.push_back(output_name + suf); dim_set.push_back(output_name + suf);
} }
const cldnn::concatenation op_concat(output_name, dim_set, get_cldnn_axis(cldnn_channel)); const cldnn::concatenation op_concat(output_name, dim_set, direction);
topology.add(op_concat); topology.add(op_concat);
} }
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <CPP/concatenation.hpp>
#include <CPP/reshape.hpp>
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp"
#include "ngraph/util.hpp"
using namespace std;
using namespace ngraph;
static const string reshape_suf("_reshape");
static Shape propagate_backward(const Shape& input)
{
Shape result({0, 0, 0, 0});
size_t idx = result.size() - 1;
for (auto i = input.crbegin(); i != input.crend(); ++i, --idx)
{
result.at(idx) = *i;
}
return result;
}
static Shape propagate_forward(const Shape& input)
{
Shape result({0, 0, 0, 0});
size_t idx = 0;
for (auto i = input.cbegin(); i != input.cend(); ++i, ++idx)
{
result.at(idx) = *i;
}
return result;
}
static Shape apply_axis(const Shape& input, const AxisSet& axis)
{
Shape result = input;
for (auto const& i : axis)
{
result.at(i) = 0;
}
return result;
}
// This function broadcast input data to all other dimensions of the output
// it operates in two mode only (controlled by is_forward flag):
// [forward]: propagate data from left to right in Shape array term
// in[2], out[2,3,4,5], axis[1,2,3]
// [backward]: propagate data from right to left in Shape array term
// in[5], out[2,3,4,5], axis[0,1,2]
// Input and output shapes can be up to 4 dimensions
// Other variants, like: in[4] out[2,3,4,5] axis[0,1,3], unsupported yet
static void do_propagation(cldnn::topology& topology,
const string& input_name,
const Shape& input_shape,
const string& output_name,
const Shape& output_shape,
const AxisSet& axis,
bool is_forward)
{
//default value used in "forward" mode
cldnn::concatenation::concatenation_axis direction =
runtime::intelgpu::IntelGPULayout::get_cldnn_axis(3);
string input_name_it = input_name;
string output_name_it = output_name;
Shape input_shape_it = input_shape;
for (auto axis_id = axis.crbegin(); axis_id != axis.crend();)
{
const size_t input_count = output_shape.at(*axis_id);
if (is_forward)
{
input_shape_it.push_back(1);
const cldnn::tensor my_tensor =
runtime::intelgpu::IntelGPULayout::create_cldnn_tensor(input_shape_it);
const cldnn::reshape op_reshape(input_name_it + reshape_suf, input_name_it, my_tensor);
topology.add(op_reshape);
input_shape_it.back() = input_count;
input_name_it += reshape_suf;
}
else
{
direction = runtime::intelgpu::IntelGPULayout::get_cldnn_axis(*axis_id);
}
const vector<cldnn::primitive_id> input_names(input_count, input_name_it);
++axis_id;
if (axis_id == axis.crend())
{
output_name_it = output_name;
}
else
{
output_name_it += ":_";
input_name_it = output_name_it;
}
const cldnn::concatenation op_concat(output_name_it, input_names, direction);
topology.add(op_concat);
}
}
// Assume input is scalar. All output data will be populated by the scalar
// The function extremely non optimal from performance perspective
static void do_scalar_propagation(cldnn::topology& topology,
const string& input_name,
const string& output_name,
const Shape& output_shape)
{
const size_t input_count = shape_size<const Shape>(output_shape);
const vector<cldnn::primitive_id> input_names(input_count, input_name);
const cldnn::concatenation op_concat(output_name, input_names, cldnn::concatenation::along_x);
topology.add(op_concat);
}
void runtime::intelgpu::do_broadcast_operation(cldnn::topology& topology,
const string& input_name,
const Shape& input_shape,
const string& output_name,
const Shape& output_shape,
const AxisSet& axis)
{
if (input_shape.size() > 4 || output_shape.size() > 4)
{
throw invalid_argument("IntelGPU::Broadcast supports 4D shapes maximum.");
}
if (input_shape.empty())
{
do_scalar_propagation(topology, input_name, output_name, output_shape);
return;
}
const Shape output_shape_axis = apply_axis(output_shape, axis);
const Shape input_shape_forward = propagate_forward(input_shape);
const Shape output_shape_forward = propagate_forward(output_shape_axis);
const Shape input_shape_backward = propagate_backward(input_shape);
const Shape output_shape_backward = propagate_backward(output_shape_axis);
if (input_shape_forward == output_shape_forward)
{
do_propagation(topology, input_name, input_shape, output_name, output_shape, axis, true);
}
else if (input_shape_backward == output_shape_backward)
{
do_propagation(topology, input_name, input_shape, output_name, output_shape, axis, false);
}
else
{
ostringstream os;
os << "IntelGP::Broadcast unsupported mode. input" << vector_to_string(input_shape)
<< " output" << vector_to_string(output_shape) << " axis" << vector_to_string(axis);
throw invalid_argument(os.str());
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <CPP/topology.hpp>
#include "ngraph/axis_set.hpp"
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace runtime
{
namespace intelgpu
{
// This implements Broadcast nGraph operation
void do_broadcast_operation(cldnn::topology& topology,
const std::string& input_name,
const Shape& input_shape,
const std::string& output_name,
const Shape& output_shape,
const AxisSet& axis);
}
}
}
abc_int64 abc_int64
abs
acos acos
aliased_output aliased_output
asin asin
...@@ -19,7 +18,6 @@ avg_pool_2d_2channel_2image_padded_only_below ...@@ -19,7 +18,6 @@ avg_pool_2d_2channel_2image_padded_only_below
avg_pool_3d avg_pool_3d
backwards_abs backwards_abs
backwards_acos backwards_acos
backwards_add
backwards_asin backwards_asin
backwards_atan backwards_atan
backwards_avgpool_n1_c1_hw2x2 backwards_avgpool_n1_c1_hw2x2
...@@ -36,7 +34,6 @@ backwards_concat_axis_1 ...@@ -36,7 +34,6 @@ backwards_concat_axis_1
backwards_concat_vector backwards_concat_vector
backwards_cos backwards_cos
backwards_cosh backwards_cosh
backwards_divide
backwards_dot_scalar_scalar backwards_dot_scalar_scalar
backwards_dot_scalar_tensor backwards_dot_scalar_tensor
backwards_dot_tensor2_tensor2 backwards_dot_tensor2_tensor2
...@@ -53,8 +50,6 @@ backwards_maxpool_n2_c1_hw5_3x3_str2_max ...@@ -53,8 +50,6 @@ backwards_maxpool_n2_c1_hw5_3x3_str2_max
backwards_maxpool_n4c1h4w4_kh2kw2_sh1sw1 backwards_maxpool_n4c1h4w4_kh2kw2_sh1sw1
backwards_maxpool_n4_c1_hw4_2x2_max backwards_maxpool_n4_c1_hw4_2x2_max
backwards_minimum backwards_minimum
backwards_negative
backwards_parameter
backwards_power backwards_power
backwards_relu backwards_relu
backwards_replace_slice backwards_replace_slice
...@@ -72,29 +67,23 @@ backwards_softmax_3d ...@@ -72,29 +67,23 @@ backwards_softmax_3d
backwards_softmax_all backwards_softmax_all
backwards_softmax_axis backwards_softmax_axis
backwards_softmax_underflow backwards_softmax_underflow
backwards_subtract
backwards_sum_m2s backwards_sum_m2s
backwards_sum_m2v_0 backwards_sum_m2v_0
backwards_sum_m2v_1 backwards_sum_m2v_1
backwards_sum_v2s backwards_sum_v2s
backwards_tan backwards_tan
backwards_tanh
batchnorm_bprop_n4c3h2w2 batchnorm_bprop_n4c3h2w2
batchnorm_fprop_b1c2h2w2 batchnorm_fprop_b1c2h2w2
batchnorm_fprop_b2c2h2w1 batchnorm_fprop_b2c2h2w1
batchnorm_fprop_globalstats_b2c2w2h1
batchnorm_fprop_inference_b2c2h2w1
batch_norm_one_output batch_norm_one_output
batch_norm_three_outputs batch_norm_three_outputs
broadcast_matrix_0 broadcast_algo_3d_stride_1
broadcast_algo_3d_stride_2
broadcast_algo_matrix_stride_1
broadcast_algo_matrix_stride_2
broadcast_algo_matrix_stride_3
broadcast_algo_vector_middle
broadcast_matrix_1 broadcast_matrix_1
broadcast_matrix_2
broadcast_scalar_matrix
broadcast_scalar_tensor
broadcast_scalar_vector
broadcast_trivial
broadcast_vector_colwise
broadcast_vector_rowwise
broadcast_vector_rowwise_int64 broadcast_vector_rowwise_int64
broadcast_vector_rowwise_reversed broadcast_vector_rowwise_reversed
ceiling ceiling
...@@ -148,7 +137,6 @@ convolution_4d_4items_strided_dilated_padded_same ...@@ -148,7 +137,6 @@ convolution_4d_4items_strided_dilated_padded_same
convolution_outlining convolution_outlining
cos cos
cosh cosh
divide_adjoint_stability
divide_by_zero_int32 divide_by_zero_int32
dot_0_0 dot_0_0
dot1d dot1d
...@@ -190,10 +178,6 @@ max_matrix_columns ...@@ -190,10 +178,6 @@ max_matrix_columns
max_matrix_rows max_matrix_rows
max_matrix_rows_zero max_matrix_rows_zero
max_matrix_to_scalar_zero_by_zero max_matrix_to_scalar_zero_by_zero
max_pool_2d_1channel_1image_overpadded
max_pool_2d_1channel_1image_padded
max_pool_2d_1channel_1image_padded_negative_values
max_pool_2d_1channel_1image_strided
max_pool_3d max_pool_3d
max_to_scalar max_to_scalar
max_trivial max_trivial
...@@ -214,10 +198,6 @@ min_to_scalar ...@@ -214,10 +198,6 @@ min_to_scalar
min_trivial min_trivial
min_trivial_5d min_trivial_5d
min_vector_zero min_vector_zero
mkldnn_layouts
multiple_backends
multiple_result
negative
not not
notequal notequal
numeric_double_inf numeric_double_inf
...@@ -275,9 +255,7 @@ reduce_window_emulating_max_pool_1d_2channel_2image ...@@ -275,9 +255,7 @@ reduce_window_emulating_max_pool_1d_2channel_2image
reduce_window_emulating_max_pool_2d_1channel_1image_strided reduce_window_emulating_max_pool_2d_1channel_1image_strided
reduce_window_emulating_max_pool_2d_2channel_2image reduce_window_emulating_max_pool_2d_2channel_2image
relu_2Dbackprop relu_2Dbackprop
relu_2Dfprop
relu_4Dbackprop relu_4Dbackprop
relu_4Dfprop
replace_slice_3d replace_slice_3d
replace_slice_3d_strided replace_slice_3d_strided
replace_slice_3d_strided_different_strides replace_slice_3d_strided_different_strides
...@@ -327,8 +305,6 @@ softmax_axis_2 ...@@ -327,8 +305,6 @@ softmax_axis_2
softmax_axis_3d softmax_axis_3d
softmax_axis_3d_trivial softmax_axis_3d_trivial
softmax_underflow softmax_underflow
sqrt
subtract
sum_3d_eliminate_zero_dim sum_3d_eliminate_zero_dim
sum_3d_to_matrix_least_sig sum_3d_to_matrix_least_sig
sum_3d_to_matrix_most_sig sum_3d_to_matrix_most_sig
...@@ -345,9 +321,7 @@ sum_trivial ...@@ -345,9 +321,7 @@ sum_trivial
sum_trivial_5d sum_trivial_5d
sum_vector_zero sum_vector_zero
tan tan
tanh
tensor_constant_int64 tensor_constant_int64
tensor_constant_with_op
validate_call_input_type validate_call_input_type
validate_call_output_type validate_call_output_type
zero_sized_abs zero_sized_abs
......
...@@ -1908,6 +1908,152 @@ NGRAPH_TEST(${BACKEND_NAME}, broadcast_vector_rowwise_int64) ...@@ -1908,6 +1908,152 @@ NGRAPH_TEST(${BACKEND_NAME}, broadcast_vector_rowwise_int64)
EXPECT_EQ((vector<int64_t>{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4}), read_vector<int64_t>(result)); EXPECT_EQ((vector<int64_t>{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4}), read_vector<int64_t>(result));
} }
static void broadcast_test_helper(const Shape& shape_a, const Shape& shape_r, const AxisSet& axis)
{
auto A = make_shared<op::Parameter>(element::f32, shape_a);
vector<float> inp_data(shape_size<const Shape>(shape_a));
iota(inp_data.begin(), inp_data.end(), 1);
auto f =
make_shared<Function>(make_shared<op::Broadcast>(A, shape_r, axis), op::ParameterVector{A});
auto ref_backend = runtime::Backend::create("INTERPRETER");
auto wrk_backend = runtime::Backend::create("${BACKEND_NAME}");
auto wrk_a = wrk_backend->create_tensor(element::f32, shape_a);
copy_data(wrk_a, inp_data);
auto ref_a = ref_backend->create_tensor(element::f32, shape_a);
copy_data(ref_a, inp_data);
auto wrk_result = wrk_backend->create_tensor(element::f32, shape_r);
auto ref_result = ref_backend->create_tensor(element::f32, shape_r);
wrk_backend->call(f, {wrk_result}, {wrk_a});
ref_backend->call(f, {ref_result}, {ref_a});
EXPECT_EQ(read_vector<float>(ref_result), read_vector<float>(wrk_result));
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_vector_middle)
{
Shape shape_a{2};
Shape shape_r{3, 2, 4};
AxisSet axis{0, 2};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_vector_forward_2)
{
Shape shape_a{2};
Shape shape_r{3, 2};
AxisSet axis{0};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_vector_forward_3)
{
Shape shape_a{2};
Shape shape_r{4, 3, 2};
AxisSet axis{0, 1};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_vector_forward_4)
{
Shape shape_a{2};
Shape shape_r{5, 4, 3, 2};
AxisSet axis{0, 1, 2};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_scalar)
{
Shape shape_a{};
Shape shape_r{5, 4, 3, 2};
AxisSet axis{0, 1, 2, 3};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_vector_backward_2)
{
Shape shape_a{2};
Shape shape_r{2, 3};
AxisSet axis{1};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_vector_backward_3)
{
Shape shape_a{2};
Shape shape_r{2, 3, 4};
AxisSet axis{1, 2};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_vector_backward_4)
{
Shape shape_a{2};
Shape shape_r{2, 3, 4, 5};
AxisSet axis{1, 2, 3};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_matrix_backward_4)
{
Shape shape_a{4, 5};
Shape shape_r{2, 3, 4, 5};
AxisSet axis{0, 1};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_matrix_stride_1)
{
Shape shape_a{3, 5};
Shape shape_r{2, 3, 4, 5};
AxisSet axis{0, 2};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_matrix_stride_2)
{
Shape shape_a{3, 4};
Shape shape_r{2, 3, 4, 5};
AxisSet axis{0, 3};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_matrix_stride_3)
{
Shape shape_a{2, 4};
Shape shape_r{2, 3, 4, 5};
AxisSet axis{1, 3};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_3d_backward)
{
Shape shape_a{2, 3, 4};
Shape shape_r{5, 2, 3, 4};
AxisSet axis{0};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_3d_stride_1)
{
Shape shape_a{2, 3, 4};
Shape shape_r{2, 5, 3, 4};
AxisSet axis{1};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_3d_stride_2)
{
Shape shape_a{2, 3, 4};
Shape shape_r{2, 3, 5, 4};
AxisSet axis{2};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_matrix_0) NGRAPH_TEST(${BACKEND_NAME}, broadcast_matrix_0)
{ {
Shape shape_a{2, 2}; Shape shape_a{2, 2};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment