Commit d4349db8 authored by shssf's avatar shssf Committed by Robert Kimball

IntelGPU backend: broadcast operation (#1252)

* IntelGPUBackend: Broadcast operation

* IntelGPUBackend: more tests for Broadcast operation

* Move macro to static C function in Broadcast tests
parent 8c1aad8f
......@@ -19,6 +19,7 @@ set(SRC
intelgpu_tensor_view.cpp
intelgpu_layout.cpp
intelgpu_op_batchnorm.cpp
intelgpu_op_broadcast.cpp
)
if (NGRAPH_INTELGPU_ENABLE)
......
......@@ -16,7 +16,6 @@
#include <CPP/activation.hpp>
#include <CPP/batch_norm.hpp>
#include <CPP/concatenation.hpp>
#include <CPP/convolution.hpp>
#include <CPP/data.hpp>
#include <CPP/eltwise.hpp>
......@@ -25,13 +24,13 @@
#include <CPP/permute.hpp>
#include <CPP/pooling.hpp>
#include <CPP/reorder.hpp>
#include <CPP/reshape.hpp>
#include <CPP/scale.hpp>
#include <CPP/topology.hpp>
#include "ngraph/runtime/intelgpu/intelgpu_backend.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_tensor_view.hpp"
#include "ngraph/node.hpp"
......@@ -42,6 +41,7 @@
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/max_pool.hpp"
#include "ngraph/op/reshape.hpp"
#include "ngraph/util.hpp"
using namespace std;
using namespace ngraph;
......@@ -91,6 +91,18 @@ static void do_unary_operation(cldnn::topology& topology,
topology.add(cldnn_unary);
}
// This function needed to only change the name of the data in topology
// No real data copy needed
static void do_equal_propagation(cldnn::topology& topology,
const string& input_name,
const string& output_name)
{
const vector<cldnn::primitive_id> input_names(1, input_name);
const cldnn::concatenation op_concat(output_name, input_names, cldnn::concatenation::along_x);
topology.add(op_concat);
}
extern "C" const char* get_ngraph_version_string()
{
return NGRAPH_VERSION;
......@@ -152,15 +164,21 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{
arguments_check(op, 1, 1);
const descriptor::Tensor& input_tensor = op->get_inputs().begin()->get_tensor();
const descriptor::Tensor& output_tensor = op->get_outputs().begin()->get_tensor();
const string& input_name = input_tensor.get_name();
const string& output_name = output_tensor.get_name();
const cldnn::layout input_layout = IntelGPULayout::create_cldnn_layout(
input_tensor.get_element_type(), op->get_inputs().begin()->get_shape());
const string& input_name = op->get_inputs().begin()->get_tensor().get_name();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const cldnn::reorder op_reorder(output_name, input_name, input_layout);
topology.add(op_reorder);
do_equal_propagation(topology, input_name, output_name);
}
else if ("GetOutputElement" == op->description())
{
arguments_check(op, 3, 1);
const shared_ptr<op::GetOutputElement> elem =
static_pointer_cast<op::GetOutputElement>(op);
const string& input_name = op->get_inputs().at(elem->get_n()).get_tensor().get_name();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
do_equal_propagation(topology, input_name, output_name);
}
else if ("Add" == op->description())
{
......@@ -213,7 +231,7 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
vector<cldnn::tensor::value_type> offset({0, 0, 0, 0}); // No action by default
size_t ridx = 4;
for (auto i = pad.rbegin(); i != pad.rend() && ridx > 0; ++i, --ridx)
for (auto i = pad.crbegin(); i != pad.crend() && ridx > 0; ++i, --ridx)
{
offset.at(ridx - 1) = -(*i);
}
......@@ -234,6 +252,29 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
output_size);
topology.add(cldd_pooling);
}
else if ("Broadcast" == op->description())
{
arguments_check(op, 1, 1);
const string& input_name = op->get_inputs().begin()->get_tensor().get_name();
const Shape& input_shape = op->get_inputs().begin()->get_shape();
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
const Shape& output_shape = op->get_outputs().begin()->get_shape();
const shared_ptr<op::Broadcast> broadcast = static_pointer_cast<op::Broadcast>(op);
const AxisSet& axis = broadcast->get_broadcast_axes();
if (axis.empty())
{
do_equal_propagation(topology, input_name, output_name);
}
else
{
do_broadcast_operation(
topology, input_name, input_shape, output_name, output_shape, axis);
}
}
else if ("Reshape" == op->description())
{
arguments_check(op, 1, 1);
......@@ -252,7 +293,7 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
// For example, in 2D array, indexes are 0,1 but in 4D array it should be 2,3
// because cldnn::tensor is always 4D assuming cldnn::bfyx model
size_t rindex = max_dim;
for (auto i = broadcast_axes.rbegin(); i != broadcast_axes.rend() && rindex > 0;
for (auto i = broadcast_axes.crbegin(); i != broadcast_axes.crend() && rindex > 0;
++i, --rindex)
{
permute_order.at(rindex - 1) = *i + scale;
......
......@@ -29,7 +29,7 @@ runtime::intelgpu::IntelGPULayout::IntelGPULayout(const descriptor::TensorView&
{
}
size_t runtime::intelgpu::IntelGPULayout::get_index_offset(const std::vector<size_t>& indices)
size_t runtime::intelgpu::IntelGPULayout::get_index_offset(const vector<size_t>& indices)
{
if (indices.size() != strides.size())
{
......@@ -74,16 +74,16 @@ cldnn::data_types
{
ostringstream os;
os << "IntelGPULayout::get_cldnn_type: Unknown type " << element_type;
throw std::invalid_argument(os.str());
throw invalid_argument(os.str());
}
}
cldnn::tensor runtime::intelgpu::IntelGPULayout::create_cldnn_tensor(const Shape& element_shape)
{
std::vector<size_t> idx(4, 1);
vector<size_t> idx(4, 1);
size_t index = 0;
for (auto i = element_shape.rbegin(); i != element_shape.rend() && index < 3; ++i, ++index)
for (auto i = element_shape.crbegin(); i != element_shape.crend() && index < 3; ++i, ++index)
{
idx.at(index) = *i;
}
......@@ -109,3 +109,21 @@ cldnn::layout runtime::intelgpu::IntelGPULayout::create_cldnn_layout(
return cldnn::layout(data_type, format, tensor);
}
cldnn::concatenation::concatenation_axis
runtime::intelgpu::IntelGPULayout::get_cldnn_axis(size_t tensor_channel)
{
switch (tensor_channel)
{
case 0: return cldnn::concatenation::along_b;
case 1: return cldnn::concatenation::along_f;
case 2: return cldnn::concatenation::along_y;
case 3: return cldnn::concatenation::along_x;
default:
{
ostringstream os;
os << "IntelGPULayout::get_cldnn_axis: wrong tensor channel " << tensor_channel;
throw invalid_argument(os.str());
}
}
}
......@@ -16,6 +16,7 @@
#pragma once
#include <CPP/concatenation.hpp>
#include <CPP/layout.hpp>
#include <CPP/tensor.hpp>
......@@ -51,6 +52,9 @@ public:
const Shape& element_shape);
static cldnn::tensor create_cldnn_tensor(const Shape& element_shape);
// This function converts Shape dimension_id into cldnn::concatenation id
static cldnn::concatenation::concatenation_axis get_cldnn_axis(size_t tensor_channel);
private:
Strides strides;
cldnn::layout cldnn_layout;
......
......@@ -19,6 +19,7 @@
#include <CPP/scale.hpp>
#include <CPP/split.hpp>
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp"
#include "ngraph/op/batch_norm.hpp"
......@@ -26,19 +27,6 @@
using namespace std;
using namespace ngraph;
// This function converts Shape dimension id into cldnn::concatenation id
static cldnn::concatenation::concatenation_axis get_cldnn_axis(size_t tensor_channel)
{
switch (tensor_channel)
{
case 0: return cldnn::concatenation::along_b;
case 1: return cldnn::concatenation::along_f;
case 2: return cldnn::concatenation::along_y;
case 3: return cldnn::concatenation::along_x;
default: throw invalid_argument("intelgpu::get_cldnn_axis() wrong input tensor channel.");
}
}
static string do_matrix_split(cldnn::topology& topology,
const string& name,
const vector<pair<cldnn::primitive_id, cldnn::tensor>>& offsets)
......@@ -88,6 +76,8 @@ void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology,
// Also, input data must be at least 2D array
const size_t shape_channel = 1;
const size_t cldnn_channel = 4 - input_shape.size() + shape_channel;
const cldnn::concatenation::concatenation_axis direction =
runtime::intelgpu::IntelGPULayout::get_cldnn_axis(cldnn_channel);
const size_t split_arr_count = input_shape.at(shape_channel);
for (size_t i = 0; i < split_arr_count; ++i)
......@@ -99,7 +89,6 @@ void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology,
vector<cldnn::tensor::value_type> offset({0, 0, 0, 0}); // No action by default
offset.at(cldnn_channel) = i;
cout << "Splitted to " << i << " with " << vector_to_string(offset) << "\n";
const cldnn::tensor input_offset(offset.at(0), offset.at(1), offset.at(3), offset.at(2));
split_offsets.push_back(pair<cldnn::primitive_id, cldnn::tensor>(str_i, input_offset));
}
......@@ -141,6 +130,6 @@ void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology,
dim_set.push_back(output_name + suf);
}
const cldnn::concatenation op_concat(output_name, dim_set, get_cldnn_axis(cldnn_channel));
const cldnn::concatenation op_concat(output_name, dim_set, direction);
topology.add(op_concat);
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <CPP/concatenation.hpp>
#include <CPP/reshape.hpp>
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp"
#include "ngraph/util.hpp"
using namespace std;
using namespace ngraph;
static const string reshape_suf("_reshape");
static Shape propagate_backward(const Shape& input)
{
Shape result({0, 0, 0, 0});
size_t idx = result.size() - 1;
for (auto i = input.crbegin(); i != input.crend(); ++i, --idx)
{
result.at(idx) = *i;
}
return result;
}
static Shape propagate_forward(const Shape& input)
{
Shape result({0, 0, 0, 0});
size_t idx = 0;
for (auto i = input.cbegin(); i != input.cend(); ++i, ++idx)
{
result.at(idx) = *i;
}
return result;
}
static Shape apply_axis(const Shape& input, const AxisSet& axis)
{
Shape result = input;
for (auto const& i : axis)
{
result.at(i) = 0;
}
return result;
}
// This function broadcast input data to all other dimensions of the output
// it operates in two mode only (controlled by is_forward flag):
// [forward]: propagate data from left to right in Shape array term
// in[2], out[2,3,4,5], axis[1,2,3]
// [backward]: propagate data from right to left in Shape array term
// in[5], out[2,3,4,5], axis[0,1,2]
// Input and output shapes can be up to 4 dimensions
// Other variants, like: in[4] out[2,3,4,5] axis[0,1,3], unsupported yet
static void do_propagation(cldnn::topology& topology,
const string& input_name,
const Shape& input_shape,
const string& output_name,
const Shape& output_shape,
const AxisSet& axis,
bool is_forward)
{
//default value used in "forward" mode
cldnn::concatenation::concatenation_axis direction =
runtime::intelgpu::IntelGPULayout::get_cldnn_axis(3);
string input_name_it = input_name;
string output_name_it = output_name;
Shape input_shape_it = input_shape;
for (auto axis_id = axis.crbegin(); axis_id != axis.crend();)
{
const size_t input_count = output_shape.at(*axis_id);
if (is_forward)
{
input_shape_it.push_back(1);
const cldnn::tensor my_tensor =
runtime::intelgpu::IntelGPULayout::create_cldnn_tensor(input_shape_it);
const cldnn::reshape op_reshape(input_name_it + reshape_suf, input_name_it, my_tensor);
topology.add(op_reshape);
input_shape_it.back() = input_count;
input_name_it += reshape_suf;
}
else
{
direction = runtime::intelgpu::IntelGPULayout::get_cldnn_axis(*axis_id);
}
const vector<cldnn::primitive_id> input_names(input_count, input_name_it);
++axis_id;
if (axis_id == axis.crend())
{
output_name_it = output_name;
}
else
{
output_name_it += ":_";
input_name_it = output_name_it;
}
const cldnn::concatenation op_concat(output_name_it, input_names, direction);
topology.add(op_concat);
}
}
// Assume input is scalar. All output data will be populated by the scalar
// The function extremely non optimal from performance perspective
static void do_scalar_propagation(cldnn::topology& topology,
const string& input_name,
const string& output_name,
const Shape& output_shape)
{
const size_t input_count = shape_size<const Shape>(output_shape);
const vector<cldnn::primitive_id> input_names(input_count, input_name);
const cldnn::concatenation op_concat(output_name, input_names, cldnn::concatenation::along_x);
topology.add(op_concat);
}
void runtime::intelgpu::do_broadcast_operation(cldnn::topology& topology,
const string& input_name,
const Shape& input_shape,
const string& output_name,
const Shape& output_shape,
const AxisSet& axis)
{
if (input_shape.size() > 4 || output_shape.size() > 4)
{
throw invalid_argument("IntelGPU::Broadcast supports 4D shapes maximum.");
}
if (input_shape.empty())
{
do_scalar_propagation(topology, input_name, output_name, output_shape);
return;
}
const Shape output_shape_axis = apply_axis(output_shape, axis);
const Shape input_shape_forward = propagate_forward(input_shape);
const Shape output_shape_forward = propagate_forward(output_shape_axis);
const Shape input_shape_backward = propagate_backward(input_shape);
const Shape output_shape_backward = propagate_backward(output_shape_axis);
if (input_shape_forward == output_shape_forward)
{
do_propagation(topology, input_name, input_shape, output_name, output_shape, axis, true);
}
else if (input_shape_backward == output_shape_backward)
{
do_propagation(topology, input_name, input_shape, output_name, output_shape, axis, false);
}
else
{
ostringstream os;
os << "IntelGP::Broadcast unsupported mode. input" << vector_to_string(input_shape)
<< " output" << vector_to_string(output_shape) << " axis" << vector_to_string(axis);
throw invalid_argument(os.str());
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <CPP/topology.hpp>
#include "ngraph/axis_set.hpp"
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace runtime
{
namespace intelgpu
{
// This implements Broadcast nGraph operation
void do_broadcast_operation(cldnn::topology& topology,
const std::string& input_name,
const Shape& input_shape,
const std::string& output_name,
const Shape& output_shape,
const AxisSet& axis);
}
}
}
abc_int64
abs
acos
aliased_output
asin
......@@ -19,7 +18,6 @@ avg_pool_2d_2channel_2image_padded_only_below
avg_pool_3d
backwards_abs
backwards_acos
backwards_add
backwards_asin
backwards_atan
backwards_avgpool_n1_c1_hw2x2
......@@ -36,7 +34,6 @@ backwards_concat_axis_1
backwards_concat_vector
backwards_cos
backwards_cosh
backwards_divide
backwards_dot_scalar_scalar
backwards_dot_scalar_tensor
backwards_dot_tensor2_tensor2
......@@ -53,8 +50,6 @@ backwards_maxpool_n2_c1_hw5_3x3_str2_max
backwards_maxpool_n4c1h4w4_kh2kw2_sh1sw1
backwards_maxpool_n4_c1_hw4_2x2_max
backwards_minimum
backwards_negative
backwards_parameter
backwards_power
backwards_relu
backwards_replace_slice
......@@ -72,29 +67,23 @@ backwards_softmax_3d
backwards_softmax_all
backwards_softmax_axis
backwards_softmax_underflow
backwards_subtract
backwards_sum_m2s
backwards_sum_m2v_0
backwards_sum_m2v_1
backwards_sum_v2s
backwards_tan
backwards_tanh
batchnorm_bprop_n4c3h2w2
batchnorm_fprop_b1c2h2w2
batchnorm_fprop_b2c2h2w1
batchnorm_fprop_globalstats_b2c2w2h1
batchnorm_fprop_inference_b2c2h2w1
batch_norm_one_output
batch_norm_three_outputs
broadcast_matrix_0
broadcast_algo_3d_stride_1
broadcast_algo_3d_stride_2
broadcast_algo_matrix_stride_1
broadcast_algo_matrix_stride_2
broadcast_algo_matrix_stride_3
broadcast_algo_vector_middle
broadcast_matrix_1
broadcast_matrix_2
broadcast_scalar_matrix
broadcast_scalar_tensor
broadcast_scalar_vector
broadcast_trivial
broadcast_vector_colwise
broadcast_vector_rowwise
broadcast_vector_rowwise_int64
broadcast_vector_rowwise_reversed
ceiling
......@@ -148,7 +137,6 @@ convolution_4d_4items_strided_dilated_padded_same
convolution_outlining
cos
cosh
divide_adjoint_stability
divide_by_zero_int32
dot_0_0
dot1d
......@@ -190,10 +178,6 @@ max_matrix_columns
max_matrix_rows
max_matrix_rows_zero
max_matrix_to_scalar_zero_by_zero
max_pool_2d_1channel_1image_overpadded
max_pool_2d_1channel_1image_padded
max_pool_2d_1channel_1image_padded_negative_values
max_pool_2d_1channel_1image_strided
max_pool_3d
max_to_scalar
max_trivial
......@@ -214,10 +198,6 @@ min_to_scalar
min_trivial
min_trivial_5d
min_vector_zero
mkldnn_layouts
multiple_backends
multiple_result
negative
not
notequal
numeric_double_inf
......@@ -275,9 +255,7 @@ reduce_window_emulating_max_pool_1d_2channel_2image
reduce_window_emulating_max_pool_2d_1channel_1image_strided
reduce_window_emulating_max_pool_2d_2channel_2image
relu_2Dbackprop
relu_2Dfprop
relu_4Dbackprop
relu_4Dfprop
replace_slice_3d
replace_slice_3d_strided
replace_slice_3d_strided_different_strides
......@@ -327,8 +305,6 @@ softmax_axis_2
softmax_axis_3d
softmax_axis_3d_trivial
softmax_underflow
sqrt
subtract
sum_3d_eliminate_zero_dim
sum_3d_to_matrix_least_sig
sum_3d_to_matrix_most_sig
......@@ -345,9 +321,7 @@ sum_trivial
sum_trivial_5d
sum_vector_zero
tan
tanh
tensor_constant_int64
tensor_constant_with_op
validate_call_input_type
validate_call_output_type
zero_sized_abs
......
......@@ -1908,6 +1908,152 @@ NGRAPH_TEST(${BACKEND_NAME}, broadcast_vector_rowwise_int64)
EXPECT_EQ((vector<int64_t>{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4}), read_vector<int64_t>(result));
}
static void broadcast_test_helper(const Shape& shape_a, const Shape& shape_r, const AxisSet& axis)
{
auto A = make_shared<op::Parameter>(element::f32, shape_a);
vector<float> inp_data(shape_size<const Shape>(shape_a));
iota(inp_data.begin(), inp_data.end(), 1);
auto f =
make_shared<Function>(make_shared<op::Broadcast>(A, shape_r, axis), op::ParameterVector{A});
auto ref_backend = runtime::Backend::create("INTERPRETER");
auto wrk_backend = runtime::Backend::create("${BACKEND_NAME}");
auto wrk_a = wrk_backend->create_tensor(element::f32, shape_a);
copy_data(wrk_a, inp_data);
auto ref_a = ref_backend->create_tensor(element::f32, shape_a);
copy_data(ref_a, inp_data);
auto wrk_result = wrk_backend->create_tensor(element::f32, shape_r);
auto ref_result = ref_backend->create_tensor(element::f32, shape_r);
wrk_backend->call(f, {wrk_result}, {wrk_a});
ref_backend->call(f, {ref_result}, {ref_a});
EXPECT_EQ(read_vector<float>(ref_result), read_vector<float>(wrk_result));
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_vector_middle)
{
Shape shape_a{2};
Shape shape_r{3, 2, 4};
AxisSet axis{0, 2};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_vector_forward_2)
{
Shape shape_a{2};
Shape shape_r{3, 2};
AxisSet axis{0};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_vector_forward_3)
{
Shape shape_a{2};
Shape shape_r{4, 3, 2};
AxisSet axis{0, 1};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_vector_forward_4)
{
Shape shape_a{2};
Shape shape_r{5, 4, 3, 2};
AxisSet axis{0, 1, 2};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_scalar)
{
Shape shape_a{};
Shape shape_r{5, 4, 3, 2};
AxisSet axis{0, 1, 2, 3};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_vector_backward_2)
{
Shape shape_a{2};
Shape shape_r{2, 3};
AxisSet axis{1};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_vector_backward_3)
{
Shape shape_a{2};
Shape shape_r{2, 3, 4};
AxisSet axis{1, 2};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_vector_backward_4)
{
Shape shape_a{2};
Shape shape_r{2, 3, 4, 5};
AxisSet axis{1, 2, 3};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_matrix_backward_4)
{
Shape shape_a{4, 5};
Shape shape_r{2, 3, 4, 5};
AxisSet axis{0, 1};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_matrix_stride_1)
{
Shape shape_a{3, 5};
Shape shape_r{2, 3, 4, 5};
AxisSet axis{0, 2};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_matrix_stride_2)
{
Shape shape_a{3, 4};
Shape shape_r{2, 3, 4, 5};
AxisSet axis{0, 3};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_matrix_stride_3)
{
Shape shape_a{2, 4};
Shape shape_r{2, 3, 4, 5};
AxisSet axis{1, 3};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_3d_backward)
{
Shape shape_a{2, 3, 4};
Shape shape_r{5, 2, 3, 4};
AxisSet axis{0};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_3d_stride_1)
{
Shape shape_a{2, 3, 4};
Shape shape_r{2, 5, 3, 4};
AxisSet axis{1};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_algo_3d_stride_2)
{
Shape shape_a{2, 3, 4};
Shape shape_r{2, 3, 5, 4};
AxisSet axis{2};
broadcast_test_helper(shape_a, shape_r, axis);
}
NGRAPH_TEST(${BACKEND_NAME}, broadcast_matrix_0)
{
Shape shape_a{2, 2};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment