Commit 66f6331b authored by Amy Zhuang's avatar Amy Zhuang Committed by Scott Cyphers

Use Eigen kernel for Gather for any axis value. (#3025)

parent 94df1977
......@@ -53,10 +53,9 @@ namespace ngraph
if (is_int64)
{
if ((args[0].get_element_type() == element::f32 ||
if (args[0].get_element_type() == element::f32 ||
args[0].get_element_type() == element::f64 ||
args[0].get_element_type() == element::u8) &&
axis == 0)
args[0].get_element_type() == element::u8)
{
std::function<decltype(runtime::cpu::kernel::gather_i64<float, 2, 2>)>
kernel;
......@@ -72,6 +71,7 @@ namespace ngraph
params_shape,
indices_shape,
out_shape,
axis,
params_buffer_index,
indices_buffer_index,
out_buffer_index](CPURuntimeContext* ctx,
......@@ -82,6 +82,7 @@ namespace ngraph
params_shape,
indices_shape,
out_shape,
axis,
ectx->arena);
};
}
......@@ -110,10 +111,9 @@ namespace ngraph
else
{
if ((args[0].get_element_type() == element::f32 ||
if (args[0].get_element_type() == element::f32 ||
args[0].get_element_type() == element::f64 ||
args[0].get_element_type() == element::u8) &&
axis == 0)
args[0].get_element_type() == element::u8)
{
std::function<decltype(runtime::cpu::kernel::gather_i32<float, 2, 2>)>
kernel;
......@@ -129,6 +129,7 @@ namespace ngraph
params_shape,
indices_shape,
out_shape,
axis,
params_buffer_index,
indices_buffer_index,
out_buffer_index](CPURuntimeContext* ctx,
......@@ -139,6 +140,7 @@ namespace ngraph
params_shape,
indices_shape,
out_shape,
axis,
ectx->arena);
};
}
......
......@@ -1846,6 +1846,7 @@ namespace ngraph
writer << " {" << join(args[0].get_shape()) << "},\n";
writer << " {" << join(args[1].get_shape()) << "},\n";
writer << " {" << join(out[0].get_shape()) << "},\n";
writer << " " << gather->get_axis() << ",\n";
writer << " 0);\n";
}
else
......
......@@ -247,6 +247,7 @@ namespace ngraph
const Shape& inputs_shape,
const Shape& indices_shape,
const Shape& output_shape,
size_t axis,
int arena);
template <typename ElementType,
......
......@@ -31,12 +31,15 @@ namespace ngraph
{
namespace kernel
{
// Calculate the indices from position 0 to rank-1.
static void
get_leading_indices(const Shape& shape, int index, std::vector<int>& indices)
get_indices(const Shape& shape, int index, std::vector<int>& indices, int rank)
{
auto rank = shape.size();
if (rank == 0)
{
return;
}
std::vector<int> partial_sum(rank);
partial_sum[rank - 1] = 1;
for (int j = rank - 2; j >= 0; j--)
{
......@@ -49,6 +52,7 @@ namespace ngraph
}
}
// Gather use indices to get slices of inputs.
template <typename ElementType,
typename IndicesType,
unsigned int Rank1,
......@@ -59,6 +63,7 @@ namespace ngraph
const Shape& inputs_shape,
const Shape& indices_shape,
const Shape& output_shape,
size_t axis,
int arena)
{
Eigen::array<Eigen::Index, Rank1> in_dims;
......@@ -80,60 +85,119 @@ namespace ngraph
auto indices_ptr = static_cast<IndicesType*>(indices);
auto indices_rank = indices_shape.size();
auto outer_loop_num = 1;
for (int i = 0; i < axis; i++)
{
outer_loop_num *= inputs_shape[i];
}
if (indices_rank == 0)
{
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int i = 0; i < outer_loop_num; i++)
{
Eigen::array<Eigen::Index, Rank1> in_extents, in_offsets;
Eigen::array<Eigen::Index, Rank2> out_extents, out_offsets;
// indices_before_axis depends on inputs_shape[0,..., axis-1] and i.
// if axis is 0, indices_before_axis is empty.
std::vector<int> indices_before_axis(axis);
get_indices(inputs_shape, i, indices_before_axis, axis);
for (int i = 0; i < Rank1; i++)
// before axis
for (int r = 0; r < axis; r++)
{
in_extents[i] = inputs_shape[i];
in_offsets[i] = 0;
in_extents[r] = 1;
in_offsets[r] = indices_before_axis[r];
}
in_extents[0] = 1;
in_offsets[0] = indices_ptr[0];
for (int i = 0; i < Rank2; i++)
// from axis
for (int r = axis; r < Rank1; r++)
{
out_extents[i] = output_shape[i];
out_offsets[i] = 0;
in_extents[r] = inputs_shape[r];
in_offsets[r] = 0;
}
// at axis
in_extents[axis] = 1;
// at axis, get the value from indices arg
in_offsets[axis] = indices_ptr[0];
// before axis
for (int r = 0; r < axis; r++)
{
out_extents[r] = 1;
out_offsets[r] = indices_before_axis[r];
}
// after axis
for (int r = axis; r < Rank2; r++)
{
out_extents[r] = output_shape[r];
out_offsets[r] = 0;
}
out.slice(out_offsets, out_extents)
.device(ngraph::runtime::cpu::executor::GetCPUExecutor().get_device(
arena)) = in.slice(in_offsets, in_extents).reshape(out_extents);
}
}
else
{
auto num_indices = shape_size(indices_shape);
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int i = 0; i < shape_size(indices_shape); i++)
for (int i = 0; i < outer_loop_num * num_indices; i++)
{
// Declare these inside the loop for omp parallel
Eigen::array<Eigen::Index, Rank1> in_extents, in_offsets;
Eigen::array<Eigen::Index, Rank2> out_extents, out_offsets;
std::vector<int> leading_indices(indices_rank);
std::vector<int> indices_before_axis(axis);
// indices_before_axis depends on inputs_shape[0,..., axis-1] and i / num_indices.
// if axis is 0, indices_before_axis is empty.
get_indices(inputs_shape, i / num_indices, indices_before_axis, axis);
std::vector<int> indices_from_indices_arg(indices_rank);
for (int r = 0; r < Rank1; r++)
// before axis
for (int r = 0; r < axis; r++)
{
in_extents[r] = 1;
in_offsets[r] = indices_before_axis[r];
}
// from axis
for (int r = axis; r < Rank1; r++)
{
in_extents[r] = inputs_shape[r];
in_offsets[r] = 0;
}
in_extents[0] = 1;
in_offsets[0] = indices_ptr[i];
for (int r = 0; r < Rank2; r++)
// at axis
in_extents[axis] = 1;
// before axis
for (int r = 0; r < axis; r++)
{
out_extents[r] = 1;
out_offsets[r] = indices_before_axis[r];
}
// from axis
for (int r = axis; r < Rank2; r++)
{
out_extents[r] = output_shape[r];
out_offsets[r] = 0;
}
get_leading_indices(indices_shape, i, leading_indices);
// at axis, get the value from indices arg
int k = i % num_indices;
in_offsets[axis] = indices_ptr[k];
// indices_from_indices_arg depends on indices_shape and k.
// suppose the inputs has shape {3, 3, 3}, indices has shape {2, 2}, and axis is 1,
// the output would have shape {3, 2, 2, 3} and
// indices_from_indices_arg would contain indices at position 1 and 2 for output slice offsets.
get_indices(indices_shape, k, indices_from_indices_arg, indices_rank);
for (int j = 0; j < indices_rank; j++)
{
out_extents[j] = 1;
out_offsets[j] = leading_indices[j];
out_extents[j + axis] = 1;
out_offsets[j + axis] = indices_from_indices_arg[j];
}
out.slice(out_offsets, out_extents)
.device(ngraph::runtime::cpu::executor::GetCPUExecutor().get_device(
arena)) = in.slice(in_offsets, in_extents).reshape(out_extents);
......@@ -148,10 +212,17 @@ namespace ngraph
const Shape& inputs_shape,
const Shape& indices_shape,
const Shape& output_shape,
size_t axis,
int arena)
{
gather<ElementType, int64_t, Rank1, Rank2>(
inputs, indices, output, inputs_shape, indices_shape, output_shape, arena);
gather<ElementType, int64_t, Rank1, Rank2>(inputs,
indices,
output,
inputs_shape,
indices_shape,
output_shape,
axis,
arena);
}
template <typename ElementType, unsigned int Rank1, unsigned int Rank2>
......@@ -161,10 +232,17 @@ namespace ngraph
const Shape& inputs_shape,
const Shape& indices_shape,
const Shape& output_shape,
size_t axis,
int arena)
{
gather<ElementType, int32_t, Rank1, Rank2>(
inputs, indices, output, inputs_shape, indices_shape, output_shape, arena);
gather<ElementType, int32_t, Rank1, Rank2>(inputs,
indices,
output,
inputs_shape,
indices_shape,
output_shape,
axis,
arena);
}
}
}
......
......@@ -159,7 +159,6 @@ erf
zero_sized_erf
model_erf
model_erf_int32
gather
gather_nd_scalar_from_2d
gather_nd_1d_from_2d
gather_nd_scalar_from_3d
......@@ -171,12 +170,15 @@ gather_nd_batch_scalar_from_3d
gather_nd_batch_1d_from_3d
gather_nd_batch_2d_from_3d
gather_nd_single_indices
gather_scalar_indices
gather_scalar_indices_no_axis
gather_2d_indices_no_axis
gather_3d_indices_no_axis
gather_4d_indices_no_axis
gather_4d_indices_no_axis_uint8
gather_scalar_indices_axis_1_2d_input
gather_1d_indices_axis_2_4d_input
gather_2d_indices_axis_1_2d_input
gather_scalar_indices_no_axis_2d_input
gather_1d_indices_no_axis_1d_input
gather_2d_indices_no_axis_2d_input
gather_3d_indices_no_axis_2d_input
gather_4d_indices_no_axis_2d_input
gemm
gemm_broadcast_input_C
model_hardmax
......
......@@ -44,7 +44,6 @@ pad_reflect_2d_with_neg
batch_mat_mul_forward
backwards_batchmatmul_tensor2_tensor2
erf
gather
gather_nd_scalar_from_2d
gather_nd_1d_from_2d
gather_nd_scalar_from_3d
......@@ -56,12 +55,15 @@ gather_nd_batch_scalar_from_3d
gather_nd_batch_1d_from_3d
gather_nd_batch_2d_from_3d
gather_nd_single_indices
gather_scalar_indices
gather_scalar_indices_no_axis
gather_2d_indices_no_axis
gather_3d_indices_no_axis
gather_4d_indices_no_axis
gather_4d_indices_no_axis_uint8
gather_scalar_indices_axis_1_2d_input
gather_1d_indices_axis_2_4d_input
gather_2d_indices_axis_1_2d_input
gather_scalar_indices_no_axis_2d_input
gather_1d_indices_no_axis_1d_input
gather_2d_indices_no_axis_2d_input
gather_3d_indices_no_axis_2d_input
gather_4d_indices_no_axis_2d_input
gemm
gemm_broadcast_input_C
normalize_across_chw_scalar_scale_4d
......
......@@ -103,7 +103,6 @@ embedding_lookup_10x1_arbitrary
embedding_lookup_10x1_arbitrary_index_type_int
embedding_lookup_10x1_arbitrary_index_type_int64
floor_int32
gather
gather_nd_scalar_from_2d
gather_nd_1d_from_2d
gather_nd_scalar_from_3d
......@@ -115,12 +114,15 @@ gather_nd_batch_scalar_from_3d
gather_nd_batch_1d_from_3d
gather_nd_batch_2d_from_3d
gather_nd_single_indices
gather_scalar_indices
gather_scalar_indices_no_axis
gather_2d_indices_no_axis
gather_3d_indices_no_axis
gather_4d_indices_no_axis
gather_4d_indices_no_axis_uint8
gather_scalar_indices_axis_1_2d_input
gather_1d_indices_axis_2_4d_input
gather_2d_indices_axis_1_2d_input
gather_scalar_indices_no_axis_2d_input
gather_1d_indices_no_axis_1d_input
gather_2d_indices_no_axis_2d_input
gather_3d_indices_no_axis_2d_input
gather_4d_indices_no_axis_2d_input
scatter_add_4d_indices
scatter_add_3d_indices
scatter_add_2d_indices
......
......@@ -69,7 +69,7 @@ NGRAPH_TEST(${BACKEND_NAME}, gather_4d_indices_no_axis_uint8)
read_vector<uint8_t>(result)));
}
NGRAPH_TEST(${BACKEND_NAME}, gather_4d_indices_no_axis)
NGRAPH_TEST(${BACKEND_NAME}, gather_4d_indices_no_axis_2d_input)
{
Shape params_shape{3, 2};
Shape indices_shape{2, 2, 3, 4};
......@@ -105,7 +105,7 @@ NGRAPH_TEST(${BACKEND_NAME}, gather_4d_indices_no_axis)
MIN_FLOAT_TOLERANCE_BITS));
}
NGRAPH_TEST(${BACKEND_NAME}, gather_3d_indices_no_axis)
NGRAPH_TEST(${BACKEND_NAME}, gather_3d_indices_no_axis_2d_input)
{
Shape params_shape{3, 2};
Shape indices_shape{2, 3, 4};
......@@ -136,7 +136,7 @@ NGRAPH_TEST(${BACKEND_NAME}, gather_3d_indices_no_axis)
MIN_FLOAT_TOLERANCE_BITS));
}
NGRAPH_TEST(${BACKEND_NAME}, gather_2d_indices_no_axis)
NGRAPH_TEST(${BACKEND_NAME}, gather_2d_indices_no_axis_2d_input)
{
Shape params_shape{3, 2};
Shape indices_shape{2, 2};
......@@ -162,7 +162,32 @@ NGRAPH_TEST(${BACKEND_NAME}, gather_2d_indices_no_axis)
MIN_FLOAT_TOLERANCE_BITS));
}
NGRAPH_TEST(${BACKEND_NAME}, gather_scalar_indices_no_axis)
NGRAPH_TEST(${BACKEND_NAME}, gather_1d_indices_no_axis_1d_input)
{
Shape params_shape{3};
Shape indices_shape{2};
Shape out_shape{2};
auto P = make_shared<op::Parameter>(element::f32, params_shape);
auto I = make_shared<op::Parameter>(element::i32, indices_shape);
auto G = make_shared<op::Gather>(P, I);
auto f = make_shared<Function>(make_shared<op::GetOutputElement>(G, 0), ParameterVector{P, I});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto p = backend->create_tensor(element::f32, params_shape);
copy_data(p, vector<float>{1.0f, 2.0f, 3.0f});
auto i = backend->create_tensor(element::i32, indices_shape);
copy_data(i, vector<int32_t>{1, 0});
auto result = backend->create_tensor(element::f32, out_shape);
auto c = backend->compile(f);
c->call_with_validate({result}, {p, i});
EXPECT_TRUE(test::all_close_f(
(vector<float>{2.0f, 1.0f}), read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
}
NGRAPH_TEST(${BACKEND_NAME}, gather_scalar_indices_no_axis_2d_input)
{
Shape params_shape{3, 2};
Shape indices_shape{};
......@@ -187,7 +212,7 @@ NGRAPH_TEST(${BACKEND_NAME}, gather_scalar_indices_no_axis)
(vector<float>{2.0f, 2.1f}), read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
}
NGRAPH_TEST(${BACKEND_NAME}, gather)
NGRAPH_TEST(${BACKEND_NAME}, gather_2d_indices_axis_1_2d_input)
{
Shape params_shape{3, 3};
Shape indices_shape{1, 2};
......@@ -213,7 +238,38 @@ NGRAPH_TEST(${BACKEND_NAME}, gather)
MIN_FLOAT_TOLERANCE_BITS));
}
NGRAPH_TEST(${BACKEND_NAME}, gather_scalar_indices)
NGRAPH_TEST(${BACKEND_NAME}, gather_1d_indices_axis_2_4d_input)
{
Shape params_shape{2, 2, 3, 3};
Shape indices_shape{2};
Shape out_shape{2, 2, 2, 3};
auto P = make_shared<op::Parameter>(element::f32, params_shape);
auto I = make_shared<op::Parameter>(element::i32, indices_shape);
auto G = make_shared<op::Gather>(P, I, 2);
auto f = make_shared<Function>(make_shared<op::GetOutputElement>(G, 0), ParameterVector{P, I});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto p = backend->create_tensor(element::f32, params_shape);
copy_data(p, vector<float>{1.0f, 1.1f, 1.2f, 2.0f, 2.1f, 2.2f, 3.0f, 3.1f, 3.2f,
1.0f, 1.1f, 1.2f, 2.0f, 2.1f, 2.2f, 3.0f, 3.1f, 3.2f,
1.0f, 1.1f, 1.2f, 2.0f, 2.1f, 2.2f, 3.0f, 3.1f, 3.2f,
1.0f, 1.1f, 1.2f, 2.0f, 2.1f, 2.2f, 3.0f, 3.1f, 3.2f});
auto i = backend->create_tensor(element::i32, indices_shape);
copy_data(i, vector<int32_t>{0, 2});
auto result = backend->create_tensor(element::f32, out_shape);
auto c = backend->compile(f);
c->call_with_validate({result}, {p, i});
EXPECT_TRUE(test::all_close_f(
(vector<float>{1.0f, 1.1f, 1.2f, 3.0f, 3.1f, 3.2f, 1.0f, 1.1f, 1.2f, 3.0f, 3.1f, 3.2f,
1.0f, 1.1f, 1.2f, 3.0f, 3.1f, 3.2f, 1.0f, 1.1f, 1.2f, 3.0f, 3.1f, 3.2f}),
read_vector<float>(result),
MIN_FLOAT_TOLERANCE_BITS));
}
NGRAPH_TEST(${BACKEND_NAME}, gather_scalar_indices_axis_1_2d_input)
{
Shape params_shape{3, 3};
Shape indices_shape{};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment