Commit eb6bca0e authored by Amy Zhuang's avatar Amy Zhuang Committed by Scott Cyphers

Add CPU backend support for Tile op using Eigen kernel. (#2977)

* Add CPU backend support for Tile op using Eigen kernel.

* Emit Tile code for CODEGEN.

* Fix a bug.

* Add one comment and fix error message typo.
parent 78d6a880
......@@ -51,7 +51,7 @@ void op::Tile::validate_and_infer_types()
// Repeats shapes should be of form {arg_rank} or dynamic
NODE_VALIDATION_CHECK(this,
repeats_shape.compatible(PartialShape{arg_rank}),
"Arg and padding below ranks mismatch");
"Arg and repeats ranks mismatch");
output_rank = arg_rank;
}
......
......@@ -82,6 +82,7 @@ set(SRC
builder/softmax.cpp
builder/get_output_element.cpp
builder/sum.cpp
builder/tile.cpp
builder/topk.cpp
builder/update_slice.cpp
kernel/pad.cpp
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/cpu/kernel/tile.hpp"
#include "ngraph/op/experimental/tile.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::Tile)
{
auto arg_shape = args[0].get_shape();
auto arg_rank = arg_shape.size();
auto& functors = external_function->get_functors();
auto arg_buffer_index = external_function->get_buffer_index(args[0].get_name());
auto out_buffer_index = external_function->get_buffer_index(out[0].get_name());
auto out_shape = out[0].get_shape();
// keep it here in case we want to support scalar input in the future.
if (arg_rank == 0)
{
size_t repeats = shape_size(out_shape);
std::function<decltype(runtime::cpu::kernel::tile_rank_0<float>)> kernel;
SELECT_KERNEL(
kernel, out[0].get_element_type(), runtime::cpu::kernel::tile_rank_0);
auto functor = [&, kernel, repeats, arg_buffer_index, out_buffer_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
kernel(ctx->buffer_data[arg_buffer_index],
ctx->buffer_data[out_buffer_index],
repeats);
};
functors.emplace_back(functor);
}
else
{
std::function<decltype(runtime::cpu::kernel::tile<float, 2>)> kernel;
SELECT_KERNEL_BY_RANK(
kernel, out[0].get_element_type(), arg_rank, runtime::cpu::kernel::tile);
auto functor =
[&, kernel, arg_shape, out_shape, arg_buffer_index, out_buffer_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
kernel(ctx->buffer_data[arg_buffer_index],
ctx->buffer_data[out_buffer_index],
arg_shape,
out_shape,
ectx->arena);
};
functors.emplace_back(functor);
}
}
REGISTER_OP_BUILDER(Tile);
}
}
}
......@@ -61,6 +61,7 @@
#include "ngraph/op/experimental/quantized_dot.hpp"
#include "ngraph/op/experimental/quantized_dot_bias.hpp"
#include "ngraph/op/experimental/quantized_max_pool.hpp"
#include "ngraph/op/experimental/tile.hpp"
#include "ngraph/op/floor.hpp"
#include "ngraph/op/fused/conv_fused.hpp"
#include "ngraph/op/fused/group_conv.hpp"
......@@ -4062,6 +4063,37 @@ namespace ngraph
}
}
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::Tile)
{
auto arg_shape = args[0].get_shape();
auto arg_rank = arg_shape.size();
auto out_shape = out[0].get_shape();
const element::Type& et = args[0].get_element_type();
if (arg_rank == 0)
{
size_t repeats = shape_size(out_shape);
writer.block_begin();
writer << "cpu::kernel::tile_rank_0<" << et.c_type_string() << ">("
<< args[0].get_name() << ", " << out[0].get_name() << ", "
<< std::to_string(repeats) << ");\n";
writer.block_end();
}
else
{
writer.block_begin();
writer << "cpu::kernel::tile<" << et.c_type_string() << ", "
<< std::to_string(arg_rank) << ">(" << args[0].get_name() << ", "
<< out[0].get_name() << ", {" << join(arg_shape) << "}, {"
<< join(out_shape) << "}, 0);\n";
writer.block_end();
}
}
#undef TI
} // namespace cpu
} // namespace runtime
......
......@@ -78,6 +78,7 @@
#include "ngraph/op/experimental/quantized_dot.hpp"
#include "ngraph/op/experimental/quantized_dot_bias.hpp"
#include "ngraph/op/experimental/quantized_max_pool.hpp"
#include "ngraph/op/experimental/tile.hpp"
#include "ngraph/op/floor.hpp"
#include "ngraph/op/fused/conv_fused.hpp"
#include "ngraph/op/fused/group_conv.hpp"
......@@ -437,6 +438,7 @@ static const runtime::cpu::OpMap dispatcher{
{TI(ngraph::op::DeconvolutionBias),
&runtime::cpu::CPU_Emitter::emit<ngraph::op::DeconvolutionBias>},
{TI(ngraph::op::QuantizedConcat), &runtime::cpu::CPU_Emitter::emit<op::QuantizedConcat>},
{TI(ngraph::op::Tile), &runtime::cpu::CPU_Emitter::emit<op::Tile>},
};
static void
......
......@@ -225,6 +225,16 @@ namespace ngraph
template <typename ElementType>
void reference_erf(void* arg, void* out, size_t count);
template <typename ElementType>
void tile_rank_0(void* input, void* output, size_t repeats);
template <typename ElementType, unsigned int Rank>
void tile(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape,
int arena);
template <typename ElementType,
typename IndicesType,
unsigned int Rank1,
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/axis_set.hpp"
#include "ngraph/runtime/cpu/cpu_executor.hpp"
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void tile_rank_0(void* input, void* output, size_t repeats)
{
auto data = (static_cast<ElementType*>(input))[0];
auto output_ptr = static_cast<ElementType*>(output);
for (auto i = 0; i < repeats; i++)
{
output_ptr[i] = data;
}
}
template <typename ElementType, unsigned int Rank>
void tile(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape,
int arena)
{
Eigen::array<Eigen::Index, Rank> out_dims;
Eigen::array<Eigen::Index, Rank> in_dims;
for (int i = 0; i < Rank; i++)
{
out_dims[i] = output_shape[i];
in_dims[i] = input_shape[i];
}
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(
static_cast<ElementType*>(input), in_dims);
Eigen::array<ptrdiff_t, Rank> factors;
for (int i = 0; i < Rank; i++)
{
factors[i] = output_shape[i] / input_shape[i];
}
out.device(ngraph::runtime::cpu::executor::GetCPUExecutor().get_device(arena)) =
in.broadcast(factors);
}
}
}
}
}
......@@ -30,6 +30,7 @@
#include "ngraph/ngraph.hpp"
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/erf.hpp"
#include "ngraph/op/experimental/tile.hpp"
#include "ngraph/op/fused/conv_fused.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/parameter.hpp"
......@@ -1731,6 +1732,168 @@ TEST(cpu_test, avg_pool_bprop_2d_2channel_2image)
MIN_FLOAT_TOLERANCE_BITS));
}
TEST(cpu_test, tile_1d_with_zero_repeats)
{
Shape shape_a{2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_re{1};
auto repeats = make_shared<op::Constant>(element::i64, shape_re, vector<int>{0});
Shape shape_r{0};
auto tile = make_shared<op::Tile>(A, repeats);
auto f = make_shared<Function>(tile, ParameterVector{A});
auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape_a);
copy_data(a, vector<float>{1, 2});
auto result = backend->create_tensor(element::f32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a});
EXPECT_TRUE(
test::all_close_f(vector<float>{}, read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
}
TEST(cpu_test, tile_1d)
{
Shape shape_a{2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_re{1};
auto repeats = make_shared<op::Constant>(element::i64, shape_re, vector<int>{2});
Shape shape_r{4};
auto tile = make_shared<op::Tile>(A, repeats);
auto f = make_shared<Function>(tile, ParameterVector{A});
auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape_a);
copy_data(a, vector<float>{1, 2});
auto result = backend->create_tensor(element::f32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a});
EXPECT_TRUE(test::all_close_f(
vector<float>{1, 2, 1, 2}, read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
}
TEST(cpu_test, tile_2d_with_zero_repeats)
{
Shape shape_a{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_re{2};
auto repeats = make_shared<op::Constant>(element::i64, shape_re, vector<int>{2, 0});
Shape shape_r{4, 0};
auto tile = make_shared<op::Tile>(A, repeats);
auto f = make_shared<Function>(tile, ParameterVector{A});
auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape_a);
copy_data(a, vector<float>{1, 2, 3, 4});
auto result = backend->create_tensor(element::f32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a});
EXPECT_TRUE(
test::all_close_f(vector<float>{}, read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
}
TEST(cpu_test, tile_2d_1axis)
{
Shape shape_a{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_re{2};
auto repeats = make_shared<op::Constant>(element::i64, shape_re, vector<int>{3, 1});
Shape shape_r{6, 2};
auto tile = make_shared<op::Tile>(A, repeats);
auto f = make_shared<Function>(tile, ParameterVector{A});
auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape_a);
copy_data(a, vector<float>{1, 2, 3, 4});
auto result = backend->create_tensor(element::f32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a});
EXPECT_TRUE(test::all_close_f(vector<float>{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4},
read_vector<float>(result),
MIN_FLOAT_TOLERANCE_BITS));
}
TEST(cpu_test, tile_2d_2axes)
{
Shape shape_a{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_re{2};
auto repeats = make_shared<op::Constant>(element::i64, shape_re, vector<int>{3, 3});
Shape shape_r{6, 6};
auto tile = make_shared<op::Tile>(A, repeats);
auto f = make_shared<Function>(tile, ParameterVector{A});
auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape_a);
copy_data(a, vector<float>{1, 2, 3, 4});
auto result = backend->create_tensor(element::f32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a});
EXPECT_TRUE(
test::all_close_f(vector<float>{1, 2, 1, 2, 1, 2, 3, 4, 3, 4, 3, 4, 1, 2, 1, 2, 1, 2,
3, 4, 3, 4, 3, 4, 1, 2, 1, 2, 1, 2, 3, 4, 3, 4, 3, 4},
read_vector<float>(result),
MIN_FLOAT_TOLERANCE_BITS));
}
TEST(cpu_test, tile_3d)
{
Shape shape_a{2, 1, 3};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_re{3};
auto repeats = make_shared<op::Constant>(element::i64, shape_re, vector<int>{2, 2, 1});
Shape shape_r{4, 2, 3};
auto tile = make_shared<op::Tile>(A, repeats);
auto f = make_shared<Function>(tile, ParameterVector{A});
auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape_a);
copy_data(a, vector<float>{1, 2, 3, 4, 5, 6});
auto result = backend->create_tensor(element::f32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a});
EXPECT_TRUE(test::all_close_f(
vector<float>{1, 2, 3, 1, 2, 3, 4, 5, 6, 4, 5, 6, 1, 2, 3, 1, 2, 3, 4, 5, 6, 4, 5, 6},
read_vector<float>(result),
MIN_FLOAT_TOLERANCE_BITS));
}
TEST(cpu_test, scatter_add_1d_indices_in_place)
{
Shape ref_shape{2, 3, 3};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment