Commit 519b18ac authored by Jayaram Bobba's avatar Jayaram Bobba Committed by Robert Kimball

IAT: Skip reshapes that are removing or adding size-1 dimensions (#1684)

* Reshape optimizations for when unit-sized dimensions are added/removed from tensors

* Added unit tests for eliminating squeeze and expand_dims operations

* Bug fix to expand dims layout

* Style fix
parent 00b4453d
......@@ -44,7 +44,7 @@ namespace ngraph
s *= shape[shape.size() - (i + 1)];
}
std::reverse(m_strides.begin(), m_strides.end());
m_mkldnn_memory_size = shape_size(tv.get_shape()) * tv.get_element_type().size();
m_buffer_size = shape_size(tv.get_shape()) * tv.get_element_type().size();
}
size_t LayoutDescriptor::get_index_offset(const std::vector<size_t>& indices)
......@@ -109,7 +109,7 @@ namespace ngraph
{
auto mem_prim_desc =
mkldnn::memory::primitive_desc(md, mkldnn_utils::global_cpu_engine);
m_mkldnn_memory_size = mem_prim_desc.get_size();
m_buffer_size = mem_prim_desc.get_size();
}
catch (const mkldnn::error& e)
{
......@@ -118,6 +118,15 @@ namespace ngraph
e.message);
}
}
bool LayoutDescriptor::is_row_major_layout()
{
if (!is_mkldnn_layout())
return true;
auto native_md = runtime::cpu::mkldnn_utils::create_blocked_mkldnn_md(
get_shape(), m_strides, get_element_type());
return runtime::cpu::mkldnn_utils::compare_mkldnn_mds(m_mkldnn_md, native_md);
}
}
}
}
......@@ -37,7 +37,7 @@ namespace ngraph
public:
LayoutDescriptor(const ngraph::descriptor::Tensor& tv);
~LayoutDescriptor() override {}
virtual size_t get_allocated_size() override { return m_mkldnn_memory_size; }
virtual size_t get_allocated_size() override { return m_buffer_size; }
size_t get_offset() const { return m_offset; }
size_t get_index_offset(const std::vector<size_t>& indices) override;
......@@ -51,6 +51,7 @@ namespace ngraph
{
return m_mkldnn_md.data.format != mkldnn::memory::format::format_undef;
}
bool is_row_major_layout();
static const mkldnn::memory::desc DummyDesc;
......@@ -64,7 +65,7 @@ namespace ngraph
// Otherwise, physical layout is assumed to be in row-major
// format represented by m_strides
mkldnn::memory::desc m_mkldnn_md;
size_t m_mkldnn_memory_size;
size_t m_buffer_size;
};
typedef std::vector<std::shared_ptr<ngraph::runtime::cpu::LayoutDescriptor>>
......
......@@ -371,31 +371,10 @@ mkldnn::memory::desc runtime::cpu::mkldnn_utils::create_blocked_mkldnn_md(
return memory::desc(md);
}
memory::desc runtime::cpu::mkldnn_utils::rotate_blocked_md(const memory::desc& in,
AxisVector& axis_order)
// MKLDNN kernel selection sometimes relies on named layouts like "mkldnn_nchw"
// Try and convert a blocked layout into a named layout
memory::desc runtime::cpu::mkldnn_utils::try_get_named_md(mkldnn_memory_desc_t md)
{
mkldnn_memory_desc_t md;
md.primitive_kind = in.data.primitive_kind;
md.ndims = in.data.ndims;
md.format = mkldnn_blocked;
md.data_type = in.data.data_type;
for (size_t i = 0; i < in.data.ndims; i++)
{
md.layout_desc.blocking.block_dims[i] =
in.data.layout_desc.blocking.block_dims[axis_order[i]];
md.layout_desc.blocking.strides[1][i] =
in.data.layout_desc.blocking.strides[1][axis_order[i]];
md.layout_desc.blocking.strides[0][i] =
in.data.layout_desc.blocking.strides[0][axis_order[i]];
md.layout_desc.blocking.padding_dims[i] =
in.data.layout_desc.blocking.padding_dims[axis_order[i]];
md.layout_desc.blocking.offset_padding_to_data[i] =
in.data.layout_desc.blocking.offset_padding_to_data[axis_order[i]];
md.dims[i] = in.data.dims[axis_order[i]];
}
md.layout_desc.blocking.offset_padding = in.data.layout_desc.blocking.offset_padding;
auto out_md = memory::desc(md);
auto get_named_md = [](const mkldnn_memory_desc_t& blk, const mkldnn_memory_format_t format) {
......@@ -448,12 +427,132 @@ memory::desc runtime::cpu::mkldnn_utils::rotate_blocked_md(const memory::desc& i
return out_md;
}
bool runtime::cpu::mkldnn_utils::use_mkldnn_kernel(const ngraph::Node* node)
memory::desc runtime::cpu::mkldnn_utils::rotate_blocked_md(const memory::desc& in,
const AxisVector& axis_order)
{
auto op_annotations = static_cast<const ngraph::op::Op*>(node)->get_op_annotations();
return (op_annotations &&
static_pointer_cast<ngraph::runtime::cpu::CPUOpAnnotations>(op_annotations)
->is_mkldnn_op());
mkldnn_memory_desc_t md;
md.primitive_kind = in.data.primitive_kind;
md.ndims = in.data.ndims;
md.format = mkldnn_blocked;
md.data_type = in.data.data_type;
for (size_t i = 0; i < in.data.ndims; i++)
{
md.layout_desc.blocking.block_dims[i] =
in.data.layout_desc.blocking.block_dims[axis_order[i]];
md.layout_desc.blocking.strides[1][i] =
in.data.layout_desc.blocking.strides[1][axis_order[i]];
md.layout_desc.blocking.strides[0][i] =
in.data.layout_desc.blocking.strides[0][axis_order[i]];
md.layout_desc.blocking.padding_dims[i] =
in.data.layout_desc.blocking.padding_dims[axis_order[i]];
md.layout_desc.blocking.offset_padding_to_data[i] =
in.data.layout_desc.blocking.offset_padding_to_data[axis_order[i]];
md.dims[i] = in.data.dims[axis_order[i]];
}
md.layout_desc.blocking.offset_padding = in.data.layout_desc.blocking.offset_padding;
return try_get_named_md(md);
}
memory::desc runtime::cpu::mkldnn_utils::squeeze_blocked_md(const memory::desc& in,
AxisVector& axis_list)
{
if (in.data.ndims <= axis_list.size())
{
throw ngraph_error("Squeezing too many axes: input " + to_string(in.data.ndims) +
" , removing " + to_string(axis_list.size()));
}
for (auto axis : axis_list)
{
if (in.data.dims[axis] != 1)
{
throw ngraph_error("Cannot squeeze axis on non unit-size, axis: " + to_string(axis) +
" size: " + to_string(in.data.dims[axis]));
}
}
mkldnn_memory_desc_t md;
md.primitive_kind = in.data.primitive_kind;
md.ndims = in.data.ndims - static_cast<int>(axis_list.size());
md.format = mkldnn_blocked;
md.data_type = in.data.data_type;
size_t k = 0;
for (size_t i = 0, j = 0; i < in.data.ndims; i++)
{
if (k < axis_list.size() && i == axis_list[k])
{
k++;
continue;
}
md.layout_desc.blocking.block_dims[j] = in.data.layout_desc.blocking.block_dims[i];
md.layout_desc.blocking.strides[1][j] = in.data.layout_desc.blocking.strides[1][i];
md.layout_desc.blocking.strides[0][j] = in.data.layout_desc.blocking.strides[0][i];
md.layout_desc.blocking.padding_dims[j] = in.data.layout_desc.blocking.padding_dims[i];
md.layout_desc.blocking.offset_padding_to_data[j] =
in.data.layout_desc.blocking.offset_padding_to_data[i];
md.dims[j] = in.data.dims[i];
j++;
}
md.layout_desc.blocking.offset_padding = in.data.layout_desc.blocking.offset_padding;
return try_get_named_md(md);
}
memory::desc runtime::cpu::mkldnn_utils::expand_blocked_md(const memory::desc& in,
AxisVector& axis_list)
{
mkldnn_memory_desc_t md;
md.primitive_kind = in.data.primitive_kind;
md.ndims = in.data.ndims + static_cast<int>(axis_list.size());
md.format = mkldnn_blocked;
md.data_type = in.data.data_type;
size_t k = 0;
for (size_t i = 0, j = 0; j < md.ndims; j++)
{
if (j == axis_list[k])
{
k++;
md.dims[j] = 1;
md.layout_desc.blocking.block_dims[j] = 1;
md.layout_desc.blocking.padding_dims[j] = 1;
md.layout_desc.blocking.offset_padding_to_data[j] = 0;
if (i > 0)
{
md.layout_desc.blocking.strides[1][j] =
in.data.layout_desc.blocking.strides[1][i - 1];
md.layout_desc.blocking.strides[0][j] =
in.data.layout_desc.blocking.strides[0][i - 1];
}
else
{
md.layout_desc.blocking.strides[1][j] = 0;
size_t nelems = 1;
for (size_t idx = 0; idx < in.data.ndims; idx++)
nelems *= in.data.dims[idx];
md.layout_desc.blocking.strides[0][j] = nelems;
}
}
else
{
md.dims[j] = in.data.dims[i];
md.layout_desc.blocking.strides[1][j] = in.data.layout_desc.blocking.strides[1][i];
md.layout_desc.blocking.strides[0][j] = in.data.layout_desc.blocking.strides[0][i];
md.layout_desc.blocking.block_dims[j] = in.data.layout_desc.blocking.block_dims[i];
md.layout_desc.blocking.padding_dims[j] = in.data.layout_desc.blocking.padding_dims[i];
md.layout_desc.blocking.offset_padding_to_data[j] =
in.data.layout_desc.blocking.offset_padding_to_data[i];
i++;
}
}
md.layout_desc.blocking.offset_padding = in.data.layout_desc.blocking.offset_padding;
return try_get_named_md(md);
}
bool runtime::cpu::mkldnn_utils::compare_mkldnn_formats(mkldnn::memory::format lhs,
......@@ -493,3 +592,38 @@ bool runtime::cpu::mkldnn_utils::is_mkldnn_blocked_data_format(mkldnn::memory::f
}
return false;
}
bool runtime::cpu::mkldnn_utils::is_mkldnn_padded_layout(const mkldnn::memory::desc& in,
const AxisVector& axis_list)
{
for (size_t i = 0; i < in.data.ndims; i++)
{
if (std::find(axis_list.begin(), axis_list.end(), i) == axis_list.end())
{
continue;
}
if (in.data.layout_desc.blocking.padding_dims[i] != in.data.dims[i])
{
return true;
}
if (in.data.layout_desc.blocking.offset_padding_to_data[i] != 0)
{
return true;
}
}
if (in.data.layout_desc.blocking.offset_padding != 0)
{
return true;
}
return false;
}
bool runtime::cpu::mkldnn_utils::use_mkldnn_kernel(const ngraph::Node* node)
{
auto op_annotations = static_cast<const ngraph::op::Op*>(node)->get_op_annotations();
return (op_annotations &&
static_pointer_cast<ngraph::runtime::cpu::CPUOpAnnotations>(op_annotations)
->is_mkldnn_op());
}
......@@ -43,6 +43,7 @@ namespace ngraph
const mkldnn::memory::desc& get_input_mkldnn_md(const Node* node, size_t index);
const mkldnn::memory::desc& get_output_mkldnn_md(const Node* node, size_t index);
mkldnn::memory::desc create_default_mkldnn_md(const Node* node,
size_t index,
bool is_output,
......@@ -54,14 +55,24 @@ namespace ngraph
mkldnn::memory::desc create_blocked_mkldnn_md(const Shape& dims,
const Strides& strides,
const ngraph::element::Type type);
mkldnn::memory::desc try_get_named_md(mkldnn_memory_desc_t md);
mkldnn::memory::desc rotate_blocked_md(const mkldnn::memory::desc& in,
AxisVector& axis_order);
bool use_mkldnn_kernel(const ngraph::Node* node);
const AxisVector& axis_order);
mkldnn::memory::desc squeeze_blocked_md(const mkldnn::memory::desc& in,
AxisVector& axis_list);
mkldnn::memory::desc expand_blocked_md(const mkldnn::memory::desc& in,
AxisVector& axis_list);
bool compare_mkldnn_formats(mkldnn::memory::format lhs, mkldnn::memory::format rhs);
bool compare_mkldnn_mds(const mkldnn::memory::desc& lhs,
const mkldnn::memory::desc& rhs);
bool is_mkldnn_padded_layout(const mkldnn::memory::desc& in,
const AxisVector& axis_list);
bool is_mkldnn_filter_format(mkldnn::memory::format fmt);
bool is_mkldnn_blocked_data_format(mkldnn::memory::format fmt);
bool use_mkldnn_kernel(const ngraph::Node* node);
std::unordered_set<std::type_index>& get_op_registry();
std::map<element::Type, const mkldnn::memory::data_type>&
get_mkldnn_data_type_map();
......
......@@ -483,7 +483,7 @@ namespace ngraph
auto arg0_rank = arg0_shape.size();
auto result_shape = node->get_output_shape(0);
if ((arg0_rank == 4 || arg0_rank == 2) &&
if ((arg0_rank == 4 || arg0_rank == 3 || arg0_rank == 2) &&
node->get_input_element_type(0) == element::f32)
{
auto op_annotations =
......
......@@ -1322,84 +1322,182 @@ namespace ngraph
}
}
static bool can_be_rotated(const ngraph::op::Reshape* reshape,
const mkldnn::memory::desc& md)
{
auto axis_order = reshape->get_input_order();
auto input_shape = reshape->get_input_shape(0);
auto output_shape = reshape->get_output_shape();
if (input_shape.size() != output_shape.size())
return false;
if ((shape_size(input_shape)) == 1)
return false;
for (size_t i = 0; i < output_shape.size(); i++)
{
if (input_shape[axis_order[i]] != output_shape[i])
return false;
}
return true;
}
static bool can_be_squeezed(const ngraph::op::Reshape* reshape,
const mkldnn::memory::desc& md,
AxisVector& squeezed_axis)
{
auto input_shape = reshape->get_input_shape(0);
auto output_shape = reshape->get_output_shape();
if (input_shape.size() <= output_shape.size())
return false;
if ((shape_size(input_shape)) == 1)
return false;
for (size_t i = 0, j = 0; i < input_shape.size(); i++)
{
if (j >= output_shape.size() || input_shape[i] != output_shape[j])
{
// Squeezed axis
if (input_shape[i] != 1)
return false;
squeezed_axis.push_back(i);
}
else
{
j++;
}
}
if (mkldnn_utils::is_mkldnn_padded_layout(md, squeezed_axis))
return false;
return true;
}
static bool can_be_expanded(const ngraph::op::Reshape* reshape,
const mkldnn::memory::desc& md,
AxisVector& expanded_axis)
{
auto input_shape = reshape->get_input_shape(0);
auto output_shape = reshape->get_output_shape();
if (input_shape.size() >= output_shape.size())
return false;
if ((shape_size(input_shape)) == 1)
return false;
for (size_t i = 0, j = 0; j < output_shape.size(); j++)
{
if (i >= input_shape.size() || input_shape[i] != output_shape[j])
{
// Expanded axis
if (output_shape[j] != 1)
return false;
expanded_axis.push_back(j);
}
else
{
i++;
}
}
return true;
}
template <>
void CPULayout::LAYOUT_DECL(ngraph::op::Reshape)
{
auto reshape = static_cast<ngraph::op::Reshape*>(node.get());
if (reshape->get_is_transpose())
bool skip_reshape = false;
bool skip_input_reorder = false;
auto tvl =
node->get_inputs()[0].get_output().get_tensor_ptr()->get_tensor_layout();
auto cpu_tvl = dynamic_cast<runtime::cpu::LayoutDescriptor*>(tvl.get());
if (cpu_tvl && cpu_tvl->is_mkldnn_layout())
{
if (reshape->get_output_shape().size() ==
reshape->get_argument(0)->get_shape().size())
auto input_md = mkldnn_utils::get_input_mkldnn_md(node.get(), 0);
auto input_shape = reshape->get_input_shape(0);
auto output_shape = reshape->get_output_shape();
AxisVector squeezed_axis;
AxisVector expanded_axis;
// Case 1: Transpose only. Rotate layouts
// Case 2: Squeeze dims. Removes size-1 dimensions. Squeeze mkldnn layout
// Case 3: Exapnd dims. Add size-1 dimensions. Expand mkldnn layout
// Default: Convert to row-major if needed
if (can_be_rotated(reshape, input_md))
{
auto axis_order = reshape->get_input_order();
auto tvl = node->get_inputs()[0]
.get_output()
.get_tensor_ptr()
->get_tensor_layout();
auto cpu_tvl = dynamic_cast<runtime::cpu::LayoutDescriptor*>(tvl.get());
if (cpu_tvl && cpu_tvl->is_mkldnn_layout())
{
// Rotate MKLDNN memory descriptor
auto input_md = mkldnn_utils::get_input_mkldnn_md(node.get(), 0);
auto output_md =
mkldnn_utils::rotate_blocked_md(input_md, axis_order);
set_output_layouts(node, {output_md});
auto op_annotations = reshape->get_op_annotations();
if (op_annotations)
{
// pass-through
op_annotations->add_in_place_oi_pair({0, 0, false});
}
else
{
op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
// pass-through
op_annotations->add_in_place_oi_pair({0, 0, false});
reshape->set_op_annotations(op_annotations);
}
}
else
{
auto input_strides = cpu_tvl->get_strides();
Strides output_strides(input_strides.size());
for (size_t i = 0; i < input_strides.size(); i++)
{
output_strides[i] = input_strides[axis_order[i]];
}
set_native_layouts(external_function, node);
auto output_tvl =
dynamic_pointer_cast<runtime::cpu::LayoutDescriptor>(
node->get_output_tensor_ptr()->get_tensor_layout());
// TODO (jbobba): For now non-MKLDNN layouts are always in row-major format
// Enable this once we support non row-major strided formats
// output_tvl->set_strides(output_strides);
}
auto output_md = mkldnn_utils::rotate_blocked_md(
input_md, reshape->get_input_order());
set_output_layouts(node, {output_md});
skip_reshape = true;
skip_input_reorder = true;
}
else if (can_be_squeezed(reshape, input_md, squeezed_axis))
{
auto output_md =
mkldnn_utils::squeeze_blocked_md(input_md, squeezed_axis);
set_output_layouts(node, {output_md});
skip_reshape = true;
skip_input_reorder = true;
}
else if (can_be_expanded(reshape, input_md, expanded_axis))
{
auto output_md =
mkldnn_utils::expand_blocked_md(input_md, expanded_axis);
set_output_layouts(node, {output_md});
skip_reshape = true;
skip_input_reorder = true;
}
else
{
set_native_layouts(external_function, node);
return;
if (!reshape->get_is_transpose())
skip_reshape = true;
}
}
else
{
// Shape change only, tensor in native layout can be
// forwarded to output
auto op_annotations = reshape->get_op_annotations();
if (op_annotations)
// Input is in row-major layout
if (reshape->get_is_transpose())
{
// pass-through
op_annotations->add_in_place_oi_pair({0, 0, false});
auto input_strides = cpu_tvl->get_strides();
auto axis_order = reshape->get_input_order();
Strides output_strides(input_strides.size());
for (size_t i = 0; i < input_strides.size(); i++)
{
output_strides[i] = input_strides[axis_order[i]];
}
auto output_tvl = dynamic_pointer_cast<runtime::cpu::LayoutDescriptor>(
node->get_output_tensor_ptr()->get_tensor_layout());
// TODO (jbobba): For now non-MKLDNN layouts are always in row-major format
// Enable this once we support non row-major strided formats
// output_tvl->set_strides(output_strides);
}
else
{
skip_reshape = true;
}
}
if (skip_reshape)
{
auto op_annotations = reshape->get_op_annotations();
if (!op_annotations)
{
op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
// pass-through
op_annotations->add_in_place_oi_pair({0, 0, false});
reshape->set_op_annotations(op_annotations);
}
// pass-through
op_annotations->add_in_place_oi_pair({0, 0, false});
}
if (!skip_input_reorder)
{
set_native_layouts(external_function, node);
}
}
......
......@@ -31,7 +31,10 @@
#include "ngraph/op/parameter.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/visualize_tree.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp"
#include "ngraph/runtime/cpu/pass/cpu_assignment.hpp"
#include "ngraph/runtime/cpu/pass/cpu_fusion.hpp"
#include "ngraph/runtime/cpu/pass/cpu_layout.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
#include "nlohmann/json.hpp"
......@@ -191,3 +194,166 @@ TEST(cpu_test, mkldnn_layouts)
EXPECT_EQ(vector<float>{expected_result}, rv);
}
TEST(cpu_test, reshape_squeeze)
{
auto make_function = []() -> std::shared_ptr<Function> {
auto A = make_shared<op::Parameter>(element::f32, Shape{1, 16, 2, 2});
auto B = make_shared<op::Parameter>(element::f32, Shape{32, 16, 1, 1});
auto conv = make_shared<op::Convolution>(A,
B,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
auto squeeze = make_shared<op::Reshape>(conv, AxisVector{0, 1, 2, 3}, Shape{32, 2, 2});
return make_shared<Function>(NodeVector{squeeze}, op::ParameterVector{A, B});
};
auto backend = runtime::Backend::create("CPU");
auto cpu_f = make_function();
auto int_f = make_function();
test::Uniform<float> rng(-100.0f, 100.0f);
vector<vector<float>> args;
for (shared_ptr<op::Parameter> param : cpu_f->get_parameters())
{
vector<float> tensor_val(shape_size(param->get_shape()));
rng.initialize(tensor_val);
args.push_back(tensor_val);
}
auto int_results = execute(int_f, args, "INTERPRETER");
auto cpu_results = execute(cpu_f, args, "CPU");
// Two convert layouts for inputs and weights of convolution.
EXPECT_EQ(count_ops_of_type<runtime::cpu::op::ConvertLayout>(cpu_f), 2);
for (size_t i = 0; i < cpu_results.size(); i++)
{
EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f));
}
}
TEST(cpu_test, reshape_expand)
{
auto make_function = []() -> std::shared_ptr<Function> {
auto A = make_shared<op::Parameter>(element::f32, Shape{1, 16, 2, 2});
auto B = make_shared<op::Parameter>(element::f32, Shape{32, 16, 1, 1});
auto conv = make_shared<op::Convolution>(A,
B,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
auto expand =
make_shared<op::Reshape>(conv, AxisVector{0, 1, 2, 3}, Shape{1, 32, 2, 1, 2, 1});
return make_shared<Function>(NodeVector{expand}, op::ParameterVector{A, B});
};
auto backend = runtime::Backend::create("CPU");
auto cpu_f = make_function();
auto int_f = make_function();
test::Uniform<float> rng(-100.0f, 100.0f);
vector<vector<float>> args;
for (shared_ptr<op::Parameter> param : cpu_f->get_parameters())
{
vector<float> tensor_val(shape_size(param->get_shape()));
rng.initialize(tensor_val);
args.push_back(tensor_val);
}
auto int_results = execute(int_f, args, "INTERPRETER");
auto cpu_results = execute(cpu_f, args, "CPU");
EXPECT_EQ(count_ops_of_type<runtime::cpu::op::ConvertLayout>(cpu_f), 2);
for (size_t i = 0; i < cpu_results.size(); i++)
{
EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f));
}
}
TEST(cpu_test, reshape_squeeze_padded)
{
auto make_function = []() -> std::shared_ptr<Function> {
auto A = make_shared<op::Parameter>(element::f32, Shape{1, 16, 2, 2});
auto B = make_shared<op::Parameter>(element::f32, Shape{1, 16, 1, 1});
auto conv = make_shared<op::Convolution>(A,
B,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
auto squeeze = make_shared<op::Reshape>(conv, AxisVector{0, 1, 2, 3}, Shape{2, 2});
return make_shared<Function>(NodeVector{squeeze}, op::ParameterVector{A, B});
};
auto backend = runtime::Backend::create("CPU");
auto cpu_f = make_function();
auto int_f = make_function();
test::Uniform<float> rng(-100.0f, 100.0f);
vector<vector<float>> args;
for (shared_ptr<op::Parameter> param : cpu_f->get_parameters())
{
vector<float> tensor_val(shape_size(param->get_shape()));
rng.initialize(tensor_val);
args.push_back(tensor_val);
}
auto int_results = execute(int_f, args, "INTERPRETER");
auto cpu_results = execute(cpu_f, args, "CPU");
// Two convert layouts for inputs and weights of convolution.
// One convert layout after convolution
EXPECT_EQ(count_ops_of_type<runtime::cpu::op::ConvertLayout>(cpu_f), 3);
for (size_t i = 0; i < cpu_results.size(); i++)
{
EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f));
}
}
TEST(cpu_test, reshape_expand_squeeze)
{
auto make_function = []() -> std::shared_ptr<Function> {
auto A = make_shared<op::Parameter>(element::f32, Shape{1, 16, 1, 8});
auto B1 = make_shared<op::Parameter>(element::f32, Shape{32, 16, 1, 1});
auto conv1 = make_shared<op::Convolution>(A,
B1,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
auto squeeze = make_shared<op::Reshape>(conv1, AxisVector{0, 1, 2, 3}, Shape{1, 32, 8});
auto relu = make_shared<op::Relu>(squeeze);
auto expand = make_shared<op::Reshape>(relu, AxisVector{0, 1, 2}, Shape{1, 32, 1, 8});
auto B2 = make_shared<op::Parameter>(element::f32, Shape{8, 32, 1, 1});
auto conv2 = make_shared<op::Convolution>(expand,
B2,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
return make_shared<Function>(NodeVector{conv2}, op::ParameterVector{A, B1, B2});
};
auto backend = runtime::Backend::create("CPU");
auto cpu_f = make_function();
auto int_f = make_function();
test::Uniform<float> rng(-100.0f, 100.0f);
vector<vector<float>> args;
for (shared_ptr<op::Parameter> param : cpu_f->get_parameters())
{
vector<float> tensor_val(shape_size(param->get_shape()));
rng.initialize(tensor_val);
args.push_back(tensor_val);
}
auto int_results = execute(int_f, args, "INTERPRETER");
auto cpu_results = execute(cpu_f, args, "CPU");
for (size_t i = 0; i < cpu_results.size(); i++)
{
EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f));
}
EXPECT_LE(count_ops_of_type<runtime::cpu::op::ConvertLayout>(cpu_f), 4);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment