Commit ac646533 authored by Jayaram Bobba's avatar Jayaram Bobba Committed by Scott Cyphers

[SPEC] Support auto broadcast in FakeQuantize op (#3760)

* Support auto broadcast in FakeQuantize op

* style fix

* add pdpd unit test to plaidml manifest
parent d3c2d772
......@@ -43,58 +43,39 @@ op::FakeQuantize::FakeQuantize(const Output<Node>& data,
const Output<Node>& input_high,
const Output<Node>& output_low,
const Output<Node>& output_high,
size_t levels)
size_t levels,
const AutoBroadcastSpec& auto_broadcast)
: FusedOp({data, input_low, input_high, output_low, output_high})
, m_levels(levels)
, m_auto_broadcast(auto_broadcast)
{
constructor_validate_and_infer_types();
}
void op::FakeQuantize::pre_validate_and_infer_types()
{
const auto& data_pshape = get_input_partial_shape(0);
const auto& input_low_pshape = get_input_partial_shape(1);
const auto& input_high_pshape = get_input_partial_shape(2);
const auto& output_low_pshape = get_input_partial_shape(3);
const auto& output_high_pshape = get_input_partial_shape(4);
if (data_pshape.is_static() && input_low_pshape.is_static() && input_high_pshape.is_static() &&
output_low_pshape.is_static() && output_high_pshape.is_static())
PartialShape data_pshape = get_input_partial_shape(0);
for (auto i = 1; i <= 4; i++)
{
const Shape data_shape{data_pshape.to_shape()};
const Shape input_low_shape{input_low_pshape.to_shape()};
const Shape input_high_shape{input_high_pshape.to_shape()};
const Shape output_low_shape{output_low_pshape.to_shape()};
const Shape output_high_shape{output_high_pshape.to_shape()};
NODE_VALIDATION_CHECK(
this,
(input_low_shape.size() == 0 ||
(input_low_shape.size() == 1 && input_low_shape.at(0) == data_shape.at(1))),
"Input low tensor shape: ",
input_low_shape,
", must either be a scalar or a vector of size equal to number of channels.");
NODE_VALIDATION_CHECK(
this,
(input_high_shape.size() == 0 ||
(input_high_shape.size() == 1 && input_high_shape.at(0) == data_shape.at(1))),
"Input high tensor shape: ",
input_high_shape,
", must either be a scalar or a vector of size equal to number of channels.");
NODE_VALIDATION_CHECK(
this,
(output_low_shape.size() == 0 ||
(output_low_shape.size() == 1 && output_low_shape.at(0) == data_shape.at(1))),
"Output low tensor shape: ",
output_low_shape,
", must either be a scalar or a vector of size equal to number of channels.");
NODE_VALIDATION_CHECK(
this,
(output_high_shape.size() == 0 ||
(output_high_shape.size() == 1 && output_high_shape.at(0) == data_shape.at(1))),
"Output high tensor shape: ",
output_high_shape,
", must either be a scalar or a vector of size equal to number of channels.");
if (m_auto_broadcast.m_type == op::AutoBroadcastType::NONE)
{
NODE_VALIDATION_CHECK(this,
PartialShape::merge_into(data_pshape, get_input_partial_shape(i)),
"Argument shapes are inconsistent.");
}
else if (m_auto_broadcast.m_type == op::AutoBroadcastType::NUMPY ||
m_auto_broadcast.m_type == op::AutoBroadcastType::PDPD)
{
NODE_VALIDATION_CHECK(this,
PartialShape::broadcast_merge_into(
data_pshape, get_input_partial_shape(i), m_auto_broadcast),
"Argument shapes are inconsistent.");
}
else
{
NODE_VALIDATION_CHECK(this, false, "Unsupported auto broadcast specification");
}
}
}
......@@ -106,7 +87,7 @@ NodeVector op::FakeQuantize::decompose_op() const
Output<Node> output_low{input_value(3)};
Output<Node> output_high{input_value(4)};
if (input_low.get_shape().size() == 0)
if (m_auto_broadcast.m_type == AutoBroadcastType::NUMPY)
{
OutputVector broadcasted_nodes = numpy_style_broadcast_values(
OutputVector{data, input_low, input_high, output_low, output_high});
......@@ -117,13 +98,17 @@ NodeVector op::FakeQuantize::decompose_op() const
output_low = broadcasted_nodes.at(3);
output_high = broadcasted_nodes.at(4);
}
else
else if (m_auto_broadcast.m_type == AutoBroadcastType::PDPD)
{
input_low = legacy_style_broadcast_values_for_binary_operation(data, input_low, 1).at(1);
input_high = legacy_style_broadcast_values_for_binary_operation(data, input_high, 1).at(1);
output_low = legacy_style_broadcast_values_for_binary_operation(data, output_low, 1).at(1);
output_high =
legacy_style_broadcast_values_for_binary_operation(data, output_high, 1).at(1);
OutputVector broadcasted_nodes =
pdpd_style_broadcast(OutputVector{data, input_low, input_high, output_low, output_high},
m_auto_broadcast.m_axis);
data = broadcasted_nodes.at(0);
input_low = broadcasted_nodes.at(1);
input_high = broadcasted_nodes.at(2);
output_low = broadcasted_nodes.at(3);
output_high = broadcasted_nodes.at(4);
}
const auto input_data_shape = data.get_shape();
......
......@@ -18,6 +18,7 @@
#include "ngraph/autodiff/adjoints.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/util/attr_types.hpp"
#include "ngraph/op/util/fused_op.hpp"
namespace ngraph
......@@ -47,19 +48,23 @@ namespace ngraph
///
/// \brief Constructs a FakeQuantize operation node.
///
/// \param[in] data The input data tensor.
/// \param[in] input_low The minimum limit for input values.
/// \param[in] input_high The maximum limit for input values.
/// \param[in] output_low The minimum quantized value.
/// \param[in] output_high The maximum quantized value.
/// \param[in] levels The number of quantization levels.
/// \param[in] data The input data tensor.
/// \param[in] input_low The minimum limit for input values.
/// \param[in] input_high The maximum limit for input values.
/// \param[in] output_low The minimum quantized value.
/// \param[in] output_high The maximum quantized value.
/// \param[in] levels The number of quantization levels.
/// \param[in] auto_broadcast AutoBroadcast mode to be used for broadcasting
/// limit values
///
FakeQuantize(const Output<Node>& data,
const Output<Node>& input_low,
const Output<Node>& input_high,
const Output<Node>& output_low,
const Output<Node>& output_high,
std::size_t levels);
std::size_t levels,
const AutoBroadcastSpec& auto_broadcast =
AutoBroadcastSpec(AutoBroadcastType::NUMPY));
virtual NodeVector decompose_op() const override;
virtual void pre_validate_and_infer_types() override;
......@@ -68,8 +73,16 @@ namespace ngraph
copy_with_new_args(const NodeVector& new_args) const override;
std::size_t get_levels() const { return m_levels; }
void set_levels(std::size_t levels) { m_levels = levels; }
const AutoBroadcastSpec& get_auto_broadcast() const { return m_auto_broadcast; }
void set_auto_broadcast(const AutoBroadcastSpec& auto_broadcast)
{
m_auto_broadcast = auto_broadcast;
}
private:
std::size_t m_levels;
AutoBroadcastSpec m_auto_broadcast;
};
}
}
......@@ -484,6 +484,22 @@ namespace ngraph
return broadcasted_inputs;
}
OutputVector pdpd_style_broadcast(const OutputVector& inputs, int64_t axis)
{
if (inputs.size() <= 1)
{
return inputs;
}
OutputVector broadcasted_inputs{inputs[0]};
for (std::size_t i = 1; i < inputs.size(); ++i)
{
broadcasted_inputs.push_back(
broadcast_value_pdpd_style(inputs[i], inputs[0].get_shape(), axis));
}
return broadcasted_inputs;
}
AxisSet calculate_broadcast_axes(const Shape& output_shape,
const Shape& input_shape,
std::size_t start_match_axis)
......
......@@ -141,6 +141,7 @@ namespace ngraph
///
/// \return pdpd-style broadcasted list of nodes.
NodeVector pdpd_style_broadcast(const NodeVector& inputs, int64_t axis);
OutputVector pdpd_style_broadcast(const OutputVector& inputs, int64_t axis);
/// \brief Generate a list of broadcast axes.
///
......
......@@ -7,6 +7,7 @@ model_matmul_integer_no_zero_point
model_matmul_integer_4d_no_zero_point
fake_quantize
fake_quantize_pdpd
fake_quantize_with_clip
fake_quantize_with_clip_across_channels
......
......@@ -94,12 +94,15 @@ model_lstm_mixed_seq_reverse
model_reverse_sequence_0_batch_1
model_reverse_sequence_1_batch_0
# unsupported broadcast mode (pdpd)
fake_quantize_pdpd
auto_bcast_binary_elementwise_pdpd
auto_bcast_binary_elementwise_pdpd_dynamic
# result mismatch
model_dequantize_linear_scalar_zero_scale_int8
model_softmax
avg_pool_3d_uneven_strided_padded
auto_bcast_binary_elementwise_pdpd
auto_bcast_binary_elementwise_pdpd_dynamic
rnn_cell_activation_function
gru_cell_bias_clip
gru_cell_linear_before_reset
......
......@@ -1696,6 +1696,49 @@ NGRAPH_TEST(${BACKEND_NAME}, fake_quantize_with_clip)
}
NGRAPH_TEST(${BACKEND_NAME}, fake_quantize_with_clip_across_channels)
{
Shape data_shape{1, 2, 5, 5};
size_t levels = 5;
auto data = make_shared<op::Parameter>(element::f32, data_shape);
auto input_low = make_shared<op::Parameter>(element::f32, Shape{2, 1, 1});
auto input_high = make_shared<op::Parameter>(element::f32, Shape{2, 1, 1});
auto output_low = make_shared<op::Parameter>(element::f32, Shape{2, 1, 1});
auto output_high = make_shared<op::Parameter>(element::f32, Shape{2, 1, 1});
auto quantize =
make_shared<op::FakeQuantize>(data, input_low, input_high, output_low, output_high, levels);
auto function = make_shared<Function>(
NodeVector{quantize},
ParameterVector{data, input_low, input_high, output_low, output_high});
auto test_case = ngraph::test::NgraphTestCase(function, "${BACKEND_NAME}");
size_t n_elements = shape_size(data_shape);
vector<float> input_data(n_elements);
iota(begin(input_data), end(input_data), 0);
test_case.add_input<float>(input_data);
// input_low
test_case.add_input<float>(vector<float>{5.f, 30.f});
// input_high
test_case.add_input<float>(vector<float>{10.f, 40.f});
// output_low
test_case.add_input<float>(vector<float>{0.f, 50.f});
// output_high
test_case.add_input<float>(vector<float>{20.f, 70.f});
// expected result
test_case.add_expected_output<float>(
data_shape,
vector<float>{0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 5.0f, 10.0f, 10.0f, 15.0f,
20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f,
20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 50.0f, 50.0f, 50.0f, 50.0f, 50.0f,
50.0f, 50.0f, 55.0f, 55.0f, 60.0f, 60.0f, 60.0f, 65.0f, 65.0f, 70.0f,
70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f});
test_case.run();
}
NGRAPH_TEST(${BACKEND_NAME}, fake_quantize_pdpd)
{
Shape data_shape{1, 2, 5, 5};
size_t levels = 5;
......@@ -1706,7 +1749,13 @@ NGRAPH_TEST(${BACKEND_NAME}, fake_quantize_with_clip_across_channels)
auto output_high = make_shared<op::Parameter>(element::f32, Shape{2});
auto quantize =
make_shared<op::FakeQuantize>(data, input_low, input_high, output_low, output_high, levels);
make_shared<op::FakeQuantize>(data,
input_low,
input_high,
output_low,
output_high,
levels,
op::AutoBroadcastSpec(op::AutoBroadcastType::PDPD, 1));
auto function = make_shared<Function>(
NodeVector{quantize},
ParameterVector{data, input_low, input_high, output_low, output_high});
......
......@@ -36,7 +36,22 @@ TEST(type_prop, fake_quantize)
EXPECT_EQ(fake_quantize->get_shape(), (Shape{1, 2, 3, 4}));
}
TEST(type_prop, fake_quantize_invalid_rank)
TEST(type_prop, fake_quantize_autob)
{
const auto data = make_shared<op::Parameter>(element::f32, Shape{1, 2, 3, 4});
const auto input_low = make_shared<op::Parameter>(element::f32, Shape{3, 1});
const auto input_high = make_shared<op::Parameter>(element::f32, Shape{1, 2, 3, 4});
const auto output_low = make_shared<op::Parameter>(element::f32, Shape{4});
const auto output_high = make_shared<op::Parameter>(element::f32, Shape{});
const int levels = 5;
const auto fake_quantize =
make_shared<op::FakeQuantize>(data, input_low, input_high, output_low, output_high, levels);
EXPECT_EQ(fake_quantize->get_element_type(), element::f32);
EXPECT_EQ(fake_quantize->get_shape(), (Shape{1, 2, 3, 4}));
}
TEST(type_prop, fake_quantize_invalid_autob)
{
const auto data = make_shared<op::Parameter>(element::f32, Shape{1, 2, 3, 4});
auto input_low = make_shared<op::Parameter>(element::f32, Shape{3});
......@@ -45,58 +60,6 @@ TEST(type_prop, fake_quantize_invalid_rank)
auto output_high = make_shared<op::Parameter>(element::f32, Shape{});
const int levels = 5;
// Invalid input_low dimension
try
{
const auto fake_quantize = make_shared<op::FakeQuantize>(
data, input_low, input_high, output_low, output_high, levels);
EXPECT_FALSE(fake_quantize.get())
<< "FakeQuantize validation did not work. Op node was created with incorrect params.";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(),
std::string("must either be a scalar or a vector of size equal "
"to number of channels."));
}
// Invalid input_high dimension
input_low = make_shared<op::Parameter>(element::f32, Shape{});
input_high = make_shared<op::Parameter>(element::f32, Shape{3});
try
{
const auto fake_quantize = make_shared<op::FakeQuantize>(
data, input_low, input_high, output_low, output_high, levels);
EXPECT_FALSE(fake_quantize.get())
<< "FakeQuantize validation did not work. Op node was created with incorrect params.";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(),
std::string("must either be a scalar or a vector of size equal "
"to number of channels."));
}
// Invalid output_low dimension
input_high = make_shared<op::Parameter>(element::f32, Shape{});
output_low = make_shared<op::Parameter>(element::f32, Shape{3});
try
{
const auto fake_quantize = make_shared<op::FakeQuantize>(
data, input_low, input_high, output_low, output_high, levels);
EXPECT_FALSE(fake_quantize.get())
<< "FakeQuantize validation did not work. Op node was created with incorrect params.";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(),
std::string("must either be a scalar or a vector of size equal "
"to number of channels."));
}
// Invalid output_high dimension
output_low = make_shared<op::Parameter>(element::f32, Shape{});
output_high = make_shared<op::Parameter>(element::f32, Shape{3});
try
{
const auto fake_quantize = make_shared<op::FakeQuantize>(
......@@ -106,8 +69,6 @@ TEST(type_prop, fake_quantize_invalid_rank)
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(),
std::string("must either be a scalar or a vector of size equal "
"to number of channels."));
EXPECT_HAS_SUBSTRING(error.what(), std::string("Argument shapes are inconsistent"));
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment