Commit ac646533 authored by Jayaram Bobba's avatar Jayaram Bobba Committed by Scott Cyphers

[SPEC] Support auto broadcast in FakeQuantize op (#3760)

* Support auto broadcast in FakeQuantize op

* style fix

* add pdpd unit test to plaidml manifest
parent d3c2d772
...@@ -43,58 +43,39 @@ op::FakeQuantize::FakeQuantize(const Output<Node>& data, ...@@ -43,58 +43,39 @@ op::FakeQuantize::FakeQuantize(const Output<Node>& data,
const Output<Node>& input_high, const Output<Node>& input_high,
const Output<Node>& output_low, const Output<Node>& output_low,
const Output<Node>& output_high, const Output<Node>& output_high,
size_t levels) size_t levels,
const AutoBroadcastSpec& auto_broadcast)
: FusedOp({data, input_low, input_high, output_low, output_high}) : FusedOp({data, input_low, input_high, output_low, output_high})
, m_levels(levels) , m_levels(levels)
, m_auto_broadcast(auto_broadcast)
{ {
constructor_validate_and_infer_types(); constructor_validate_and_infer_types();
} }
void op::FakeQuantize::pre_validate_and_infer_types() void op::FakeQuantize::pre_validate_and_infer_types()
{ {
const auto& data_pshape = get_input_partial_shape(0); PartialShape data_pshape = get_input_partial_shape(0);
const auto& input_low_pshape = get_input_partial_shape(1);
const auto& input_high_pshape = get_input_partial_shape(2); for (auto i = 1; i <= 4; i++)
const auto& output_low_pshape = get_input_partial_shape(3);
const auto& output_high_pshape = get_input_partial_shape(4);
if (data_pshape.is_static() && input_low_pshape.is_static() && input_high_pshape.is_static() &&
output_low_pshape.is_static() && output_high_pshape.is_static())
{ {
const Shape data_shape{data_pshape.to_shape()}; if (m_auto_broadcast.m_type == op::AutoBroadcastType::NONE)
const Shape input_low_shape{input_low_pshape.to_shape()}; {
const Shape input_high_shape{input_high_pshape.to_shape()}; NODE_VALIDATION_CHECK(this,
const Shape output_low_shape{output_low_pshape.to_shape()}; PartialShape::merge_into(data_pshape, get_input_partial_shape(i)),
const Shape output_high_shape{output_high_pshape.to_shape()}; "Argument shapes are inconsistent.");
}
NODE_VALIDATION_CHECK( else if (m_auto_broadcast.m_type == op::AutoBroadcastType::NUMPY ||
this, m_auto_broadcast.m_type == op::AutoBroadcastType::PDPD)
(input_low_shape.size() == 0 || {
(input_low_shape.size() == 1 && input_low_shape.at(0) == data_shape.at(1))), NODE_VALIDATION_CHECK(this,
"Input low tensor shape: ", PartialShape::broadcast_merge_into(
input_low_shape, data_pshape, get_input_partial_shape(i), m_auto_broadcast),
", must either be a scalar or a vector of size equal to number of channels."); "Argument shapes are inconsistent.");
NODE_VALIDATION_CHECK( }
this, else
(input_high_shape.size() == 0 || {
(input_high_shape.size() == 1 && input_high_shape.at(0) == data_shape.at(1))), NODE_VALIDATION_CHECK(this, false, "Unsupported auto broadcast specification");
"Input high tensor shape: ", }
input_high_shape,
", must either be a scalar or a vector of size equal to number of channels.");
NODE_VALIDATION_CHECK(
this,
(output_low_shape.size() == 0 ||
(output_low_shape.size() == 1 && output_low_shape.at(0) == data_shape.at(1))),
"Output low tensor shape: ",
output_low_shape,
", must either be a scalar or a vector of size equal to number of channels.");
NODE_VALIDATION_CHECK(
this,
(output_high_shape.size() == 0 ||
(output_high_shape.size() == 1 && output_high_shape.at(0) == data_shape.at(1))),
"Output high tensor shape: ",
output_high_shape,
", must either be a scalar or a vector of size equal to number of channels.");
} }
} }
...@@ -106,7 +87,7 @@ NodeVector op::FakeQuantize::decompose_op() const ...@@ -106,7 +87,7 @@ NodeVector op::FakeQuantize::decompose_op() const
Output<Node> output_low{input_value(3)}; Output<Node> output_low{input_value(3)};
Output<Node> output_high{input_value(4)}; Output<Node> output_high{input_value(4)};
if (input_low.get_shape().size() == 0) if (m_auto_broadcast.m_type == AutoBroadcastType::NUMPY)
{ {
OutputVector broadcasted_nodes = numpy_style_broadcast_values( OutputVector broadcasted_nodes = numpy_style_broadcast_values(
OutputVector{data, input_low, input_high, output_low, output_high}); OutputVector{data, input_low, input_high, output_low, output_high});
...@@ -117,13 +98,17 @@ NodeVector op::FakeQuantize::decompose_op() const ...@@ -117,13 +98,17 @@ NodeVector op::FakeQuantize::decompose_op() const
output_low = broadcasted_nodes.at(3); output_low = broadcasted_nodes.at(3);
output_high = broadcasted_nodes.at(4); output_high = broadcasted_nodes.at(4);
} }
else else if (m_auto_broadcast.m_type == AutoBroadcastType::PDPD)
{ {
input_low = legacy_style_broadcast_values_for_binary_operation(data, input_low, 1).at(1); OutputVector broadcasted_nodes =
input_high = legacy_style_broadcast_values_for_binary_operation(data, input_high, 1).at(1); pdpd_style_broadcast(OutputVector{data, input_low, input_high, output_low, output_high},
output_low = legacy_style_broadcast_values_for_binary_operation(data, output_low, 1).at(1); m_auto_broadcast.m_axis);
output_high =
legacy_style_broadcast_values_for_binary_operation(data, output_high, 1).at(1); data = broadcasted_nodes.at(0);
input_low = broadcasted_nodes.at(1);
input_high = broadcasted_nodes.at(2);
output_low = broadcasted_nodes.at(3);
output_high = broadcasted_nodes.at(4);
} }
const auto input_data_shape = data.get_shape(); const auto input_data_shape = data.get_shape();
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "ngraph/autodiff/adjoints.hpp" #include "ngraph/autodiff/adjoints.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/op/util/attr_types.hpp"
#include "ngraph/op/util/fused_op.hpp" #include "ngraph/op/util/fused_op.hpp"
namespace ngraph namespace ngraph
...@@ -47,19 +48,23 @@ namespace ngraph ...@@ -47,19 +48,23 @@ namespace ngraph
/// ///
/// \brief Constructs a FakeQuantize operation node. /// \brief Constructs a FakeQuantize operation node.
/// ///
/// \param[in] data The input data tensor. /// \param[in] data The input data tensor.
/// \param[in] input_low The minimum limit for input values. /// \param[in] input_low The minimum limit for input values.
/// \param[in] input_high The maximum limit for input values. /// \param[in] input_high The maximum limit for input values.
/// \param[in] output_low The minimum quantized value. /// \param[in] output_low The minimum quantized value.
/// \param[in] output_high The maximum quantized value. /// \param[in] output_high The maximum quantized value.
/// \param[in] levels The number of quantization levels. /// \param[in] levels The number of quantization levels.
/// \param[in] auto_broadcast AutoBroadcast mode to be used for broadcasting
/// limit values
/// ///
FakeQuantize(const Output<Node>& data, FakeQuantize(const Output<Node>& data,
const Output<Node>& input_low, const Output<Node>& input_low,
const Output<Node>& input_high, const Output<Node>& input_high,
const Output<Node>& output_low, const Output<Node>& output_low,
const Output<Node>& output_high, const Output<Node>& output_high,
std::size_t levels); std::size_t levels,
const AutoBroadcastSpec& auto_broadcast =
AutoBroadcastSpec(AutoBroadcastType::NUMPY));
virtual NodeVector decompose_op() const override; virtual NodeVector decompose_op() const override;
virtual void pre_validate_and_infer_types() override; virtual void pre_validate_and_infer_types() override;
...@@ -68,8 +73,16 @@ namespace ngraph ...@@ -68,8 +73,16 @@ namespace ngraph
copy_with_new_args(const NodeVector& new_args) const override; copy_with_new_args(const NodeVector& new_args) const override;
std::size_t get_levels() const { return m_levels; } std::size_t get_levels() const { return m_levels; }
void set_levels(std::size_t levels) { m_levels = levels; }
const AutoBroadcastSpec& get_auto_broadcast() const { return m_auto_broadcast; }
void set_auto_broadcast(const AutoBroadcastSpec& auto_broadcast)
{
m_auto_broadcast = auto_broadcast;
}
private: private:
std::size_t m_levels; std::size_t m_levels;
AutoBroadcastSpec m_auto_broadcast;
}; };
} }
} }
...@@ -484,6 +484,22 @@ namespace ngraph ...@@ -484,6 +484,22 @@ namespace ngraph
return broadcasted_inputs; return broadcasted_inputs;
} }
OutputVector pdpd_style_broadcast(const OutputVector& inputs, int64_t axis)
{
if (inputs.size() <= 1)
{
return inputs;
}
OutputVector broadcasted_inputs{inputs[0]};
for (std::size_t i = 1; i < inputs.size(); ++i)
{
broadcasted_inputs.push_back(
broadcast_value_pdpd_style(inputs[i], inputs[0].get_shape(), axis));
}
return broadcasted_inputs;
}
AxisSet calculate_broadcast_axes(const Shape& output_shape, AxisSet calculate_broadcast_axes(const Shape& output_shape,
const Shape& input_shape, const Shape& input_shape,
std::size_t start_match_axis) std::size_t start_match_axis)
......
...@@ -141,6 +141,7 @@ namespace ngraph ...@@ -141,6 +141,7 @@ namespace ngraph
/// ///
/// \return pdpd-style broadcasted list of nodes. /// \return pdpd-style broadcasted list of nodes.
NodeVector pdpd_style_broadcast(const NodeVector& inputs, int64_t axis); NodeVector pdpd_style_broadcast(const NodeVector& inputs, int64_t axis);
OutputVector pdpd_style_broadcast(const OutputVector& inputs, int64_t axis);
/// \brief Generate a list of broadcast axes. /// \brief Generate a list of broadcast axes.
/// ///
......
...@@ -7,6 +7,7 @@ model_matmul_integer_no_zero_point ...@@ -7,6 +7,7 @@ model_matmul_integer_no_zero_point
model_matmul_integer_4d_no_zero_point model_matmul_integer_4d_no_zero_point
fake_quantize fake_quantize
fake_quantize_pdpd
fake_quantize_with_clip fake_quantize_with_clip
fake_quantize_with_clip_across_channels fake_quantize_with_clip_across_channels
......
...@@ -94,12 +94,15 @@ model_lstm_mixed_seq_reverse ...@@ -94,12 +94,15 @@ model_lstm_mixed_seq_reverse
model_reverse_sequence_0_batch_1 model_reverse_sequence_0_batch_1
model_reverse_sequence_1_batch_0 model_reverse_sequence_1_batch_0
# unsupported broadcast mode (pdpd)
fake_quantize_pdpd
auto_bcast_binary_elementwise_pdpd
auto_bcast_binary_elementwise_pdpd_dynamic
# result mismatch # result mismatch
model_dequantize_linear_scalar_zero_scale_int8 model_dequantize_linear_scalar_zero_scale_int8
model_softmax model_softmax
avg_pool_3d_uneven_strided_padded avg_pool_3d_uneven_strided_padded
auto_bcast_binary_elementwise_pdpd
auto_bcast_binary_elementwise_pdpd_dynamic
rnn_cell_activation_function rnn_cell_activation_function
gru_cell_bias_clip gru_cell_bias_clip
gru_cell_linear_before_reset gru_cell_linear_before_reset
......
...@@ -1696,6 +1696,49 @@ NGRAPH_TEST(${BACKEND_NAME}, fake_quantize_with_clip) ...@@ -1696,6 +1696,49 @@ NGRAPH_TEST(${BACKEND_NAME}, fake_quantize_with_clip)
} }
NGRAPH_TEST(${BACKEND_NAME}, fake_quantize_with_clip_across_channels) NGRAPH_TEST(${BACKEND_NAME}, fake_quantize_with_clip_across_channels)
{
Shape data_shape{1, 2, 5, 5};
size_t levels = 5;
auto data = make_shared<op::Parameter>(element::f32, data_shape);
auto input_low = make_shared<op::Parameter>(element::f32, Shape{2, 1, 1});
auto input_high = make_shared<op::Parameter>(element::f32, Shape{2, 1, 1});
auto output_low = make_shared<op::Parameter>(element::f32, Shape{2, 1, 1});
auto output_high = make_shared<op::Parameter>(element::f32, Shape{2, 1, 1});
auto quantize =
make_shared<op::FakeQuantize>(data, input_low, input_high, output_low, output_high, levels);
auto function = make_shared<Function>(
NodeVector{quantize},
ParameterVector{data, input_low, input_high, output_low, output_high});
auto test_case = ngraph::test::NgraphTestCase(function, "${BACKEND_NAME}");
size_t n_elements = shape_size(data_shape);
vector<float> input_data(n_elements);
iota(begin(input_data), end(input_data), 0);
test_case.add_input<float>(input_data);
// input_low
test_case.add_input<float>(vector<float>{5.f, 30.f});
// input_high
test_case.add_input<float>(vector<float>{10.f, 40.f});
// output_low
test_case.add_input<float>(vector<float>{0.f, 50.f});
// output_high
test_case.add_input<float>(vector<float>{20.f, 70.f});
// expected result
test_case.add_expected_output<float>(
data_shape,
vector<float>{0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 5.0f, 10.0f, 10.0f, 15.0f,
20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f,
20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 50.0f, 50.0f, 50.0f, 50.0f, 50.0f,
50.0f, 50.0f, 55.0f, 55.0f, 60.0f, 60.0f, 60.0f, 65.0f, 65.0f, 70.0f,
70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f});
test_case.run();
}
NGRAPH_TEST(${BACKEND_NAME}, fake_quantize_pdpd)
{ {
Shape data_shape{1, 2, 5, 5}; Shape data_shape{1, 2, 5, 5};
size_t levels = 5; size_t levels = 5;
...@@ -1706,7 +1749,13 @@ NGRAPH_TEST(${BACKEND_NAME}, fake_quantize_with_clip_across_channels) ...@@ -1706,7 +1749,13 @@ NGRAPH_TEST(${BACKEND_NAME}, fake_quantize_with_clip_across_channels)
auto output_high = make_shared<op::Parameter>(element::f32, Shape{2}); auto output_high = make_shared<op::Parameter>(element::f32, Shape{2});
auto quantize = auto quantize =
make_shared<op::FakeQuantize>(data, input_low, input_high, output_low, output_high, levels); make_shared<op::FakeQuantize>(data,
input_low,
input_high,
output_low,
output_high,
levels,
op::AutoBroadcastSpec(op::AutoBroadcastType::PDPD, 1));
auto function = make_shared<Function>( auto function = make_shared<Function>(
NodeVector{quantize}, NodeVector{quantize},
ParameterVector{data, input_low, input_high, output_low, output_high}); ParameterVector{data, input_low, input_high, output_low, output_high});
......
...@@ -36,7 +36,22 @@ TEST(type_prop, fake_quantize) ...@@ -36,7 +36,22 @@ TEST(type_prop, fake_quantize)
EXPECT_EQ(fake_quantize->get_shape(), (Shape{1, 2, 3, 4})); EXPECT_EQ(fake_quantize->get_shape(), (Shape{1, 2, 3, 4}));
} }
TEST(type_prop, fake_quantize_invalid_rank) TEST(type_prop, fake_quantize_autob)
{
const auto data = make_shared<op::Parameter>(element::f32, Shape{1, 2, 3, 4});
const auto input_low = make_shared<op::Parameter>(element::f32, Shape{3, 1});
const auto input_high = make_shared<op::Parameter>(element::f32, Shape{1, 2, 3, 4});
const auto output_low = make_shared<op::Parameter>(element::f32, Shape{4});
const auto output_high = make_shared<op::Parameter>(element::f32, Shape{});
const int levels = 5;
const auto fake_quantize =
make_shared<op::FakeQuantize>(data, input_low, input_high, output_low, output_high, levels);
EXPECT_EQ(fake_quantize->get_element_type(), element::f32);
EXPECT_EQ(fake_quantize->get_shape(), (Shape{1, 2, 3, 4}));
}
TEST(type_prop, fake_quantize_invalid_autob)
{ {
const auto data = make_shared<op::Parameter>(element::f32, Shape{1, 2, 3, 4}); const auto data = make_shared<op::Parameter>(element::f32, Shape{1, 2, 3, 4});
auto input_low = make_shared<op::Parameter>(element::f32, Shape{3}); auto input_low = make_shared<op::Parameter>(element::f32, Shape{3});
...@@ -45,58 +60,6 @@ TEST(type_prop, fake_quantize_invalid_rank) ...@@ -45,58 +60,6 @@ TEST(type_prop, fake_quantize_invalid_rank)
auto output_high = make_shared<op::Parameter>(element::f32, Shape{}); auto output_high = make_shared<op::Parameter>(element::f32, Shape{});
const int levels = 5; const int levels = 5;
// Invalid input_low dimension
try
{
const auto fake_quantize = make_shared<op::FakeQuantize>(
data, input_low, input_high, output_low, output_high, levels);
EXPECT_FALSE(fake_quantize.get())
<< "FakeQuantize validation did not work. Op node was created with incorrect params.";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(),
std::string("must either be a scalar or a vector of size equal "
"to number of channels."));
}
// Invalid input_high dimension
input_low = make_shared<op::Parameter>(element::f32, Shape{});
input_high = make_shared<op::Parameter>(element::f32, Shape{3});
try
{
const auto fake_quantize = make_shared<op::FakeQuantize>(
data, input_low, input_high, output_low, output_high, levels);
EXPECT_FALSE(fake_quantize.get())
<< "FakeQuantize validation did not work. Op node was created with incorrect params.";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(),
std::string("must either be a scalar or a vector of size equal "
"to number of channels."));
}
// Invalid output_low dimension
input_high = make_shared<op::Parameter>(element::f32, Shape{});
output_low = make_shared<op::Parameter>(element::f32, Shape{3});
try
{
const auto fake_quantize = make_shared<op::FakeQuantize>(
data, input_low, input_high, output_low, output_high, levels);
EXPECT_FALSE(fake_quantize.get())
<< "FakeQuantize validation did not work. Op node was created with incorrect params.";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(),
std::string("must either be a scalar or a vector of size equal "
"to number of channels."));
}
// Invalid output_high dimension
output_low = make_shared<op::Parameter>(element::f32, Shape{});
output_high = make_shared<op::Parameter>(element::f32, Shape{3});
try try
{ {
const auto fake_quantize = make_shared<op::FakeQuantize>( const auto fake_quantize = make_shared<op::FakeQuantize>(
...@@ -106,8 +69,6 @@ TEST(type_prop, fake_quantize_invalid_rank) ...@@ -106,8 +69,6 @@ TEST(type_prop, fake_quantize_invalid_rank)
} }
catch (const NodeValidationFailure& error) catch (const NodeValidationFailure& error)
{ {
EXPECT_HAS_SUBSTRING(error.what(), EXPECT_HAS_SUBSTRING(error.what(), std::string("Argument shapes are inconsistent"));
std::string("must either be a scalar or a vector of size equal "
"to number of channels."));
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment