[SPEC] Support auto broadcast in FakeQuantize op (#3760)

* Support auto broadcast in FakeQuantize op * style fix * add pdpd unit test to plaidml manifest

[SPEC] Support auto broadcast in FakeQuantize op (#3760)
* Support auto broadcast in FakeQuantize op * style fix * add pdpd unit test to plaidml manifest
ac646533 · Jayaram Bobba · Scott Cyphers · d3c2d772 · ac646533 · ac646533
Commit ac646533 authored Oct 18, 2019 by Jayaram Bobba Committed by Scott Cyphers Oct 18, 2019
8 changed files
--- a/src/ngraph/op/fused/fake_quantize.cpp
+++ b/src/ngraph/op/fused/fake_quantize.cpp
@@ -43,58 +43,39 @@ op::FakeQuantize::FakeQuantize(const Output<Node>& data,
                               const Output<Node>& input_high,
                               const Output<Node>& output_low,
                               const Output<Node>& output_high,
-                               size_t levels)
+                               size_t levels,
+                               const AutoBroadcastSpec& auto_broadcast)
    : FusedOp({data, input_low, input_high, output_low, output_high})
    , m_levels(levels)
+    , m_auto_broadcast(auto_broadcast)
 {
    constructor_validate_and_infer_types();
 }
 void op::FakeQuantize::pre_validate_and_infer_types()
 {
-    const auto& data_pshape = get_input_partial_shape(0);
+    PartialShape data_pshape = get_input_partial_shape(0);
-    const auto& input_low_pshape = get_input_partial_shape(1);
-    const auto& input_high_pshape = get_input_partial_shape(2);
+    for (auto i = 1; i <= 4; i++)
-    const auto& output_low_pshape = get_input_partial_shape(3);
-    const auto& output_high_pshape = get_input_partial_shape(4);
-    if (data_pshape.is_static() && input_low_pshape.is_static() && input_high_pshape.is_static() &&
-        output_low_pshape.is_static() && output_high_pshape.is_static())
    {
-        const Shape data_shape{data_pshape.to_shape()};
+        if (m_auto_broadcast.m_type == op::AutoBroadcastType::NONE)
-        const Shape input_low_shape{input_low_pshape.to_shape()};
+        {
-        const Shape input_high_shape{input_high_pshape.to_shape()};
+            NODE_VALIDATION_CHECK(this,
-        const Shape output_low_shape{output_low_pshape.to_shape()};
+                                  PartialShape::merge_into(data_pshape, get_input_partial_shape(i)),
-        const Shape output_high_shape{output_high_pshape.to_shape()};
+                                  "Argument shapes are inconsistent.");
+        }
-        NODE_VALIDATION_CHECK(
+        else if (m_auto_broadcast.m_type == op::AutoBroadcastType::NUMPY ||
-            this,
+                 m_auto_broadcast.m_type == op::AutoBroadcastType::PDPD)
-            (input_low_shape.size() == 0 ||
+        {
-             (input_low_shape.size() == 1 && input_low_shape.at(0) == data_shape.at(1))),
+            NODE_VALIDATION_CHECK(this,
-            "Input low tensor shape: ",
+                                  PartialShape::broadcast_merge_into(
-            input_low_shape,
+                                      data_pshape, get_input_partial_shape(i), m_auto_broadcast),
-            ", must either be a scalar or a vector of size equal to number of channels.");
+                                  "Argument shapes are inconsistent.");
-        NODE_VALIDATION_CHECK(
+        }
-            this,
+        else
-            (input_high_shape.size() == 0 ||
+        {
-             (input_high_shape.size() == 1 && input_high_shape.at(0) == data_shape.at(1))),
+            NODE_VALIDATION_CHECK(this, false, "Unsupported auto broadcast specification");
-            "Input high tensor shape: ",
+        }
-            input_high_shape,
-            ", must either be a scalar or a vector of size equal to number of channels.");
-        NODE_VALIDATION_CHECK(
-            this,
-            (output_low_shape.size() == 0 ||
-             (output_low_shape.size() == 1 && output_low_shape.at(0) == data_shape.at(1))),
-            "Output low tensor shape: ",
-            output_low_shape,
-            ", must either be a scalar or a vector of size equal to number of channels.");
-        NODE_VALIDATION_CHECK(
-            this,
-            (output_high_shape.size() == 0 ||
-             (output_high_shape.size() == 1 && output_high_shape.at(0) == data_shape.at(1))),
-            "Output high tensor shape: ",
-            output_high_shape,
-            ", must either be a scalar or a vector of size equal to number of channels.");
    }
 }
@@ -106,7 +87,7 @@ NodeVector op::FakeQuantize::decompose_op() const
    Output<Node> output_low{input_value(3)};
    Output<Node> output_high{input_value(4)};
-    if (input_low.get_shape().size() == 0)
+    if (m_auto_broadcast.m_type == AutoBroadcastType::NUMPY)
    {
        OutputVector broadcasted_nodes = numpy_style_broadcast_values(
            OutputVector{data, input_low, input_high, output_low, output_high});
@@ -117,13 +98,17 @@ NodeVector op::FakeQuantize::decompose_op() const
        output_low = broadcasted_nodes.at(3);
        output_high = broadcasted_nodes.at(4);
    }
-    else
+    else if (m_auto_broadcast.m_type == AutoBroadcastType::PDPD)
    {
-        input_low = legacy_style_broadcast_values_for_binary_operation(data, input_low, 1).at(1);
+        OutputVector broadcasted_nodes =
-        input_high = legacy_style_broadcast_values_for_binary_operation(data, input_high, 1).at(1);
+            pdpd_style_broadcast(OutputVector{data, input_low, input_high, output_low, output_high},
-        output_low = legacy_style_broadcast_values_for_binary_operation(data, output_low, 1).at(1);
+                                 m_auto_broadcast.m_axis);
-        output_high =
-            legacy_style_broadcast_values_for_binary_operation(data, output_high, 1).at(1);
+        data = broadcasted_nodes.at(0);
+        input_low = broadcasted_nodes.at(1);
+        input_high = broadcasted_nodes.at(2);
+        output_low = broadcasted_nodes.at(3);
+        output_high = broadcasted_nodes.at(4);
    }
    const auto input_data_shape = data.get_shape();

--- a/src/ngraph/op/fused/fake_quantize.hpp
+++ b/src/ngraph/op/fused/fake_quantize.hpp
@@ -18,6 +18,7 @@
 #include "ngraph/autodiff/adjoints.hpp"
 #include "ngraph/node.hpp"
+#include "ngraph/op/util/attr_types.hpp"
 #include "ngraph/op/util/fused_op.hpp"
 namespace ngraph
@@ -47,19 +48,23 @@ namespace ngraph
            ///
            /// \brief      Constructs a FakeQuantize operation node.
            ///
-            /// \param[in]  data         The input data tensor.
+            /// \param[in]  data            The input data tensor.
-            /// \param[in]  input_low    The minimum limit for input values.
+            /// \param[in]  input_low       The minimum limit for input values.
-            /// \param[in]  input_high   The maximum limit for input values.
+            /// \param[in]  input_high      The maximum limit for input values.
-            /// \param[in]  output_low   The minimum quantized value.
+            /// \param[in]  output_low      The minimum quantized value.
-            /// \param[in]  output_high  The maximum quantized value.
+            /// \param[in]  output_high     The maximum quantized value.
-            /// \param[in]  levels       The number of quantization levels.
+            /// \param[in]  levels          The number of quantization levels.
+            /// \param[in]  auto_broadcast  AutoBroadcast mode to be used for broadcasting
+            ///                             limit values
            ///
            FakeQuantize(const Output<Node>& data,
                         const Output<Node>& input_low,
                         const Output<Node>& input_high,
                         const Output<Node>& output_low,
                         const Output<Node>& output_high,
-                         std::size_t levels);
+                         std::size_t levels,
+                         const AutoBroadcastSpec& auto_broadcast =
+                             AutoBroadcastSpec(AutoBroadcastType::NUMPY));
            virtual NodeVector decompose_op() const override;
            virtual void pre_validate_and_infer_types() override;
@@ -68,8 +73,16 @@ namespace ngraph
                copy_with_new_args(const NodeVector& new_args) const override;
            std::size_t get_levels() const { return m_levels; }
+            void set_levels(std::size_t levels) { m_levels = levels; }
+            const AutoBroadcastSpec& get_auto_broadcast() const { return m_auto_broadcast; }
+            void set_auto_broadcast(const AutoBroadcastSpec& auto_broadcast)
+            {
+                m_auto_broadcast = auto_broadcast;
+            }
        private:
            std::size_t m_levels;
+            AutoBroadcastSpec m_auto_broadcast;
        };
    }
 }
--- a/src/ngraph/op/util/broadcasting.cpp
+++ b/src/ngraph/op/util/broadcasting.cpp
@@ -484,6 +484,22 @@ namespace ngraph
            return broadcasted_inputs;
        }
+        OutputVector pdpd_style_broadcast(const OutputVector& inputs, int64_t axis)
+        {
+            if (inputs.size() <= 1)
+            {
+                return inputs;
+            }
+            OutputVector broadcasted_inputs{inputs[0]};
+            for (std::size_t i = 1; i < inputs.size(); ++i)
+            {
+                broadcasted_inputs.push_back(
+                    broadcast_value_pdpd_style(inputs[i], inputs[0].get_shape(), axis));
+            }
+            return broadcasted_inputs;
+        }
        AxisSet calculate_broadcast_axes(const Shape& output_shape,
                                         const Shape& input_shape,
                                         std::size_t start_match_axis)

--- a/src/ngraph/op/util/broadcasting.hpp
+++ b/src/ngraph/op/util/broadcasting.hpp
@@ -141,6 +141,7 @@ namespace ngraph
        ///
        /// \return pdpd-style broadcasted list of nodes.
        NodeVector pdpd_style_broadcast(const NodeVector& inputs, int64_t axis);
+        OutputVector pdpd_style_broadcast(const OutputVector& inputs, int64_t axis);
        /// \brief Generate a list of broadcast axes.
        ///

--- a/src/ngraph/runtime/interpreter/unit_test.manifest
+++ b/src/ngraph/runtime/interpreter/unit_test.manifest
@@ -7,6 +7,7 @@ model_matmul_integer_no_zero_point
 model_matmul_integer_4d_no_zero_point
 fake_quantize
+fake_quantize_pdpd
 fake_quantize_with_clip
 fake_quantize_with_clip_across_channels

--- a/src/ngraph/runtime/plaidml/unit_test.manifest
+++ b/src/ngraph/runtime/plaidml/unit_test.manifest
@@ -94,12 +94,15 @@ model_lstm_mixed_seq_reverse
 model_reverse_sequence_0_batch_1
 model_reverse_sequence_1_batch_0
+# unsupported broadcast mode (pdpd)
+fake_quantize_pdpd
+auto_bcast_binary_elementwise_pdpd
+auto_bcast_binary_elementwise_pdpd_dynamic
 # result mismatch
 model_dequantize_linear_scalar_zero_scale_int8
 model_softmax
 avg_pool_3d_uneven_strided_padded
-auto_bcast_binary_elementwise_pdpd
-auto_bcast_binary_elementwise_pdpd_dynamic
 rnn_cell_activation_function
 gru_cell_bias_clip
 gru_cell_linear_before_reset

--- a/test/backend/fused_op.in.cpp
+++ b/test/backend/fused_op.in.cpp
@@ -1696,6 +1696,49 @@ NGRAPH_TEST(${BACKEND_NAME}, fake_quantize_with_clip)
 }
 NGRAPH_TEST(${BACKEND_NAME}, fake_quantize_with_clip_across_channels)
+{
+    Shape data_shape{1, 2, 5, 5};
+    size_t levels = 5;
+    auto data = make_shared<op::Parameter>(element::f32, data_shape);
+    auto input_low = make_shared<op::Parameter>(element::f32, Shape{2, 1, 1});
+    auto input_high = make_shared<op::Parameter>(element::f32, Shape{2, 1, 1});
+    auto output_low = make_shared<op::Parameter>(element::f32, Shape{2, 1, 1});
+    auto output_high = make_shared<op::Parameter>(element::f32, Shape{2, 1, 1});
+    auto quantize =
+        make_shared<op::FakeQuantize>(data, input_low, input_high, output_low, output_high, levels);
+    auto function = make_shared<Function>(
+        NodeVector{quantize},
+        ParameterVector{data, input_low, input_high, output_low, output_high});
+    auto test_case = ngraph::test::NgraphTestCase(function, "${BACKEND_NAME}");
+    size_t n_elements = shape_size(data_shape);
+    vector<float> input_data(n_elements);
+    iota(begin(input_data), end(input_data), 0);
+    test_case.add_input<float>(input_data);
+    // input_low
+    test_case.add_input<float>(vector<float>{5.f, 30.f});
+    // input_high
+    test_case.add_input<float>(vector<float>{10.f, 40.f});
+    // output_low
+    test_case.add_input<float>(vector<float>{0.f, 50.f});
+    // output_high
+    test_case.add_input<float>(vector<float>{20.f, 70.f});
+    // expected result
+    test_case.add_expected_output<float>(
+        data_shape,
+        vector<float>{0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  5.0f,  10.0f, 10.0f, 15.0f,
+                      20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f,
+                      20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 50.0f, 50.0f, 50.0f, 50.0f, 50.0f,
+                      50.0f, 50.0f, 55.0f, 55.0f, 60.0f, 60.0f, 60.0f, 65.0f, 65.0f, 70.0f,
+                      70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f});
+    test_case.run();
+}
+NGRAPH_TEST(${BACKEND_NAME}, fake_quantize_pdpd)
 {
    Shape data_shape{1, 2, 5, 5};
    size_t levels = 5;
@@ -1706,7 +1749,13 @@ NGRAPH_TEST(${BACKEND_NAME}, fake_quantize_with_clip_across_channels)
    auto output_high = make_shared<op::Parameter>(element::f32, Shape{2});
    auto quantize =
-        make_shared<op::FakeQuantize>(data, input_low, input_high, output_low, output_high, levels);
+        make_shared<op::FakeQuantize>(data,
+                                      input_low,
+                                      input_high,
+                                      output_low,
+                                      output_high,
+                                      levels,
+                                      op::AutoBroadcastSpec(op::AutoBroadcastType::PDPD, 1));
    auto function = make_shared<Function>(
        NodeVector{quantize},
        ParameterVector{data, input_low, input_high, output_low, output_high});

--- a/test/type_prop/fake_quantize.cpp
+++ b/test/type_prop/fake_quantize.cpp
@@ -36,7 +36,22 @@ TEST(type_prop, fake_quantize)
    EXPECT_EQ(fake_quantize->get_shape(), (Shape{1, 2, 3, 4}));
 }
-TEST(type_prop, fake_quantize_invalid_rank)
+TEST(type_prop, fake_quantize_autob)
+{
+    const auto data = make_shared<op::Parameter>(element::f32, Shape{1, 2, 3, 4});
+    const auto input_low = make_shared<op::Parameter>(element::f32, Shape{3, 1});
+    const auto input_high = make_shared<op::Parameter>(element::f32, Shape{1, 2, 3, 4});
+    const auto output_low = make_shared<op::Parameter>(element::f32, Shape{4});
+    const auto output_high = make_shared<op::Parameter>(element::f32, Shape{});
+    const int levels = 5;
+    const auto fake_quantize =
+        make_shared<op::FakeQuantize>(data, input_low, input_high, output_low, output_high, levels);
+    EXPECT_EQ(fake_quantize->get_element_type(), element::f32);
+    EXPECT_EQ(fake_quantize->get_shape(), (Shape{1, 2, 3, 4}));
+}
+TEST(type_prop, fake_quantize_invalid_autob)
 {
    const auto data = make_shared<op::Parameter>(element::f32, Shape{1, 2, 3, 4});
    auto input_low = make_shared<op::Parameter>(element::f32, Shape{3});
@@ -45,58 +60,6 @@ TEST(type_prop, fake_quantize_invalid_rank)
    auto output_high = make_shared<op::Parameter>(element::f32, Shape{});
    const int levels = 5;
-    // Invalid input_low dimension
-    try
-    {
-        const auto fake_quantize = make_shared<op::FakeQuantize>(
-            data, input_low, input_high, output_low, output_high, levels);
-        EXPECT_FALSE(fake_quantize.get())
-            << "FakeQuantize validation did not work. Op node was created with incorrect params.";
-    }
-    catch (const NodeValidationFailure& error)
-    {
-        EXPECT_HAS_SUBSTRING(error.what(),
-                             std::string("must either be a scalar or a vector of size equal "
-                                         "to number of channels."));
-    }
-    // Invalid input_high dimension
-    input_low = make_shared<op::Parameter>(element::f32, Shape{});
-    input_high = make_shared<op::Parameter>(element::f32, Shape{3});
-    try
-    {
-        const auto fake_quantize = make_shared<op::FakeQuantize>(
-            data, input_low, input_high, output_low, output_high, levels);
-        EXPECT_FALSE(fake_quantize.get())
-            << "FakeQuantize validation did not work. Op node was created with incorrect params.";
-    }
-    catch (const NodeValidationFailure& error)
-    {
-        EXPECT_HAS_SUBSTRING(error.what(),
-                             std::string("must either be a scalar or a vector of size equal "
-                                         "to number of channels."));
-    }
-    // Invalid output_low dimension
-    input_high = make_shared<op::Parameter>(element::f32, Shape{});
-    output_low = make_shared<op::Parameter>(element::f32, Shape{3});
-    try
-    {
-        const auto fake_quantize = make_shared<op::FakeQuantize>(
-            data, input_low, input_high, output_low, output_high, levels);
-        EXPECT_FALSE(fake_quantize.get())
-            << "FakeQuantize validation did not work. Op node was created with incorrect params.";
-    }
-    catch (const NodeValidationFailure& error)
-    {
-        EXPECT_HAS_SUBSTRING(error.what(),
-                             std::string("must either be a scalar or a vector of size equal "
-                                         "to number of channels."));
-    }
-    // Invalid output_high dimension
-    output_low = make_shared<op::Parameter>(element::f32, Shape{});
-    output_high = make_shared<op::Parameter>(element::f32, Shape{3});
    try
    {
        const auto fake_quantize = make_shared<op::FakeQuantize>(
@@ -106,8 +69,6 @@ TEST(type_prop, fake_quantize_invalid_rank)
    }
    catch (const NodeValidationFailure& error)
    {
-        EXPECT_HAS_SUBSTRING(error.what(),
+        EXPECT_HAS_SUBSTRING(error.what(), std::string("Argument shapes are inconsistent"));
-                             std::string("must either be a scalar or a vector of size equal "
-                                         "to number of channels."));
    }
 }