[Spec] Update fused operator GroupConvolution to new specification (#3543)

* Change method of splitting data and filters for groups * Add support for groups included in filters shape * Add UT for groups in weights shape * Add helper function to check if groups are in filters shape * Review fix I * Remove unused variables * Skip test on PlaidML * Review Fix II * Internaly store groups as dimension instead of size_t * Review fix III * Add updating of m_groups in pre_validate_and_infer_types()

[Spec] Update fused operator GroupConvolution to new specification (#3543)
* Change method of splitting data and filters for groups * Add support for groups included in filters shape * Add UT for groups in weights shape * Add helper function to check if groups are in filters shape * Review fix I * Remove unused variables * Skip test on PlaidML * Review Fix II * Internaly store groups as dimension instead of size_t * Review fix III * Add updating of m_groups in pre_validate_and_infer_types()
5a1de88e · Tomasz Socha · Scott Cyphers · 6c31d28f · 5a1de88e · 5a1de88e
Commit 5a1de88e authored Oct 01, 2019 by Tomasz Socha Committed by Scott Cyphers Oct 01, 2019
4 changed files
--- a/src/ngraph/op/fused/group_conv.cpp
+++ b/src/ngraph/op/fused/group_conv.cpp
@@ -18,6 +18,8 @@

 #include "group_conv.hpp"

+#include "ngraph/builder/reshape.hpp"
+#include "ngraph/builder/split.hpp"
 #include "ngraph/op/concat.hpp"
 #include "ngraph/op/convolution.hpp"
 #include "ngraph/op/slice.hpp"
@@ -49,23 +51,53 @@ op::GroupConvolution::GroupConvolution(const Output<Node>& data_batch,
    constructor_validate_and_infer_types();
 }

+op::GroupConvolution::GroupConvolution(const Output<Node>& data_batch,
+                                       const Output<Node>& filters,
+                                       const Strides& window_movement_strides,
+                                       const Strides& window_dilation_strides,
+                                       const CoordinateDiff& padding_below,
+                                       const CoordinateDiff& padding_above,
+                                       const Strides& data_dilation_strides,
+                                       const PadType& pad_type)
+    : FusedOp({data_batch, filters})
+    , m_window_movement_strides(window_movement_strides)
+    , m_window_dilation_strides(window_dilation_strides)
+    , m_padding_below(padding_below)
+    , m_padding_above(padding_above)
+    , m_data_dilation_strides(data_dilation_strides)
+    , m_groups(filters.get_partial_shape().rank().is_dynamic() ? Dimension::dynamic()
+                                                               : filters.get_partial_shape()[0])
+    , m_pad_type(pad_type)
+{
+    constructor_validate_and_infer_types();
+}
+
 void op::GroupConvolution::pre_validate_and_infer_types()
 {
    auto data_shape = get_input_partial_shape(0);
    auto filters_shape = get_input_partial_shape(1);
+
    if (data_shape.is_static() && filters_shape.is_static())
    {
+        // Update groups
+        if (has_groups_in_filters_shape())
+        {
+            m_groups = get_input_partial_shape(1)[0];
+        }
+
        // Data channels
        NODE_VALIDATION_CHECK(this,
-                              data_shape.to_shape()[1] % m_groups == 0,
+                              data_shape.to_shape()[1] % get_groups() == 0,
                              "Data channels not a multiple of group size");
        // Output channels
        NODE_VALIDATION_CHECK(this,
-                              filters_shape.to_shape()[0] % m_groups == 0,
+                              filters_shape.to_shape()[0] % get_groups() == 0,
                              "# Filters not a multiple of group size");
+
        // Input Filters
        NODE_VALIDATION_CHECK(this,
-                              filters_shape.to_shape()[1] * m_groups == data_shape.to_shape()[1],
+                              (filters_shape.to_shape()[has_groups_in_filters_shape() ? 2 : 1] *
+                               get_groups()) == data_shape.to_shape()[1],
                              "Incorrect number of channels per filter");
    }
 }
@@ -95,20 +127,35 @@ void op::GroupConvolution::post_validate_and_infer_types()

 Shape op::GroupConvolution::get_weights_dimensions() const
 {
+    auto data_shape = get_input_shape(0);
+    auto weights_shape = get_input_shape(1);
+    // check if weights already includes groups
+    if (has_groups_in_filters_shape())
+    {
+        return weights_shape;
+    }
    // reshape weights into 5d tensors that includes groups
    const size_t OC = 0;
    const size_t OC_IN_OUTPUT = 1;
    const size_t IC = 1;
-    Shape weights_shape_groups{get_input_shape(1)};
+    Shape weights_shape_groups{weights_shape};
    // adjust output and channel given a number of groups

    weights_shape_groups.at(OC) = get_shape().at(OC_IN_OUTPUT) / get_groups();
-    weights_shape_groups.at(IC) = get_input_shape(0).at(IC) / get_groups();
+    weights_shape_groups.at(IC) = data_shape.at(IC) / get_groups();
    // push_front the number of groups
    weights_shape_groups.insert(weights_shape_groups.begin(), get_groups());
    return weights_shape_groups;
 }

+size_t ngraph::op::GroupConvolution::get_groups() const
+{
+    NODE_VALIDATION_CHECK(this,
+                          m_groups.is_static(),
+                          "get_groups() can only be called if the number of groups is static.");
+    return static_cast<size_t>(m_groups);
+}
+
 shared_ptr<Node> op::GroupConvolution::copy_with_new_args(const NodeVector& new_args) const
 {
    if (new_args.size() != 2)
@@ -131,38 +178,30 @@ NodeVector op::GroupConvolution::decompose_op() const
 {
    auto data = input_value(0);
    auto filters = input_value(1);
+    auto filters_shape = get_input_shape(1);
    // Split one convolution op to N ops where N is the number of groups
    // and concat results after computation.
    // reference:
    // https://github.com/NervanaSystems/ngraph-mxnet/blob/fdd692/src/ngraph/ngraph_emitter.cc#L822-L856
-    std::size_t n_data_channels{data.get_shape().at(1)};
-    std::size_t n_filters_channels{filters.get_shape().at(0)};
-    std::size_t data_group_size{n_data_channels / m_groups};
-    std::size_t filters_group_size{n_filters_channels / m_groups};
    NodeVector convolution_nodes;

-    // initial bounds for splice
-    std::vector<std::size_t> data_lower_bounds(data.get_shape().size());
-    std::vector<std::size_t> data_upper_bounds{data.get_shape()};
-    std::vector<std::size_t> filters_lower_bounds(filters.get_shape().size());
-    std::vector<std::size_t> filters_upper_bounds{filters.get_shape()};
-
-    for (std::size_t group{0}; group < m_groups; ++group)
+    // slice data
+    auto sliced_data = builder::split(data, get_groups(), 1);
+    // slice filters
+    auto sliced_filters = builder::split(filters, get_groups(), 0);
+    for (std::size_t group{0}; group < get_groups(); ++group)
    {
-        // slice data
-        data_lower_bounds[1] = group * data_group_size;
-        data_upper_bounds[1] = (group + 1) * data_group_size;
-        auto sliced_data =
-            std::make_shared<ngraph::op::Slice>(data, data_lower_bounds, data_upper_bounds);
-        // slice filters
-        filters_lower_bounds[0] = group * filters_group_size;
-        filters_upper_bounds[0] = (group + 1) * filters_group_size;
-        auto sliced_filters = std::make_shared<ngraph::op::Slice>(
-            filters, filters_lower_bounds, filters_upper_bounds);
-
+        auto sliced_filter = sliced_filters[group];
+        if (has_groups_in_filters_shape())
+        {
+            // Remove group dimmension after slicing
+            sliced_filter = builder::reshape(
+                sliced_filters[group],
+                Shape(std::next(std::begin(filters_shape), 1), std::end(filters_shape)));
+        }
        convolution_nodes.push_back(
-            std::make_shared<ngraph::op::Convolution>(sliced_data,
-                                                      sliced_filters,
+            std::make_shared<ngraph::op::Convolution>(sliced_data[group],
+                                                      sliced_filter,
                                                      m_window_movement_strides,
                                                      m_window_dilation_strides,
                                                      m_padding_below,
@@ -179,3 +218,10 @@ void op::GroupConvolution::generate_adjoints(autodiff::Adjoints& /* adjoints */,
 {
    throw ngraph_error("NYI");
 }
+
+bool ngraph::op::GroupConvolution::has_groups_in_filters_shape() const
+{
+    // If filters_rank is (data_rank + 1), then filters are divided by groups on first
+    // dim.
+    return ((get_input_shape(0).size() + 1) == get_input_shape(1).size());
+}
--- a/src/ngraph/op/fused/group_conv.hpp
+++ b/src/ngraph/op/fused/group_conv.hpp
@@ -43,6 +43,15 @@ namespace ngraph
                             const size_t groups,
                             const PadType& pad_type = PadType::EXPLICIT);

+            // constructor which accept groups included in filters shape.
+            GroupConvolution(const Output<Node>& data_batch,
+                             const Output<Node>& filters,
+                             const Strides& window_movement_strides,
+                             const Strides& window_dilation_strides,
+                             const CoordinateDiff& padding_below,
+                             const CoordinateDiff& padding_above,
+                             const Strides& data_dilation_strides,
+                             const PadType& pad_type = PadType::EXPLICIT);
            Shape get_weights_dimensions() const;
            const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
            const Strides& get_window_dilation_strides() const { return m_window_dilation_strides; }
@@ -51,7 +60,7 @@ namespace ngraph
            const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
            Output<Node> get_filters() { return input_value(1); }
            Output<Node> get_data_batch() { return input_value(0); }
-            size_t get_groups() const { return m_groups; }
+            size_t get_groups() const;
            const PadType& get_pad_type() const { return m_pad_type; }
            virtual std::shared_ptr<Node>
                copy_with_new_args(const NodeVector& new_args) const override;
@@ -70,8 +79,11 @@ namespace ngraph
            CoordinateDiff m_padding_below;
            CoordinateDiff m_padding_above;
            Strides m_data_dilation_strides;
-            size_t m_groups{0};
+            Dimension m_groups;
            PadType m_pad_type{PadType::NOTSET};
+
+        private:
+            bool has_groups_in_filters_shape() const;
        };
    }
 }
--- a/src/ngraph/runtime/plaidml/unit_test.manifest
+++ b/src/ngraph/runtime/plaidml/unit_test.manifest
@@ -263,6 +263,9 @@ dot_2x0_0
 auto_bcast_binary_elementwise
 max_pool_2d_1channel_1image_overpadded

+# node validation error: "Applying function, tensor with mismatching dimensionality: F, expected=4, got=5"
+group_conv_groups_included_in_shape
+
 # passes locally, fails in CI
 numeric_float_nan
 fake_quantize_with_clip_across_channels

--- a/test/backend/fused_op.in.cpp
+++ b/test/backend/fused_op.in.cpp
@@ -540,6 +540,33 @@ NGRAPH_TEST(${BACKEND_NAME}, group_conv_input_data_variation)
    EXPECT_EQ(expected, read_vector<float>(result0));
 }

+NGRAPH_TEST(${BACKEND_NAME}, group_conv_groups_included_in_shape)
+{
+    auto data = make_shared<op::Parameter>(element::f32, Shape{1, 4, 2, 2});
+    auto filters = make_shared<op::Parameter>(element::f32, Shape{2, 1, 2, 1, 1});
+    auto group_conv = make_shared<op::GroupConvolution>(data,
+                                                        filters,
+                                                        Strides{1, 1},
+                                                        Strides{1, 1},
+                                                        CoordinateDiff{0, 0},
+                                                        CoordinateDiff{0, 0},
+                                                        Strides{1, 1});
+    auto f0 = make_shared<Function>(NodeVector{group_conv}, ParameterVector{data, filters});
+
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::f32, Shape{1, 4, 2, 2});
+    copy_data(a, vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
+    auto b = backend->create_tensor(element::f32, Shape{2, 1, 2, 1, 1});
+    copy_data(b, vector<float>{1, 2, 3, 4});
+    auto result0 = backend->create_tensor(element::f32, Shape{1, 2, 2, 2});
+    auto handle = backend->compile(f0);
+    handle->call_with_validate({result0}, {a, b});
+    vector<float> expected{11, 14, 17, 20, 79, 86, 93, 100};
+    EXPECT_EQ(expected, read_vector<float>(result0));
+}
+
 NGRAPH_TEST(${BACKEND_NAME}, space_to_depth)
 {
    auto A = make_shared<op::Parameter>(element::f32, Shape{1, 2, 4, 4});