[r0.25] PlaidML Grouped Convolution Implementation (#3399)

* adding group convolution implementation for plaidml * update CMakeLists.txt to include new group convolution files * adding new ngraph unit tests for grouped convolutions * adding group conv unit test back in for plaidml backend * specify group convolution support in plaidml * add denise as codeowner for plaidml runtime dir * remove commented-out lines of unit test manifest * style changes * resolve all discussions in PR * skip data dilation unit test on cpu backend

[r0.25] PlaidML Grouped Convolution Implementation (#3399)
* adding group convolution implementation for plaidml * update CMakeLists.txt to include new group convolution files * adding new ngraph unit tests for grouped convolutions * adding group conv unit test back in for plaidml backend * specify group convolution support in plaidml * add denise as codeowner for plaidml runtime dir * remove commented-out lines of unit test manifest * style changes * resolve all discussions in PR * skip data dilation unit test on cpu backend
9937f8b5 · Denise Kutnick · Scott Cyphers · 1a1edef9 · 9937f8b5 · 9937f8b5
Commit 9937f8b5 authored Aug 07, 2019 by Denise Kutnick Committed by Scott Cyphers Aug 07, 2019
7 changed files
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -55,7 +55,7 @@ project/doc-contributor-README.rst  @indie
 /src/ngraph/runtime/hybrid/         @rkimballn1
 /src/ngraph/runtime/intelgpu/       @dmyershov
 /src/ngraph/runtime/interpreter/    @rkimballn1
-/src/ngraph/runtime/plaidml/        @earhart
+/src/ngraph/runtime/plaidml/        @earhart @dgkutnic
 /src/ngraph/runtime/reference/      @aprocter
 /src/ngraph/runtime/reference/allreduce.*pp      @wenzhe-nrv @aprocter
 /src/ngraph/type/                   @diyessi

--- a/src/ngraph/runtime/cpu/unit_test.manifest
+++ b/src/ngraph/runtime/cpu/unit_test.manifest
@@ -10,3 +10,6 @@ max_3d_to_scalar_int32
 # Not implemented
 send_recv
 send_recv_ring
+
+# param not supported in CPU backend
+group_conv_data_dilation
--- a/src/ngraph/runtime/plaidml/CMakeLists.txt
+++ b/src/ngraph/runtime/plaidml/CMakeLists.txt
@@ -33,6 +33,7 @@ set(SRC
    plaidml_ops_convolution.cpp
    plaidml_ops_dot.cpp
    plaidml_ops_general.cpp
+    plaidml_ops_group_convolution.cpp
    plaidml_ops_implicit_broadcast.cpp
    plaidml_ops_index_reduction.cpp
    plaidml_ops_io.cpp

--- a/src/ngraph/runtime/plaidml/plaidml_compiler.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_compiler.cpp
@@ -17,6 +17,7 @@
 #include "ngraph/runtime/plaidml/plaidml_compiler.hpp"
 #include "ngraph/graph_util.hpp"
 #include "ngraph/log.hpp"
+#include "ngraph/op/fused/group_conv.hpp"
 #include "ngraph/pass/algebraic_simplification.hpp"
 #include "ngraph/pass/core_fusion.hpp"
 #include "ngraph/pass/cse.hpp"
@@ -66,7 +67,7 @@ namespace
            PLAIDML_DEBUG << "Retire tensor: " << t;
        }
    }
-}
+} // namespace

 ngraph::runtime::plaidml::Compiler::Compiler(Config* config)
    : m_config{config}
@@ -87,7 +88,11 @@ std::shared_ptr<ngraph::runtime::plaidml::PlaidML_Executable>
    pass_manager.set_per_pass_validation(false);

    // We apply the same general-purposes passes as the CPU backend.
-    pass_manager.register_pass<ngraph::pass::FusedOpDecomposition>();
+    pass_manager.register_pass<ngraph::pass::FusedOpDecomposition>([](const Node& node) -> bool {
+        if (node.description() == ngraph::op::GroupConvolution().description())
+            return true;
+        return false;
+    });
    pass_manager.register_pass<ngraph::pass::LikeReplacement>();
    pass_manager.register_pass<ngraph::pass::NopElimination>();
    pass_manager.register_pass<ngraph::pass::ZeroDimTensorElimination>();

--- a/src/ngraph/runtime/plaidml/plaidml_ops_group_convolution.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_group_convolution.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/except.hpp"
+#include "ngraph/log.hpp"
+#include "ngraph/op/fused/group_conv.hpp"
+#include "ngraph/op/slice.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            NGRAPH_PLAIDML_OP_CLASS(ImplGroupConvolution, OpImpl<::ngraph::op::GroupConvolution>);
+        }
+    } // namespace runtime
+} // namespace ngraph
+
+// GroupConvolution implements a grouped convolution, with optional striding, padding, and dilation.
+void ngraph::runtime::plaidml::ImplGroupConvolution::Apply()
+{
+    this->check_inputs(2);
+    this->check_outputs(1);
+
+    const auto& image = op_input(0);
+    const auto& filter = op_input(1);
+
+    auto rank = op().get_input_shape(0).size() - 2;
+    const auto& groups = op().get_groups();
+    const auto& padding_above = op().get_padding_above();
+    const auto& padding_below = op().get_padding_below();
+    const auto& strides = op().get_window_movement_strides();
+    const auto& filter_dilation = op().get_window_dilation_strides();
+    const auto& data_dilation = op().get_data_dilation_strides();
+
+    const auto& grps =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(groups));
+    const auto& dd0 =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(data_dilation[0]));
+    const auto& dd1 =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(data_dilation[1]));
+    const auto& fd0 =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(filter_dilation[0]));
+    const auto& fd1 =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(filter_dilation[1]));
+    const auto& pxb =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(padding_below[0]));
+    const auto& pyb =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(padding_below[1]));
+    const auto& pxa =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(padding_above[0]));
+    const auto& pya =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(padding_above[1]));
+    const auto& sx =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(strides[0]));
+    const auto& sy =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(strides[1]));
+
+    this->set_output(::vertexai::plaidml::function{R"(
+            function (I[N, CI, XI0, XI1], F[CO, FCI, XF0, XF1], DD0, DD1, FD0, FD1, G, PXB, PYB, PXA, PYA, SX, SY) -> (O) {
+                O[n, (CO/G) * g + co, x, y: N, CO, ((DD0 * (XI0 - 1) + PXA + PXB) - (FD0 * (XF0 - 1)) + SX) / SX, ((DD1 * (XI1 - 1) + PYA + PYB) - (FD1 * (XF1 - 1)) + SY) / SY] = 
+                    +(I[n, (CI/G) * g + ci, (x + FD0 * xf0 - PXB)/DD0, (y + FD1 * xf1 - PYB)/DD1] * F[(CO/G) * g + co, ci, xf0, xf1]), co < CO/G;
+            })"}(image, filter, dd0, dd1, fd0, fd1, grps, pxb, pyb, pxa, pya, sx, sy));
+}
--- a/src/ngraph/runtime/plaidml/unit_test.manifest
+++ b/src/ngraph/runtime/plaidml/unit_test.manifest
@@ -178,7 +178,6 @@ conv_bias_2d
 conv_bias_3d
 conv_bias_bprop_2d
 conv_bias_add_2d
-group_conv
 space_to_depth
 depth_to_space
 normalize_across_chw_scalar_scale_4d
@@ -278,8 +277,6 @@ lstm_cell_no_bias_no_peepholes
 lstm_cell_bias_peepholes
 lstm_cell_bias_peepholes_clip_input_forget
 lstm_cell_activaction_functions
-group_conv_transpose
-group_conv_transpose_output_shape
 divide_python_rounding_int32
 backwards_batchmatmul_tensor2_tensor2


--- a/test/backend/fused_op.in.cpp
+++ b/test/backend/fused_op.in.cpp