Commit 9937f8b5 authored by Denise Kutnick's avatar Denise Kutnick Committed by Scott Cyphers

[r0.25] PlaidML Grouped Convolution Implementation (#3399)

* adding group convolution implementation for plaidml

* update CMakeLists.txt to include new group convolution files

* adding new ngraph unit tests for grouped convolutions

* adding group conv unit test back in for plaidml backend

* specify group convolution support in plaidml

* add denise as codeowner for plaidml runtime dir

* remove commented-out lines of unit test manifest

* style changes

* resolve all discussions in PR

* skip data dilation unit test on cpu backend
parent 1a1edef9
......@@ -55,7 +55,7 @@ project/doc-contributor-README.rst @indie
/src/ngraph/runtime/hybrid/ @rkimballn1
/src/ngraph/runtime/intelgpu/ @dmyershov
/src/ngraph/runtime/interpreter/ @rkimballn1
/src/ngraph/runtime/plaidml/ @earhart
/src/ngraph/runtime/plaidml/ @earhart @dgkutnic
/src/ngraph/runtime/reference/ @aprocter
/src/ngraph/runtime/reference/allreduce.*pp @wenzhe-nrv @aprocter
/src/ngraph/type/ @diyessi
......
......@@ -10,3 +10,6 @@ max_3d_to_scalar_int32
# Not implemented
send_recv
send_recv_ring
# param not supported in CPU backend
group_conv_data_dilation
......@@ -33,6 +33,7 @@ set(SRC
plaidml_ops_convolution.cpp
plaidml_ops_dot.cpp
plaidml_ops_general.cpp
plaidml_ops_group_convolution.cpp
plaidml_ops_implicit_broadcast.cpp
plaidml_ops_index_reduction.cpp
plaidml_ops_io.cpp
......
......@@ -17,6 +17,7 @@
#include "ngraph/runtime/plaidml/plaidml_compiler.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/log.hpp"
#include "ngraph/op/fused/group_conv.hpp"
#include "ngraph/pass/algebraic_simplification.hpp"
#include "ngraph/pass/core_fusion.hpp"
#include "ngraph/pass/cse.hpp"
......@@ -66,7 +67,7 @@ namespace
PLAIDML_DEBUG << "Retire tensor: " << t;
}
}
}
} // namespace
ngraph::runtime::plaidml::Compiler::Compiler(Config* config)
: m_config{config}
......@@ -87,7 +88,11 @@ std::shared_ptr<ngraph::runtime::plaidml::PlaidML_Executable>
pass_manager.set_per_pass_validation(false);
// We apply the same general-purposes passes as the CPU backend.
pass_manager.register_pass<ngraph::pass::FusedOpDecomposition>();
pass_manager.register_pass<ngraph::pass::FusedOpDecomposition>([](const Node& node) -> bool {
if (node.description() == ngraph::op::GroupConvolution().description())
return true;
return false;
});
pass_manager.register_pass<ngraph::pass::LikeReplacement>();
pass_manager.register_pass<ngraph::pass::NopElimination>();
pass_manager.register_pass<ngraph::pass::ZeroDimTensorElimination>();
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/except.hpp"
#include "ngraph/log.hpp"
#include "ngraph/op/fused/group_conv.hpp"
#include "ngraph/op/slice.hpp"
#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
namespace ngraph
{
namespace runtime
{
namespace plaidml
{
NGRAPH_PLAIDML_OP_CLASS(ImplGroupConvolution, OpImpl<::ngraph::op::GroupConvolution>);
}
} // namespace runtime
} // namespace ngraph
// GroupConvolution implements a grouped convolution, with optional striding, padding, and dilation.
void ngraph::runtime::plaidml::ImplGroupConvolution::Apply()
{
this->check_inputs(2);
this->check_outputs(1);
const auto& image = op_input(0);
const auto& filter = op_input(1);
auto rank = op().get_input_shape(0).size() - 2;
const auto& groups = op().get_groups();
const auto& padding_above = op().get_padding_above();
const auto& padding_below = op().get_padding_below();
const auto& strides = op().get_window_movement_strides();
const auto& filter_dilation = op().get_window_dilation_strides();
const auto& data_dilation = op().get_data_dilation_strides();
const auto& grps =
static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(groups));
const auto& dd0 =
static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(data_dilation[0]));
const auto& dd1 =
static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(data_dilation[1]));
const auto& fd0 =
static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(filter_dilation[0]));
const auto& fd1 =
static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(filter_dilation[1]));
const auto& pxb =
static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(padding_below[0]));
const auto& pyb =
static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(padding_below[1]));
const auto& pxa =
static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(padding_above[0]));
const auto& pya =
static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(padding_above[1]));
const auto& sx =
static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(strides[0]));
const auto& sy =
static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(strides[1]));
this->set_output(::vertexai::plaidml::function{R"(
function (I[N, CI, XI0, XI1], F[CO, FCI, XF0, XF1], DD0, DD1, FD0, FD1, G, PXB, PYB, PXA, PYA, SX, SY) -> (O) {
O[n, (CO/G) * g + co, x, y: N, CO, ((DD0 * (XI0 - 1) + PXA + PXB) - (FD0 * (XF0 - 1)) + SX) / SX, ((DD1 * (XI1 - 1) + PYA + PYB) - (FD1 * (XF1 - 1)) + SY) / SY] =
+(I[n, (CI/G) * g + ci, (x + FD0 * xf0 - PXB)/DD0, (y + FD1 * xf1 - PYB)/DD1] * F[(CO/G) * g + co, ci, xf0, xf1]), co < CO/G;
})"}(image, filter, dd0, dd1, fd0, fd1, grps, pxb, pyb, pxa, pya, sx, sy));
}
......@@ -178,7 +178,6 @@ conv_bias_2d
conv_bias_3d
conv_bias_bprop_2d
conv_bias_add_2d
group_conv
space_to_depth
depth_to_space
normalize_across_chw_scalar_scale_4d
......@@ -278,8 +277,6 @@ lstm_cell_no_bias_no_peepholes
lstm_cell_bias_peepholes
lstm_cell_bias_peepholes_clip_input_forget
lstm_cell_activaction_functions
group_conv_transpose
group_conv_transpose_output_shape
divide_python_rounding_int32
backwards_batchmatmul_tensor2_tensor2
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment