[MLIR] Add padding support to Convolution (#3586)

* Enable Padding * Small fix to match MLIR API change * WIP * Subtract pad below from virtual img index * Fixed integer set inequalities * style-apply * remove unneeded padAbove values * Remove dump call * PR feedback * style apply

[MLIR] Add padding support to Convolution (#3586)
* Enable Padding * Small fix to match MLIR API change * WIP * Subtract pad below from virtual img index * Fixed integer set inequalities * style-apply * remove unneeded padAbove values * Remove dump call * PR feedback * style apply
43a1bf1c · Nagy Mostafa · Scott Cyphers · 1b8cda81 · 43a1bf1c · 43a1bf1c
Commit 43a1bf1c authored Sep 18, 2019 by Nagy Mostafa Committed by Scott Cyphers Sep 18, 2019
Showing with 142 additions and 21 deletions

lowerer.cpp src/contrib/mlir/compiler/lowerer.cpp +104 -17

mlir_subgraph_extraction.cpp src/contrib/mlir/compiler/pass/mlir_subgraph_extraction.cpp +1 -4

convolution.in.cpp test/backend/convolution.in.cpp +37 -0

No files found.
--- a/src/contrib/mlir/compiler/lowerer.cpp
+++ b/src/contrib/mlir/compiler/lowerer.cpp
@@ -790,16 +790,6 @@ namespace
        auto padBelow = convolOp.padBelow().getValue();
        auto padAbove = convolOp.padBelow().getValue();

-        for (auto value : llvm::zip(padBelow, padAbove))
-        {
-            auto padAttr = std::get<0>(value);
-            NGRAPH_CHECK(padAttr.cast<IntegerAttr>().getInt() == 0,
-                         "No support for padding in convolution op");
-            padAttr = std::get<1>(value);
-            NGRAPH_CHECK(padAttr.cast<IntegerAttr>().getInt() == 0,
-                         "No support for padding in convolution op");
-        }
-
        Type elemTy = images->getType().cast<MemRefType>().getElementType();

        // Let Images shape be  [N, C_IN, D_1, ... D_f]
@@ -839,11 +829,13 @@ namespace
        //           Output[n, k, r_1, .. r_f] +=
        //             Images[n, c, i_1 + j_1, .. i_f + j_f] * Filters[k, c, j_1, .. j_f]

-        // TODO: With padding, we need to check (using IntegerSets) whether each spatial dim in
-        // Images lie within paddings
-        // If yes, we init value to zero, else load from MemRef.
-        // Q: Can this be done using a map from padded tensor to  unpadded one ? Will we load zero
-        // if OOB ?
+        // With padding, we check (using IntegerSets) whether each spatial dim in Images lie inside
+        // non-padded spatial region. If true, we perform the computation:
+        //
+        //         for <j_1 .. j_f> : <0 .. 0> -> <F_1 .. F_f>
+        //         if(indices in non-padded region):
+        //           Output[n, k, r_1, .. r_f] +=
+        //             Images[n, c, i_1 + j_1, .. i_f + j_f] * Filters[k, c, j_1, .. j_f]

        // Create view to write into result.
        MemRefView vRes(result), vImages(images), vFilters(filters);
@@ -868,6 +860,9 @@ namespace
        auto resSpatialIndices = makeIndexHandles(spatialRank);
        auto resSpatialIndicesPtrs = makeIndexHandlePointers(resSpatialIndices);
        SmallVector<int64_t, 4> resSteps, filtersSteps;
+        SmallVector<int, 4> padBelowIntValues;
+        bool withPadding = false;
+
        for (auto i = 0; i < spatialRank; i++)
        {
            // result spatial bounds and steps
@@ -877,6 +872,22 @@ namespace
            // image spatial bounds
            imgSpatialLbs.push_back(vImages.lb(i + 2));
            imgSpatialUbs.push_back(vImages.ub(i + 2));
+
+            // Check if we have any padding and collect pad values
+            IntegerAttr iAttr = padBelow[i].cast<IntegerAttr>();
+            int padValue = iAttr.getInt();
+            if (padValue)
+            {
+                withPadding = true;
+            }
+            padBelowIntValues.push_back(padValue);
+
+            iAttr = padAbove[i].cast<IntegerAttr>();
+            padValue = iAttr.getInt();
+            if (padValue)
+            {
+                withPadding = true;
+            }
        }

        NGRAPH_CHECK(vImages.rank() == vFilters.rank(), "Images and Filters have unequal ranks");
@@ -895,6 +906,42 @@ namespace
            filtersSteps.push_back(vFilters.step(i + 2));
        }

+        IntegerSet nonPaddedRange;
+        if (withPadding)
+        {
+            // Create affine expressions and IntegerSet
+            // IntegerSet (d0, d1, .. d_N-1)[LB_0, LB_1, .. LB_N-1, UB_0, UB_1, .. UB_N-1], where
+            // for each dim:
+            //   (d_dim - padBelow[dim] - LB_dim >= 0),
+            //   (padBelow[dim] + UB_dim - d_dim - 1 >= 0)
+            SmallVector<AffineExpr, 4> affineExprs;
+            // Bool to indicate if expr is equality or inequality
+            SmallVector<bool, 4> isEq;
+
+            for (unsigned dim = 0; dim < spatialRank; dim++)
+            {
+                // i_dim
+                auto dimExpr = rewriter.getAffineDimExpr(dim);
+                auto imgLbExpr = rewriter.getAffineSymbolExpr(dim);
+
+                // expr1 : i_dim - padBelow[dim] - imgLB >= 0
+                auto padBelowExpr = rewriter.getAffineConstantExpr(padBelowIntValues[dim]);
+                affineExprs.push_back(dimExpr - padBelowExpr - imgLbExpr);
+                isEq.push_back(false);
+
+                // expr2: padBelow[dim] + imgUB - i_dim - 1 >= 0
+                auto imgUbExpr = rewriter.getAffineSymbolExpr(spatialRank + dim);
+                auto oneExpr = rewriter.getAffineConstantExpr(1);
+                affineExprs.push_back(padBelowExpr + imgUbExpr - dimExpr - oneExpr);
+                isEq.push_back(false);
+            }
+
+            NGRAPH_CHECK(affineExprs.size() == isEq.size() && isEq.size() == 2 * spatialRank,
+                         "Invalid number of expressions in the IntegerSet");
+            nonPaddedRange =
+                rewriter.getIntegerSet(spatialRank, 2 * spatialRank, affineExprs, isEq);
+        }
+
        // Initialize output to zero
        {
            IndexHandle n, k, c;
@@ -949,6 +996,8 @@ namespace
                                        filtersSteps)([&] {
                            SmallVector<IndexHandle, 4> imgIndices, filtersIndices;
                            // Image indices
+                            // Here we compute the virtual start index into the padded image.
+
                            imgIndices.push_back(n);
                            imgIndices.push_back(c);
                            for (auto i = 0; i < spatialRank; i++)
@@ -963,8 +1012,46 @@ namespace
                                                  filtersSpatialIndices.begin(),
                                                  filtersSpatialIndices.end());

-                            iRes(resIndices) =
-                                iRes(resIndices) + (iImages(imgIndices) * iFilters(filtersIndices));
+                            if (withPadding)
+                            {
+                                // if args : img dims, img lbs, img ubs
+                                SmallVector<IndexHandle, 4>::iterator it = imgIndices.begin();
+                                std::advance(it, 2);
+                                SmallVector<Value*, 4> affineIfArgs(it, imgIndices.end());
+                                affineIfArgs.insert(
+                                    affineIfArgs.end(), imgSpatialLbs.begin(), imgSpatialLbs.end());
+                                affineIfArgs.insert(
+                                    affineIfArgs.end(), imgSpatialUbs.begin(), imgSpatialUbs.end());
+
+                                auto affineIfOp =
+                                    rewriter.create<AffineIfOp>(rewriter.getUnknownLoc(),
+                                                                nonPaddedRange,
+                                                                affineIfArgs,
+                                                                /*withElseRegion=*/false);
+                                {
+                                    auto rewriter = affineIfOp.getThenBodyBuilder();
+                                    ScopedContext scope(rewriter, loc);
+                                    // We must subtract pad below before img load, since the
+                                    // physical image is not padded
+                                    SmallVector<IndexHandle, 4> adjustedImgIndices;
+                                    adjustedImgIndices.push_back(n);
+                                    adjustedImgIndices.push_back(c);
+                                    for (auto i = 0; i < spatialRank; i++)
+                                    {
+                                        adjustedImgIndices.push_back(IndexHandle(
+                                            imgIndices[2 + i] -
+                                            intrinsics::constant_index(padBelowIntValues[i])));
+                                    }
+                                    iRes(resIndices) =
+                                        iRes(resIndices) +
+                                        (iImages(adjustedImgIndices) * iFilters(filtersIndices));
+                                }
+                            }
+                            else
+                            {
+                                iRes(resIndices) = iRes(resIndices) +
+                                                   (iImages(imgIndices) * iFilters(filtersIndices));
+                            }
                        });
                    });
                });

--- a/src/contrib/mlir/compiler/pass/mlir_subgraph_extraction.cpp
+++ b/src/contrib/mlir/compiler/pass/mlir_subgraph_extraction.cpp
@@ -446,12 +446,9 @@ bool MLIRSubgraphExtractionPass::is_supported_mlir_op(std::shared_ptr<Node> node
        auto data_dilation = conv_node->get_data_dilation_strides();
        auto window_dilation = conv_node->get_window_dilation_strides();

-        auto is_zero = [](size_t s) { return s == 0; };
        auto is_one = [](size_t s) { return s == 1; };

-        return std::all_of(pad_below.begin(), pad_below.end(), is_zero) &&
-               std::all_of(pad_above.begin(), pad_above.end(), is_zero) &&
-               std::all_of(data_dilation.begin(), data_dilation.end(), is_one) &&
+        return std::all_of(data_dilation.begin(), data_dilation.end(), is_one) &&
               std::all_of(window_dilation.begin(), window_dilation.end(), is_one);
    }


--- a/test/backend/convolution.in.cpp
+++ b/test/backend/convolution.in.cpp
@@ -99,3 +99,40 @@ NGRAPH_TEST(${BACKEND_NAME}, convolution_simple)
    handle->call_with_validate({result}, {a, b});
    EXPECT_TRUE(test::all_close_f(vector<float>{expected_result}, read_vector<float>(result)));
 }
+
+NGRAPH_TEST(${BACKEND_NAME}, convolution_simple_padding)
+{
+    Shape shape_a{1, 1, 2, 2};
+    auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    Shape shape_b{1, 1, 1, 1};
+    auto B = make_shared<op::Parameter>(element::f32, shape_b);
+    Shape shape_r{1, 1, 5, 5};
+    auto conv1 = make_shared<op::Convolution>(A,
+                                              B,
+                                              Strides{1, 1},
+                                              Strides{1, 1},
+                                              CoordinateDiff{1, 1},
+                                              CoordinateDiff{2, 2},
+                                              Strides{1, 1});
+
+    auto f = make_shared<Function>(conv1, ParameterVector{A, B});
+
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::f32, shape_a);
+    copy_data(a, vector<float>{1.0f, 2.0f, 3.0f, 4.0f});
+    auto b = backend->create_tensor(element::f32, shape_b);
+    copy_data(b, vector<float>{2.0f});
+    auto result = backend->create_tensor(element::f32, shape_r);
+    // clang-format off
+    vector<float> expected_result{0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+                                  0.0f, 2.0f, 4.0f, 0.0f, 0.0f,
+                                  0.0f, 6.0f, 8.0f, 0.0f, 0.0f,
+                                  0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+                                  0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
+    // clang-format on
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a, b});
+    EXPECT_TRUE(test::all_close_f(vector<float>{expected_result}, read_vector<float>(result)));
+}