Commit 43a1bf1c authored by Nagy Mostafa's avatar Nagy Mostafa Committed by Scott Cyphers

[MLIR] Add padding support to Convolution (#3586)

* Enable Padding

* Small fix to match MLIR API change

* WIP

* Subtract pad below from virtual img index

* Fixed integer set inequalities

* style-apply

* remove unneeded padAbove values

* Remove dump call

* PR feedback

* style apply
parent 1b8cda81
......@@ -790,16 +790,6 @@ namespace
auto padBelow = convolOp.padBelow().getValue();
auto padAbove = convolOp.padBelow().getValue();
for (auto value : llvm::zip(padBelow, padAbove))
{
auto padAttr = std::get<0>(value);
NGRAPH_CHECK(padAttr.cast<IntegerAttr>().getInt() == 0,
"No support for padding in convolution op");
padAttr = std::get<1>(value);
NGRAPH_CHECK(padAttr.cast<IntegerAttr>().getInt() == 0,
"No support for padding in convolution op");
}
Type elemTy = images->getType().cast<MemRefType>().getElementType();
// Let Images shape be [N, C_IN, D_1, ... D_f]
......@@ -839,11 +829,13 @@ namespace
// Output[n, k, r_1, .. r_f] +=
// Images[n, c, i_1 + j_1, .. i_f + j_f] * Filters[k, c, j_1, .. j_f]
// TODO: With padding, we need to check (using IntegerSets) whether each spatial dim in
// Images lie within paddings
// If yes, we init value to zero, else load from MemRef.
// Q: Can this be done using a map from padded tensor to unpadded one ? Will we load zero
// if OOB ?
// With padding, we check (using IntegerSets) whether each spatial dim in Images lie inside
// non-padded spatial region. If true, we perform the computation:
//
// for <j_1 .. j_f> : <0 .. 0> -> <F_1 .. F_f>
// if(indices in non-padded region):
// Output[n, k, r_1, .. r_f] +=
// Images[n, c, i_1 + j_1, .. i_f + j_f] * Filters[k, c, j_1, .. j_f]
// Create view to write into result.
MemRefView vRes(result), vImages(images), vFilters(filters);
......@@ -868,6 +860,9 @@ namespace
auto resSpatialIndices = makeIndexHandles(spatialRank);
auto resSpatialIndicesPtrs = makeIndexHandlePointers(resSpatialIndices);
SmallVector<int64_t, 4> resSteps, filtersSteps;
SmallVector<int, 4> padBelowIntValues;
bool withPadding = false;
for (auto i = 0; i < spatialRank; i++)
{
// result spatial bounds and steps
......@@ -877,6 +872,22 @@ namespace
// image spatial bounds
imgSpatialLbs.push_back(vImages.lb(i + 2));
imgSpatialUbs.push_back(vImages.ub(i + 2));
// Check if we have any padding and collect pad values
IntegerAttr iAttr = padBelow[i].cast<IntegerAttr>();
int padValue = iAttr.getInt();
if (padValue)
{
withPadding = true;
}
padBelowIntValues.push_back(padValue);
iAttr = padAbove[i].cast<IntegerAttr>();
padValue = iAttr.getInt();
if (padValue)
{
withPadding = true;
}
}
NGRAPH_CHECK(vImages.rank() == vFilters.rank(), "Images and Filters have unequal ranks");
......@@ -895,6 +906,42 @@ namespace
filtersSteps.push_back(vFilters.step(i + 2));
}
IntegerSet nonPaddedRange;
if (withPadding)
{
// Create affine expressions and IntegerSet
// IntegerSet (d0, d1, .. d_N-1)[LB_0, LB_1, .. LB_N-1, UB_0, UB_1, .. UB_N-1], where
// for each dim:
// (d_dim - padBelow[dim] - LB_dim >= 0),
// (padBelow[dim] + UB_dim - d_dim - 1 >= 0)
SmallVector<AffineExpr, 4> affineExprs;
// Bool to indicate if expr is equality or inequality
SmallVector<bool, 4> isEq;
for (unsigned dim = 0; dim < spatialRank; dim++)
{
// i_dim
auto dimExpr = rewriter.getAffineDimExpr(dim);
auto imgLbExpr = rewriter.getAffineSymbolExpr(dim);
// expr1 : i_dim - padBelow[dim] - imgLB >= 0
auto padBelowExpr = rewriter.getAffineConstantExpr(padBelowIntValues[dim]);
affineExprs.push_back(dimExpr - padBelowExpr - imgLbExpr);
isEq.push_back(false);
// expr2: padBelow[dim] + imgUB - i_dim - 1 >= 0
auto imgUbExpr = rewriter.getAffineSymbolExpr(spatialRank + dim);
auto oneExpr = rewriter.getAffineConstantExpr(1);
affineExprs.push_back(padBelowExpr + imgUbExpr - dimExpr - oneExpr);
isEq.push_back(false);
}
NGRAPH_CHECK(affineExprs.size() == isEq.size() && isEq.size() == 2 * spatialRank,
"Invalid number of expressions in the IntegerSet");
nonPaddedRange =
rewriter.getIntegerSet(spatialRank, 2 * spatialRank, affineExprs, isEq);
}
// Initialize output to zero
{
IndexHandle n, k, c;
......@@ -949,6 +996,8 @@ namespace
filtersSteps)([&] {
SmallVector<IndexHandle, 4> imgIndices, filtersIndices;
// Image indices
// Here we compute the virtual start index into the padded image.
imgIndices.push_back(n);
imgIndices.push_back(c);
for (auto i = 0; i < spatialRank; i++)
......@@ -963,8 +1012,46 @@ namespace
filtersSpatialIndices.begin(),
filtersSpatialIndices.end());
iRes(resIndices) =
iRes(resIndices) + (iImages(imgIndices) * iFilters(filtersIndices));
if (withPadding)
{
// if args : img dims, img lbs, img ubs
SmallVector<IndexHandle, 4>::iterator it = imgIndices.begin();
std::advance(it, 2);
SmallVector<Value*, 4> affineIfArgs(it, imgIndices.end());
affineIfArgs.insert(
affineIfArgs.end(), imgSpatialLbs.begin(), imgSpatialLbs.end());
affineIfArgs.insert(
affineIfArgs.end(), imgSpatialUbs.begin(), imgSpatialUbs.end());
auto affineIfOp =
rewriter.create<AffineIfOp>(rewriter.getUnknownLoc(),
nonPaddedRange,
affineIfArgs,
/*withElseRegion=*/false);
{
auto rewriter = affineIfOp.getThenBodyBuilder();
ScopedContext scope(rewriter, loc);
// We must subtract pad below before img load, since the
// physical image is not padded
SmallVector<IndexHandle, 4> adjustedImgIndices;
adjustedImgIndices.push_back(n);
adjustedImgIndices.push_back(c);
for (auto i = 0; i < spatialRank; i++)
{
adjustedImgIndices.push_back(IndexHandle(
imgIndices[2 + i] -
intrinsics::constant_index(padBelowIntValues[i])));
}
iRes(resIndices) =
iRes(resIndices) +
(iImages(adjustedImgIndices) * iFilters(filtersIndices));
}
}
else
{
iRes(resIndices) = iRes(resIndices) +
(iImages(imgIndices) * iFilters(filtersIndices));
}
});
});
});
......
......@@ -446,12 +446,9 @@ bool MLIRSubgraphExtractionPass::is_supported_mlir_op(std::shared_ptr<Node> node
auto data_dilation = conv_node->get_data_dilation_strides();
auto window_dilation = conv_node->get_window_dilation_strides();
auto is_zero = [](size_t s) { return s == 0; };
auto is_one = [](size_t s) { return s == 1; };
return std::all_of(pad_below.begin(), pad_below.end(), is_zero) &&
std::all_of(pad_above.begin(), pad_above.end(), is_zero) &&
std::all_of(data_dilation.begin(), data_dilation.end(), is_one) &&
return std::all_of(data_dilation.begin(), data_dilation.end(), is_one) &&
std::all_of(window_dilation.begin(), window_dilation.end(), is_one);
}
......
......@@ -99,3 +99,40 @@ NGRAPH_TEST(${BACKEND_NAME}, convolution_simple)
handle->call_with_validate({result}, {a, b});
EXPECT_TRUE(test::all_close_f(vector<float>{expected_result}, read_vector<float>(result)));
}
NGRAPH_TEST(${BACKEND_NAME}, convolution_simple_padding)
{
Shape shape_a{1, 1, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_b{1, 1, 1, 1};
auto B = make_shared<op::Parameter>(element::f32, shape_b);
Shape shape_r{1, 1, 5, 5};
auto conv1 = make_shared<op::Convolution>(A,
B,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{1, 1},
CoordinateDiff{2, 2},
Strides{1, 1});
auto f = make_shared<Function>(conv1, ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape_a);
copy_data(a, vector<float>{1.0f, 2.0f, 3.0f, 4.0f});
auto b = backend->create_tensor(element::f32, shape_b);
copy_data(b, vector<float>{2.0f});
auto result = backend->create_tensor(element::f32, shape_r);
// clang-format off
vector<float> expected_result{0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
0.0f, 2.0f, 4.0f, 0.0f, 0.0f,
0.0f, 6.0f, 8.0f, 0.0f, 0.0f,
0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
// clang-format on
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
EXPECT_TRUE(test::all_close_f(vector<float>{expected_result}, read_vector<float>(result)));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment