Commit 1b8cda81 authored by Nagy Mostafa's avatar Nagy Mostafa Committed by Scott Cyphers

[MLIR] Enable inplace memory for elt-wise operations (#3609)

* Re-use memref

* add mem opt file

* Add missing files

* Add flag

* Fix style

* Small fix

* PR fixes. WIP

* PR fixes

* Fixed conv validation logic. Invoked after ng mem opt pass. Style-apply

* style-apply
parent 15ceedb7
......@@ -23,6 +23,7 @@ set(SRC
memory_manager.cpp
pass/mlir_subgraph_extraction.cpp
pass/mlir_subgraph_extraction.hpp
pass/memory_optimization.cpp
)
add_library(mlir_backend SHARED ${SRC})
......
......@@ -46,6 +46,7 @@
#include "ngraph/op/subtract.hpp"
#include "ngraph/op/util/index_reduction.hpp"
#include "ngraph/type/element_type.hpp"
#include "pass/memory_optimization.hpp"
#include <llvm/ADT/STLExtras.h>
#include <llvm/Analysis/TargetTransformInfo.h>
......@@ -92,6 +93,11 @@ static llvm::cl::opt<bool> clPrintIRAfterAll(
// *** Optimization flags ***
static llvm::cl::opt<bool> clEnableNgInPlaceMemoryOpt(
"ng-inplace-mem-opt",
llvm::cl::init(false),
llvm::cl::desc("Enable ngraph dialect in-place memory optimization pass"));
static llvm::cl::opt<bool>
clEnableAffineLoopFusion("affine-loop-fusion",
llvm::cl::init(false),
......@@ -184,6 +190,7 @@ void MLIRCompiler::init_mlir()
void MLIRCompiler::compile()
{
buildNgDialectModule();
optimizeNgDialect();
lowerNgDialect();
}
......@@ -666,6 +673,18 @@ mlir::Operation* MLIRCompiler::createIndexReduction(const ngraph::Node* ngNode)
op->setAttr("axes", redAxesAttr);
return op;
}
void MLIRCompiler::optimizeNgDialect()
{
mlir::PassManager pm(&m_context);
mlir::applyPassManagerCLOptions(pm);
if (clEnableNgInPlaceMemoryOpt)
{
pm.addPass(mlir::createMemoryOptimizationPass());
}
pm.run(m_module.get());
}
// Binds MLIR function arguments to the proper values. This includes externally allocated tensors
// helpers to be used inside the function.
void MLIRCompiler::bindArguments(std::vector<void*>& externalTensors)
......
......@@ -89,6 +89,7 @@ namespace ngraph
private:
void buildNgDialectModule();
void lowerNgDialect();
void optimizeNgDialect();
void optimize();
void bindArguments(std::vector<void*>& externalTensors);
void execute();
......
......@@ -240,7 +240,7 @@ mlir::LogicalResult verifyOp(NGConvolutionOp* op)
SmallVector<int64_t, 4> stridesVal, padAboveVal, padBelowVal;
// Identical filters and image element types
if (filtersEt != imagesType)
if (filtersEt != imagesEt)
{
return op->emitOpError("Incompatible image and filters types");
}
......@@ -299,13 +299,36 @@ mlir::LogicalResult verifyOp(NGConvolutionOp* op)
unsigned resDim = llvm::divideCeil(padBelowVal[i] + padAboveVal[i] + imagesShape[2 + i] -
filtersShape[2 + i] + 1,
stridesVal[i]);
if (resultShape[i] != resDim)
if (resultShape[2 + i] != resDim)
{
return op->emitOpError("Invalid result spatial shape");
}
}
return mlir::success();
}
static std::string getBufferIdAttrName()
{
return "ng.buffer_id";
}
void setBufferId(mlir::Operation* op, mlir::IntegerAttr attr)
{
op->setAttr(getBufferIdAttrName(), attr);
}
mlir::IntegerAttr setBufferId(mlir::Operation* op, unsigned val)
{
auto attr = mlir::IntegerAttr::get(IntegerType::get(32, op->getContext()), val);
setBufferId(op, attr);
return attr;
}
mlir::IntegerAttr getBufferId(mlir::Operation* op)
{
return op->getAttrOfType<mlir::IntegerAttr>(getBufferIdAttrName());
}
namespace mlir
{
#define GET_OP_CLASSES
......
......@@ -32,3 +32,7 @@ namespace mlir
#include "ops.h.inc"
#undef GET_OP_CLASSES
}
void setBufferId(mlir::Operation* op, mlir::IntegerAttr attr);
mlir::IntegerAttr setBufferId(mlir::Operation* op, unsigned val);
mlir::IntegerAttr getBufferId(mlir::Operation* op);
......@@ -67,7 +67,7 @@ namespace
};
// Conversion classes declarations
#define MLIR_OP(OP) \
#define MLIR_OP(OP, INPLACE) \
class OP##Conversion : public NGraphOpLowering \
{ \
public: \
......@@ -198,6 +198,12 @@ namespace
NGraphTypeConverter typeConverter;
// List of temporary memrefs to deallocate at end of function
SmallVector<Value*, 4> memRefsToDealloc;
// Ops maybe assigned mem-refs in previous memory optimization passes.
// Track pre-assigned buffers for each Value and re-use it if one is available.
using IdToMemRefMap = std::unordered_map<unsigned, Value*>;
IdToMemRefMap m_id_to_memref;
ngmlir::MLIRCompiler& compiler;
};
......@@ -236,8 +242,8 @@ namespace
void DialectLoweringPass::populateNGraphToAffineConversionPatterns(
OwningRewritePatternList& patterns)
{
#define MLIR_OP(OP) OP##Conversion,
#define MLIR_LAST_OP(OP) OP##Conversion
#define MLIR_OP(OP, INPLACE) OP##Conversion,
#define MLIR_LAST_OP(OP, INPLACE) OP##Conversion
patterns.insert<
#include "op_lowerers.inc"
>(&getContext(), *this);
......@@ -288,7 +294,30 @@ namespace
else
{
auto tensorType = origResult->getType().cast<NGTensorType>();
auto newResult = createTempTensor(typeConverter.convertType(tensorType), rewriter);
Value* newResult;
Attribute bufferIdAttr = getBufferId(op);
if (!bufferIdAttr)
{
// Allocate new memref
newResult = createTempTensor(typeConverter.convertType(tensorType), rewriter);
}
else
{
unsigned bufferId = bufferIdAttr.cast<IntegerAttr>().getInt();
// Re-use a memref if it exist, else create a new one and update map
IdToMemRefMap::iterator it = m_id_to_memref.find(bufferId);
if (it == m_id_to_memref.end())
{
// create a new memref
newResult =
createTempTensor(typeConverter.convertType(tensorType), rewriter);
m_id_to_memref[bufferId] = newResult;
}
else
{
newResult = it->second;
}
}
newResults.push_back(newResult);
}
}
......
......@@ -20,26 +20,27 @@
#endif
#ifndef MLIR_LAST_OP
#define MLIR_LAST_OP(OP) MLIR_OP(OP)
#define MLIR_LAST_OP(OP, INPLACE) MLIR_OP(OP, INPLACE)
#endif
MLIR_OP(NGAddOp)
MLIR_OP(NGArgMaxRedOp)
MLIR_OP(NGArgMinRedOp)
MLIR_OP(NGConcatOp)
MLIR_OP(NGConvolutionOp)
MLIR_OP(NGDivOp)
MLIR_OP(NGDotOp)
MLIR_OP(NGGatherOp)
MLIR_OP(NGGreaterOp)
MLIR_OP(NGLessOp)
MLIR_OP(NGMulOp)
MLIR_OP(NGMaxOp)
MLIR_OP(NGMinOp)
MLIR_OP(NGNegOp)
MLIR_OP(NGReluOp)
MLIR_OP(NGSubOp)
MLIR_LAST_OP(NGReturnOp)
/* op name */ /* in-place safe ? */
MLIR_OP(NGAddOp , true )
MLIR_OP(NGArgMaxRedOp , false )
MLIR_OP(NGArgMinRedOp , false )
MLIR_OP(NGConcatOp , false )
MLIR_OP(NGConvolutionOp , false )
MLIR_OP(NGDivOp , true )
MLIR_OP(NGDotOp , false )
MLIR_OP(NGGatherOp , false )
MLIR_OP(NGGreaterOp , true )
MLIR_OP(NGLessOp , true )
MLIR_OP(NGMulOp , true )
MLIR_OP(NGMaxOp , true )
MLIR_OP(NGMinOp , true )
MLIR_OP(NGNegOp , true )
MLIR_OP(NGReluOp , true )
MLIR_OP(NGSubOp , true )
MLIR_LAST_OP(NGReturnOp , false )
#undef MLIR_OP
#undef MLIR_LAST_OP
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
// NOTE: This file follows nGraph format style and MLIR naming convention since it does
// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
#include "contrib/mlir/compiler/compiler.hpp"
#include "contrib/mlir/compiler/dialect/ops.hpp"
#include "contrib/mlir/compiler/dialect/type.hpp"
#include "ngraph/assertion.hpp"
#include <llvm/ADT/DenseSet.h>
#include <map>
#include <mlir/EDSC/Builders.h>
#include <mlir/EDSC/Helpers.h>
#include <mlir/EDSC/Intrinsics.h>
#include <mlir/IR/AffineExpr.h>
#include <mlir/IR/IntegerSet.h>
#include <mlir/IR/MLIRContext.h>
#include <mlir/IR/StandardTypes.h>
#include <mlir/Pass/Pass.h>
#include <mlir/Transforms/DialectConversion.h>
// anonymous namespace
// no need to expose any of the following outside of this file
namespace
{
using namespace ngraph::runtime;
using namespace ngraph::runtime::ngmlir;
using namespace mlir;
/// Memory Optimization pass
/// - Tries to perform operations in place where applicable by assigning a virtual buffer ID
/// to values. Those are used later in affine lowering pass to create or re-use memrefs
class MemoryOptimizationPass : public mlir::FunctionPass<MemoryOptimizationPass>
{
public:
MemoryOptimizationPass()
{
m_inplaceOps = {
#define MLIR_OP(OP, INPLACE) {OP::getOperationName().str(), INPLACE},
#include "contrib/mlir/compiler/op_lowerers.inc"
};
}
void runOnFunction() override;
private:
bool isSafeInPlace(mlir::Operation* op);
std::unordered_map<std::string, bool> m_inplaceOps;
static unsigned bufferId;
};
unsigned MemoryOptimizationPass::bufferId = 0;
void MemoryOptimizationPass::runOnFunction()
{
auto f = getFunction();
f.walk([&](mlir::Operation* op) {
if (!isSafeInPlace(op))
{
return;
}
if (op->getNumResults() > 1)
{
return;
}
auto defVal = op->getResult(0);
// If the defined value is an output of the sub-graph, cannot do it in place
for (auto use = defVal->use_begin(); use != defVal->use_end(); use++)
{
auto useOp = use->getOwner();
if (isa<NGReturnOp>(useOp))
{
return;
}
}
// Check if we can re-use the buffer of any of the inputs. Conjunction of the following:
// - single use value or all uses in the current op
// - not an input argument
// TODO: Check instead if last post-dominating (dataflow-wise) use.
for (auto opnd = op->operand_begin(); opnd != op->operand_end(); opnd++)
{
auto val = *opnd;
// we optimize if the val has one use or if all uses are in the current op
bool optimize;
optimize = val->hasOneUse();
if (!optimize)
{
optimize = true;
// check if all uses are in the current op
for (auto use = val->use_begin(); use != val->use_end(); use++)
{
if (use->getOwner() != op)
{
optimize = false;
}
}
}
if (optimize)
{
// do we have a buffer id attached to this value
auto defOp = val->getDefiningOp();
// If no defining op, then this is a block arg, skip operand
if (!defOp)
{
continue;
}
IntegerAttr attr = getBufferId(defOp);
if (!attr)
{
// attach a new buffer id
attr = setBufferId(defOp, this->bufferId++);
}
// propagate attribute to dst, and we are done
setBufferId(op, attr);
return;
}
}
});
}
bool MemoryOptimizationPass::isSafeInPlace(mlir::Operation* op)
{
auto it = m_inplaceOps.find(op->getName().getStringRef().str());
return it != m_inplaceOps.end() ? it->second : false;
}
}
namespace mlir
{
std::unique_ptr<Pass> createMemoryOptimizationPass()
{
return std::make_unique<MemoryOptimizationPass>();
}
} // namespace mlir
static PassRegistration<MemoryOptimizationPass> pass("ng-inplace-mem-opt",
"Performs in-place memory optimizations");
\ No newline at end of file
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
// NOTE: This file follows nGraph format style and MLIR naming convention since it does
// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
#pragma once
#include <mlir/Pass/Pass.h>
namespace mlir
{
std::unique_ptr<Pass> createMemoryOptimizationPass();
}
......@@ -89,3 +89,31 @@ NGRAPH_TEST(${BACKEND_NAME}, add_overload)
EXPECT_TRUE(test::all_close_f(read_vector<float>(result),
(test::NDArray<float, 2>({{6, 8}, {10, 12}})).get_vector()));
}
NGRAPH_TEST(${BACKEND_NAME}, add_in_place)
{
Shape shape{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto T = A + B;
auto T2 = T + T;
auto T3 = T2 + T2;
auto T4 = T3 + T3;
auto f = make_shared<Function>(T4, ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::f32, shape);
copy_data(a, test::NDArray<float, 2>({{1, 2}, {3, 4}}).get_vector());
copy_data(b, test::NDArray<float, 2>({{5, 6}, {7, 8}}).get_vector());
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
EXPECT_TRUE(test::all_close_f(read_vector<float>(result),
(test::NDArray<float, 2>({{48, 64}, {80, 96}})).get_vector()));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment