Commit 6de4893b authored by Nagy Mostafa's avatar Nagy Mostafa Committed by Sang Ik Lee

[MLIR] In-place memory optimization for elt-wise and concat ops. (#3832)

* AliasMap WIP

* Added liveness info

* WIP

* WIP: Tests

* WIP: LIT tests

* Added knobs for mem optimization pass. More LIT tests

* Revert affine_lowerer change

* More elaborate comment

* Minor fixes

* style-apply

* Rename liveness

* Switch to Analysis framework

* Fix optimization conditions

* Remove LIT test

* style

* Switch to equivalence relationship impl of non-alias relationship

* refined comment

* Switch non-alias to equivalence relationship

* Fix bad merge

* Adding tests. WIP

* Added buffer size tracking and unit-tests

* Added LIT and unit-tests

* Turn optimization ON

* style

* fix unit-tests

* Fix useCount

* Fix copyright and typo

* Refine few comments, remove new lines

* style fix
Co-authored-by: 's avatarScott Cyphers <diyessi@users.noreply.github.com>
Co-authored-by: 's avatarSang Ik Lee <sang.ik.lee@intel.com>
parent b3db038e
...@@ -25,7 +25,7 @@ add_subdirectory(tools/ngraph-opt) ...@@ -25,7 +25,7 @@ add_subdirectory(tools/ngraph-opt)
set(SRC set(SRC
backend/cpu/cpu_backend.cpp backend/cpu/cpu_backend.cpp
backend/pass/affine_lowerer.cpp backend/pass/affine_lowerer.cpp
backend/pass/memory_optimization.cpp backend/analysis/memory_analysis.cpp
core/compiler.cpp core/compiler.cpp
core/ngraph_dialect/dialect.cpp core/ngraph_dialect/dialect.cpp
core/ngraph_dialect/type.cpp core/ngraph_dialect/type.cpp
......
//*****************************************************************************
// Copyright 2017-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
// NOTE: This file follows nGraph format style and MLIR naming convention since it does
// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
#include "memory_analysis.hpp"
#include "contrib/mlir/core/compiler.hpp"
#include "contrib/mlir/core/ngraph_dialect/ops.hpp"
#include "contrib/mlir/core/ngraph_dialect/type.hpp"
#include <llvm/ADT/BitVector.h>
#include <llvm/ADT/DenseSet.h>
#include <map>
#include <mlir/EDSC/Builders.h>
#include <mlir/EDSC/Helpers.h>
#include <mlir/EDSC/Intrinsics.h>
#include <mlir/IR/AffineExpr.h>
#include <mlir/IR/IntegerSet.h>
#include <mlir/IR/MLIRContext.h>
#include <mlir/IR/StandardTypes.h>
#include <mlir/Pass/Pass.h>
#include <mlir/Transforms/DialectConversion.h>
static llvm::cl::opt<bool> clEnableNgInPlaceMemory(
"ngraph-memory-opt",
llvm::cl::init(true),
llvm::cl::desc("Enable ngraph dialect in-place memory optimization pass"));
static llvm::cl::opt<bool>
clEnableNgInPlaceConcat("ngraph-memory-opt-concat",
llvm::cl::init(true),
llvm::cl::desc("Enable inplace concat optimization"));
static llvm::cl::opt<bool>
clEnableNgInPlaceEltWise("ngraph-memory-opt-eltwise",
llvm::cl::init(true),
llvm::cl::desc("Enable inplace element wise optimization"));
// anonymous namespace
// no need to expose any of the following outside of this file
namespace
{
using namespace ngraph::runtime;
using namespace ngraph::runtime::ngmlir;
using namespace mlir;
// A helper data-structure to track cannot alias relationship between
// tensor syms. If NoAlias[T] contains S, then T and S cannot alias.
// The relationship is an equivalence (transitive, symmetric, reflexive)
// Initially each sym is put in its own equivalence class (set).
// If two syms a and b are found to be non-alias (equivalent),
// their equivalence classes are unioned
class AliasRelation
{
public:
/// Initialize the relationship for a number of syms
void init(std::unordered_set<Value*>& symbols);
/// Checks if values a and b can alias
bool canAlias(Value* a, Value* b);
void insertNoAlias(Value* a, Value* b);
private:
using BV = llvm::BitVector;
std::unordered_map<Value*, unsigned> m_valueToIdx;
std::unordered_map<unsigned, Value*> m_idxToValue;
std::unordered_map<Value*, BV*> m_valueToSet;
SmallVector<BV, 10> m_sets;
};
// Simple single basic block liveness analysis
// TODO: Replace with MLIR's liveness analysis
class LivenessAnalysis
{
public:
bool isLive(Value* v);
void setLive(Value* v);
void kill(Value* v);
void getLiveValues(llvm::SmallVectorImpl<Value*>& values);
void reset();
private:
unsigned m_maxIdx = 0;
SmallVector<bool, 10> m_liveness;
std::unordered_map<Value*, unsigned> m_valueToIdx;
};
// Memory Assignment analysis
// Tries to find operations that can be done in place where applicable
// by assigning a virtual buffer ID to values.
// The buffer assignment is used later in affine lowering pass to create
// or re-use memrefs
class MemoryAssignment
{
public:
MemoryAssignment(MemoryAnalysis* memAnalysis)
: m_memAnalysis(memAnalysis)
{
m_inplaceOps = {
#define MLIR_OP(OP, INPLACE) {OP::getOperationName().str(), INPLACE},
#include "contrib/mlir/backend/pass/op_lowerers.inc"
};
m_bufferId = 0;
}
void run(ModuleOp* module);
private:
void processDestructiveInPlace(mlir::Operation* op);
void processConcat(mlir::Operation* op);
bool isSafeInPlace(mlir::Operation* op);
bool isInputOrOutputValue(mlir::Value* value);
LivenessAnalysis m_liveness;
AliasRelation m_aliasRelation;
std::unordered_map<std::string, bool> m_inplaceOps;
int m_bufferId;
MemoryAnalysis* m_memAnalysis;
};
// helpers
// Determines the buffer size a value needs based on its type
// offset is where that value should start in the buffer
static unsigned getBufferSizeForOperand(mlir::Value* value, int offset);
// Go backwards over instructions
//
// Re-use buffers if none of the dst/srcs are input/output of the sub-graph
//
// For destructive in-place ops (elt-wise):
// - Find first src where it is last use (src is dead).
// If all srcs are last-use, then pick one with lower number of uses.
// If no src is found, bail out.
// - If dst has pre-assigned buffer/offset, then copy them to src.
// If not, assign new buffer to both dst and src.
// - Mark all live syms at this point to not alias src
//
// For non-Destructive in-place ops:
// Concat:
// - Reuse buffer if
// - Concat axis is most-significant non-one axis, and
// - all operands can alias dest.
// - If dst has an assignment, copy it over to srcs as long as
// there is no conflicting src pre-assignment
// - If dst has no assignment, and all srcs have no assignment,
// assign new buffer to dst and srcs
//
// Slice: TBD
// Reshape: TBD
//
// Update liveness info
void MemoryAssignment::run(ModuleOp* module)
{
if (!clEnableNgInPlaceMemory)
{
// Optimization disabled
return;
}
SmallVector<FuncOp, 2> funcOps(module->getOps<FuncOp>());
if (funcOps.size() > 1 || funcOps.empty())
{
// single func for now
return;
}
auto f = funcOps.back();
auto& blocks = f.getBlocks();
if (blocks.size() != 1)
{
// single block func for now
return;
}
auto& block = *(blocks.begin());
// count number of syms in the code and initialize alias relationship
std::unordered_set<Value*> syms;
for (auto it = block.begin(); it != block.end(); it++)
{
Operation* op = &(*it);
for (auto it : op->getResults())
{
Value* v = it;
if (syms.find(v) == syms.end())
{
syms.insert(v);
}
}
for (auto it : op->getOperands())
{
Value* v = it;
if (syms.find(v) == syms.end())
{
syms.insert(v);
}
}
}
m_aliasRelation.init(syms);
// scan instructions backwards
for (auto it = block.rbegin(); it != block.rend(); it++)
{
Operation* op = &(*it);
if (isSafeInPlace(op))
{
// TODO: replace with Op Interface check
if (dyn_cast<NGConcatOp>(op))
{
if (clEnableNgInPlaceConcat)
processConcat(op);
}
else
{
if (clEnableNgInPlaceEltWise)
processDestructiveInPlace(op);
}
}
// update liveness info
for (auto dit : op->getResults())
{
m_liveness.kill(dit);
}
for (auto uit : op->getOperands())
{
m_liveness.setLive(uit);
}
}
}
void MemoryAssignment::processConcat(mlir::Operation* op)
{
auto concat = cast<mlir::NGConcatOp>(op);
{
// concat on the highest non-one axis
auto concatAxis = concat.concatenation_axis();
auto result = concat.getResult();
auto shape = (result->getType().cast<NGTensorType>()).getShape();
std::vector<int> opndOffsets;
BufferInfo bufferInfo;
int bufferId = -1, baseOffset = 0;
unsigned bufferSize = 0;
if (isInputOrOutputValue(op->getResult(0)))
{
// dst is output, bail out
return;
};
for (auto i = 0; i < shape.size(); i++)
{
if (i == concatAxis)
{
break;
}
if (shape[i] != 1)
{
return;
}
}
// check that all operands and dst can alias
// and that none is input or output
for (auto opnd : op->getOperands())
{
if (!m_aliasRelation.canAlias(result, opnd) || isInputOrOutputValue(opnd))
{
return;
}
}
// calculate relative offsets in the output buffer
int opndOffset = 0;
for (auto i = 0; i < op->getNumOperands(); i++)
{
if (i == 0)
{
opndOffsets.push_back(0);
}
else
{
auto opnd = op->getOperand(i - 1);
auto tensorType = opnd->getType().cast<NGTensorType>();
opndOffset += tensorType.getNumElements();
opndOffsets.push_back(opndOffset);
}
}
// check for consistent pre-existing buffer assignments
bufferInfo = m_memAnalysis->getBufferInfo(op);
// if dest has an assignment
if (bufferInfo.isValid())
{
// set buffer ID and base offset to that of dest's
bufferId = bufferInfo.m_bufferId;
baseOffset = bufferInfo.m_offset;
// check if we can re-use it for all src operands
int bufferOffset = 0;
for (auto i = 0; i < op->getNumOperands(); i++)
{
auto opnd = op->getOperand(i);
auto defOp = opnd->getDefiningOp();
NGRAPH_CHECK(defOp != nullptr, "Defining operation expected");
// calculate expected absolute offset in the buffer
bufferOffset = baseOffset + opndOffsets[i];
bufferInfo = m_memAnalysis->getBufferInfo(defOp);
if (bufferInfo.isValid())
{
if (bufferInfo.m_bufferId != bufferId ||
bufferInfo.m_offset != bufferOffset)
{
// buffer ID or offset mismatch, bailout
return;
}
}
}
}
else
{
// dst has no buffer assignment
// TODO:
// We can re-use an existing assignment of a src operand if
// Every other src either:
// a. has a matching pre-assigned buffer ID and offset, or
// b. is unassigned a buffer/offset, and the computed offset is valid
// (non-negative), and no other live tensor aliases the chunk
// of the buffer we want to assign.
// To achieve this, we need to track buffer->{tensor,offset,size} and
// perform the check
//
// Example:
// V1 = Concat S0 (?), S1{0,16}, S2 (?)
// R0 = ...
// R2 = ...
// V2 = Concat R0{0, 0}, S1 {0,16}, R2{0,32}
//
// For the first concat, we could use the assignment of S1 (from second concat)
// to define assignments for S0 and S2, and since R0, R2 are dead, no live tensors
// alias into the buffer, and the assignment is valid.
//
// On the other hand, the following is invalid
// Example:
// R0 = ...
// V1 = Concat S0(?), S1(0,16), S2(?)
// R2 = ...
// V2 = Concat R0, S1{0,16}, R2
// Reusing assignment of S1 in the first concat will cause S0 and R0 to alias.
// And since R0 is alive the write to R0 will overwrite S0.
// For now, assign only if all srcs have no prior assignments
for (auto opnd : op->getOperands())
{
if (m_memAnalysis->getBufferInfo(opnd->getDefiningOp()).isValid())
{
return;
}
}
}
// We didn't find any pre-existing buffer assignment, create a new buffer
if (bufferId == -1)
{
bufferId = m_bufferId++;
baseOffset = 0;
}
// adjust the buffer size based on this instruction
// max size is determined from dst offset and type
bufferSize = getBufferSizeForOperand(op->getResult(0), baseOffset);
m_memAnalysis->setBufferSize(bufferId, bufferSize);
// Update analysis map. No need to check if we are over-writing previous entries
// since they should all match.
m_memAnalysis->setBufferInfo(op, {bufferId, baseOffset});
for (auto i = 0; i < op->getNumOperands(); i++)
{
auto opnd = op->getOperand(i);
auto defOp = opnd->getDefiningOp();
NGRAPH_CHECK(defOp != nullptr, "Defining operation expected");
auto opndOffset = baseOffset + opndOffsets[i];
m_memAnalysis->setBufferInfo(defOp, {bufferId, opndOffset});
}
}
}
void MemoryAssignment::processDestructiveInPlace(mlir::Operation* op)
{
NGRAPH_CHECK(op->getNumResults() == 1, "Destructive in-place with multi-def ?");
Value* use = nullptr;
int useCount = -1;
if (isInputOrOutputValue(op->getResult(0)))
{
// dst is output, bail out
return;
};
// pick a dead operand that is not an input or output with the least number of uses
for (auto opnd : op->getOperands())
{
if (!m_liveness.isLive(opnd) && !isInputOrOutputValue(opnd))
{
int uses = 0;
for (auto& i : opnd->getUses())
{
uses++;
}
if (useCount == -1 || uses < useCount)
{
use = opnd;
useCount = uses;
}
}
}
if (!use)
{
return;
}
// assign new buffer or copy buffer info from dst
auto bufferInfo = m_memAnalysis->getBufferInfo(op);
if (!bufferInfo.isValid())
{
// attach a new buffer id, and 0 offset on obth src and result
bufferInfo = {m_bufferId++, 0};
m_memAnalysis->setBufferInfo(op, bufferInfo);
m_memAnalysis->setBufferInfo(use->getDefiningOp(), bufferInfo);
}
else
{
// copy result buffer id and offset to src
m_memAnalysis->setBufferInfo(use->getDefiningOp(), bufferInfo);
}
auto bufferSize = 0;
bufferSize = getBufferSizeForOperand(op->getResult(0), bufferInfo.m_offset);
m_memAnalysis->setBufferSize(bufferInfo.m_bufferId, bufferSize);
// update aliasing info
// use value cannot alias any live value
SmallVector<Value*, 10> liveValues;
m_liveness.getLiveValues(liveValues);
for (auto& value : liveValues)
{
m_aliasRelation.insertNoAlias(use, value);
}
}
bool MemoryAssignment::isInputOrOutputValue(mlir::Value* value)
{
auto defOp = value->getDefiningOp();
// If no defining op, then this is a block arg, skip operand
//
// TODO: This check is assuming single BB function, improve to handle control-flow.
// In which case, we have to track block args to all pred branches that feed them,
// all the way up to the initial def, if any, or entry block arg. This is preferably
// done as a pre-pass to capture all inputs/output values.
if (!defOp)
{
return true;
}
// If the defined value is an output of the sub-graph, cannot do it in place
//
// TODO: Improve to support control flow. Track value use-chain along branches/block-args,
// if we hit a use in a return, it is an output value.
for (auto& use : value->getUses())
{
auto useOp = use.getOwner();
if (isa<NGReturnOp>(useOp))
{
return true;
}
}
return false;
}
// TODO Change this to use interfaces.
bool MemoryAssignment::isSafeInPlace(mlir::Operation* op)
{
auto it = m_inplaceOps.find(op->getName().getStringRef().str());
return it != m_inplaceOps.end() ? it->second : false;
}
void AliasRelation::init(std::unordered_set<Value*>& symbols)
{
unsigned numSyms = symbols.size();
m_sets.resize(numSyms);
for (auto& bv : m_sets)
{
bv.resize(numSyms);
}
// populate id->value and value->id maps
unsigned i = 0;
for (auto v : symbols)
{
m_idxToValue[i] = v;
m_valueToIdx[v] = i;
m_valueToSet[v] = &m_sets[i];
// set bit for that value
m_sets[i].set(i);
i++;
}
}
bool AliasRelation::canAlias(Value* a, Value* b)
{
// check if a and b are in the same set
return m_valueToSet[a] != m_valueToSet[b];
}
void AliasRelation::insertNoAlias(Value* a, Value* b)
{
// union the two sets that a and b belong to
// update the maps accordingly
if (!canAlias(a, b))
{
// nothing to do
return;
}
// union the two sets of a and b
BV* aSet = m_valueToSet[a];
BV* bSet = m_valueToSet[b];
BV uSet = (*aSet);
uSet |= (*bSet);
// replace aSet with union
auto pSet = m_valueToSet[a];
*pSet = uSet;
// update value to set maps
for (auto it = pSet->set_bits_begin(); it != pSet->set_bits_end(); it++)
{
unsigned id = *it;
auto value = m_idxToValue[id];
m_valueToSet[value] = pSet;
}
}
void LivenessAnalysis::reset()
{
m_valueToIdx.clear();
m_liveness.clear();
m_maxIdx = 0;
}
void LivenessAnalysis::getLiveValues(llvm::SmallVectorImpl<Value*>& values)
{
for (auto& entry : m_valueToIdx)
{
if (m_liveness[entry.second])
{
values.push_back(entry.first);
}
}
}
bool LivenessAnalysis::isLive(Value* v)
{
auto it = m_valueToIdx.find(v);
if (it == m_valueToIdx.end())
{
return false;
}
return m_liveness[it->second];
}
void LivenessAnalysis::setLive(Value* v)
{
auto it = m_valueToIdx.find(v);
if (it == m_valueToIdx.end())
{
m_valueToIdx[v] = m_maxIdx++;
m_liveness.push_back(true);
NGRAPH_CHECK(m_liveness.size() == m_maxIdx);
}
else
{
m_liveness[it->second] = true;
}
}
void LivenessAnalysis::kill(Value* v)
{
auto it = m_valueToIdx.find(v);
if (it == m_valueToIdx.end())
{
// already dead
return;
}
m_liveness[it->second] = false;
}
// helpers
unsigned getBufferSizeForOperand(mlir::Value* value, int offset)
{
auto tensorType = value->getType().dyn_cast<NGTensorType>();
NGRAPH_CHECK(tensorType, "Invalid type to find buffer size for");
unsigned bufferSize = offset * std::ceil(tensorType.getElementBitWidth() / 8);
bufferSize += tensorType.getSizeInBytes();
return bufferSize;
}
}
namespace mlir
{
MemoryAnalysis::MemoryAnalysis(Operation* op)
{
MemoryAssignment memoryAssignment(this);
auto moduleOp = dyn_cast<ModuleOp>(op);
NGRAPH_CHECK(moduleOp != nullptr, "Expecting FuncOp for anaylsis");
memoryAssignment.run(&moduleOp);
}
} // namespace mlir
...@@ -20,8 +20,60 @@ ...@@ -20,8 +20,60 @@
#pragma once #pragma once
#include <mlir/Pass/Pass.h> #include <mlir/Pass/Pass.h>
#include <unordered_map>
#include "ngraph/check.hpp"
namespace mlir namespace mlir
{ {
std::unique_ptr<Pass> createMemoryOptimizationPass(); // BufferInfo
struct BufferInfo
{
// Buffer Id. If -1 then invalid buffer.
int m_bufferId;
// Offset into the buffer
int m_offset;
bool isValid() const { return m_bufferId != -1; }
};
struct MemoryAnalysis
{
using BufferInfoMap = std::unordered_map<Operation*, BufferInfo>;
using BufferSizeMap = std::unordered_map<unsigned, unsigned>;
// Compute this analysis with the provided operation.
MemoryAnalysis(Operation* op);
BufferInfo getBufferInfo(Operation* op)
{
auto it = m_bufferInfo.find(op);
if (it == m_bufferInfo.end())
{
return {-1, -1};
}
return it->second;
}
void setBufferInfo(Operation* op, BufferInfo bufferInfo) { m_bufferInfo[op] = bufferInfo; }
void setBufferSize(unsigned bufferId, unsigned size)
{
auto it = m_bufferSize.find(bufferId);
if (it != m_bufferSize.end())
{
it->second = (size > it->second) ? size : it->second;
}
else
{
m_bufferSize[bufferId] = size;
}
}
unsigned getBufferSize(unsigned bufferId)
{
auto it = m_bufferSize.find(bufferId);
NGRAPH_CHECK(it != m_bufferSize.end(), "Buffer has no size!");
return it->second;
}
private:
// Records assignment of BufferInfo to each inplace op
BufferInfoMap m_bufferInfo;
// Records buffer size required for each buffer id in bytes
BufferSizeMap m_bufferSize;
};
} }
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
#include "cpu_backend.hpp" #include "cpu_backend.hpp"
#include "contrib/mlir/backend/pass/affine_lowerer.hpp" #include "contrib/mlir/backend/pass/affine_lowerer.hpp"
#include "contrib/mlir/backend/pass/memory_optimization.hpp"
#include "contrib/mlir/utils.hpp" #include "contrib/mlir/utils.hpp"
#include "ngraph/check.hpp" #include "ngraph/check.hpp"
...@@ -160,7 +159,6 @@ void MLIRCPUBackend::init() ...@@ -160,7 +159,6 @@ void MLIRCPUBackend::init()
void MLIRCPUBackend::codegen() void MLIRCPUBackend::codegen()
{ {
optimizeNgDialect();
lowerNgDialect(); lowerNgDialect();
} }
...@@ -261,18 +259,3 @@ void MLIRCPUBackend::optimizeAffineDialect() ...@@ -261,18 +259,3 @@ void MLIRCPUBackend::optimizeAffineDialect()
// Run Std dialect optimizations. // Run Std dialect optimizations.
// TODO // TODO
} }
void MLIRCPUBackend::optimizeNgDialect()
{
mlir::PassManager pm(&m_context);
mlir::applyPassManagerCLOptions(pm);
if (clEnableNgInPlaceMemoryOpt)
{
pm.addPass(mlir::createMemoryOptimizationPass());
}
if (failed(pm.run(m_module.get())))
{
NGRAPH_CHECK(false, "MLIR pass manager failed");
}
}
...@@ -19,11 +19,13 @@ ...@@ -19,11 +19,13 @@
#include "affine_lowerer.hpp" #include "affine_lowerer.hpp"
#include "contrib/mlir/backend/analysis/memory_analysis.hpp"
#include "contrib/mlir/core/ngraph_dialect/ops.hpp" #include "contrib/mlir/core/ngraph_dialect/ops.hpp"
#include "contrib/mlir/core/ngraph_dialect/type.hpp" #include "contrib/mlir/core/ngraph_dialect/type.hpp"
#include "ngraph/assertion.hpp" #include "ngraph/assertion.hpp"
#include <llvm/ADT/DenseSet.h> #include <llvm/ADT/DenseSet.h>
#include <llvm/Support/Debug.h>
#include <mlir/EDSC/Builders.h> #include <mlir/EDSC/Builders.h>
#include <mlir/EDSC/Helpers.h> #include <mlir/EDSC/Helpers.h>
#include <mlir/EDSC/Intrinsics.h> #include <mlir/EDSC/Intrinsics.h>
...@@ -165,6 +167,8 @@ namespace ...@@ -165,6 +167,8 @@ namespace
ValueHandle createZeroConstant(mlir::Type type); ValueHandle createZeroConstant(mlir::Type type);
ValueHandle createOneConstant(mlir::Type type); ValueHandle createOneConstant(mlir::Type type);
bool isInPlaceConcat(mlir::Operation* op, DialectLoweringPass& pass);
/// Conversion from types in the nGraph dialect to the Standard dialect. /// Conversion from types in the nGraph dialect to the Standard dialect.
class NGraphTypeConverter : public TypeConverter class NGraphTypeConverter : public TypeConverter
{ {
...@@ -184,29 +188,25 @@ namespace ...@@ -184,29 +188,25 @@ namespace
void runOnModule() override; void runOnModule() override;
SmallVector<Value*, 4> buildOutputDefs(Operation* op, PatternRewriter& rewriter); SmallVector<Value*, 4> buildOutputDefs(Operation* op, PatternRewriter& rewriter);
/// Allocates a linear buffer for a temporary tensor /// Allocates a linear buffer for a temporary memref that shares its
Value* createTempBuffer(Type type, PatternRewriter& rewriter); /// underlying memory. Used in conjunction with createTempMemref
Value* createTempBuffer(int bufferId, PatternRewriter& rewriter);
/// Creates an allocation or view of a memref. /// Creates an allocation or view of a memref.
/// type MemRef Type /// type MemRef Type
/// buffer Optional buffer value to create view over /// buffer Optional buffer value to create view over
/// offset Optional offset into the buffer this view starts at /// offset Optional offset into the buffer this view starts at
/// ///
/// If buffer is null, a new allocation of a memref is created. /// If buffer is null it allocates a Memref directly and Offset is ignored.
/// Offset is ignored. If buffer is non-null, then we create a temp /// If not, it creates a view over the pre-allocated buffer at the given offset.
/// view over a pre-allocated buffer (see createTempBuffer)
Value* Value*
createTempMemref(Type type, Value* buffer, unsigned offset, PatternRewriter& rewriter); createTempMemref(Type type, Value* buffer, unsigned offset, PatternRewriter& rewriter);
/// Inserts dealloc Ops for each temporary allocated by AllocOp /// Inserts dealloc Ops for each temporary allocated by AllocOp
void insertDeallocs(PatternRewriter& rewriter); void insertDeallocs(PatternRewriter& rewriter);
NGraphTypeConverter& getTypeConverter() { return typeConverter; } NGraphTypeConverter& getTypeConverter() { return typeConverter; }
MemoryAnalysis* getMemAnalysis() const { return m_memAnalysis; }
private: private:
/// Collect a set of patterns to convert from the nGraph dialect to Affine dialect. /// Collect a set of patterns to convert from the nGraph dialect to Affine dialect.
void populateNGraphToAffineConversionPatterns(OwningRewritePatternList& patterns); void populateNGraphToAffineConversionPatterns(OwningRewritePatternList& patterns);
void findOutputValues(); void findOutputValues();
void insertNoAliasArgAttrs(); void insertNoAliasArgAttrs();
...@@ -219,7 +219,7 @@ namespace ...@@ -219,7 +219,7 @@ namespace
// Track pre-assigned buffers for each Value and re-use it if one is available. // Track pre-assigned buffers for each Value and re-use it if one is available.
using IdToMemRefMap = std::unordered_map<unsigned, Value*>; using IdToMemRefMap = std::unordered_map<unsigned, Value*>;
IdToMemRefMap m_id_to_memref; IdToMemRefMap m_id_to_memref;
MemoryAnalysis* m_memAnalysis;
// TODO: Workaround for findOutputValues and buildOutputDefs. See NGCPU-470. // TODO: Workaround for findOutputValues and buildOutputDefs. See NGCPU-470.
std::string funcName; std::string funcName;
}; };
...@@ -232,6 +232,9 @@ namespace ...@@ -232,6 +232,9 @@ namespace
populateNGraphToAffineConversionPatterns(patterns); populateNGraphToAffineConversionPatterns(patterns);
// Get Memory analysis for in-place memory optimizations
m_memAnalysis = &getAnalysis<MemoryAnalysis>();
// Create target that defines legal ops for nGraph dialect to be lowered to. // Create target that defines legal ops for nGraph dialect to be lowered to.
ConversionTarget target(getContext()); ConversionTarget target(getContext());
...@@ -336,24 +339,25 @@ namespace ...@@ -336,24 +339,25 @@ namespace
// will re-use the same buffer. // will re-use the same buffer.
auto tensorType = origResult->getType().cast<NGTensorType>(); auto tensorType = origResult->getType().cast<NGTensorType>();
Value* newResult = nullptr; Value* newResult = nullptr;
Attribute bufferIdAttr = getBufferId(op); auto bufferInfo = m_memAnalysis->getBufferInfo(op);
Type memRefType = typeConverter.convertType(tensorType); Type memRefType = typeConverter.convertType(tensorType);
Value* bufferValue = nullptr; Value* bufferValue = nullptr;
if (!bufferIdAttr)
if (!bufferInfo.isValid())
{ {
// Allocate new memref // Allocate new memref
newResult = createTempMemref(memRefType, nullptr, 0, rewriter); newResult = createTempMemref(memRefType, nullptr, 0, rewriter);
} }
else else
{ {
unsigned bufferId = bufferIdAttr.cast<IntegerAttr>().getInt(); unsigned bufferId = bufferInfo.m_bufferId;
unsigned offset = bufferInfo.m_offset;
// Re-use a buffer if it exist, else create a new one and update map // Re-use a buffer if it exist, else create a new one and update map
IdToMemRefMap::iterator it = m_id_to_memref.find(bufferId); IdToMemRefMap::iterator it = m_id_to_memref.find(bufferId);
if (it == m_id_to_memref.end()) if (it == m_id_to_memref.end())
{ {
// create a new buffer // create a new buffer
bufferValue = createTempBuffer(memRefType, rewriter); bufferValue = createTempBuffer(bufferId, rewriter);
m_id_to_memref[bufferId] = bufferValue; m_id_to_memref[bufferId] = bufferValue;
} }
else else
...@@ -361,7 +365,7 @@ namespace ...@@ -361,7 +365,7 @@ namespace
bufferValue = it->second; bufferValue = it->second;
} }
// Create a temp view over the linear buffer // Create a temp view over the linear buffer
newResult = createTempMemref(memRefType, bufferValue, 0, rewriter); newResult = createTempMemref(memRefType, bufferValue, offset, rewriter);
} }
NGRAPH_CHECK(newResult != nullptr, "Temp memref value is not set"); NGRAPH_CHECK(newResult != nullptr, "Temp memref value is not set");
newResults.push_back(newResult); newResults.push_back(newResult);
...@@ -370,18 +374,17 @@ namespace ...@@ -370,18 +374,17 @@ namespace
return newResults; return newResults;
} }
Value* DialectLoweringPass::createTempBuffer(Type type, PatternRewriter& rewriter) Value* DialectLoweringPass::createTempBuffer(int bufferId, PatternRewriter& rewriter)
{ {
MemRefType memRefType = type.cast<MemRefType>(); unsigned sizeInBytes = getMemAnalysis()->getBufferSize(bufferId);
NGRAPH_CHECK(bufferId >= 0, "Invalid buffer id to allocate");
NGRAPH_CHECK(memRefType.hasStaticShape(), "Dynamic shapes are not supported"); NGRAPH_CHECK(sizeInBytes > 0, "Zero buffer allocation?");
// deduce linear buffer shape
unsigned sizeInBytes = memRefType.getSizeInBits() / 8;
LLVM_DEBUG(llvm::dbgs() << "Allocating buffer of size " << sizeInBytes << " bytes\n");
MemRefType bufferType = MemRefType bufferType =
MemRefType::get({sizeInBytes}, IntegerType::get(8, type.getContext()), {}); MemRefType::get({sizeInBytes}, IntegerType::get(8, rewriter.getContext()), {});
// TODO: Set alignment
Value* alloc = rewriter.create<mlir::AllocOp>(rewriter.getUnknownLoc(), bufferType); Value* alloc = rewriter.create<mlir::AllocOp>(rewriter.getUnknownLoc(), bufferType);
memRefsToDealloc.push_back(alloc); memRefsToDealloc.push_back(alloc);
...@@ -404,7 +407,6 @@ namespace ...@@ -404,7 +407,6 @@ namespace
unsigned offset, unsigned offset,
PatternRewriter& rewriter) PatternRewriter& rewriter)
{ {
NGRAPH_CHECK(offset == 0, "Only zero offset is supported");
MemRefType memRefType = type.cast<MemRefType>(); MemRefType memRefType = type.cast<MemRefType>();
if (buffer) if (buffer)
{ {
...@@ -414,7 +416,7 @@ namespace ...@@ -414,7 +416,7 @@ namespace
// linear // linear
// buffer // buffer
// This is simply (d0, d1, d2, .. dN-1) --> d0 * S0 + d1 * S1 ... + dN-1 * SN-1 // This is simply (d0, d1, d2, .. dN-1) --> d0 * S0 + d1 * S1 ... + dN-1 * SN-1
// Where Si is the stride along the i_th dimension // Where Si is the stride along the i_th dimension in elements
auto shape = memRefType.getShape(); auto shape = memRefType.getShape();
SmallVector<int64_t, 4> strides(shape.size(), 0); SmallVector<int64_t, 4> strides(shape.size(), 0);
strides[shape.size() - 1] = 1; strides[shape.size() - 1] = 1;
...@@ -1503,6 +1505,71 @@ namespace ...@@ -1503,6 +1505,71 @@ namespace
} }
NGRAPH_UNREACHABLE("Unsupported type"); NGRAPH_UNREACHABLE("Unsupported type");
} }
// Given a concat op, it will check if dst and operands have
// a valid buffer/offset assignment that will make this op
// valid in-place
bool isInPlaceConcat(mlir::Operation* op, DialectLoweringPass& pass)
{
NGRAPH_CHECK(isa<NGConcatOp>(op), "Expecting concat operation");
auto concat = cast<NGConcatOp>(op);
auto concatAxis = concat.concatenation_axis();
auto result = concat.getResult();
auto shape = (result->getType().cast<NGTensorType>()).getShape();
auto memAnalysis = pass.getMemAnalysis();
BufferInfo bufferInfo = memAnalysis->getBufferInfo(op);
if (!bufferInfo.isValid())
{
// no buffer assignment to dst, nothing to do
return false;
}
auto dstBufferId = bufferInfo.m_bufferId;
auto dstOffset = bufferInfo.m_offset;
LLVM_DEBUG(llvm::dbgs() << ">> Check in-place concat\n");
LLVM_DEBUG(op->dump());
for (auto i = 0; i < shape.size(); i++)
{
if (i == concatAxis)
{
break;
}
if (shape[i] != 1)
{
LLVM_DEBUG(llvm::dbgs() << "Axis FAIL. Skipping instruction\n");
return false;
}
}
LLVM_DEBUG(llvm::dbgs() << "Axis OK\n");
// Check if the buffer id and offsets are consistent with what's exepcted
LLVM_DEBUG(llvm::dbgs() << "Dst (id, offset) = (" << dstBufferId << ", " << dstOffset
<< ")\n");
// relative offset in the buffer
int opndOffset = 0;
for (auto opnd : op->getOperands())
{
bufferInfo = memAnalysis->getBufferInfo(opnd->getDefiningOp());
auto srcBufferId = bufferInfo.m_bufferId;
auto srcOffset = bufferInfo.m_offset;
LLVM_DEBUG(llvm::dbgs() << "Src (id, offset) = (" << srcBufferId << ", " << srcOffset
<< ")\n");
if (!bufferInfo.isValid() || srcBufferId != dstBufferId ||
srcOffset != (opndOffset + dstOffset))
{
// mismatch in buffer IDs or offsets
LLVM_DEBUG(llvm::dbgs() << "Buffer ID and Offsets FAIL. Skipping instruction\n");
return false;
}
auto tensorType = opnd->getType().cast<NGTensorType>();
opndOffset += tensorType.getNumElements();
}
LLVM_DEBUG(llvm::dbgs() << "Buffer ID and Offsets OK\n");
return true;
}
} // namespace } // namespace
namespace mlir namespace mlir
......
//*****************************************************************************
// Copyright 2017-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
// NOTE: This file follows nGraph format style and MLIR naming convention since it does
// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
#include "contrib/mlir/core/compiler.hpp"
#include "contrib/mlir/core/ngraph_dialect/ops.hpp"
#include "contrib/mlir/core/ngraph_dialect/type.hpp"
#include "ngraph/assertion.hpp"
#include <llvm/ADT/DenseSet.h>
#include <map>
#include <mlir/EDSC/Builders.h>
#include <mlir/EDSC/Helpers.h>
#include <mlir/EDSC/Intrinsics.h>
#include <mlir/IR/AffineExpr.h>
#include <mlir/IR/IntegerSet.h>
#include <mlir/IR/MLIRContext.h>
#include <mlir/IR/StandardTypes.h>
#include <mlir/Pass/Pass.h>
#include <mlir/Transforms/DialectConversion.h>
// anonymous namespace
// no need to expose any of the following outside of this file
namespace
{
using namespace ngraph::runtime;
using namespace ngraph::runtime::ngmlir;
using namespace mlir;
/// Memory Optimization pass
/// - Tries to perform operations in place where applicable by assigning a virtual buffer ID
/// to values. Those are used later in affine lowering pass to create or re-use memrefs
class MemoryOptimizationPass : public mlir::FunctionPass<MemoryOptimizationPass>
{
public:
MemoryOptimizationPass()
{
m_inplaceOps = {
#define MLIR_OP(OP, INPLACE) {OP::getOperationName().str(), INPLACE},
#include "contrib/mlir/backend/pass/op_lowerers.inc"
};
}
void runOnFunction() override;
private:
bool isSafeInPlace(mlir::Operation* op);
std::unordered_map<std::string, bool> m_inplaceOps;
static unsigned bufferId;
};
unsigned MemoryOptimizationPass::bufferId = 0;
void MemoryOptimizationPass::runOnFunction()
{
auto f = getFunction();
f.walk([&](mlir::Operation* op) {
if (!isSafeInPlace(op))
{
return;
}
if (op->getNumResults() > 1)
{
return;
}
auto defVal = op->getResult(0);
// If the defined value is an output of the sub-graph, cannot do it in place
for (auto use = defVal->use_begin(); use != defVal->use_end(); use++)
{
auto useOp = use->getOwner();
if (isa<NGReturnOp>(useOp))
{
return;
}
}
// Check if we can re-use the buffer of any of the inputs. Conjunction of the following:
// - single use value or all uses in the current op
// - not an input argument
// TODO: Check instead if last post-dominating (dataflow-wise) use.
for (auto opnd = op->operand_begin(); opnd != op->operand_end(); opnd++)
{
auto val = *opnd;
// we optimize if the val has one use or if all uses are in the current op
bool optimize;
optimize = val->hasOneUse();
if (!optimize)
{
optimize = true;
// check if all uses are in the current op
for (auto use = val->use_begin(); use != val->use_end(); use++)
{
if (use->getOwner() != op)
{
optimize = false;
}
}
}
if (optimize)
{
// do we have a buffer id attached to this value
auto defOp = val->getDefiningOp();
// If no defining op, then this is a block arg, skip operand
if (!defOp)
{
continue;
}
IntegerAttr attr = getBufferId(defOp);
if (!attr)
{
// attach a new buffer id
attr = setBufferId(defOp, this->bufferId++);
}
// propagate attribute to dst, and we are done
setBufferId(op, attr);
return;
}
}
});
}
bool MemoryOptimizationPass::isSafeInPlace(mlir::Operation* op)
{
auto it = m_inplaceOps.find(op->getName().getStringRef().str());
return it != m_inplaceOps.end() ? it->second : false;
}
}
namespace mlir
{
std::unique_ptr<Pass> createMemoryOptimizationPass()
{
return std::make_unique<MemoryOptimizationPass>();
}
} // namespace mlir
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
MLIR_OP(NGAddOp , true ) MLIR_OP(NGAddOp , true )
MLIR_OP(NGArgMaxRedOp , false ) MLIR_OP(NGArgMaxRedOp , false )
MLIR_OP(NGArgMinRedOp , false ) MLIR_OP(NGArgMinRedOp , false )
MLIR_OP(NGConcatOp , false ) MLIR_OP(NGConcatOp , true )
MLIR_OP(NGConvolutionOp , false ) MLIR_OP(NGConvolutionOp , false )
MLIR_OP(NGDivOp , true ) MLIR_OP(NGDivOp , true )
MLIR_OP(NGDotOp , false ) MLIR_OP(NGDotOp , false )
......
...@@ -309,28 +309,6 @@ mlir::LogicalResult verifyOp(NGConvolutionOp* op) ...@@ -309,28 +309,6 @@ mlir::LogicalResult verifyOp(NGConvolutionOp* op)
return mlir::success(); return mlir::success();
} }
static std::string getBufferIdAttrName()
{
return "ng.buffer_id";
}
void setBufferId(mlir::Operation* op, mlir::IntegerAttr attr)
{
op->setAttr(getBufferIdAttrName(), attr);
}
mlir::IntegerAttr setBufferId(mlir::Operation* op, unsigned val)
{
auto attr = mlir::IntegerAttr::get(IntegerType::get(32, op->getContext()), val);
setBufferId(op, attr);
return attr;
}
mlir::IntegerAttr getBufferId(mlir::Operation* op)
{
return op->getAttrOfType<mlir::IntegerAttr>(getBufferIdAttrName());
}
namespace mlir namespace mlir
{ {
#include "ops_interfaces.cpp.inc" #include "ops_interfaces.cpp.inc"
......
...@@ -41,7 +41,3 @@ namespace mlir ...@@ -41,7 +41,3 @@ namespace mlir
#include "ops.h.inc" #include "ops.h.inc"
#undef GET_OP_CLASSES #undef GET_OP_CLASSES
} }
void setBufferId(mlir::Operation* op, mlir::IntegerAttr attr);
mlir::IntegerAttr setBufferId(mlir::Operation* op, unsigned val);
mlir::IntegerAttr getBufferId(mlir::Operation* op);
...@@ -349,6 +349,143 @@ NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_propagate_2d_tensor) ...@@ -349,6 +349,143 @@ NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_propagate_2d_tensor)
(vector<float>{3, 7, 2}), read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS)); (vector<float>{3, 7, 2}), read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
} }
NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_tree_1)
{
Shape shape{1, 2, 2};
Shape shape_r{1, 4, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto add1 = make_shared<op::Add>(A, B);
auto add2 = make_shared<op::Add>(A, B);
auto concat = make_shared<op::Concat>(NodeVector{add1, add2}, 1);
auto f = make_shared<Function>(make_shared<op::Add>(concat, concat), ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape);
copy_data(a, vector<float>{1, 1, 1, 1});
auto b = backend->create_tensor(element::f32, shape);
copy_data(b, vector<float>{1, 1, 1, 1});
auto result = backend->create_tensor(element::f32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
vector<float> expected;
expected.resize(8, 4);
EXPECT_TRUE(test::all_close_f(expected, read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
}
NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_tree_2)
{
Shape shape{1, 2, 2};
Shape shape_r{1, 8, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto add1 = make_shared<op::Add>(A, B);
auto add2 = make_shared<op::Add>(A, B);
auto concat1 = make_shared<op::Concat>(NodeVector{add1, add2}, 1);
auto concat2 = make_shared<op::Concat>(NodeVector{add1, add2}, 1);
auto concat12 = make_shared<op::Concat>(NodeVector{concat1, concat2}, 1);
auto f = make_shared<Function>(make_shared<op::Add>(concat12, concat12), ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape);
copy_data(a, vector<float>{1, 1, 1, 1});
auto b = backend->create_tensor(element::f32, shape);
copy_data(b, vector<float>{1, 1, 1, 1});
auto result = backend->create_tensor(element::f32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
vector<float> expected;
expected.resize(16, 4);
EXPECT_TRUE(test::all_close_f(expected, read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
}
NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_tree_3)
{
Shape shape{1, 2, 2};
Shape shape_r{1, 16, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto concat1 = make_shared<op::Concat>(NodeVector{A, B}, 1);
auto concat2 = make_shared<op::Concat>(NodeVector{A, B}, 1);
auto concat3 = make_shared<op::Concat>(NodeVector{A, B}, 1);
auto concat4 = make_shared<op::Concat>(NodeVector{A, B}, 1);
auto concat12 = make_shared<op::Concat>(NodeVector{concat1, concat2}, 1);
auto concat34 = make_shared<op::Concat>(NodeVector{concat3, concat4}, 1);
auto concat14 = make_shared<op::Concat>(NodeVector{concat12, concat34}, 1);
auto f = make_shared<Function>(make_shared<op::Add>(concat14, concat14), ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape);
copy_data(a, vector<float>{1, 1, 1, 1});
auto b = backend->create_tensor(element::f32, shape);
copy_data(b, vector<float>{1, 1, 1, 1});
auto result = backend->create_tensor(element::f32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
vector<float> expected;
expected.resize(32, 2);
EXPECT_TRUE(test::all_close_f(expected, read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
}
NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_add_concat)
{
Shape shape{2, 2};
Shape shape_r{4, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto add1 = make_shared<op::Add>(A, B);
auto add2 = make_shared<op::Add>(add1, add1);
auto concat = make_shared<op::Concat>(NodeVector{add1, add2}, 0);
auto add3 = make_shared<op::Add>(concat, concat);
auto f = make_shared<Function>(add3, ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
auto a = backend->create_tensor(element::f32, shape);
copy_data(a, vector<float>{1, 1, 1, 1});
auto b = backend->create_tensor(element::f32, shape);
copy_data(b, vector<float>{1, 1, 1, 1});
auto result = backend->create_tensor(element::f32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
vector<float> expected = {4, 4, 4, 4, 8, 8, 8, 8};
EXPECT_TRUE(test::all_close_f(expected, read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
}
NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_add_concat_2)
{
Shape shape{1, 2, 2};
Shape shape_r{1, 6, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto add1 = make_shared<op::Add>(A, B);
auto add2 = make_shared<op::Add>(A, B);
auto add3 = make_shared<op::Add>(A, B);
auto add4 = make_shared<op::Add>(A, B);
auto add5 = make_shared<op::Add>(A, B);
auto concat1 = make_shared<op::Concat>(NodeVector{add1, add2, add3}, 1);
auto concat2 = make_shared<op::Concat>(NodeVector{add4, add2, add5}, 1);
auto add6 = make_shared<op::Add>(concat1, concat2);
auto f = make_shared<Function>(add6, ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
auto a = backend->create_tensor(element::f32, shape);
copy_data(a, vector<float>{1, 1, 1, 1});
auto b = backend->create_tensor(element::f32, shape);
copy_data(b, vector<float>{1, 1, 1, 1});
auto result = backend->create_tensor(element::f32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
vector<float> expected = {4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4};
EXPECT_TRUE(test::all_close_f(expected, read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
}
// from numpy import * // from numpy import *
// a=linspace(1,2*3*4*3*2,2*3*4*3*2) // a=linspace(1,2*3*4*3*2,2*3*4*3*2)
// b=linspace(1000+1,1000+2*3*3*3*2,2*3*3*3*2) // b=linspace(1000+1,1000+2*3*3*3*2,2*3*3*3*2)
......
// RUN: ngraph-opt %s --split-input-file --ngraph-memory-opt --ngraph-memory-opt-concat --ngraph-memory-opt-eltwise -convert-ngraph-to-affine | FileCheck %s
// CHECK-DAG: #[[MAP0:[a-zA-Z0-9]+]] = (d0, d1) -> (d0 * 2 + d1)
// CHECK-LABEL: test0
// CHECK: %[[B:.*]] = alloc() : memref<16xi8>
// CHECK: std.view %[[B]][][] : memref<16xi8> to memref<2x2xf32, #[[MAP0]]>
// CHECK: std.view %[[B]][][] : memref<16xi8> to memref<2x2xf32, #[[MAP0]]>
// CHECK: std.view %[[B]][][] : memref<16xi8> to memref<2x2xf32, #[[MAP0]]>
// CHECK: dealloc %[[B]] : memref<16xi8>
func @test0(%arg0: !ng.tensor<2x2xf32>, %arg1: !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32> {
%0 = "ng.add"(%arg0, %arg1) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32>
%1 = "ng.add"(%0, %0) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32>
%2 = "ng.add"(%1, %1) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32>
%3 = "ng.add"(%2, %2) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32>
"ng.return"(%3) : (!ng.tensor<2x2xf32>) -> ()
}
// -----
// CHECK-DAG: #[[MAP0:[a-zA-Z0-9]+]] = (d0, d1) -> (d0 * 2 + d1)
// CHECK-DAG: #[[MAP1:[a-zA-Z0-9]+]] = (d0, d1) -> (d0 * 2 + d1 + 4)
// CHECK-LABEL: test1
// CHECK: %[[B:.*]] = alloc() : memref<32xi8>
// CHECK: std.view %[[B]][][] : memref<32xi8> to memref<2x2xf32, #[[MAP0]]>
// CHECK: std.view %[[B]][][] : memref<32xi8> to memref<2x2xf32, #[[MAP1]]>
// CHECK: std.view %[[B]][][] : memref<32xi8> to memref<4x2xf32, #[[MAP0]]>
// CHECK: dealloc %[[B]] : memref<32xi8>
func @test1(%arg0: !ng.tensor<2x2xf32>, %arg1: !ng.tensor<2x2xf32>) -> !ng.tensor<4x2xf32> {
%0 = "ng.add"(%arg0, %arg1) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32>
%1 = "ng.add"(%0, %0) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32>
%2 = "ng.concat"(%0, %1) {concatenation_axis = 0} : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<4x2xf32>
%3 = "ng.add"(%2, %2) : (!ng.tensor<4x2xf32>, !ng.tensor<4x2xf32>) -> !ng.tensor<4x2xf32>
"ng.return"(%3) : (!ng.tensor<4x2xf32>) -> ()
}
// -----
// CHECK-DAG: #[[MAP0:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 4 + d1 * 2 + d2)
// CHECK-DAG: #[[MAP1:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 4 + d1 * 2 + d2 + 4)
// CHECK-DAG: #[[MAP2:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 8 + d1 * 2 + d2)
// CHECK-DAG: #[[MAP3:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 16 + d1 * 2 + d2)
// CHECK-LABEL: test2
// CHECK: %[[B1:.*]] = alloc() : memref<32xi8>
// CHECK: std.view %[[B1]][][] : memref<32xi8> to memref<1x2x2xf32, #[[MAP0]]>
// CHECK: std.view %[[B1]][][] : memref<32xi8> to memref<1x2x2xf32, #[[MAP1]]>
// CHECK: std.view %[[B1]][][] : memref<32xi8> to memref<1x4x2xf32, #[[MAP2]]>
// CHECK: %[[B2:.*]] = alloc() : memref<64xi8>
// CHECK: std.view %[[B2]][][] : memref<64xi8> to memref<1x8x2xf32, #[[MAP3]]>
// CHECK: std.view %[[B2]][][] : memref<64xi8> to memref<1x8x2xf32, #[[MAP3]]>
func @test2(%arg0: !ng.tensor<1x2x2xf32>, %arg1: !ng.tensor<1x2x2xf32>) -> (!ng.tensor<1x4x2xf32>, !ng.tensor<1x8x2xf32>){
%0 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
%1 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
// inplace
%2 = "ng.concat"(%0, %1) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x4x2xf32>
// cannot be done inplace, %3 and %2 cannot alias
%3 = "ng.concat"(%0, %1, %2) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>, !ng.tensor<1x4x2xf32>) -> !ng.tensor<1x8x2xf32>
// inplace destructive. %3 and %2 cannot alias
%4 = "ng.add"(%3, %3) : (!ng.tensor<1x8x2xf32>, !ng.tensor<1x8x2xf32>) -> !ng.tensor<1x8x2xf32>
// no inplace, result is output
%5 = "ng.add"(%2, %2) : (!ng.tensor<1x4x2xf32>, !ng.tensor<1x4x2xf32>) -> !ng.tensor<1x4x2xf32>
// no inplace, result is output
%6 = "ng.add"(%4, %4) : (!ng.tensor<1x8x2xf32>, !ng.tensor<1x8x2xf32>) -> !ng.tensor<1x8x2xf32>
"ng.return"(%5, %6) : (!ng.tensor<1x4x2xf32>, !ng.tensor<1x8x2xf32>) -> ()
}
// -----
// CHECK-DAG: #[[MAP0:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 8 + d1 * 2 + d2)
// CHECK-DAG: #[[MAP8:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 8 + d1 * 2 + d2 + 8)
// CHECK-DAG: #[[MAP9:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 8 + d1 * 2 + d2 + 16)
// CHECK-DAG: #[[MAP10:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 8 + d1 * 2 + d2 + 24)
// CHECK-DAG: #[[MAP11:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 16 + d1 * 2 + d2)
// CHECK-DAG: #[[MAP12:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 16 + d1 * 2 + d2 + 16)
// CHECK-DAG: #[[MAP13:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 32 + d1 * 2 + d2)
// CHECK-LABEL: test3
// CHECK: %[[B:.*]] = alloc() : memref<128xi8>
// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x4x2xf32, #[[MAP0]]>
// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x4x2xf32, #[[MAP8]]>
// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x4x2xf32, #[[MAP9]]>
// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x4x2xf32, #[[MAP10]]>
// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x8x2xf32, #[[MAP11]]>
// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x8x2xf32, #[[MAP12]]>
// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x16x2xf32, #[[MAP13]]>
// CHECK: dealloc %[[B]] : memref<128xi8>
func @test3(%arg0: !ng.tensor<1x2x2xf32>, %arg1: !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x16x2xf32> {
%0 = "ng.concat"(%arg0, %arg1) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x4x2xf32>
%1 = "ng.concat"(%arg0, %arg1) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x4x2xf32>
%2 = "ng.concat"(%arg0, %arg1) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x4x2xf32>
%3 = "ng.concat"(%arg0, %arg1) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x4x2xf32>
%4 = "ng.concat"(%0, %1) {concatenation_axis = 1} : (!ng.tensor<1x4x2xf32>, !ng.tensor<1x4x2xf32>) -> !ng.tensor<1x8x2xf32>
%5 = "ng.concat"(%2, %3) {concatenation_axis = 1} : (!ng.tensor<1x4x2xf32>, !ng.tensor<1x4x2xf32>) -> !ng.tensor<1x8x2xf32>
%6 = "ng.concat"(%4, %5) {concatenation_axis = 1} : (!ng.tensor<1x8x2xf32>, !ng.tensor<1x8x2xf32>) -> !ng.tensor<1x16x2xf32>
%7 = "ng.add"(%6, %6) : (!ng.tensor<1x16x2xf32>, !ng.tensor<1x16x2xf32>) -> !ng.tensor<1x16x2xf32>
"ng.return"(%7) : (!ng.tensor<1x16x2xf32>) -> ()
}
// -----
//CHECK-DAG: #[[MAP4:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 4 + d1 * 2 + d2 + 4)
//CHECK-DAG: #[[MAP5:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 4 + d1 * 2 + d2)
//CHECK-DAG: #[[MAP6:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 4 + d1 * 2 + d2 + 8)
//CHECK-DAG: #[[MAP12:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 12 + d1 * 2 + d2)
// CHECK-LABEL: test4
//CHECK: %[[B1:.*]] = alloc() : memref<1x2x2xf32>
//CHECK: %[[B2:.*]] = alloc() : memref<48xi8>
//CHECK: std.view %[[B2]][][] : memref<48xi8> to memref<1x2x2xf32, #[[MAP4]]>
//CHECK: %[[B3:.*]] = alloc() : memref<1x2x2xf32>
//CHECK: std.view %[[B2]][][] : memref<48xi8> to memref<1x2x2xf32, #[[MAP5]]>
//CHECK: std.view %[[B2]][][] : memref<48xi8> to memref<1x2x2xf32, #[[MAP6]]>
//CHECK: %[[B4:.*]] = alloc() : memref<1x6x2xf32>
//CHECK: std.view %1[][] : memref<48xi8> to memref<1x6x2xf32, #[[MAP12]]>
//CHECK: dealloc %[[B1]] : memref<1x2x2xf32>
//CHECK: dealloc %[[B2]] : memref<48xi8>
//CHECK: dealloc %[[B3]] : memref<1x2x2xf32>
//CHECK: dealloc %[[B4]] : memref<1x6x2xf32>
func @test4(%arg0: !ng.tensor<1x2x2xf32>, %arg1: !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x8x2xf32> {
%S0 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
%S1 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
%S2 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
%R0 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
%R2 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
// pre-existing assignment of S1 in %D2 prevents assignment for %D1 concat
%D1 = "ng.concat"(%S0, %S1, %S2) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x6x2xf32>
%D2 = "ng.concat"(%R0, %S1, %R2) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x6x2xf32>
%D3 = "ng.add"(%D1, %D2) : (!ng.tensor<1x6x2xf32>, !ng.tensor<1x6x2xf32>) -> !ng.tensor<1x6x2xf32>
"ng.return"(%D3) : (!ng.tensor<1x6x2xf32>) -> ()
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment