Commit 6de4893b authored by Nagy Mostafa's avatar Nagy Mostafa Committed by Sang Ik Lee

[MLIR] In-place memory optimization for elt-wise and concat ops. (#3832)

* AliasMap WIP

* Added liveness info

* WIP

* WIP: Tests

* WIP: LIT tests

* Added knobs for mem optimization pass. More LIT tests

* Revert affine_lowerer change

* More elaborate comment

* Minor fixes

* style-apply

* Rename liveness

* Switch to Analysis framework

* Fix optimization conditions

* Remove LIT test

* style

* Switch to equivalence relationship impl of non-alias relationship

* refined comment

* Switch non-alias to equivalence relationship

* Fix bad merge

* Adding tests. WIP

* Added buffer size tracking and unit-tests

* Added LIT and unit-tests

* Turn optimization ON

* style

* fix unit-tests

* Fix useCount

* Fix copyright and typo

* Refine few comments, remove new lines

* style fix
Co-authored-by: 's avatarScott Cyphers <diyessi@users.noreply.github.com>
Co-authored-by: 's avatarSang Ik Lee <sang.ik.lee@intel.com>
parent b3db038e
......@@ -25,7 +25,7 @@ add_subdirectory(tools/ngraph-opt)
set(SRC
backend/cpu/cpu_backend.cpp
backend/pass/affine_lowerer.cpp
backend/pass/memory_optimization.cpp
backend/analysis/memory_analysis.cpp
core/compiler.cpp
core/ngraph_dialect/dialect.cpp
core/ngraph_dialect/type.cpp
......
This diff is collapsed.
......@@ -20,8 +20,60 @@
#pragma once
#include <mlir/Pass/Pass.h>
#include <unordered_map>
#include "ngraph/check.hpp"
namespace mlir
{
std::unique_ptr<Pass> createMemoryOptimizationPass();
// BufferInfo
struct BufferInfo
{
// Buffer Id. If -1 then invalid buffer.
int m_bufferId;
// Offset into the buffer
int m_offset;
bool isValid() const { return m_bufferId != -1; }
};
struct MemoryAnalysis
{
using BufferInfoMap = std::unordered_map<Operation*, BufferInfo>;
using BufferSizeMap = std::unordered_map<unsigned, unsigned>;
// Compute this analysis with the provided operation.
MemoryAnalysis(Operation* op);
BufferInfo getBufferInfo(Operation* op)
{
auto it = m_bufferInfo.find(op);
if (it == m_bufferInfo.end())
{
return {-1, -1};
}
return it->second;
}
void setBufferInfo(Operation* op, BufferInfo bufferInfo) { m_bufferInfo[op] = bufferInfo; }
void setBufferSize(unsigned bufferId, unsigned size)
{
auto it = m_bufferSize.find(bufferId);
if (it != m_bufferSize.end())
{
it->second = (size > it->second) ? size : it->second;
}
else
{
m_bufferSize[bufferId] = size;
}
}
unsigned getBufferSize(unsigned bufferId)
{
auto it = m_bufferSize.find(bufferId);
NGRAPH_CHECK(it != m_bufferSize.end(), "Buffer has no size!");
return it->second;
}
private:
// Records assignment of BufferInfo to each inplace op
BufferInfoMap m_bufferInfo;
// Records buffer size required for each buffer id in bytes
BufferSizeMap m_bufferSize;
};
}
......@@ -19,7 +19,6 @@
#include "cpu_backend.hpp"
#include "contrib/mlir/backend/pass/affine_lowerer.hpp"
#include "contrib/mlir/backend/pass/memory_optimization.hpp"
#include "contrib/mlir/utils.hpp"
#include "ngraph/check.hpp"
......@@ -160,7 +159,6 @@ void MLIRCPUBackend::init()
void MLIRCPUBackend::codegen()
{
optimizeNgDialect();
lowerNgDialect();
}
......@@ -261,18 +259,3 @@ void MLIRCPUBackend::optimizeAffineDialect()
// Run Std dialect optimizations.
// TODO
}
void MLIRCPUBackend::optimizeNgDialect()
{
mlir::PassManager pm(&m_context);
mlir::applyPassManagerCLOptions(pm);
if (clEnableNgInPlaceMemoryOpt)
{
pm.addPass(mlir::createMemoryOptimizationPass());
}
if (failed(pm.run(m_module.get())))
{
NGRAPH_CHECK(false, "MLIR pass manager failed");
}
}
......@@ -19,11 +19,13 @@
#include "affine_lowerer.hpp"
#include "contrib/mlir/backend/analysis/memory_analysis.hpp"
#include "contrib/mlir/core/ngraph_dialect/ops.hpp"
#include "contrib/mlir/core/ngraph_dialect/type.hpp"
#include "ngraph/assertion.hpp"
#include <llvm/ADT/DenseSet.h>
#include <llvm/Support/Debug.h>
#include <mlir/EDSC/Builders.h>
#include <mlir/EDSC/Helpers.h>
#include <mlir/EDSC/Intrinsics.h>
......@@ -165,6 +167,8 @@ namespace
ValueHandle createZeroConstant(mlir::Type type);
ValueHandle createOneConstant(mlir::Type type);
bool isInPlaceConcat(mlir::Operation* op, DialectLoweringPass& pass);
/// Conversion from types in the nGraph dialect to the Standard dialect.
class NGraphTypeConverter : public TypeConverter
{
......@@ -184,29 +188,25 @@ namespace
void runOnModule() override;
SmallVector<Value*, 4> buildOutputDefs(Operation* op, PatternRewriter& rewriter);
/// Allocates a linear buffer for a temporary tensor
Value* createTempBuffer(Type type, PatternRewriter& rewriter);
/// Allocates a linear buffer for a temporary memref that shares its
/// underlying memory. Used in conjunction with createTempMemref
Value* createTempBuffer(int bufferId, PatternRewriter& rewriter);
/// Creates an allocation or view of a memref.
/// type MemRef Type
/// buffer Optional buffer value to create view over
/// offset Optional offset into the buffer this view starts at
///
/// If buffer is null, a new allocation of a memref is created.
/// Offset is ignored. If buffer is non-null, then we create a temp
/// view over a pre-allocated buffer (see createTempBuffer)
/// If buffer is null it allocates a Memref directly and Offset is ignored.
/// If not, it creates a view over the pre-allocated buffer at the given offset.
Value*
createTempMemref(Type type, Value* buffer, unsigned offset, PatternRewriter& rewriter);
/// Inserts dealloc Ops for each temporary allocated by AllocOp
void insertDeallocs(PatternRewriter& rewriter);
NGraphTypeConverter& getTypeConverter() { return typeConverter; }
MemoryAnalysis* getMemAnalysis() const { return m_memAnalysis; }
private:
/// Collect a set of patterns to convert from the nGraph dialect to Affine dialect.
void populateNGraphToAffineConversionPatterns(OwningRewritePatternList& patterns);
void findOutputValues();
void insertNoAliasArgAttrs();
......@@ -219,7 +219,7 @@ namespace
// Track pre-assigned buffers for each Value and re-use it if one is available.
using IdToMemRefMap = std::unordered_map<unsigned, Value*>;
IdToMemRefMap m_id_to_memref;
MemoryAnalysis* m_memAnalysis;
// TODO: Workaround for findOutputValues and buildOutputDefs. See NGCPU-470.
std::string funcName;
};
......@@ -232,6 +232,9 @@ namespace
populateNGraphToAffineConversionPatterns(patterns);
// Get Memory analysis for in-place memory optimizations
m_memAnalysis = &getAnalysis<MemoryAnalysis>();
// Create target that defines legal ops for nGraph dialect to be lowered to.
ConversionTarget target(getContext());
......@@ -336,24 +339,25 @@ namespace
// will re-use the same buffer.
auto tensorType = origResult->getType().cast<NGTensorType>();
Value* newResult = nullptr;
Attribute bufferIdAttr = getBufferId(op);
auto bufferInfo = m_memAnalysis->getBufferInfo(op);
Type memRefType = typeConverter.convertType(tensorType);
Value* bufferValue = nullptr;
if (!bufferIdAttr)
if (!bufferInfo.isValid())
{
// Allocate new memref
newResult = createTempMemref(memRefType, nullptr, 0, rewriter);
}
else
{
unsigned bufferId = bufferIdAttr.cast<IntegerAttr>().getInt();
unsigned bufferId = bufferInfo.m_bufferId;
unsigned offset = bufferInfo.m_offset;
// Re-use a buffer if it exist, else create a new one and update map
IdToMemRefMap::iterator it = m_id_to_memref.find(bufferId);
if (it == m_id_to_memref.end())
{
// create a new buffer
bufferValue = createTempBuffer(memRefType, rewriter);
bufferValue = createTempBuffer(bufferId, rewriter);
m_id_to_memref[bufferId] = bufferValue;
}
else
......@@ -361,7 +365,7 @@ namespace
bufferValue = it->second;
}
// Create a temp view over the linear buffer
newResult = createTempMemref(memRefType, bufferValue, 0, rewriter);
newResult = createTempMemref(memRefType, bufferValue, offset, rewriter);
}
NGRAPH_CHECK(newResult != nullptr, "Temp memref value is not set");
newResults.push_back(newResult);
......@@ -370,18 +374,17 @@ namespace
return newResults;
}
Value* DialectLoweringPass::createTempBuffer(Type type, PatternRewriter& rewriter)
Value* DialectLoweringPass::createTempBuffer(int bufferId, PatternRewriter& rewriter)
{
MemRefType memRefType = type.cast<MemRefType>();
NGRAPH_CHECK(memRefType.hasStaticShape(), "Dynamic shapes are not supported");
// deduce linear buffer shape
unsigned sizeInBytes = memRefType.getSizeInBits() / 8;
unsigned sizeInBytes = getMemAnalysis()->getBufferSize(bufferId);
NGRAPH_CHECK(bufferId >= 0, "Invalid buffer id to allocate");
NGRAPH_CHECK(sizeInBytes > 0, "Zero buffer allocation?");
LLVM_DEBUG(llvm::dbgs() << "Allocating buffer of size " << sizeInBytes << " bytes\n");
MemRefType bufferType =
MemRefType::get({sizeInBytes}, IntegerType::get(8, type.getContext()), {});
MemRefType::get({sizeInBytes}, IntegerType::get(8, rewriter.getContext()), {});
// TODO: Set alignment
Value* alloc = rewriter.create<mlir::AllocOp>(rewriter.getUnknownLoc(), bufferType);
memRefsToDealloc.push_back(alloc);
......@@ -404,7 +407,6 @@ namespace
unsigned offset,
PatternRewriter& rewriter)
{
NGRAPH_CHECK(offset == 0, "Only zero offset is supported");
MemRefType memRefType = type.cast<MemRefType>();
if (buffer)
{
......@@ -414,7 +416,7 @@ namespace
// linear
// buffer
// This is simply (d0, d1, d2, .. dN-1) --> d0 * S0 + d1 * S1 ... + dN-1 * SN-1
// Where Si is the stride along the i_th dimension
// Where Si is the stride along the i_th dimension in elements
auto shape = memRefType.getShape();
SmallVector<int64_t, 4> strides(shape.size(), 0);
strides[shape.size() - 1] = 1;
......@@ -1503,6 +1505,71 @@ namespace
}
NGRAPH_UNREACHABLE("Unsupported type");
}
// Given a concat op, it will check if dst and operands have
// a valid buffer/offset assignment that will make this op
// valid in-place
bool isInPlaceConcat(mlir::Operation* op, DialectLoweringPass& pass)
{
NGRAPH_CHECK(isa<NGConcatOp>(op), "Expecting concat operation");
auto concat = cast<NGConcatOp>(op);
auto concatAxis = concat.concatenation_axis();
auto result = concat.getResult();
auto shape = (result->getType().cast<NGTensorType>()).getShape();
auto memAnalysis = pass.getMemAnalysis();
BufferInfo bufferInfo = memAnalysis->getBufferInfo(op);
if (!bufferInfo.isValid())
{
// no buffer assignment to dst, nothing to do
return false;
}
auto dstBufferId = bufferInfo.m_bufferId;
auto dstOffset = bufferInfo.m_offset;
LLVM_DEBUG(llvm::dbgs() << ">> Check in-place concat\n");
LLVM_DEBUG(op->dump());
for (auto i = 0; i < shape.size(); i++)
{
if (i == concatAxis)
{
break;
}
if (shape[i] != 1)
{
LLVM_DEBUG(llvm::dbgs() << "Axis FAIL. Skipping instruction\n");
return false;
}
}
LLVM_DEBUG(llvm::dbgs() << "Axis OK\n");
// Check if the buffer id and offsets are consistent with what's exepcted
LLVM_DEBUG(llvm::dbgs() << "Dst (id, offset) = (" << dstBufferId << ", " << dstOffset
<< ")\n");
// relative offset in the buffer
int opndOffset = 0;
for (auto opnd : op->getOperands())
{
bufferInfo = memAnalysis->getBufferInfo(opnd->getDefiningOp());
auto srcBufferId = bufferInfo.m_bufferId;
auto srcOffset = bufferInfo.m_offset;
LLVM_DEBUG(llvm::dbgs() << "Src (id, offset) = (" << srcBufferId << ", " << srcOffset
<< ")\n");
if (!bufferInfo.isValid() || srcBufferId != dstBufferId ||
srcOffset != (opndOffset + dstOffset))
{
// mismatch in buffer IDs or offsets
LLVM_DEBUG(llvm::dbgs() << "Buffer ID and Offsets FAIL. Skipping instruction\n");
return false;
}
auto tensorType = opnd->getType().cast<NGTensorType>();
opndOffset += tensorType.getNumElements();
}
LLVM_DEBUG(llvm::dbgs() << "Buffer ID and Offsets OK\n");
return true;
}
} // namespace
namespace mlir
......
//*****************************************************************************
// Copyright 2017-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
// NOTE: This file follows nGraph format style and MLIR naming convention since it does
// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
#include "contrib/mlir/core/compiler.hpp"
#include "contrib/mlir/core/ngraph_dialect/ops.hpp"
#include "contrib/mlir/core/ngraph_dialect/type.hpp"
#include "ngraph/assertion.hpp"
#include <llvm/ADT/DenseSet.h>
#include <map>
#include <mlir/EDSC/Builders.h>
#include <mlir/EDSC/Helpers.h>
#include <mlir/EDSC/Intrinsics.h>
#include <mlir/IR/AffineExpr.h>
#include <mlir/IR/IntegerSet.h>
#include <mlir/IR/MLIRContext.h>
#include <mlir/IR/StandardTypes.h>
#include <mlir/Pass/Pass.h>
#include <mlir/Transforms/DialectConversion.h>
// anonymous namespace
// no need to expose any of the following outside of this file
namespace
{
using namespace ngraph::runtime;
using namespace ngraph::runtime::ngmlir;
using namespace mlir;
/// Memory Optimization pass
/// - Tries to perform operations in place where applicable by assigning a virtual buffer ID
/// to values. Those are used later in affine lowering pass to create or re-use memrefs
class MemoryOptimizationPass : public mlir::FunctionPass<MemoryOptimizationPass>
{
public:
MemoryOptimizationPass()
{
m_inplaceOps = {
#define MLIR_OP(OP, INPLACE) {OP::getOperationName().str(), INPLACE},
#include "contrib/mlir/backend/pass/op_lowerers.inc"
};
}
void runOnFunction() override;
private:
bool isSafeInPlace(mlir::Operation* op);
std::unordered_map<std::string, bool> m_inplaceOps;
static unsigned bufferId;
};
unsigned MemoryOptimizationPass::bufferId = 0;
void MemoryOptimizationPass::runOnFunction()
{
auto f = getFunction();
f.walk([&](mlir::Operation* op) {
if (!isSafeInPlace(op))
{
return;
}
if (op->getNumResults() > 1)
{
return;
}
auto defVal = op->getResult(0);
// If the defined value is an output of the sub-graph, cannot do it in place
for (auto use = defVal->use_begin(); use != defVal->use_end(); use++)
{
auto useOp = use->getOwner();
if (isa<NGReturnOp>(useOp))
{
return;
}
}
// Check if we can re-use the buffer of any of the inputs. Conjunction of the following:
// - single use value or all uses in the current op
// - not an input argument
// TODO: Check instead if last post-dominating (dataflow-wise) use.
for (auto opnd = op->operand_begin(); opnd != op->operand_end(); opnd++)
{
auto val = *opnd;
// we optimize if the val has one use or if all uses are in the current op
bool optimize;
optimize = val->hasOneUse();
if (!optimize)
{
optimize = true;
// check if all uses are in the current op
for (auto use = val->use_begin(); use != val->use_end(); use++)
{
if (use->getOwner() != op)
{
optimize = false;
}
}
}
if (optimize)
{
// do we have a buffer id attached to this value
auto defOp = val->getDefiningOp();
// If no defining op, then this is a block arg, skip operand
if (!defOp)
{
continue;
}
IntegerAttr attr = getBufferId(defOp);
if (!attr)
{
// attach a new buffer id
attr = setBufferId(defOp, this->bufferId++);
}
// propagate attribute to dst, and we are done
setBufferId(op, attr);
return;
}
}
});
}
bool MemoryOptimizationPass::isSafeInPlace(mlir::Operation* op)
{
auto it = m_inplaceOps.find(op->getName().getStringRef().str());
return it != m_inplaceOps.end() ? it->second : false;
}
}
namespace mlir
{
std::unique_ptr<Pass> createMemoryOptimizationPass()
{
return std::make_unique<MemoryOptimizationPass>();
}
} // namespace mlir
......@@ -27,7 +27,7 @@
MLIR_OP(NGAddOp , true )
MLIR_OP(NGArgMaxRedOp , false )
MLIR_OP(NGArgMinRedOp , false )
MLIR_OP(NGConcatOp , false )
MLIR_OP(NGConcatOp , true )
MLIR_OP(NGConvolutionOp , false )
MLIR_OP(NGDivOp , true )
MLIR_OP(NGDotOp , false )
......
......@@ -309,28 +309,6 @@ mlir::LogicalResult verifyOp(NGConvolutionOp* op)
return mlir::success();
}
static std::string getBufferIdAttrName()
{
return "ng.buffer_id";
}
void setBufferId(mlir::Operation* op, mlir::IntegerAttr attr)
{
op->setAttr(getBufferIdAttrName(), attr);
}
mlir::IntegerAttr setBufferId(mlir::Operation* op, unsigned val)
{
auto attr = mlir::IntegerAttr::get(IntegerType::get(32, op->getContext()), val);
setBufferId(op, attr);
return attr;
}
mlir::IntegerAttr getBufferId(mlir::Operation* op)
{
return op->getAttrOfType<mlir::IntegerAttr>(getBufferIdAttrName());
}
namespace mlir
{
#include "ops_interfaces.cpp.inc"
......
......@@ -41,7 +41,3 @@ namespace mlir
#include "ops.h.inc"
#undef GET_OP_CLASSES
}
void setBufferId(mlir::Operation* op, mlir::IntegerAttr attr);
mlir::IntegerAttr setBufferId(mlir::Operation* op, unsigned val);
mlir::IntegerAttr getBufferId(mlir::Operation* op);
......@@ -349,6 +349,143 @@ NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_propagate_2d_tensor)
(vector<float>{3, 7, 2}), read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
}
NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_tree_1)
{
Shape shape{1, 2, 2};
Shape shape_r{1, 4, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto add1 = make_shared<op::Add>(A, B);
auto add2 = make_shared<op::Add>(A, B);
auto concat = make_shared<op::Concat>(NodeVector{add1, add2}, 1);
auto f = make_shared<Function>(make_shared<op::Add>(concat, concat), ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape);
copy_data(a, vector<float>{1, 1, 1, 1});
auto b = backend->create_tensor(element::f32, shape);
copy_data(b, vector<float>{1, 1, 1, 1});
auto result = backend->create_tensor(element::f32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
vector<float> expected;
expected.resize(8, 4);
EXPECT_TRUE(test::all_close_f(expected, read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
}
NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_tree_2)
{
Shape shape{1, 2, 2};
Shape shape_r{1, 8, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto add1 = make_shared<op::Add>(A, B);
auto add2 = make_shared<op::Add>(A, B);
auto concat1 = make_shared<op::Concat>(NodeVector{add1, add2}, 1);
auto concat2 = make_shared<op::Concat>(NodeVector{add1, add2}, 1);
auto concat12 = make_shared<op::Concat>(NodeVector{concat1, concat2}, 1);
auto f = make_shared<Function>(make_shared<op::Add>(concat12, concat12), ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape);
copy_data(a, vector<float>{1, 1, 1, 1});
auto b = backend->create_tensor(element::f32, shape);
copy_data(b, vector<float>{1, 1, 1, 1});
auto result = backend->create_tensor(element::f32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
vector<float> expected;
expected.resize(16, 4);
EXPECT_TRUE(test::all_close_f(expected, read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
}
NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_tree_3)
{
Shape shape{1, 2, 2};
Shape shape_r{1, 16, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto concat1 = make_shared<op::Concat>(NodeVector{A, B}, 1);
auto concat2 = make_shared<op::Concat>(NodeVector{A, B}, 1);
auto concat3 = make_shared<op::Concat>(NodeVector{A, B}, 1);
auto concat4 = make_shared<op::Concat>(NodeVector{A, B}, 1);
auto concat12 = make_shared<op::Concat>(NodeVector{concat1, concat2}, 1);
auto concat34 = make_shared<op::Concat>(NodeVector{concat3, concat4}, 1);
auto concat14 = make_shared<op::Concat>(NodeVector{concat12, concat34}, 1);
auto f = make_shared<Function>(make_shared<op::Add>(concat14, concat14), ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape);
copy_data(a, vector<float>{1, 1, 1, 1});
auto b = backend->create_tensor(element::f32, shape);
copy_data(b, vector<float>{1, 1, 1, 1});
auto result = backend->create_tensor(element::f32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
vector<float> expected;
expected.resize(32, 2);
EXPECT_TRUE(test::all_close_f(expected, read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
}
NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_add_concat)
{
Shape shape{2, 2};
Shape shape_r{4, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto add1 = make_shared<op::Add>(A, B);
auto add2 = make_shared<op::Add>(add1, add1);
auto concat = make_shared<op::Concat>(NodeVector{add1, add2}, 0);
auto add3 = make_shared<op::Add>(concat, concat);
auto f = make_shared<Function>(add3, ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
auto a = backend->create_tensor(element::f32, shape);
copy_data(a, vector<float>{1, 1, 1, 1});
auto b = backend->create_tensor(element::f32, shape);
copy_data(b, vector<float>{1, 1, 1, 1});
auto result = backend->create_tensor(element::f32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
vector<float> expected = {4, 4, 4, 4, 8, 8, 8, 8};
EXPECT_TRUE(test::all_close_f(expected, read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
}
NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_add_concat_2)
{
Shape shape{1, 2, 2};
Shape shape_r{1, 6, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto add1 = make_shared<op::Add>(A, B);
auto add2 = make_shared<op::Add>(A, B);
auto add3 = make_shared<op::Add>(A, B);
auto add4 = make_shared<op::Add>(A, B);
auto add5 = make_shared<op::Add>(A, B);
auto concat1 = make_shared<op::Concat>(NodeVector{add1, add2, add3}, 1);
auto concat2 = make_shared<op::Concat>(NodeVector{add4, add2, add5}, 1);
auto add6 = make_shared<op::Add>(concat1, concat2);
auto f = make_shared<Function>(add6, ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
auto a = backend->create_tensor(element::f32, shape);
copy_data(a, vector<float>{1, 1, 1, 1});
auto b = backend->create_tensor(element::f32, shape);
copy_data(b, vector<float>{1, 1, 1, 1});
auto result = backend->create_tensor(element::f32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
vector<float> expected = {4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4};
EXPECT_TRUE(test::all_close_f(expected, read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
}
// from numpy import *
// a=linspace(1,2*3*4*3*2,2*3*4*3*2)
// b=linspace(1000+1,1000+2*3*3*3*2,2*3*3*3*2)
......
// RUN: ngraph-opt %s --split-input-file --ngraph-memory-opt --ngraph-memory-opt-concat --ngraph-memory-opt-eltwise -convert-ngraph-to-affine | FileCheck %s
// CHECK-DAG: #[[MAP0:[a-zA-Z0-9]+]] = (d0, d1) -> (d0 * 2 + d1)
// CHECK-LABEL: test0
// CHECK: %[[B:.*]] = alloc() : memref<16xi8>
// CHECK: std.view %[[B]][][] : memref<16xi8> to memref<2x2xf32, #[[MAP0]]>
// CHECK: std.view %[[B]][][] : memref<16xi8> to memref<2x2xf32, #[[MAP0]]>
// CHECK: std.view %[[B]][][] : memref<16xi8> to memref<2x2xf32, #[[MAP0]]>
// CHECK: dealloc %[[B]] : memref<16xi8>
func @test0(%arg0: !ng.tensor<2x2xf32>, %arg1: !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32> {
%0 = "ng.add"(%arg0, %arg1) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32>
%1 = "ng.add"(%0, %0) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32>
%2 = "ng.add"(%1, %1) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32>
%3 = "ng.add"(%2, %2) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32>
"ng.return"(%3) : (!ng.tensor<2x2xf32>) -> ()
}
// -----
// CHECK-DAG: #[[MAP0:[a-zA-Z0-9]+]] = (d0, d1) -> (d0 * 2 + d1)
// CHECK-DAG: #[[MAP1:[a-zA-Z0-9]+]] = (d0, d1) -> (d0 * 2 + d1 + 4)
// CHECK-LABEL: test1
// CHECK: %[[B:.*]] = alloc() : memref<32xi8>
// CHECK: std.view %[[B]][][] : memref<32xi8> to memref<2x2xf32, #[[MAP0]]>
// CHECK: std.view %[[B]][][] : memref<32xi8> to memref<2x2xf32, #[[MAP1]]>
// CHECK: std.view %[[B]][][] : memref<32xi8> to memref<4x2xf32, #[[MAP0]]>
// CHECK: dealloc %[[B]] : memref<32xi8>
func @test1(%arg0: !ng.tensor<2x2xf32>, %arg1: !ng.tensor<2x2xf32>) -> !ng.tensor<4x2xf32> {
%0 = "ng.add"(%arg0, %arg1) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32>
%1 = "ng.add"(%0, %0) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32>
%2 = "ng.concat"(%0, %1) {concatenation_axis = 0} : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<4x2xf32>
%3 = "ng.add"(%2, %2) : (!ng.tensor<4x2xf32>, !ng.tensor<4x2xf32>) -> !ng.tensor<4x2xf32>
"ng.return"(%3) : (!ng.tensor<4x2xf32>) -> ()
}
// -----
// CHECK-DAG: #[[MAP0:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 4 + d1 * 2 + d2)
// CHECK-DAG: #[[MAP1:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 4 + d1 * 2 + d2 + 4)
// CHECK-DAG: #[[MAP2:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 8 + d1 * 2 + d2)
// CHECK-DAG: #[[MAP3:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 16 + d1 * 2 + d2)
// CHECK-LABEL: test2
// CHECK: %[[B1:.*]] = alloc() : memref<32xi8>
// CHECK: std.view %[[B1]][][] : memref<32xi8> to memref<1x2x2xf32, #[[MAP0]]>
// CHECK: std.view %[[B1]][][] : memref<32xi8> to memref<1x2x2xf32, #[[MAP1]]>
// CHECK: std.view %[[B1]][][] : memref<32xi8> to memref<1x4x2xf32, #[[MAP2]]>
// CHECK: %[[B2:.*]] = alloc() : memref<64xi8>
// CHECK: std.view %[[B2]][][] : memref<64xi8> to memref<1x8x2xf32, #[[MAP3]]>
// CHECK: std.view %[[B2]][][] : memref<64xi8> to memref<1x8x2xf32, #[[MAP3]]>
func @test2(%arg0: !ng.tensor<1x2x2xf32>, %arg1: !ng.tensor<1x2x2xf32>) -> (!ng.tensor<1x4x2xf32>, !ng.tensor<1x8x2xf32>){
%0 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
%1 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
// inplace
%2 = "ng.concat"(%0, %1) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x4x2xf32>
// cannot be done inplace, %3 and %2 cannot alias
%3 = "ng.concat"(%0, %1, %2) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>, !ng.tensor<1x4x2xf32>) -> !ng.tensor<1x8x2xf32>
// inplace destructive. %3 and %2 cannot alias
%4 = "ng.add"(%3, %3) : (!ng.tensor<1x8x2xf32>, !ng.tensor<1x8x2xf32>) -> !ng.tensor<1x8x2xf32>
// no inplace, result is output
%5 = "ng.add"(%2, %2) : (!ng.tensor<1x4x2xf32>, !ng.tensor<1x4x2xf32>) -> !ng.tensor<1x4x2xf32>
// no inplace, result is output
%6 = "ng.add"(%4, %4) : (!ng.tensor<1x8x2xf32>, !ng.tensor<1x8x2xf32>) -> !ng.tensor<1x8x2xf32>
"ng.return"(%5, %6) : (!ng.tensor<1x4x2xf32>, !ng.tensor<1x8x2xf32>) -> ()
}
// -----
// CHECK-DAG: #[[MAP0:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 8 + d1 * 2 + d2)
// CHECK-DAG: #[[MAP8:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 8 + d1 * 2 + d2 + 8)
// CHECK-DAG: #[[MAP9:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 8 + d1 * 2 + d2 + 16)
// CHECK-DAG: #[[MAP10:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 8 + d1 * 2 + d2 + 24)
// CHECK-DAG: #[[MAP11:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 16 + d1 * 2 + d2)
// CHECK-DAG: #[[MAP12:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 16 + d1 * 2 + d2 + 16)
// CHECK-DAG: #[[MAP13:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 32 + d1 * 2 + d2)
// CHECK-LABEL: test3
// CHECK: %[[B:.*]] = alloc() : memref<128xi8>
// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x4x2xf32, #[[MAP0]]>
// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x4x2xf32, #[[MAP8]]>
// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x4x2xf32, #[[MAP9]]>
// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x4x2xf32, #[[MAP10]]>
// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x8x2xf32, #[[MAP11]]>
// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x8x2xf32, #[[MAP12]]>
// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x16x2xf32, #[[MAP13]]>
// CHECK: dealloc %[[B]] : memref<128xi8>
func @test3(%arg0: !ng.tensor<1x2x2xf32>, %arg1: !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x16x2xf32> {
%0 = "ng.concat"(%arg0, %arg1) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x4x2xf32>
%1 = "ng.concat"(%arg0, %arg1) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x4x2xf32>
%2 = "ng.concat"(%arg0, %arg1) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x4x2xf32>
%3 = "ng.concat"(%arg0, %arg1) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x4x2xf32>
%4 = "ng.concat"(%0, %1) {concatenation_axis = 1} : (!ng.tensor<1x4x2xf32>, !ng.tensor<1x4x2xf32>) -> !ng.tensor<1x8x2xf32>
%5 = "ng.concat"(%2, %3) {concatenation_axis = 1} : (!ng.tensor<1x4x2xf32>, !ng.tensor<1x4x2xf32>) -> !ng.tensor<1x8x2xf32>
%6 = "ng.concat"(%4, %5) {concatenation_axis = 1} : (!ng.tensor<1x8x2xf32>, !ng.tensor<1x8x2xf32>) -> !ng.tensor<1x16x2xf32>
%7 = "ng.add"(%6, %6) : (!ng.tensor<1x16x2xf32>, !ng.tensor<1x16x2xf32>) -> !ng.tensor<1x16x2xf32>
"ng.return"(%7) : (!ng.tensor<1x16x2xf32>) -> ()
}
// -----
//CHECK-DAG: #[[MAP4:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 4 + d1 * 2 + d2 + 4)
//CHECK-DAG: #[[MAP5:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 4 + d1 * 2 + d2)
//CHECK-DAG: #[[MAP6:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 4 + d1 * 2 + d2 + 8)
//CHECK-DAG: #[[MAP12:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 12 + d1 * 2 + d2)
// CHECK-LABEL: test4
//CHECK: %[[B1:.*]] = alloc() : memref<1x2x2xf32>
//CHECK: %[[B2:.*]] = alloc() : memref<48xi8>
//CHECK: std.view %[[B2]][][] : memref<48xi8> to memref<1x2x2xf32, #[[MAP4]]>
//CHECK: %[[B3:.*]] = alloc() : memref<1x2x2xf32>
//CHECK: std.view %[[B2]][][] : memref<48xi8> to memref<1x2x2xf32, #[[MAP5]]>
//CHECK: std.view %[[B2]][][] : memref<48xi8> to memref<1x2x2xf32, #[[MAP6]]>
//CHECK: %[[B4:.*]] = alloc() : memref<1x6x2xf32>
//CHECK: std.view %1[][] : memref<48xi8> to memref<1x6x2xf32, #[[MAP12]]>
//CHECK: dealloc %[[B1]] : memref<1x2x2xf32>
//CHECK: dealloc %[[B2]] : memref<48xi8>
//CHECK: dealloc %[[B3]] : memref<1x2x2xf32>
//CHECK: dealloc %[[B4]] : memref<1x6x2xf32>
func @test4(%arg0: !ng.tensor<1x2x2xf32>, %arg1: !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x8x2xf32> {
%S0 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
%S1 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
%S2 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
%R0 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
%R2 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
// pre-existing assignment of S1 in %D2 prevents assignment for %D1 concat
%D1 = "ng.concat"(%S0, %S1, %S2) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x6x2xf32>
%D2 = "ng.concat"(%R0, %S1, %R2) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x6x2xf32>
%D3 = "ng.add"(%D1, %D2) : (!ng.tensor<1x6x2xf32>, !ng.tensor<1x6x2xf32>) -> !ng.tensor<1x6x2xf32>
"ng.return"(%D3) : (!ng.tensor<1x6x2xf32>) -> ()
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment