Commit c737a573 authored by Amy Zhuang's avatar Amy Zhuang Committed by Scott Cyphers

[MLIR] Use call back for MatMul. (#3838)

* [MLIR] Use call back for MatMul.

* Use callback for Gemm.

* Use mkldnn callback for Softmax.

* Address PR feedback.

* Fix merge errors.

* Change to tail allocation struct.

* Use mkldnn callback for AvgPool.

* Add callbacks for AvgPoolBackprop, MaxPool, and MaxPoolBackprop.

* Fix merge errors.

* Use UnrankedMemRefType for callbacks.

* Address PR feedback.

* Cleanup.

* Address PR feedback.

* Fix a bug.

* Use global variable to hold attributes.

* Convert layout if needed for pooling.

* Address PR feedback.

* Add header.

* Address PR feedback.

* Update Copyright to 2017-2020.

* Address PR feedback.
Co-authored-by: 's avatarScott Cyphers <diyessi@users.noreply.github.com>
parent e8c0282c
......@@ -36,6 +36,7 @@ set(SRC
core/pass/ng_dialect_builder.hpp
runtime/cpu/memory_manager.cpp
runtime/cpu/cpu_runtime.cpp
runtime/cpu/cpu_callbacks.cpp
utils.cpp
)
......@@ -90,7 +91,8 @@ target_link_libraries(
)
# Link ngraph
target_link_libraries(mlir_backend PUBLIC ngraph)
target_link_libraries(mlir_backend PUBLIC ngraph libmkl libmkldnn)
target_include_directories(mlir_backend SYSTEM PUBLIC libmkldnn)
# table-gen dialect ops
# include table-gen helpers
......
......@@ -33,6 +33,7 @@
#include <mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h>
#include <mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h>
#include <mlir/Dialect/LLVMIR/LLVMDialect.h>
#include <mlir/IR/StandardTypes.h>
#include <mlir/Pass/PassManager.h>
#include <mlir/Target/LLVMIR.h>
#include <mlir/Transforms/DialectConversion.h>
......
......@@ -22,6 +22,8 @@
#include "contrib/mlir/backend/analysis/memory_analysis.hpp"
#include "contrib/mlir/core/ngraph_dialect/ops.hpp"
#include "contrib/mlir/core/ngraph_dialect/type.hpp"
#include "contrib/mlir/runtime/cpu/callback_utils.hpp"
#include "contrib/mlir/utils.hpp"
#include "ngraph/assertion.hpp"
#include <llvm/ADT/DenseSet.h>
......@@ -30,6 +32,7 @@
#include <mlir/EDSC/Helpers.h>
#include <mlir/EDSC/Intrinsics.h>
#include <mlir/IR/AffineExpr.h>
#include <mlir/IR/Function.h>
#include <mlir/IR/IntegerSet.h>
#include <mlir/IR/MLIRContext.h>
#include <mlir/IR/StandardTypes.h>
......@@ -40,6 +43,8 @@
#define PASS_NAME "convert-ngraph-to-affine"
#define DEBUG_TYPE PASS_NAME
std::vector<ngraph::runtime::ngmlir::opAttrs> opAttrsVec;
// anonymous namespace
// no need to expose any of the following outside of this file
namespace
......@@ -164,6 +169,12 @@ namespace
PatternRewriter& rewriter,
DialectLoweringPass& pass);
template <typename OP>
void lowerPooling(Operation* op,
ArrayRef<Value*> operands,
PatternRewriter& rewriter,
DialectLoweringPass& pass);
ValueHandle createZeroConstant(mlir::Type type);
ValueHandle createOneConstant(mlir::Type type);
......@@ -203,6 +214,13 @@ namespace
/// Inserts dealloc Ops for each temporary allocated by AllocOp
void insertDeallocs(PatternRewriter& rewriter);
NGraphTypeConverter& getTypeConverter() { return typeConverter; }
FuncOp getCallDecl(StringRef name,
ArrayRef<Type> args,
ArrayRef<Type> output,
PatternRewriter& rewriter);
inline size_t insertAttrs(opAttrs attrs);
MemoryAnalysis* getMemAnalysis() const { return m_memAnalysis; }
private:
/// Collect a set of patterns to convert from the nGraph dialect to Affine dialect.
......@@ -222,6 +240,9 @@ namespace
MemoryAnalysis* m_memAnalysis;
// TODO: Workaround for findOutputValues and buildOutputDefs. See NGCPU-470.
std::string funcName;
// Store the attributes needed by callback
std::vector<opAttrs> m_attrsVec;
};
void DialectLoweringPass::runOnModule()
......@@ -271,6 +292,8 @@ namespace
// separate rewrite pattern. Retrieve new function after signature conversion.
insertNoAliasArgAttrs();
}
opAttrsVec = m_attrsVec;
}
void DialectLoweringPass::populateNGraphToAffineConversionPatterns(
......@@ -467,6 +490,33 @@ namespace
}
}
mlir::FuncOp DialectLoweringPass::getCallDecl(StringRef name,
ArrayRef<Type> args,
ArrayRef<Type> output,
PatternRewriter& rewriter)
{
auto module = getModule();
auto* context = getModule().getContext();
auto callBackFunc = module.lookupSymbol<mlir::FuncOp>(name);
if (!callBackFunc)
{
// Create a function declaration and insert to the module.
auto callBackType = rewriter.getFunctionType(args, output);
PatternRewriter::InsertionGuard insertGuard(rewriter);
rewriter.setInsertionPointToStart(module.getBody());
SmallVector<NamedAttribute, 4> attributes;
rewriter.create<mlir::FuncOp>(rewriter.getUnknownLoc(), name, callBackType, attributes);
callBackFunc = module.lookupSymbol<mlir::FuncOp>(name);
}
return callBackFunc;
}
inline size_t DialectLoweringPass::insertAttrs(opAttrs attrs)
{
m_attrsVec.push_back(attrs);
return m_attrsVec.size() - 1;
}
// NGDialect converters
Type NGraphTypeConverter::convertType(Type type)
{
......@@ -1198,6 +1248,368 @@ namespace
return matchSuccess();
}
// Use callback: Pooling, MatMul, Gemm, Softmax
static void castMemRef(SmallVector<mlir::Value*, 4> inputs,
SmallVector<mlir::Value*, 4>& outputs,
PatternRewriter& rewriter,
UnrankedMemRefType type)
{
for (auto in : inputs)
{
auto out = rewriter.create<mlir::MemRefCastOp>(rewriter.getUnknownLoc(), in, type);
outputs.push_back(out);
}
}
REWRITER(NGAvgPoolOp)
{
lowerPooling<mlir::NGAvgPoolOp>(op, operands, rewriter, pass);
return matchSuccess();
}
REWRITER(NGAvgPoolBackpropOp)
{
lowerPooling<mlir::NGAvgPoolBackpropOp>(op, operands, rewriter, pass);
return matchSuccess();
}
REWRITER(NGMaxPoolOp)
{
lowerPooling<mlir::NGMaxPoolOp>(op, operands, rewriter, pass);
return matchSuccess();
}
REWRITER(NGMaxPoolBackpropOp)
{
auto pooling = cast<NGMaxPoolBackpropOp>(op);
auto loc = pooling.getLoc();
// Retrieve/generate Values for operands and result.
ScopedContext scope(rewriter, loc);
Value* src = operands[0];
Value* delta = operands[1];
ArrayRef<Attribute> windowShape = pooling.windowShape().getValue();
ArrayRef<Attribute> windowStrides = pooling.windowMovementStrides().getValue();
ArrayRef<Attribute> padBelow = pooling.padBelow().getValue();
ArrayRef<Attribute> padAbove = pooling.padAbove().getValue();
Value* result = pass.buildOutputDefs(op, rewriter)[0];
NGRAPH_CHECK(src && delta && result, "Unexpected null values in MaxPoolBackprop Op");
auto resultTy = result->getType().dyn_cast<MemRefType>();
auto resultShape = resultTy.getShape();
auto srcTy = src->getType().dyn_cast<MemRefType>();
auto srcShape = srcTy.getShape();
auto deltaTy = delta->getType().dyn_cast<MemRefType>();
auto deltaShape = deltaTy.getShape();
NGRAPH_CHECK(resultTy, "Unexpected non-memref result type");
NGRAPH_CHECK(srcTy, "Unexpected non-memref src type");
NGRAPH_CHECK(deltaTy, "Unexpected non-memref delta type");
Type elemTy = resultTy.getElementType();
NGRAPH_CHECK(elemTy == srcTy.getElementType() && elemTy == deltaTy.getElementType(),
"Types mismatch in MaxPoolBackprop");
NGRAPH_CHECK((srcShape.size() == 4 && resultShape.size() == 4) ||
(srcShape.size() == 5 && resultShape.size() == 5),
"MKLDNN pooling operation is only supported for 3D and 5D tensors");
auto int64Ty = rewriter.getIntegerType(64);
auto unrankedMemrefTy = UnrankedMemRefType::get(elemTy, 0);
SmallVector<mlir::Value*, 4> inputs = {src, delta, result};
SmallVector<mlir::Value*, 4> outputs;
castMemRef(inputs, outputs, rewriter, unrankedMemrefTy);
FuncOp callBackFunc = pass.getCallDecl(
"__mlir_callback_2_inputs",
{unrankedMemrefTy, unrankedMemrefTy, unrankedMemrefTy, int64Ty, int64Ty},
{},
rewriter);
opAttrs attrs;
if (srcShape.size() == 4)
{
attrs.poolAttrs2d.includePaddingInAvgComputation = false;
for (auto i = 0; i < 2; i++)
{
attrs.poolAttrs2d.windowShape[i] = windowShape[i].cast<IntegerAttr>().getInt();
attrs.poolAttrs2d.windowStrides[i] = windowStrides[i].cast<IntegerAttr>().getInt();
attrs.poolAttrs2d.padBelow[i] = padBelow[i].cast<IntegerAttr>().getInt();
attrs.poolAttrs2d.padAbove[i] = padAbove[i].cast<IntegerAttr>().getInt();
}
}
else if (srcShape.size() == 5)
{
opAttrs attrs;
attrs.poolAttrs3d.includePaddingInAvgComputation = false;
for (auto i = 0; i < 3; i++)
{
attrs.poolAttrs3d.windowShape[i] = windowShape[i].cast<IntegerAttr>().getInt();
attrs.poolAttrs3d.windowStrides[i] = windowStrides[i].cast<IntegerAttr>().getInt();
attrs.poolAttrs3d.padBelow[i] = padBelow[i].cast<IntegerAttr>().getInt();
attrs.poolAttrs3d.padAbove[i] = padAbove[i].cast<IntegerAttr>().getInt();
}
}
auto index = pass.insertAttrs(attrs);
auto attrsIndexArg =
rewriter.create<mlir::ConstantIntOp>(rewriter.getUnknownLoc(), index, 64);
auto opTypeArg = rewriter.create<mlir::ConstantIntOp>(
rewriter.getUnknownLoc(), static_cast<int64_t>(OpType::MAXPOOLBACKPROP), 64);
SmallVector<mlir::Value*, 4> args = {
outputs[0], outputs[1], outputs[2], attrsIndexArg, opTypeArg};
rewriter.create<mlir::CallOp>(rewriter.getUnknownLoc(), callBackFunc, args);
rewriter.replaceOp(op, result);
return matchSuccess();
}
REWRITER(NGMatMulOp)
{
auto matmul = cast<NGMatMulOp>(op);
auto loc = matmul.getLoc();
// Retrieve/generate Values for operands and result.
ScopedContext scope(rewriter, loc);
Value* lhs = operands[0];
Value* rhs = operands[1];
Value* result = pass.buildOutputDefs(op, rewriter)[0];
NGRAPH_CHECK(lhs && rhs && result, "Unexpected null values in MatMulOp");
auto resultTy = result->getType().dyn_cast<MemRefType>();
auto resultShape = resultTy.getShape();
auto lhsTy = lhs->getType().dyn_cast<MemRefType>();
auto lhsShape = lhsTy.getShape();
auto rhsTy = rhs->getType().dyn_cast<MemRefType>();
auto rhsShape = rhsTy.getShape();
NGRAPH_CHECK(resultTy, "Unexpected non-memref result type");
NGRAPH_CHECK(lhsTy, "Unexpected non-memref LHS type");
NGRAPH_CHECK(rhsTy, "Unexpected non-memref RHS type");
Type elemTy = resultTy.getElementType();
NGRAPH_CHECK(elemTy == lhsTy.getElementType() && elemTy == rhsTy.getElementType(),
"Types mismatch in MatMulOp");
NGRAPH_CHECK(lhsShape.size() == 2 && rhsShape.size() == 2 && resultShape.size() == 2,
"MatMul operation is only supported for 2D tensors");
opAttrs attrs;
attrs.gemmAttrs2d.transposeA = matmul.transposeA();
attrs.gemmAttrs2d.transposeB = matmul.transposeB();
attrs.gemmAttrs2d.m = lhsShape[0];
attrs.gemmAttrs2d.k = lhsShape[1];
attrs.gemmAttrs2d.n = rhsShape[1];
attrs.gemmAttrs2d.lda = lhsShape[1];
attrs.gemmAttrs2d.ldb = rhsShape[1];
if (matmul.transposeA())
{
attrs.gemmAttrs2d.m = lhsShape[1];
attrs.gemmAttrs2d.k = lhsShape[0];
}
if (matmul.transposeB())
{
attrs.gemmAttrs2d.n = rhsShape[0];
}
attrs.gemmAttrs2d.ldc = attrs.gemmAttrs2d.n;
auto int64Ty = rewriter.getIntegerType(64);
auto unrankedMemrefTy = UnrankedMemRefType::get(elemTy, 0);
auto callBackFunc = pass.getCallDecl(
"__mlir_callback_2_inputs",
{unrankedMemrefTy, unrankedMemrefTy, unrankedMemrefTy, int64Ty, int64Ty},
{},
rewriter);
auto index = pass.insertAttrs(attrs);
auto attrsIndexArg =
rewriter.create<mlir::ConstantIntOp>(rewriter.getUnknownLoc(), index, 64);
auto opTypeArg = rewriter.create<mlir::ConstantIntOp>(
rewriter.getUnknownLoc(), static_cast<int64_t>(OpType::MATMUL), 64);
SmallVector<mlir::Value*, 4> inputs = {lhs, rhs, result};
SmallVector<mlir::Value*, 4> outputs;
castMemRef(inputs, outputs, rewriter, unrankedMemrefTy);
SmallVector<mlir::Value*, 4> args = {
outputs[0], outputs[1], outputs[2], attrsIndexArg, opTypeArg};
rewriter.create<mlir::CallOp>(rewriter.getUnknownLoc(), callBackFunc, args);
rewriter.replaceOp(op, result);
return matchSuccess();
}
REWRITER(NGGemmOp)
{
auto gemm = cast<NGGemmOp>(op);
auto loc = gemm.getLoc();
// Retrieve/generate Values for operands and result.
ScopedContext scope(rewriter, loc);
Value* lhs = operands[0];
Value* rhs = operands[1];
Value* bias = operands[2];
Value* result = pass.buildOutputDefs(op, rewriter)[0];
NGRAPH_CHECK(lhs && rhs && bias && result, "Unexpected null values in GemmOp");
auto resultTy = result->getType().dyn_cast<MemRefType>();
auto lhsTy = lhs->getType().dyn_cast<MemRefType>();
auto lhsShape = lhsTy.getShape();
auto rhsTy = rhs->getType().dyn_cast<MemRefType>();
auto rhsShape = rhsTy.getShape();
auto biasTy = bias->getType().dyn_cast<MemRefType>();
auto biasShape = biasTy.getShape();
NGRAPH_CHECK(resultTy, "Unexpected non-memref result type");
NGRAPH_CHECK(lhsTy, "Unexpected non-memref LHS type");
NGRAPH_CHECK(rhsTy, "Unexpected non-memref RHS type");
NGRAPH_CHECK(biasTy, "Unexpected non-memref bias type");
Type elemTy = resultTy.getElementType();
NGRAPH_CHECK(elemTy == lhsTy.getElementType() && elemTy == rhsTy.getElementType() &&
elemTy == biasTy.getElementType(),
"Types mismatch in GemmOp");
MemRefView vRes(result), vLhs(lhs), vRhs(rhs), vBias(bias);
NGRAPH_CHECK(vLhs.rank() == 2 && vRhs.rank() == 2 && vRes.rank() == 2 && vBias.rank() <= 2,
"Gemm operation is only supported for 2D tensors");
opAttrs attrs;
attrs.gemmAttrs2d.transposeA = gemm.transA();
attrs.gemmAttrs2d.transposeB = gemm.transB();
attrs.gemmAttrs2d.alpha = gemm.alpha().convertToFloat();
attrs.gemmAttrs2d.beta = gemm.beta().convertToFloat();
attrs.gemmAttrs2d.m = lhsShape[0];
attrs.gemmAttrs2d.k = lhsShape[1];
attrs.gemmAttrs2d.n = rhsShape[1];
attrs.gemmAttrs2d.lda = lhsShape[1];
attrs.gemmAttrs2d.ldb = rhsShape[1];
if (gemm.transA())
{
attrs.gemmAttrs2d.m = lhsShape[1];
attrs.gemmAttrs2d.k = lhsShape[0];
}
if (gemm.transB())
{
attrs.gemmAttrs2d.n = rhsShape[0];
}
attrs.gemmAttrs2d.ldc = attrs.gemmAttrs2d.n;
int broadcastHint;
if (vBias.rank() == 0)
{
// Scalar
broadcastHint = 2;
}
else if (vBias.rank() == 2)
{
if (biasShape[0] == attrs.gemmAttrs2d.m && biasShape[1] == 1)
{
broadcastHint = 1;
}
else if (biasShape[0] == 1 && biasShape[1] == attrs.gemmAttrs2d.n)
{
broadcastHint = 0;
}
else
{
broadcastHint = -1;
}
}
else
{
if (biasShape[0] == attrs.gemmAttrs2d.m)
{
broadcastHint = 1;
}
else if (biasShape[0] == attrs.gemmAttrs2d.n)
{
broadcastHint = 0;
}
}
attrs.gemmAttrs2d.broadcastHint = broadcastHint;
auto int64Ty = rewriter.getIntegerType(64);
auto unrankedMemrefTy = UnrankedMemRefType::get(elemTy, 0);
auto callBackFunc = pass.getCallDecl("__mlir_callback_3_inputs",
{unrankedMemrefTy,
unrankedMemrefTy,
unrankedMemrefTy,
unrankedMemrefTy,
int64Ty,
int64Ty},
{},
rewriter);
auto index = pass.insertAttrs(attrs);
auto attrsIndexArg =
rewriter.create<mlir::ConstantIntOp>(rewriter.getUnknownLoc(), index, 64);
auto opTypeArg = rewriter.create<mlir::ConstantIntOp>(
rewriter.getUnknownLoc(), static_cast<int64_t>(OpType::GEMM), 64);
SmallVector<mlir::Value*, 4> inputs = {lhs, rhs, bias, result};
SmallVector<mlir::Value*, 4> outputs;
castMemRef(inputs, outputs, rewriter, unrankedMemrefTy);
SmallVector<mlir::Value*, 4> args = {
outputs[0], outputs[1], outputs[2], outputs[3], attrsIndexArg, opTypeArg};
rewriter.create<mlir::CallOp>(rewriter.getUnknownLoc(), callBackFunc, args);
rewriter.replaceOp(op, result);
return matchSuccess();
}
REWRITER(NGSoftMaxOp)
{
auto softmax = cast<NGSoftMaxOp>(op);
auto loc = softmax.getLoc();
// Retrieve/generate Values for operands and result.
ScopedContext scope(rewriter, loc);
Value* lhs = operands[0];
Value* result = pass.buildOutputDefs(op, rewriter)[0];
NGRAPH_CHECK(lhs && result, "Unexpected null values in SoftmaxOp");
auto resultTy = result->getType().dyn_cast<MemRefType>();
auto resultShape = resultTy.getShape();
auto lhsTy = lhs->getType().dyn_cast<MemRefType>();
auto lhsShape = lhsTy.getShape();
NGRAPH_CHECK(resultTy, "Unexpected non-memref result type");
NGRAPH_CHECK(lhsTy, "Unexpected non-memref LHS type");
Type elemTy = resultTy.getElementType();
NGRAPH_CHECK(elemTy == lhsTy.getElementType(), "Types mismatch in SoftmaxOp");
NGRAPH_CHECK((lhsShape.size() == 2 && resultShape.size() == 2) ||
(lhsShape.size() == 4 && resultShape.size() == 4),
"MKLDNN Softmax operation is only supported for 2D and 4D tensors");
auto int64Ty = rewriter.getIntegerType(64);
auto unrankedMemrefTy = UnrankedMemRefType::get(elemTy, 0);
auto axes = softmax.axes().getValue();
opAttrs attrs;
attrs.intAttr = axes[0].cast<IntegerAttr>().getInt();
auto index = pass.insertAttrs(attrs);
auto attrsIndexArg =
rewriter.create<mlir::ConstantIntOp>(rewriter.getUnknownLoc(), index, 64);
auto opTypeArg = rewriter.create<mlir::ConstantIntOp>(
rewriter.getUnknownLoc(), static_cast<int64_t>(OpType::SOFTMAX), 64);
FuncOp callBackFunc =
pass.getCallDecl("__mlir_callback_1_input",
{unrankedMemrefTy, unrankedMemrefTy, int64Ty, int64Ty},
{},
rewriter);
SmallVector<mlir::Value*, 4> inputs = {lhs, result};
SmallVector<mlir::Value*, 4> outputs;
castMemRef(inputs, outputs, rewriter, unrankedMemrefTy);
SmallVector<mlir::Value*, 4> args = {outputs[0], outputs[1], attrsIndexArg, opTypeArg};
rewriter.create<mlir::CallOp>(rewriter.getUnknownLoc(), callBackFunc, args);
rewriter.replaceOp(op, result);
return matchSuccess();
}
#undef REWRITER
/// End of pattern matchers
template <typename OP>
......@@ -1458,6 +1870,103 @@ namespace
rewriter.replaceOp(op, result);
}
template <typename OP>
void lowerPooling(Operation* op,
ArrayRef<Value*> operands,
PatternRewriter& rewriter,
DialectLoweringPass& pass)
{
auto pooling = cast<OP>(op);
auto loc = pooling.getLoc();
// Retrieve/generate Values for operands and result.
ScopedContext scope(rewriter, loc);
Value* lhs = operands[0];
ArrayRef<Attribute> windowShape = pooling.windowShape().getValue();
ArrayRef<Attribute> windowStrides = pooling.windowMovementStrides().getValue();
ArrayRef<Attribute> padBelow = pooling.padBelow().getValue();
ArrayRef<Attribute> padAbove = pooling.padAbove().getValue();
Value* result = pass.buildOutputDefs(op, rewriter)[0];
NGRAPH_CHECK(lhs && result, "Unexpected null values in Pooling Op");
auto resultTy = result->getType().dyn_cast<MemRefType>();
auto resultShape = resultTy.getShape();
auto lhsTy = lhs->getType().dyn_cast<MemRefType>();
auto lhsShape = lhsTy.getShape();
NGRAPH_CHECK(resultTy, "Unexpected non-memref result type");
NGRAPH_CHECK(lhsTy, "Unexpected non-memref LHS type");
Type elemTy = resultTy.getElementType();
NGRAPH_CHECK(elemTy == lhsTy.getElementType(), "Types mismatch in Pooling");
NGRAPH_CHECK((lhsShape.size() == 4 && resultShape.size() == 4) ||
(lhsShape.size() == 5 && resultShape.size() == 5),
"MKLDNN pooling operation is only supported for 3D and 5D tensors");
auto int64Ty = rewriter.getIntegerType(64);
OpType ty;
bool includePadding = false;
if (auto avgPool = dyn_cast<NGAvgPoolOp>(op))
{
ty = OpType::AVGPOOL;
includePadding = avgPool.includePadding();
}
else if (auto avgPoolBprop = dyn_cast<NGAvgPoolBackpropOp>(op))
{
ty = OpType::AVGPOOLBACKPROP;
includePadding = avgPoolBprop.includePadding();
}
else if (isa<NGMaxPoolOp>(op))
{
ty = OpType::MAXPOOL;
}
auto unrankedMemrefTy = UnrankedMemRefType::get(elemTy, 0);
SmallVector<mlir::Value*, 4> inputs = {lhs, result};
SmallVector<mlir::Value*, 4> outputs;
castMemRef(inputs, outputs, rewriter, unrankedMemrefTy);
FuncOp callBackFunc =
pass.getCallDecl("__mlir_callback_1_input",
{unrankedMemrefTy, unrankedMemrefTy, int64Ty, int64Ty},
{},
rewriter);
opAttrs attrs;
if (lhsShape.size() == 4)
{
attrs.poolAttrs2d.includePaddingInAvgComputation = includePadding;
for (auto i = 0; i < 2; i++)
{
attrs.poolAttrs2d.windowShape[i] = windowShape[i].cast<IntegerAttr>().getInt();
attrs.poolAttrs2d.windowStrides[i] = windowStrides[i].cast<IntegerAttr>().getInt();
attrs.poolAttrs2d.padBelow[i] = padBelow[i].cast<IntegerAttr>().getInt();
attrs.poolAttrs2d.padAbove[i] = padAbove[i].cast<IntegerAttr>().getInt();
}
}
else if (lhsShape.size() == 5)
{
attrs.poolAttrs3d.includePaddingInAvgComputation = includePadding;
for (auto i = 0; i < 3; i++)
{
attrs.poolAttrs3d.windowShape[i] = windowShape[i].cast<IntegerAttr>().getInt();
attrs.poolAttrs3d.windowStrides[i] = windowStrides[i].cast<IntegerAttr>().getInt();
attrs.poolAttrs3d.padBelow[i] = padBelow[i].cast<IntegerAttr>().getInt();
attrs.poolAttrs3d.padAbove[i] = padAbove[i].cast<IntegerAttr>().getInt();
}
}
auto index = pass.insertAttrs(attrs);
auto attrsIndexArg =
rewriter.create<mlir::ConstantIntOp>(rewriter.getUnknownLoc(), index, 64);
auto opTypeArg = rewriter.create<mlir::ConstantIntOp>(
rewriter.getUnknownLoc(), static_cast<int64_t>(ty), 64);
SmallVector<mlir::Value*, 4> args = {outputs[0], outputs[1], attrsIndexArg, opTypeArg};
rewriter.create<mlir::CallOp>(rewriter.getUnknownLoc(), callBackFunc, args);
rewriter.replaceOp(op, result);
}
ValueHandle createZeroConstant(mlir::Type type)
{
if (auto floatTy = type.dyn_cast<FloatType>())
......
......@@ -27,22 +27,29 @@
MLIR_OP(NGAddOp , true )
MLIR_OP(NGArgMaxRedOp , false )
MLIR_OP(NGArgMinRedOp , false )
MLIR_OP(NGAvgPoolOp , false )
MLIR_OP(NGAvgPoolBackpropOp , false )
MLIR_OP(NGConcatOp , true )
MLIR_OP(NGConvolutionOp , false )
MLIR_OP(NGDivOp , true )
MLIR_OP(NGDotOp , false )
MLIR_OP(NGGatherOp , false )
MLIR_OP(NGGemmOp , false )
MLIR_OP(NGGreaterOp , true )
MLIR_OP(NGLessOp , true )
MLIR_OP(NGGreaterEqOp , true )
MLIR_OP(NGLessEqOp , true )
MLIR_OP(NGEqOp , true )
MLIR_OP(NGNotEqOp , true )
MLIR_OP(NGMatMulOp , false )
MLIR_OP(NGMulOp , true )
MLIR_OP(NGMaxOp , true )
MLIR_OP(NGMaxPoolOp , false )
MLIR_OP(NGMaxPoolBackpropOp , false )
MLIR_OP(NGMinOp , true )
MLIR_OP(NGNegOp , true )
MLIR_OP(NGReluOp , true )
MLIR_OP(NGSoftMaxOp , false )
MLIR_OP(NGSubOp , true )
MLIR_LAST_OP(NGReturnOp , false )
......
......@@ -28,24 +28,7 @@
#include "ngraph/descriptor/tensor.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/add.hpp"
#include "ngraph/op/argmax.hpp"
#include "ngraph/op/argmin.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/divide.hpp"
#include "ngraph/op/dot.hpp"
#include "ngraph/op/experimental/compiled_kernel.hpp"
#include "ngraph/op/gather.hpp"
#include "ngraph/op/greater.hpp"
#include "ngraph/op/less.hpp"
#include "ngraph/op/maximum.hpp"
#include "ngraph/op/minimum.hpp"
#include "ngraph/op/multiply.hpp"
#include "ngraph/op/negative.hpp"
#include "ngraph/op/relu.hpp"
#include "ngraph/op/subtract.hpp"
#include "ngraph/op/util/index_reduction.hpp"
#include "ngraph/ops.hpp"
#include "ngraph/type/element_type.hpp"
#include "contrib/mlir/utils.hpp"
......
......@@ -282,7 +282,7 @@ def NGMVN :
}
// MatMul Op
def NGMatMul :
def NGMatMulOp :
NG_OneResult_Op<"matmul", [NoSideEffect, DeclareOpInterfaceMethods<FusedOp>]>,
Arguments<(ins NG_TensorType:$A, NG_TensorType:$B,
DefaultValuedAttr<BoolAttr, "false">:$transposeA,
......
......@@ -309,6 +309,55 @@ mlir::LogicalResult verifyOp(NGConvolutionOp* op)
return mlir::success();
}
template <>
mlir::LogicalResult verifyOp(NGMatMulOp* op)
{
// TODO(ayzhuang): Improve verification: proper shapes, etc.
return mlir::success();
}
template <>
mlir::LogicalResult verifyOp(NGGemmOp* op)
{
// TODO(ayzhuang): Improve verification: proper shapes, etc.
return mlir::success();
}
template <>
mlir::LogicalResult verifyOp(NGSoftMaxOp* op)
{
// TODO(ayzhuang): Improve verification: proper shapes, etc.
return mlir::success();
}
template <>
mlir::LogicalResult verifyOp(NGAvgPoolOp* op)
{
// TODO(ayzhuang): Improve verification: proper shapes, etc.
return mlir::success();
}
template <>
mlir::LogicalResult verifyOp(NGAvgPoolBackpropOp* op)
{
// TODO(ayzhuang): Improve verification: proper shapes, etc.
return mlir::success();
}
template <>
mlir::LogicalResult verifyOp(NGMaxPoolOp* op)
{
// TODO(ayzhuang): Improve verification: proper shapes, etc.
return mlir::success();
}
template <>
mlir::LogicalResult verifyOp(NGMaxPoolBackpropOp* op)
{
// TODO(ayzhuang): Improve verification: proper shapes, etc.
return mlir::success();
}
namespace mlir
{
#include "ops_interfaces.cpp.inc"
......@@ -401,7 +450,7 @@ void mlir::NGLSTMCellOp::decompose()
void mlir::NGLSTMSequenceOp::decompose()
{
}
void mlir::NGMatMul::decompose()
void mlir::NGMatMulOp::decompose()
{
}
void mlir::NGLayerNormOp::decompose()
......
......@@ -252,8 +252,8 @@ def NGAvgPoolOp :
}
// AvgPool for back prop
def NGAvgPoolBackPropOp :
NG_OneResult_Op<"avgPoolBackProp", [NoSideEffect, OpVersion0]>,
def NGAvgPoolBackpropOp :
NG_OneResult_Op<"avgPoolBackprop", [NoSideEffect, OpVersion0]>,
Arguments<(ins I64ArrayAttr :$forwardArgShape,
NG_TensorType :$delta,
I64ArrayAttr :$windowShape,
......@@ -455,11 +455,10 @@ def NGMaxPoolOp :
}
// MaxPool for back prop
def NGMaxPoolBackPropOp :
NG_OneResult_Op<"maxPoolBackProp", [NoSideEffect, OpVersion0]>,
def NGMaxPoolBackpropOp :
NG_OneResult_Op<"maxPoolBackprop", [NoSideEffect, OpVersion0]>,
Arguments<(ins NG_TensorType :$argForward,
NG_TensorType :$delta,
NG_TensorType :$resultForward,
I64ArrayAttr :$windowShape,
I64ArrayAttr :$windowMovementStrides,
I64ArrayAttr :$padBelow,
......@@ -473,24 +472,7 @@ def NGMaxPoolBackPropOp :
let parser = [{ NGRAPH_CHECK(false, "No parser support"); return mlir::failure(); }];
let verifier = [{ return verifyOp(this); }];
let builders = [
// Builder without resultForward
OpBuilder<
"Builder *builder, OperationState &tblgen_state, Type res, "
"Value *argForward, Value *delta, "
"ArrayAttr windowShape, ArrayAttr windowMovementStrides, "
"ArrayAttr padBelow, ArrayAttr padAbove", [{
tblgen_state.addOperands(argForward);
tblgen_state.addOperands(delta);
tblgen_state.addOperands(nullptr);
tblgen_state.addAttribute("windowShape", windowShape);
tblgen_state.addAttribute("windowMovementStrides", windowMovementStrides);
tblgen_state.addAttribute("padBelow", padBelow);
tblgen_state.addAttribute("padAbove", padAbove);
tblgen_state.addTypes(res);
}]>
];
let extraClassDeclaration = [{
void setWindowShape(const ArrayAttr& arrayAttr) { this->setAttr("windowShape", arrayAttr); }
void setWindowMovementStrides(const ArrayAttr& arrayAttr) { this->setAttr("windowMovementStrides", arrayAttr);}
......
......@@ -6,23 +6,31 @@
MLIR_OP(Add)
MLIR_OP(ArgMin)
MLIR_OP(ArgMax)
MLIR_OP(AvgPool)
MLIR_OP(AvgPoolBackprop)
MLIR_OP(Divide)
MLIR_OP(Dot)
MLIR_OP(Concat)
MLIR_OP(Convolution)
MLIR_OP(Gather)
MLIR_OP(Gemm)
MLIR_OP(Greater)
MLIR_OP(Less)
MLIR_OP(GreaterEq)
MLIR_OP(LessEq)
MLIR_OP(Equal)
MLIR_OP(NotEqual)
MLIR_OP(MatMul)
MLIR_OP(Maximum)
MLIR_OP(MaxPool)
MLIR_OP(MaxPoolBackprop)
MLIR_OP(Minimum)
MLIR_OP(Multiply)
MLIR_OP(Negative)
MLIR_OP(Softmax)
MLIR_OP(Subtract)
MLIR_OP(Relu)
// Add new supported ops here
#undef MLIR_OP
......@@ -21,28 +21,7 @@
#include "ngraph/assertion.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/add.hpp"
#include "ngraph/op/argmax.hpp"
#include "ngraph/op/argmin.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/divide.hpp"
#include "ngraph/op/dot.hpp"
#include "ngraph/op/equal.hpp"
#include "ngraph/op/experimental/compiled_kernel.hpp"
#include "ngraph/op/gather.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/greater.hpp"
#include "ngraph/op/greater_eq.hpp"
#include "ngraph/op/less.hpp"
#include "ngraph/op/less_eq.hpp"
#include "ngraph/op/maximum.hpp"
#include "ngraph/op/minimum.hpp"
#include "ngraph/op/multiply.hpp"
#include "ngraph/op/negative.hpp"
#include "ngraph/op/not_equal.hpp"
#include "ngraph/op/relu.hpp"
#include "ngraph/op/subtract.hpp"
#include "ngraph/ops.hpp"
using namespace ngraph::descriptor;
using namespace ngraph::op;
......@@ -498,6 +477,104 @@ bool MLIRSubgraphExtractionPass::is_supported_mlir_op(std::shared_ptr<Node> node
std::all_of(window_dilation.begin(), window_dilation.end(), is_one);
}
// MKLDNN only supports softmax across single axis
if (TI(ngraph::op::Softmax) == TI(*node))
{
// Softmax is only supported through callback
if (std::getenv("NGRAPH_MLIR_CALLBACK") == nullptr)
{
return false;
}
auto softmax = static_cast<ngraph::op::Softmax*>(node.get());
auto arg0_shape = node->get_input_shape(0);
auto arg0_rank = arg0_shape.size();
return (arg0_rank == 4 || arg0_rank == 2) &&
node->get_input_element_type(0) == element::f32 && softmax->get_axes().size() == 1;
}
if (TI(ngraph::op::AvgPool) == TI(*node))
{
// AvgPool is only supported through callback
if (std::getenv("NGRAPH_MLIR_CALLBACK") == nullptr)
{
return false;
}
auto avg_pool = static_cast<ngraph::op::AvgPool*>(node.get());
auto arg0_shape = node->get_input_shape(0);
auto arg0_rank = arg0_shape.size();
return ((arg0_rank == 4 && avg_pool->get_window_shape().size() == 2) ||
(arg0_rank == 5 && avg_pool->get_window_shape().size() == 3)) &&
node->get_input_element_type(0) == element::f32;
}
if (TI(ngraph::op::AvgPoolBackprop) == TI(*node))
{
// AvgPoolBackprop is only supported through callback
if (std::getenv("NGRAPH_MLIR_CALLBACK") == nullptr)
{
return false;
}
auto avg_pool_backprop = static_cast<ngraph::op::AvgPoolBackprop*>(node.get());
auto arg0_shape = node->get_input_shape(0);
auto arg0_rank = arg0_shape.size();
return ((arg0_rank == 4 && avg_pool_backprop->get_window_shape().size() == 2) ||
(arg0_rank == 5 && avg_pool_backprop->get_window_shape().size() == 3)) &&
node->get_input_element_type(0) == element::f32;
}
if (TI(ngraph::op::MaxPoolBackprop) == TI(*node))
{
// MaxPoolBackprop is only supported through callback
if (std::getenv("NGRAPH_MLIR_CALLBACK") == nullptr)
{
return false;
}
auto max_pool_backprop = static_cast<ngraph::op::MaxPoolBackprop*>(node.get());
auto arg0_shape = node->get_input_shape(0);
auto arg0_rank = arg0_shape.size();
return ((arg0_rank == 4 && max_pool_backprop->get_window_shape().size() == 2) ||
(arg0_rank == 5 && max_pool_backprop->get_window_shape().size() == 3)) &&
node->get_input_element_type(0) == element::f32;
}
if (TI(ngraph::op::MaxPool) == TI(*node))
{
// MaxPool is only supported through callback
if (std::getenv("NGRAPH_MLIR_CALLBACK") == nullptr)
{
return false;
}
auto max_pool = static_cast<ngraph::op::MaxPool*>(node.get());
auto arg0_shape = node->get_input_shape(0);
auto arg0_rank = arg0_shape.size();
return ((arg0_rank == 4 && max_pool->get_window_shape().size() == 2) ||
(arg0_rank == 5 && max_pool->get_window_shape().size() == 3)) &&
node->get_input_element_type(0) == element::f32;
}
if (TI(ngraph::op::MatMul) == TI(*node))
{
// MatMul is only supported through callback
if (std::getenv("NGRAPH_MLIR_CALLBACK") == nullptr)
{
return false;
}
}
if (TI(ngraph::op::Gemm) == TI(*node))
{
// Gemm is only supported through callback
if (std::getenv("NGRAPH_MLIR_CALLBACK") == nullptr)
{
return false;
}
}
return true;
}
......
......@@ -26,28 +26,7 @@
#include "ngraph/descriptor/tensor.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/add.hpp"
#include "ngraph/op/argmax.hpp"
#include "ngraph/op/argmin.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/divide.hpp"
#include "ngraph/op/dot.hpp"
#include "ngraph/op/equal.hpp"
#include "ngraph/op/experimental/compiled_kernel.hpp"
#include "ngraph/op/gather.hpp"
#include "ngraph/op/greater.hpp"
#include "ngraph/op/greater_eq.hpp"
#include "ngraph/op/less.hpp"
#include "ngraph/op/less_eq.hpp"
#include "ngraph/op/maximum.hpp"
#include "ngraph/op/minimum.hpp"
#include "ngraph/op/multiply.hpp"
#include "ngraph/op/negative.hpp"
#include "ngraph/op/not_equal.hpp"
#include "ngraph/op/relu.hpp"
#include "ngraph/op/subtract.hpp"
#include "ngraph/op/util/index_reduction.hpp"
#include "ngraph/ops.hpp"
#include "ngraph/type/element_type.hpp"
// Defines a new LLVM debug type for this file to be used by LLVM_DEBUG macro.
......@@ -117,8 +96,9 @@ namespace
// Generic op lowerer to ng dialect.
// Simply maps ngraph tensors to values and generate an OP. No op-specific logic.
// Use inNum when mlir OP needs less input than its corresponding ngraph OP.
template <typename Op>
mlir::Operation* createGenericOp(const ngraph::Node* ngNode);
mlir::Operation* createGenericOp(const ngraph::Node* ngNode, int inNum = -1);
template <typename RedOp>
mlir::Operation* createIndexReduction(const ngraph::Node* ngNode);
......@@ -133,6 +113,9 @@ namespace
template <typename T>
mlir::ArrayAttr getShapeAsAttr(T ngShape);
/// Return the real input node corresponding to the fake node
ngraph::Node* getOriginArg(ngraph::Node* node) const;
private:
// Sub-graph to be compiled and executed with MLIR.
const ngraph::op::CompiledKernel* m_compiledKernel;
......@@ -220,6 +203,14 @@ mlir::ArrayAttr NgDialectConversionPass::getShapeAsAttr(T ngShape)
return m_builder.getI64ArrayAttr(mlirShape);
}
ngraph::Node* NgDialectConversionPass::getOriginArg(ngraph::Node* node) const
{
auto inputMap = m_compiledKernel->get_input_map();
auto it = inputMap.find(node->shared_from_this());
NGRAPH_CHECK(it != inputMap.end(), "Parameter not in CK input map");
return m_compiledKernel->input_values().at(it->second).get_node();
}
// Converts an nGraph Tensor into an MLIR tensor type, including the conversion of the Tensor's
// element type.
mlir::Type NgDialectConversionPass::getMlirType(const descriptor::Tensor* tensor)
......@@ -464,17 +455,157 @@ mlir::Operation* NgDialectConversionPass::COMPILE_OP_DECL(ngraph::op::Convolutio
return op;
}
template <>
mlir::Operation* NgDialectConversionPass::COMPILE_OP_DECL(ngraph::op::AvgPool)
{
mlir::Operation* op = NgDialectObj.createGenericOp<mlir::NGAvgPoolOp>(ngNode);
auto avgPoolNode = static_cast<const ngraph::op::AvgPool*>(ngNode);
auto avgPoolOp = llvm::cast<mlir::NGAvgPoolOp>(op);
mlir::BoolAttr boolAttr =
NgDialectObj.m_builder.getBoolAttr(avgPoolNode->get_include_padding_in_avg_computation());
avgPoolOp.setIncludePadding(boolAttr);
mlir::ArrayAttr attr = NgDialectObj.getShapeAsAttr(avgPoolNode->get_window_shape());
avgPoolOp.setWindowShape(attr);
attr = NgDialectObj.getShapeAsAttr(avgPoolNode->get_window_movement_strides());
avgPoolOp.setWindowMovementStrides(attr);
attr = NgDialectObj.getShapeAsAttr(avgPoolNode->get_padding_below());
avgPoolOp.setPadBelow(attr);
attr = NgDialectObj.getShapeAsAttr(avgPoolNode->get_padding_above());
avgPoolOp.setPadAbove(attr);
return op;
}
template <>
mlir::Operation* NgDialectConversionPass::COMPILE_OP_DECL(ngraph::op::AvgPoolBackprop)
{
mlir::Operation* op = NgDialectObj.createGenericOp<mlir::NGAvgPoolBackpropOp>(ngNode);
auto avgPoolBackpropNode = static_cast<const ngraph::op::AvgPoolBackprop*>(ngNode);
auto avgPoolBackpropOp = llvm::cast<mlir::NGAvgPoolBackpropOp>(op);
mlir::BoolAttr boolAttr = NgDialectObj.m_builder.getBoolAttr(
avgPoolBackpropNode->get_include_padding_in_avg_computation());
avgPoolBackpropOp.setIncludePadding(boolAttr);
mlir::ArrayAttr attr = NgDialectObj.getShapeAsAttr(avgPoolBackpropNode->get_window_shape());
avgPoolBackpropOp.setWindowShape(attr);
attr = NgDialectObj.getShapeAsAttr(avgPoolBackpropNode->get_window_movement_strides());
avgPoolBackpropOp.setWindowMovementStrides(attr);
attr = NgDialectObj.getShapeAsAttr(avgPoolBackpropNode->get_padding_below());
avgPoolBackpropOp.setPadBelow(attr);
attr = NgDialectObj.getShapeAsAttr(avgPoolBackpropNode->get_padding_above());
avgPoolBackpropOp.setPadAbove(attr);
attr = NgDialectObj.getShapeAsAttr(avgPoolBackpropNode->get_forward_arg_shape());
avgPoolBackpropOp.setForwardArgShape(attr);
return op;
}
template <>
mlir::Operation* NgDialectConversionPass::COMPILE_OP_DECL(ngraph::op::MaxPool)
{
mlir::Operation* op = NgDialectObj.createGenericOp<mlir::NGMaxPoolOp>(ngNode);
auto maxPoolNode = static_cast<const ngraph::op::MaxPool*>(ngNode);
auto maxPoolOp = llvm::cast<mlir::NGMaxPoolOp>(op);
mlir::ArrayAttr attr = NgDialectObj.getShapeAsAttr(maxPoolNode->get_window_shape());
maxPoolOp.setWindowShape(attr);
attr = NgDialectObj.getShapeAsAttr(maxPoolNode->get_window_movement_strides());
maxPoolOp.setWindowMovementStrides(attr);
attr = NgDialectObj.getShapeAsAttr(maxPoolNode->get_padding_below());
maxPoolOp.setPadBelow(attr);
attr = NgDialectObj.getShapeAsAttr(maxPoolNode->get_padding_above());
maxPoolOp.setPadAbove(attr);
return op;
}
template <>
mlir::Operation* NgDialectConversionPass::COMPILE_OP_DECL(ngraph::op::MaxPoolBackprop)
{
mlir::Operation* op = NgDialectObj.createGenericOp<mlir::NGMaxPoolBackpropOp>(ngNode, 2);
auto maxPoolBackpropNode = static_cast<const ngraph::op::MaxPool*>(ngNode);
auto maxPoolBackpropOp = llvm::cast<mlir::NGMaxPoolBackpropOp>(op);
mlir::ArrayAttr attr = NgDialectObj.getShapeAsAttr(maxPoolBackpropNode->get_window_shape());
maxPoolBackpropOp.setWindowShape(attr);
attr = NgDialectObj.getShapeAsAttr(maxPoolBackpropNode->get_window_movement_strides());
maxPoolBackpropOp.setWindowMovementStrides(attr);
attr = NgDialectObj.getShapeAsAttr(maxPoolBackpropNode->get_padding_below());
maxPoolBackpropOp.setPadBelow(attr);
attr = NgDialectObj.getShapeAsAttr(maxPoolBackpropNode->get_padding_above());
maxPoolBackpropOp.setPadAbove(attr);
return op;
}
template <>
mlir::Operation* NgDialectConversionPass::COMPILE_OP_DECL(ngraph::op::MatMul)
{
auto matmulNode = static_cast<const ngraph::op::MatMul*>(ngNode);
auto op = NgDialectObj.createGenericOp<mlir::NGMatMulOp>(ngNode);
auto matmulOp = llvm::cast<mlir::NGMatMulOp>(op);
matmulOp.setTransposeA(NgDialectObj.m_builder.getBoolAttr(matmulNode->get_transpose_a()));
matmulOp.setTransposeB(NgDialectObj.m_builder.getBoolAttr(matmulNode->get_transpose_b()));
return op;
}
template <>
mlir::Operation* NgDialectConversionPass::COMPILE_OP_DECL(ngraph::op::Gemm)
{
auto gemmNode = static_cast<const ngraph::op::Gemm*>(ngNode);
auto op = NgDialectObj.createGenericOp<mlir::NGGemmOp>(ngNode);
auto gemmOp = llvm::cast<mlir::NGGemmOp>(op);
gemmOp.setTransA(NgDialectObj.m_builder.getBoolAttr(gemmNode->get_transA()));
gemmOp.setTransB(NgDialectObj.m_builder.getBoolAttr(gemmNode->get_transB()));
gemmOp.setAlpha(NgDialectObj.m_builder.getF32FloatAttr(gemmNode->get_alpha()));
gemmOp.setBeta(NgDialectObj.m_builder.getF32FloatAttr(gemmNode->get_beta()));
return op;
}
template <>
mlir::Operation* NgDialectConversionPass::COMPILE_OP_DECL(ngraph::op::Softmax)
{
mlir::Operation* op = NgDialectObj.createGenericOp<mlir::NGSoftMaxOp>(ngNode, 1);
auto softmaxNode = static_cast<const ngraph::op::Softmax*>(ngNode);
auto softmaxOp = llvm::cast<mlir::NGSoftMaxOp>(op);
auto originArg = NgDialectObj.getOriginArg(ngNode->input_value(1).get_node());
auto const_op = static_cast<ngraph::op::Constant*>(originArg);
AxisSet axes = const_op->get_axis_set_val();
mlir::ArrayAttr attr = NgDialectObj.getShapeAsAttr(axes);
softmaxOp.setAxes(attr);
return op;
}
template <typename Op>
mlir::Operation* NgDialectConversionPass::createGenericOp(const ngraph::Node* ngNode)
mlir::Operation* NgDialectConversionPass::createGenericOp(const ngraph::Node* ngNode, int inNum)
{
std::vector<mlir::Value*> argValues;
std::vector<mlir::Type> resTypes;
auto inputMap = m_compiledKernel->get_input_map();
std::shared_ptr<descriptor::Tensor> argTensor;
int i = 0;
for (auto& argOutput : ngNode->input_values())
{
if (inNum != -1 && i == inNum)
{
break;
}
auto argOutputNode = argOutput.get_node();
if (as_type<op::Parameter>(argOutputNode))
if (is_type<op::Parameter>(argOutputNode))
{
auto it = inputMap.find(argOutputNode->shared_from_this());
NGRAPH_CHECK(it != inputMap.end(), "Parameter not in CK input map");
......@@ -488,6 +619,7 @@ mlir::Operation* NgDialectConversionPass::createGenericOp(const ngraph::Node* ng
auto argV = getTensorValue(argTensor.get()).m_value;
argValues.push_back(argV);
i++;
}
for (auto& output : ngNode->outputs())
......
//*****************************************************************************
// Copyright 2017-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <cstdint>
#pragma once
namespace ngraph
{
namespace runtime
{
namespace ngmlir
{
// OpType class is used for callbacks.
// We pass OpType to the generic callback functions,
// which call the real implementation based on OpType.
// TODO remove those not needed once all callbacks are implemented.
enum class OpType
{
ADD = 0,
AVGPOOL,
AVGPOOLBACKPROP,
BATCHNORM3ARGS,
BATCHNORM5ARGS,
BATCHNORMBACKPROP,
BOUNDEDRELU,
CONCAT,
CONVERTLAYOUT,
CONVOLUTION,
CONVOLUTIONRELU,
CONVOLUTIONADD,
CONVOLUTIONBIAS,
CONVOLUTIONBIASADD,
CONVOLUTIONBACKPROPDATA,
CONVOLUTIONBACKPROPWEIGHTS,
CONVOLUTIONBACKPROPWEIGHTSBIAS,
GELU,
GELUBACKPROP,
GEMM,
GROUPCONVOLUTION,
GROUPCONVOLUTIONBIAS,
DECONVOLUTIONBIAS,
LEAKYRELU,
LRN,
LSTM,
MATMUL,
MAXPOOL,
MAXPOOLBACKPROP,
MAXPOOLBACKPROPFORWARD,
MAXPOOLBACKPROPBACKWARD,
MAXPOOLWITHINDICES,
MAXPOOLWITHINDICESBACKPROP,
QUANTIZE,
DEQUANTIZE,
QUANTIZEDAVGPOOL,
QUANTIZEDMAXPOOL,
QUANTIZEDCONCAT,
QUANTIZEDDOTBIAS,
QUANTIZEDMATMUL,
QUANTIZEDCONVOLUTION,
QUANTIZEDCONVOLUTIONBIAS,
QUANTIZEDCONVOLUTIONBIASADD,
QUANTIZEDCONVOLUTIONBIASSIGNEDADD,
QUANTIZEDCONVOLUTIONRELU,
RELU,
RELUBACKPROP,
RNN,
SIGMOID,
SIGMOIDBACKPROP,
SLICE,
SOFTMAX
};
// These structs and union are used to pass attributes to callbacks.
template <int N>
struct poolAttrs
{
bool includePaddingInAvgComputation;
int64_t windowShape[N];
int64_t windowStrides[N];
int64_t padBelow[N];
int64_t padAbove[N];
};
struct gemmAttrs
{
bool transposeA;
bool transposeB;
int64_t m;
int64_t n;
int64_t k;
int64_t lda;
int64_t ldb;
int64_t ldc;
float alpha;
float beta;
int64_t broadcastHint;
};
union opAttrs {
int intAttr;
poolAttrs<2> poolAttrs2d;
poolAttrs<3> poolAttrs3d;
gemmAttrs gemmAttrs2d;
};
} // namespace ngmlir
} // namespace runtime
} // namespace ngraph
//*****************************************************************************
// Copyright 2017-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
// NOTE: This file follows nGraph format style.
// Follows nGraph naming convention for public APIs only, else MLIR naming convention.
#include "callback_utils.hpp"
#include "contrib/mlir/backend/cpu/cpu_backend.hpp"
#include "cpu_runtime.hpp"
#include "ngraph/check.hpp"
#include "ngraph/runtime/cpu/cpu_kernels.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
using namespace ngraph;
using namespace ngraph::runtime::ngmlir;
extern std::vector<opAttrs> opAttrsVec;
static inline opAttrs getAttrs(size_t index)
{
return opAttrsVec[index];
}
static bool inline compare_mkldnn_dims(mkldnn_dims_t& arr1, mkldnn_dims_t& arr2, size_t size)
{
for (auto i = 0; i < size; i++)
{
if (arr1[i] != arr2[i])
{
return false;
}
}
return true;
}
static bool
compare_mkldnn_strides_order(mkldnn_dims_t& strides1, mkldnn_dims_t& strides2, size_t size)
{
std::vector<size_t> indices1(size, 0), indices2(size, 0);
for (size_t i = 0; i < size; i++)
{
indices1[i] = i;
indices2[i] = i;
}
std::sort(indices1.begin(), indices1.begin(), [&](const size_t& n1, const size_t& n2) {
return strides1[n1] < strides1[n2];
});
std::sort(indices2.begin(), indices2.begin(), [&](const size_t& n1, const size_t& n2) {
return strides2[n1] < strides2[n2];
});
for (auto i = 0; i < size; i++)
{
if (indices1[i] != indices2[i])
{
return false;
}
}
return true;
}
static bool compare_mkldnn_md_formats(const mkldnn::memory::desc& lhs,
const mkldnn::memory::desc& rhs)
{
mkldnn_memory_desc_t md1 = lhs.data, md2 = rhs.data;
if (md1.format_kind != md2.format_kind)
{
return false;
}
if (md1.format_kind != static_cast<mkldnn_format_kind_t>(mkldnn::memory::format_kind::blocked))
{
// mkldnn not implemented yet
return false;
}
if (md1.ndims != md2.ndims)
{
return false;
}
auto blk1 = md1.format_desc.blocking;
auto blk2 = md2.format_desc.blocking;
if (blk1.inner_nblks != blk2.inner_nblks ||
!compare_mkldnn_dims(blk1.inner_blks, blk2.inner_blks, blk1.inner_nblks) ||
!compare_mkldnn_dims(blk1.inner_idxs, blk2.inner_idxs, blk1.inner_nblks))
{
return false;
}
return compare_mkldnn_strides_order(blk1.strides, blk2.strides, md1.ndims);
}
static mkldnn::memory convert_layout_if_diff(const mkldnn::memory::desc& lhs,
const mkldnn::memory::desc& rhs,
void* ptr,
mkldnn::engine cpu_engine)
{
if (!compare_mkldnn_md_formats(lhs, rhs))
{
mkldnn::memory reorder_in = {lhs, cpu_engine, ptr};
mkldnn::memory reorder_out = {rhs, cpu_engine};
mkldnn::reorder convert(reorder_in, reorder_out);
std::unordered_map<int, mkldnn::memory> exec_args = {{MKLDNN_ARG_SRC, reorder_in},
{MKLDNN_ARG_DST, reorder_out}};
mkldnn::stream s(cpu_engine);
try
{
convert.execute(s, exec_args);
s.wait();
}
catch (const mkldnn::error& e)
{
throw ngraph_error("Could not run mkdnn primitive " + std::string(e.message));
}
return reorder_out;
}
else
{
return mkldnn::memory{lhs, cpu_engine, ptr};
}
}
static void convert_output_layout(const mkldnn::memory::desc& lhs,
const mkldnn::memory::desc& rhs,
void* ptr,
mkldnn::engine cpu_engine)
{
mkldnn::memory reorder_in = {rhs, cpu_engine};
mkldnn::memory reorder_out = {lhs, cpu_engine, ptr};
mkldnn::reorder convert(reorder_in, reorder_out);
std::unordered_map<int, mkldnn::memory> exec_args = {{MKLDNN_ARG_SRC, reorder_in},
{MKLDNN_ARG_DST, reorder_out}};
mkldnn::stream s(cpu_engine);
try
{
convert.execute(s, exec_args);
s.wait();
}
catch (const mkldnn::error& e)
{
throw ngraph_error("Could not run mkdnn primitive " + std::string(e.message));
}
}
/// Callback for MaxPoolBackprop
static void __mlir_mkldnn_maxpoolbackprop(size_t rank,
StaticMemRef* memRefSrc,
StaticMemRef* memRefDelta,
StaticMemRef* memRefOutput,
size_t index)
{
mkldnn::memory::dims srcDims(rank);
mkldnn::memory::dims srcStrides(rank);
mkldnn::memory::dims deltaDims(rank);
mkldnn::memory::dims deltaStrides(rank);
mkldnn::memory::dims outDims(rank);
mkldnn::memory::dims outStrides(rank);
for (auto i = 0; i < rank; i++)
{
srcDims[i] = memRefSrc->shapeAndStrides[i];
srcStrides[i] = memRefSrc->shapeAndStrides[rank + i];
deltaDims[i] = memRefDelta->shapeAndStrides[i];
deltaStrides[i] = memRefDelta->shapeAndStrides[rank + i];
outDims[i] = memRefOutput->shapeAndStrides[i];
outStrides[i] = memRefOutput->shapeAndStrides[rank + i];
}
// build mkldnn primitive and execute
auto required_format = rank == 4 ? mkldnn::memory::FORMAT::nchw : mkldnn::memory::FORMAT::ncdhw;
mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32;
auto diff_dst_desc = mkldnn::memory::desc(deltaDims, dtype, required_format);
auto diff_src_desc = mkldnn::memory::desc(outDims, dtype, required_format);
auto src_desc_origin = mkldnn::memory::desc(srcDims, dtype, srcStrides);
auto diff_dst_desc_origin = mkldnn::memory::desc(deltaDims, dtype, deltaStrides);
auto diff_src_desc_origin = mkldnn::memory::desc(outDims, dtype, outStrides);
mkldnn::primitive_attr attr;
mkldnn::engine cpu_engine(mkldnn::engine::kind::cpu, 0);
mkldnn::pooling_forward::primitive_desc maxpool_pd_f;
mkldnn::pooling_backward::primitive_desc maxpool_pd_b;
if (rank == 4)
{
poolAttrs<2> pAttrs = getAttrs(index).poolAttrs2d;
auto maxpool_desc_f = mkldnn::pooling_forward::desc(
mkldnn::prop_kind::forward_training,
mkldnn::algorithm::pooling_max,
diff_src_desc,
diff_dst_desc,
mkldnn::memory::dims{pAttrs.windowStrides[0], pAttrs.windowStrides[1]},
mkldnn::memory::dims{pAttrs.windowShape[0], pAttrs.windowShape[1]},
mkldnn::memory::dims{pAttrs.padBelow[0], pAttrs.padBelow[1]},
mkldnn::memory::dims{pAttrs.padAbove[0], pAttrs.padAbove[1]});
auto maxpool_desc_b = mkldnn::pooling_backward::desc(
mkldnn::algorithm::pooling_max,
diff_src_desc,
diff_dst_desc,
mkldnn::memory::dims{pAttrs.windowStrides[0], pAttrs.windowStrides[1]},
mkldnn::memory::dims{pAttrs.windowShape[0], pAttrs.windowShape[1]},
mkldnn::memory::dims{pAttrs.padBelow[0], pAttrs.padBelow[1]},
mkldnn::memory::dims{pAttrs.padAbove[0], pAttrs.padAbove[1]});
maxpool_pd_f = mkldnn::pooling_forward::primitive_desc(maxpool_desc_f, attr, cpu_engine);
maxpool_pd_b = mkldnn::pooling_backward::primitive_desc(
maxpool_desc_b, attr, cpu_engine, maxpool_pd_f);
}
else if (rank == 5)
{
poolAttrs<3> pAttrs = getAttrs(index).poolAttrs3d;
auto maxpool_desc_f = mkldnn::pooling_forward::desc(
mkldnn::prop_kind::forward_training,
mkldnn::algorithm::pooling_max,
diff_src_desc,
diff_dst_desc,
mkldnn::memory::dims{
pAttrs.windowStrides[0], pAttrs.windowStrides[1], pAttrs.windowStrides[2]},
mkldnn::memory::dims{
pAttrs.windowShape[0], pAttrs.windowShape[1], pAttrs.windowShape[2]},
mkldnn::memory::dims{pAttrs.padBelow[0], pAttrs.padBelow[1], pAttrs.padBelow[2]},
mkldnn::memory::dims{pAttrs.padAbove[0], pAttrs.padAbove[1], pAttrs.padAbove[2]});
auto maxpool_desc_b = mkldnn::pooling_backward::desc(
mkldnn::algorithm::pooling_max,
diff_src_desc,
diff_dst_desc,
mkldnn::memory::dims{
pAttrs.windowStrides[0], pAttrs.windowStrides[1], pAttrs.windowStrides[2]},
mkldnn::memory::dims{
pAttrs.windowShape[0], pAttrs.windowShape[1], pAttrs.windowShape[2]},
mkldnn::memory::dims{pAttrs.padBelow[0], pAttrs.padBelow[1], pAttrs.padBelow[2]},
mkldnn::memory::dims{pAttrs.padAbove[0], pAttrs.padAbove[1], pAttrs.padAbove[2]});
auto maxpool_pd_f =
mkldnn::pooling_forward::primitive_desc(maxpool_desc_f, attr, cpu_engine);
maxpool_pd_f = mkldnn::pooling_forward::primitive_desc(maxpool_desc_f, attr, cpu_engine);
maxpool_pd_b = mkldnn::pooling_backward::primitive_desc(
maxpool_desc_b, attr, cpu_engine, maxpool_pd_f);
}
mkldnn::pooling_forward maxpool_f(maxpool_pd_f);
mkldnn::memory src_mem = convert_layout_if_diff(
src_desc_origin, maxpool_pd_b.diff_src_desc(), memRefSrc->allocatedPtr, cpu_engine);
mkldnn::memory dst_mem{maxpool_pd_b.diff_dst_desc(), cpu_engine};
mkldnn::memory workspace{maxpool_pd_f.workspace_desc(), cpu_engine};
mkldnn::pooling_backward maxpool_b(maxpool_pd_b);
mkldnn::memory diff_dst = convert_layout_if_diff(
diff_dst_desc_origin, maxpool_pd_b.diff_dst_desc(), memRefDelta->allocatedPtr, cpu_engine);
mkldnn::memory diff_src;
bool need_convert = false;
if (!compare_mkldnn_md_formats(diff_src_desc_origin, maxpool_pd_b.diff_src_desc()))
{
diff_src = mkldnn::memory(maxpool_pd_b.diff_src_desc(), cpu_engine);
need_convert = true;
}
else
{
diff_src =
mkldnn::memory(maxpool_pd_b.diff_src_desc(), cpu_engine, memRefOutput->allocatedPtr);
}
std::unordered_map<int, mkldnn::memory> exec_args_f = {
{MKLDNN_ARG_SRC, src_mem}, {MKLDNN_ARG_WORKSPACE, workspace}, {MKLDNN_ARG_DST, dst_mem}};
std::unordered_map<int, mkldnn::memory> exec_args_b = {{MKLDNN_ARG_DIFF_DST, diff_dst},
{MKLDNN_ARG_WORKSPACE, workspace},
{MKLDNN_ARG_DIFF_SRC, diff_src}};
mkldnn::stream s(cpu_engine);
try
{
maxpool_f.execute(s, exec_args_f);
s.wait();
maxpool_b.execute(s, exec_args_b);
s.wait();
}
catch (const mkldnn::error& e)
{
throw ngraph_error("Could not run mkdnn primitive " + std::string(e.message));
}
if (need_convert)
{
convert_output_layout(diff_dst_desc_origin,
maxpool_pd_b.diff_dst_desc(),
memRefOutput->allocatedPtr,
cpu_engine);
}
}
/// Callback for AvgPoolBackprop
static void __mlir_mkldnn_avgpoolbackprop(size_t rank,
StaticMemRef* memRefInput,
StaticMemRef* memRefOutput,
size_t index)
{
mkldnn::memory::dims dims(rank);
mkldnn::memory::dims strides(rank);
mkldnn::memory::dims outDims(rank);
mkldnn::memory::dims outStrides(rank);
for (auto i = 0; i < rank; i++)
{
dims[i] = memRefInput->shapeAndStrides[i];
strides[i] = memRefInput->shapeAndStrides[rank + i];
outDims[i] = memRefOutput->shapeAndStrides[i];
outStrides[i] = memRefOutput->shapeAndStrides[rank + i];
}
// build mkldnn primitive and execute
auto required_format = rank == 4 ? mkldnn::memory::FORMAT::nchw : mkldnn::memory::FORMAT::ncdhw;
mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32;
auto diff_dst_desc = mkldnn::memory::desc(dims, dtype, required_format);
auto diff_src_desc = mkldnn::memory::desc(outDims, dtype, required_format);
auto diff_dst_desc_origin = mkldnn::memory::desc(dims, dtype, strides);
auto diff_src_desc_origin = mkldnn::memory::desc(outDims, dtype, outStrides);
mkldnn::primitive_attr attr;
mkldnn::engine cpu_engine(mkldnn::engine::kind::cpu, 0);
mkldnn::pooling_backward::primitive_desc avgpool_pd_b;
if (rank == 4)
{
poolAttrs<2> pAttrs = getAttrs(index).poolAttrs2d;
auto avgpool_desc_f = mkldnn::pooling_forward::desc(
mkldnn::prop_kind::forward_training,
(pAttrs.includePaddingInAvgComputation
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding),
diff_src_desc,
diff_dst_desc,
mkldnn::memory::dims{pAttrs.windowStrides[0], pAttrs.windowStrides[1]},
mkldnn::memory::dims{pAttrs.windowShape[0], pAttrs.windowShape[1]},
mkldnn::memory::dims{pAttrs.padBelow[0], pAttrs.padBelow[1]},
mkldnn::memory::dims{pAttrs.padAbove[0], pAttrs.padAbove[1]});
auto avgpool_desc_b = mkldnn::pooling_backward::desc(
(pAttrs.includePaddingInAvgComputation
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding),
diff_src_desc,
diff_dst_desc,
mkldnn::memory::dims{pAttrs.windowStrides[0], pAttrs.windowStrides[1]},
mkldnn::memory::dims{pAttrs.windowShape[0], pAttrs.windowShape[1]},
mkldnn::memory::dims{pAttrs.padBelow[0], pAttrs.padBelow[1]},
mkldnn::memory::dims{pAttrs.padAbove[0], pAttrs.padAbove[1]});
auto avgpool_pd_f =
mkldnn::pooling_forward::primitive_desc(avgpool_desc_f, attr, cpu_engine);
avgpool_pd_b = mkldnn::pooling_backward::primitive_desc(
avgpool_desc_b, attr, cpu_engine, avgpool_pd_f);
}
else if (rank == 5)
{
poolAttrs<3> pAttrs = getAttrs(index).poolAttrs3d;
auto avgpool_desc_f = mkldnn::pooling_forward::desc(
mkldnn::prop_kind::forward_training,
(pAttrs.includePaddingInAvgComputation
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding),
diff_src_desc,
diff_dst_desc,
mkldnn::memory::dims{
pAttrs.windowStrides[0], pAttrs.windowStrides[1], pAttrs.windowStrides[2]},
mkldnn::memory::dims{
pAttrs.windowShape[0], pAttrs.windowShape[1], pAttrs.windowShape[2]},
mkldnn::memory::dims{pAttrs.padBelow[0], pAttrs.padBelow[1], pAttrs.padBelow[2]},
mkldnn::memory::dims{pAttrs.padAbove[0], pAttrs.padAbove[1], pAttrs.padAbove[2]});
auto avgpool_desc_b = mkldnn::pooling_backward::desc(
(pAttrs.includePaddingInAvgComputation
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding),
diff_src_desc,
diff_dst_desc,
mkldnn::memory::dims{
pAttrs.windowStrides[0], pAttrs.windowStrides[1], pAttrs.windowStrides[2]},
mkldnn::memory::dims{
pAttrs.windowShape[0], pAttrs.windowShape[1], pAttrs.windowShape[2]},
mkldnn::memory::dims{pAttrs.padBelow[0], pAttrs.padBelow[1], pAttrs.padBelow[2]},
mkldnn::memory::dims{pAttrs.padAbove[0], pAttrs.padAbove[1], pAttrs.padAbove[2]});
auto avgpool_pd_f =
mkldnn::pooling_forward::primitive_desc(avgpool_desc_f, attr, cpu_engine);
avgpool_pd_b = mkldnn::pooling_backward::primitive_desc(
avgpool_desc_b, attr, cpu_engine, avgpool_pd_f);
}
mkldnn::pooling_backward avgpool(avgpool_pd_b);
mkldnn::memory in = convert_layout_if_diff(
diff_dst_desc_origin, avgpool_pd_b.diff_dst_desc(), memRefInput->allocatedPtr, cpu_engine);
mkldnn::memory out;
bool need_convert = false;
if (!compare_mkldnn_md_formats(diff_src_desc_origin, avgpool_pd_b.diff_src_desc()))
{
out = mkldnn::memory(avgpool_pd_b.diff_src_desc(), cpu_engine);
need_convert = true;
}
else
{
out = mkldnn::memory(avgpool_pd_b.diff_src_desc(), cpu_engine, memRefOutput->allocatedPtr);
}
std::unordered_map<int, mkldnn::memory> exec_args = {{MKLDNN_ARG_DIFF_DST, in},
{MKLDNN_ARG_DIFF_SRC, out}};
mkldnn::stream s(cpu_engine);
try
{
avgpool.execute(s, exec_args);
s.wait();
}
catch (const mkldnn::error& e)
{
throw ngraph_error("Could not run mkdnn primitive " + std::string(e.message));
}
if (need_convert)
{
convert_output_layout(diff_dst_desc_origin,
avgpool_pd_b.diff_dst_desc(),
memRefOutput->allocatedPtr,
cpu_engine);
}
}
/// Callback for AvgPool and MaxPool
static void __mlir_mkldnn_pooling(
size_t rank, StaticMemRef* memRefInput, StaticMemRef* memRefOutput, size_t index, OpType type)
{
mkldnn::memory::dims dims(rank);
mkldnn::memory::dims strides(rank);
mkldnn::memory::dims outDims(rank);
mkldnn::memory::dims outStrides(rank);
for (auto i = 0; i < rank; i++)
{
dims[i] = memRefInput->shapeAndStrides[i];
strides[i] = memRefInput->shapeAndStrides[rank + i];
outDims[i] = memRefOutput->shapeAndStrides[i];
outStrides[i] = memRefOutput->shapeAndStrides[rank + i];
}
// build mkldnn primitive and execute
auto required_format = rank == 4 ? mkldnn::memory::FORMAT::nchw : mkldnn::memory::FORMAT::ncdhw;
mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32;
auto input_desc = mkldnn::memory::desc(dims, dtype, required_format);
auto result_desc = mkldnn::memory::desc(outDims, dtype, required_format);
auto input_desc_origin = mkldnn::memory::desc(dims, dtype, strides);
auto result_desc_origin = mkldnn::memory::desc(outDims, dtype, outStrides);
mkldnn::primitive_attr attr;
mkldnn::engine cpu_engine(mkldnn::engine::kind::cpu, 0);
mkldnn::pooling_forward::primitive_desc pool_pd;
if (rank == 4)
{
poolAttrs<2> pAttrs = getAttrs(index).poolAttrs2d;
mkldnn::algorithm alg = type == OpType::MAXPOOL
? mkldnn::algorithm::pooling_max
: (pAttrs.includePaddingInAvgComputation
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding);
auto pool_desc = mkldnn::pooling_forward::desc(
mkldnn::prop_kind::forward_inference,
alg,
input_desc,
result_desc,
mkldnn::memory::dims{pAttrs.windowStrides[0], pAttrs.windowStrides[1]},
mkldnn::memory::dims{pAttrs.windowShape[0], pAttrs.windowShape[1]},
mkldnn::memory::dims{pAttrs.padBelow[0], pAttrs.padBelow[1]},
mkldnn::memory::dims{pAttrs.padAbove[0], pAttrs.padAbove[1]});
pool_pd = mkldnn::pooling_forward::primitive_desc(pool_desc, attr, cpu_engine);
}
else if (rank == 5)
{
poolAttrs<3> pAttrs = getAttrs(index).poolAttrs3d;
mkldnn::algorithm alg = type == OpType::MAXPOOL
? mkldnn::algorithm::pooling_max
: (pAttrs.includePaddingInAvgComputation
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding);
auto pool_desc = mkldnn::pooling_forward::desc(
mkldnn::prop_kind::forward_inference,
alg,
input_desc,
result_desc,
mkldnn::memory::dims{
pAttrs.windowStrides[0], pAttrs.windowStrides[1], pAttrs.windowStrides[2]},
mkldnn::memory::dims{
pAttrs.windowShape[0], pAttrs.windowShape[1], pAttrs.windowShape[2]},
mkldnn::memory::dims{pAttrs.padBelow[0], pAttrs.padBelow[1], pAttrs.padBelow[2]},
mkldnn::memory::dims{pAttrs.padAbove[0], pAttrs.padAbove[1], pAttrs.padAbove[2]});
pool_pd = mkldnn::pooling_forward::primitive_desc(pool_desc, attr, cpu_engine);
}
mkldnn::pooling_forward pool(pool_pd);
mkldnn::memory in = convert_layout_if_diff(
input_desc_origin, pool_pd.src_desc(), memRefInput->allocatedPtr, cpu_engine);
mkldnn::memory out;
bool need_convert = false;
if (!compare_mkldnn_md_formats(result_desc_origin, pool_pd.dst_desc()))
{
out = mkldnn::memory(pool_pd.dst_desc(), cpu_engine);
need_convert = true;
}
else
{
out = mkldnn::memory(pool_pd.dst_desc(), cpu_engine, memRefOutput->allocatedPtr);
}
std::unordered_map<int, mkldnn::memory> exec_args = {{MKLDNN_ARG_SRC, in},
{MKLDNN_ARG_DST, out}};
mkldnn::stream s(cpu_engine);
try
{
pool.execute(s, exec_args);
s.wait();
}
catch (const mkldnn::error& e)
{
throw ngraph_error("Could not run mkdnn primitive " + std::string(e.message));
}
if (need_convert)
{
convert_output_layout(
result_desc_origin, pool_pd.dst_desc(), memRefOutput->allocatedPtr, cpu_engine);
}
}
/// Callback for Softmax
static void __mlir_mkldnn_softmax(size_t rank,
StaticMemRef* memRefInput,
StaticMemRef* memRefOutput,
int index)
{
mkldnn::memory::dims dims(rank);
mkldnn::memory::dims strides(rank);
for (auto i = 0; i < rank; i++)
{
dims[i] = memRefInput->shapeAndStrides[i];
strides[i] = memRefInput->shapeAndStrides[rank + i];
}
auto softmax_axis = getAttrs(index).intAttr;
// build mkldnn primitive and execute
mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32;
auto input_desc = mkldnn::memory::desc(dims, dtype, strides);
auto softmax_desc =
mkldnn::softmax_forward::desc(mkldnn::prop_kind::forward_scoring, input_desc, softmax_axis);
mkldnn::primitive_attr attr;
mkldnn::engine cpu_engine(mkldnn::engine::kind::cpu, 0);
auto softmax_pd = mkldnn::softmax_forward::primitive_desc(softmax_desc, attr, cpu_engine);
mkldnn::softmax_forward softmax(softmax_pd);
mkldnn::memory in{softmax_pd.src_desc(), cpu_engine, memRefInput->allocatedPtr};
mkldnn::memory out{softmax_pd.dst_desc(), cpu_engine, memRefOutput->allocatedPtr};
std::unordered_map<int, mkldnn::memory> exec_args = {{MKLDNN_ARG_SRC, in},
{MKLDNN_ARG_DST, out}};
mkldnn::stream s(cpu_engine);
try
{
softmax.execute(s, exec_args);
s.wait();
}
catch (const mkldnn::error& e)
{
throw ngraph_error("Could not run mkdnn primitive " + std::string(e.message));
}
}
/// Callback for MatMul
static void __mlir_cblas_sgemm(StaticMemRef* memRefmatA,
StaticMemRef* memRefmatB,
StaticMemRef* memRefmatC,
size_t index)
{
gemmAttrs gAttrs = getAttrs(index).gemmAttrs2d;
;
cblas::cblas_sgemm(cblas::Layout::RowMajor,
gAttrs.transposeA ? cblas::Transpose::Transpose : cblas::Transpose::None,
gAttrs.transposeB ? cblas::Transpose::Transpose : cblas::Transpose::None,
gAttrs.m,
gAttrs.n,
gAttrs.k,
1.0f,
reinterpret_cast<float*>(memRefmatA->allocatedPtr),
std::max<size_t>(1, gAttrs.lda),
reinterpret_cast<float*>(memRefmatB->allocatedPtr),
std::max<size_t>(1, gAttrs.ldb),
0.0f,
reinterpret_cast<float*>(memRefmatC->allocatedPtr),
std::max<size_t>(1, gAttrs.ldc));
}
/// Callback for Gemm
static void __mlir_cblas_sgemm_with_bias(StaticMemRef* memRefmatA,
StaticMemRef* memRefmatB,
StaticMemRef* memRefmatC,
StaticMemRef* memRefmatOut,
size_t index)
{
gemmAttrs gAttrs = getAttrs(index).gemmAttrs2d;
auto transposeA = gAttrs.transposeA;
auto transposeB = gAttrs.transposeB;
auto m = gAttrs.m;
auto n = gAttrs.n;
auto k = gAttrs.k;
auto lda = gAttrs.lda;
auto ldb = gAttrs.ldb;
auto ldc = gAttrs.ldc;
auto alpha = gAttrs.alpha;
auto beta = gAttrs.beta;
auto broadcastHint = gAttrs.broadcastHint;
auto matA = reinterpret_cast<float*>(memRefmatA->allocatedPtr);
auto matB = reinterpret_cast<float*>(memRefmatB->allocatedPtr);
auto matC = reinterpret_cast<float*>(memRefmatC->allocatedPtr);
auto matOut = reinterpret_cast<float*>(memRefmatOut->allocatedPtr);
cblas::cblas_sgemm(cblas::Layout::RowMajor,
transposeA ? cblas::Transpose::Transpose : cblas::Transpose::None,
transposeB ? cblas::Transpose::Transpose : cblas::Transpose::None,
m,
n,
k,
alpha,
matA,
std::max<size_t>(1, lda),
matB,
std::max<size_t>(1, ldb),
0.0f,
matOut,
std::max<size_t>(1, ldc));
if (broadcastHint == 0)
{
std::vector<float> ones(m, 1.0f);
cblas::cblas_sgemm(cblas::Layout::RowMajor,
cblas::Transpose::None,
cblas::Transpose::None,
m,
n,
1,
beta,
ones.data(),
1,
matC,
std::max<size_t>(1, n),
1.0f,
matOut,
std::max<size_t>(1, ldc));
}
else if (broadcastHint == 1)
{
std::vector<float> ones(n, 1.0f);
cblas::cblas_sgemm(cblas::Layout::RowMajor,
cblas::Transpose::None,
cblas::Transpose::None,
m,
n,
1,
beta,
matC,
1,
ones.data(),
std::max<size_t>(1, n),
1.0f,
matOut,
std::max<size_t>(1, ldc));
}
else if (broadcastHint == 2)
{
std::vector<float> ones(m, 1.0f);
std::vector<float> bias(n, *matC);
cblas::cblas_sgemm(cblas::Layout::RowMajor,
cblas::Transpose::None,
cblas::Transpose::None,
m,
n,
1,
beta,
ones.data(),
1,
bias.data(),
std::max<size_t>(1, n),
1.0f,
matOut,
std::max<size_t>(1, ldc));
}
else
{
std::vector<float> identity(n * n, 0.0f);
for (auto i = 0; i < n * n; i += n + 1)
{
identity[i] = 1.0;
}
cblas::cblas_sgemm(cblas::Layout::RowMajor,
cblas::Transpose::None,
cblas::Transpose::None,
m,
n,
n,
beta,
matC,
std::max<size_t>(1, n),
identity.data(),
std::max<size_t>(1, n),
1.0f,
matOut,
std::max<size_t>(1, ldc));
}
}
extern "C" void __mlir_callback_1_input(void* input, void* output, size_t index, OpType type)
{
auto unrankedMemRefInput = reinterpret_cast<UnrankedMemRef*>(input);
auto unrankedMemRefOutput = reinterpret_cast<UnrankedMemRef*>(output);
if (type == OpType::SOFTMAX)
{
__mlir_mkldnn_softmax(unrankedMemRefInput->rank,
unrankedMemRefInput->memRefDescPtr,
unrankedMemRefOutput->memRefDescPtr,
index);
}
else if (type == OpType::AVGPOOL || type == OpType::MAXPOOL)
{
__mlir_mkldnn_pooling(unrankedMemRefInput->rank,
unrankedMemRefInput->memRefDescPtr,
unrankedMemRefOutput->memRefDescPtr,
index,
type);
}
else if (type == OpType::AVGPOOLBACKPROP)
{
__mlir_mkldnn_avgpoolbackprop(unrankedMemRefInput->rank,
unrankedMemRefInput->memRefDescPtr,
unrankedMemRefOutput->memRefDescPtr,
index);
}
else
{
NGRAPH_UNREACHABLE("Unsupported type");
}
}
extern "C" void
__mlir_callback_2_inputs(void* input0, void* input1, void* output, size_t index, OpType type)
{
auto unrankedMemRefInput0 = reinterpret_cast<UnrankedMemRef*>(input0);
auto unrankedMemRefInput1 = reinterpret_cast<UnrankedMemRef*>(input1);
auto unrankedMemRefOutput = reinterpret_cast<UnrankedMemRef*>(output);
if (type == OpType::MAXPOOLBACKPROP)
{
__mlir_mkldnn_maxpoolbackprop(unrankedMemRefInput0->rank,
unrankedMemRefInput0->memRefDescPtr,
unrankedMemRefInput1->memRefDescPtr,
unrankedMemRefOutput->memRefDescPtr,
index);
}
else if (type == OpType::MATMUL)
{
__mlir_cblas_sgemm(unrankedMemRefInput0->memRefDescPtr,
unrankedMemRefInput1->memRefDescPtr,
unrankedMemRefOutput->memRefDescPtr,
index);
}
else
{
NGRAPH_UNREACHABLE("Unsupported type");
}
}
extern "C" void __mlir_callback_3_inputs(
void* input0, void* input1, void* input2, void* output, size_t index, OpType type)
{
auto unrankedMemRefInput0 = reinterpret_cast<UnrankedMemRef*>(input0);
auto unrankedMemRefInput1 = reinterpret_cast<UnrankedMemRef*>(input1);
auto unrankedMemRefInput2 = reinterpret_cast<UnrankedMemRef*>(input2);
auto unrankedMemRefOutput = reinterpret_cast<UnrankedMemRef*>(output);
if (type == OpType::GEMM)
{
__mlir_cblas_sgemm_with_bias(unrankedMemRefInput0->memRefDescPtr,
unrankedMemRefInput1->memRefDescPtr,
unrankedMemRefInput2->memRefDescPtr,
unrankedMemRefOutput->memRefDescPtr,
index);
}
else
{
NGRAPH_UNREACHABLE("Unsupported type");
}
}
......@@ -53,16 +53,18 @@ static llvm::cl::opt<std::string>
clObjectFilename("ngraph-mlir-object-filename",
llvm::cl::desc("Dump MLIR JITted-compiled object to file jitted_mlir.o"));
void MLIRCPURuntime::run(void* args)
void MLIRCPURuntime::run(const std::vector<MemRefArg>& args)
{
run_internal(*reinterpret_cast<std::vector<void*>*>(args));
// run_internal(*reinterpret_cast<std::vector<void*>*>(args), shapeVec, stridesVec);
run_internal(args);
}
void MLIRCPURuntime::run_internal(std::vector<void*>& externalTensors)
void MLIRCPURuntime::run_internal(const std::vector<MemRefArg>& args)
{
// Create an MLIR execution engine. We use a null MLIR pass manager for now to make sure we
// don't run MLIR passes that were already run. We also pass a default transformer created with
// the default or user-provided optimization level.
auto llvmTransformer = mlir::makeOptimizingTransformer(
MLIRCPUBackend::mlirOptLevel, /*sizeLevel=*/0, MLIRCPUBackend::targetMachine.get());
auto maybeEngine = mlir::ExecutionEngine::create(
......@@ -70,14 +72,14 @@ void MLIRCPURuntime::run_internal(std::vector<void*>& externalTensors)
NGRAPH_CHECK(maybeEngine, "failed to construct an execution engine");
m_engine = std::move(maybeEngine.get());
bindArguments(externalTensors);
bindArguments(args);
execute();
cleanup();
}
// Binds MLIR function arguments to the proper values. This includes externally allocated tensors
// helpers to be used inside the function.
void MLIRCPURuntime::bindArguments(std::vector<void*>& externalTensors)
void MLIRCPURuntime::bindArguments(const std::vector<MemRefArg>& args)
{
NGRAPH_CHECK(m_module, "MLIR module is not ready.");
......@@ -85,13 +87,17 @@ void MLIRCPURuntime::bindArguments(std::vector<void*>& externalTensors)
NGRAPH_CHECK(func && !func.getBlocks().empty(), "Function not found");
// Set external arguments
m_externalTensors = &externalTensors;
m_externalTensors = &args;
// Create list with a type-erased double pointer for each invocation arguments.
// We currently use 'allocateMemrefArgs', which creates the arguments list per call ABI (see
// comment below).
// StaticMemRef is just a struct with the actual pointer to the data.
for (auto i = 0; i < m_externalTensors->size(); i++)
{
m_ranks.push_back((*m_externalTensors)[i].m_shape.size());
}
auto expectedArguments = allocateMemrefArgs();
NGRAPH_CHECK(expectedArguments.size(), "Arguments can't be created");
m_invokeArgs = std::move(expectedArguments);
......@@ -103,8 +109,14 @@ void MLIRCPURuntime::bindArguments(std::vector<void*>& externalTensors)
for (size_t i = 0, numArgs = m_invokeArgs.size(); i < numArgs; ++i)
{
auto* memRefArg = *(reinterpret_cast<StaticMemRef**>(m_invokeArgs[i]));
memRefArg->allocatedPtr = (*m_externalTensors)[i];
memRefArg->alignedPtr = (*m_externalTensors)[i];
memRefArg->allocatedPtr = (*m_externalTensors)[i].m_tensor;
memRefArg->alignedPtr = (*m_externalTensors)[i].m_tensor;
auto rank = m_ranks[i];
for (auto j = 0; j < rank; j++)
{
memRefArg->shapeAndStrides[j] = (*m_externalTensors)[i].m_shape[j];
memRefArg->shapeAndStrides[rank + j] = (*m_externalTensors)[i].m_strides[j];
}
}
}
......@@ -128,6 +140,7 @@ void MLIRCPURuntime::execute()
void MLIRCPURuntime::cleanup()
{
// Free void double pointer arguments without freeing external tensor data.
int i = 0;
for (auto* arg : m_invokeArgs)
{
auto* memRefArg = *(reinterpret_cast<StaticMemRef**>(arg));
......@@ -148,7 +161,7 @@ SmallVector<void*, 8> MLIRCPURuntime::allocateMemrefArgs()
SmallVector<void*, 8> args;
for (auto i = 0; i < m_externalTensors->size(); i++)
{
auto descriptor = allocateMemrefDescriptor();
auto descriptor = allocateMemrefDescriptor(m_ranks[i]);
StaticMemRef** arg = reinterpret_cast<StaticMemRef**>(malloc(sizeof(StaticMemRef*)));
*arg = descriptor;
args.push_back(arg);
......@@ -156,13 +169,17 @@ SmallVector<void*, 8> MLIRCPURuntime::allocateMemrefArgs()
return args;
}
StaticMemRef* MLIRCPURuntime::allocateMemrefDescriptor()
StaticMemRef* MLIRCPURuntime::allocateMemrefDescriptor(size_t rank)
{
// We only use StaticMemRef because that's what MLIR currently offers.
// We should expand this with different types and dynamic MemRefs
auto* descriptor = reinterpret_cast<StaticMemRef*>(malloc(sizeof(StaticMemRef)));
// We allocate 2 * rank * sizeof(int64_t) for the last element "int64_t shapeAndStrides[]"
// in StaticMemRef because shape and strides each needs rank * sizeof(int64_t).
auto* descriptor =
reinterpret_cast<StaticMemRef*>(malloc(sizeof(StaticMemRef) + 2 * rank * sizeof(int64_t)));
NGRAPH_CHECK(descriptor != nullptr, "NULL MemRef descriptor");
descriptor->allocatedPtr = nullptr;
descriptor->alignedPtr = nullptr;
descriptor->offset = 0;
return descriptor;
}
......@@ -37,7 +37,16 @@ namespace ngraph
{
void* allocatedPtr;
void* alignedPtr;
int64_t offset;
int64_t shapeAndStrides[];
};
struct UnrankedMemRef
{
int64_t rank;
StaticMemRef* memRefDescPtr;
};
/// A CPU Runtime is an MLIR runtime that owns an MLIR context and a module
/// The module should be in LLVM dialect and ready to be lowered via an MLIR
/// ExecutionEngine. The runtime owns the context and must out-live any MLIR
......@@ -46,12 +55,12 @@ namespace ngraph
{
public:
/// Executes a pre-compiled subgraph
void run(void* args) override;
void run(const std::vector<MemRefArg>& args) override;
private:
void run_internal(std::vector<void*>& externalTensors);
void run_internal(const std::vector<MemRefArg>& args);
// Bind external tensors to MLIR module entry point
void bindArguments(std::vector<void*>& externalTensors);
void bindArguments(const std::vector<MemRefArg>& args);
// Invokes an MLIR module entry point with bound arguments
void execute();
// Cleans up allocated args
......@@ -61,14 +70,15 @@ namespace ngraph
llvm::SmallVector<void*, 8> allocateMemrefArgs();
/// Helper to allocate a mem ref object. Handles static shapes only for now.
StaticMemRef* allocateMemrefDescriptor();
StaticMemRef* allocateMemrefDescriptor(size_t);
private:
// Pointers to externally allocated memory for sub-graph's input and output tensors.
std::vector<void*>* m_externalTensors;
const std::vector<MemRefArg>* m_externalTensors;
// Arguments for the MLIR function generated for the nGraph sub-graph.
llvm::SmallVector<void*, 8> m_invokeArgs;
std::unique_ptr<mlir::ExecutionEngine> m_engine;
std::vector<size_t> m_ranks;
};
}
}
......
......@@ -33,6 +33,13 @@ namespace ngraph
{
namespace ngmlir
{
struct MemRefArg
{
void* m_tensor;
std::vector<size_t> m_shape;
std::vector<size_t> m_strides;
};
/// Base class for an MLIR runtime. An MLIR runtime owns the MLIR Context and owns
/// the final compiled module. It supports invoking the module with specific arguments
class MLIRRuntime
......@@ -43,7 +50,7 @@ namespace ngraph
/// Overload with module op
void set_module(mlir::ModuleOp& module) { m_module = module; }
/// Executes a pre-compiled subgraph
virtual void run(void* args) = 0;
virtual void run(const std::vector<MemRefArg>& args) = 0;
/// Get the MLIR module that this runtime owns
mlir::OwningModuleRef& get_module() { return m_module; }
......@@ -54,4 +61,4 @@ namespace ngraph
};
}
}
}
\ No newline at end of file
}
......@@ -19,6 +19,7 @@
#include "ngraph/op/add.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/dot.hpp"
#include "ngraph/op/fused/matmul.hpp"
#include "ngraph/op/multiply.hpp"
#include "ngraph/op/util/broadcasting.hpp"
......
......@@ -43,30 +43,69 @@ namespace ngraph
// Tensors haven't been allocated yet so we have to keep a pointer to the pointer
// that will hold the future memory address.
std::vector<size_t> buffer_indices;
std::vector<std::vector<size_t>> shape_vec;
std::vector<std::vector<size_t>> strides_vec;
for (const TensorViewWrapper& arg : args)
{
auto buffer_index = external_function->get_buffer_index(arg.get_name());
buffer_indices.push_back(buffer_index);
// Get shape and strides
auto tensor_shape = arg.get_shape();
std::vector<size_t> shape(tensor_shape.size());
for (auto i = 0; i < tensor_shape.size(); i++)
{
shape[i] = tensor_shape[i];
}
shape_vec.push_back(shape);
auto tensor_strides = arg.get_strides();
std::vector<size_t> strides(tensor_strides.size());
for (auto i = 0; i < tensor_strides.size(); i++)
{
strides[i] = tensor_strides[i];
}
strides_vec.push_back(strides);
}
for (const TensorViewWrapper& result : out)
{
auto buffer_index = external_function->get_buffer_index(result.get_name());
buffer_indices.push_back(buffer_index);
// Get shape and strides
auto tensor_shape = result.get_shape();
std::vector<size_t> shape(tensor_shape.size());
for (auto i = 0; i < tensor_shape.size(); i++)
{
shape[i] = tensor_shape[i];
}
shape_vec.push_back(shape);
auto tensor_strides = result.get_strides();
std::vector<size_t> strides(tensor_strides.size());
for (auto i = 0; i < tensor_strides.size(); i++)
{
strides[i] = tensor_strides[i];
}
strides_vec.push_back(strides);
}
// Create functor that will be executed to compile and run this CompiledKernel.
// Note that 'double_ptr_args' must be captured by value since it's a local var.
auto functor = [node, buffer_indices](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
auto functor = [node, buffer_indices, shape_vec, strides_vec](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
// MLIR requires a list of type-erased pointer to arguments. Tensors must have
// been allocated at this point so we can get rid of the extra reference.
std::vector<void*> ptr_args;
std::vector<MemRefArg> mem_ref_arg_vec;
int i = 0;
for (auto& buffer_index : buffer_indices)
{
ptr_args.push_back(ctx->buffer_data[buffer_index]);
MemRefArg mem_ref_arg;
mem_ref_arg.m_tensor = ctx->buffer_data[buffer_index];
mem_ref_arg.m_shape = shape_vec[i];
mem_ref_arg.m_strides = strides_vec[i];
mem_ref_arg_vec.push_back(mem_ref_arg);
i++;
}
// Compile nodes within the CompiledKernel op.
CompiledKernel* compiled_kernel =
static_cast<CompiledKernel*>(const_cast<Node*>(node));
......@@ -97,13 +136,13 @@ namespace ngraph
mlir_backend.codegen();
// Store module into runtime, and invoke.
mlir_runtime.set_module(mlir_backend.get_module());
mlir_runtime.run(&ptr_args);
mlir_runtime.run(mem_ref_arg_vec);
}
else
{
// We have found a cached runtime, just invoke.
MLIRCPURuntime& mlir_runtime = it->second;
mlir_runtime.run(&ptr_args);
mlir_runtime.run(mem_ref_arg_vec);
}
};
......
......@@ -87,8 +87,10 @@
#include "ngraph/op/floor.hpp"
#include "ngraph/op/fused/conv_fused.hpp"
#include "ngraph/op/fused/gelu.hpp"
#include "ngraph/op/fused/gemm.hpp"
#include "ngraph/op/fused/group_conv.hpp"
#include "ngraph/op/fused/lstm_cell.hpp"
#include "ngraph/op/fused/matmul.hpp"
#include "ngraph/op/fused/softmax_crossentropy.hpp"
#include "ngraph/op/gather.hpp"
#include "ngraph/op/gather_nd.hpp"
......@@ -1187,7 +1189,22 @@ void runtime::cpu::CPU_ExternalFunction::register_common_passes(
auto dex = is_direct_execution();
auto is_supported = [dex](const Node& node) {
#ifdef NGRAPH_MLIR_ENABLE
if (std::getenv("NGRAPH_MLIR") != nullptr && std::getenv("NGRAPH_MLIR_CALLBACK") != nullptr)
{
if (typeid(ngraph::op::MatMul) == typeid(node) &&
node.get_input_element_type(0) == element::f32)
{
return true;
}
if (typeid(ngraph::op::Gemm) == typeid(node) &&
node.get_input_element_type(0) == element::f32)
{
return true;
}
}
#endif
// this checks averts the decomposition of LSTMCell
// we will map LSTMCell to LSTM CPU op in the later
// graph pass
......
......@@ -321,6 +321,7 @@ set(MULTI_TEST_SRC
backend/logical_or.in.cpp
backend/logical_xor.in.cpp
backend/lrn.in.cpp
backend/matmul.in.cpp
backend/max.in.cpp
backend/maximum.in.cpp
backend/min.in.cpp
......
......@@ -1024,6 +1024,26 @@ NGRAPH_TEST(${BACKEND_NAME}, gemm)
test_case.run();
}
NGRAPH_TEST(${BACKEND_NAME}, gemm_C)
{
auto A = make_shared<op::Parameter>(element::f32, Shape{3, 6});
auto B = make_shared<op::Parameter>(element::f32, Shape{6, 4});
auto C = make_shared<op::Parameter>(element::f32, Shape{3, 4});
auto gemm_func = make_shared<op::Gemm>(A, B, C);
auto function = make_shared<Function>(NodeVector{gemm_func}, ParameterVector{A, B, C});
auto test_case = test::NgraphTestCase(function, "${BACKEND_NAME}");
// A
test_case.add_input<float>(vector<float>(18, 1));
// B
test_case.add_input<float>(vector<float>(24, 2));
// C
test_case.add_input<float>(vector<float>(12, 1));
// output
test_case.add_expected_output<float>(Shape{3, 4}, vector<float>(12, 13));
test_case.run();
}
NGRAPH_TEST(${BACKEND_NAME}, gemm_broadcast_input_C)
{
auto A = make_shared<op::Parameter>(element::f32, Shape{3, 6});
......@@ -1041,6 +1061,48 @@ NGRAPH_TEST(${BACKEND_NAME}, gemm_broadcast_input_C)
test_case.add_input<float>(vector<float>{1});
// output
test_case.add_expected_output<float>(Shape{3, 4}, vector<float>(12, 7));
test_case.run();
}
NGRAPH_TEST(${BACKEND_NAME}, gemm_broadcast_axes_0_input_C)
{
auto A = make_shared<op::Parameter>(element::f32, Shape{3, 6});
auto B = make_shared<op::Parameter>(element::f32, Shape{6, 4});
auto C = make_shared<op::Parameter>(element::f32, Shape{1, 4});
auto gemm_func = make_shared<op::Gemm>(A, B, C, 0.5);
auto function = make_shared<Function>(NodeVector{gemm_func}, ParameterVector{A, B, C});
auto test_case = test::NgraphTestCase(function, "${BACKEND_NAME}");
// A
test_case.add_input<float>(vector<float>(18, 1));
// B
test_case.add_input<float>(vector<float>(24, 2));
// C
test_case.add_input<float>(vector<float>{1, 2, 3, 4});
// output
test_case.add_expected_output<float>(Shape{3, 4},
vector<float>{7, 8, 9, 10, 7, 8, 9, 10, 7, 8, 9, 10});
test_case.run();
}
NGRAPH_TEST(${BACKEND_NAME}, gemm_broadcast_axes_1_input_C)
{
auto A = make_shared<op::Parameter>(element::f32, Shape{3, 6});
auto B = make_shared<op::Parameter>(element::f32, Shape{6, 4});
auto C = make_shared<op::Parameter>(element::f32, Shape{3, 1});
auto gemm_func = make_shared<op::Gemm>(A, B, C, 0.5);
auto function = make_shared<Function>(NodeVector{gemm_func}, ParameterVector{A, B, C});
auto test_case = test::NgraphTestCase(function, "${BACKEND_NAME}");
// A
test_case.add_input<float>(vector<float>(18, 1));
// B
test_case.add_input<float>(vector<float>(24, 2));
// C
test_case.add_input<float>(vector<float>(3, 1));
// output
test_case.add_expected_output<float>(Shape{3, 4}, vector<float>(12, 7));
test_case.run();
}
NGRAPH_TEST(${BACKEND_NAME}, fused_clamp)
......
//*****************************************************************************
// Copyright 2017-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <algorithm>
#include <cinttypes>
#include <cmath>
#include <cstdlib>
#include <random>
#include <string>
#include "gtest/gtest.h"
#include "ngraph/ngraph.hpp"
#include "util/all_close.hpp"
#include "util/all_close_f.hpp"
#include "util/ndarray.hpp"
#include "util/test_control.hpp"
#include "util/test_tools.hpp"
using namespace std;
using namespace ngraph;
static string s_manifest = "${MANIFEST}";
NGRAPH_TEST(${BACKEND_NAME}, matmul_2x0_0x2)
{
Shape shape_a{2, 0};
Shape shape_b{0, 2};
Shape shape_r{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
auto B = make_shared<op::Parameter>(element::f32, shape_b);
auto f = make_shared<Function>(make_shared<op::MatMul>(A, B), ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape_a);
copy_data(a, vector<float>{});
auto b = backend->create_tensor(element::f32, shape_b);
copy_data(b, vector<float>{});
auto result = backend->create_tensor(element::f32, shape_r);
// Overwrite the initial result vector to make sure we're not just coincidentally getting the
// right value.
copy_data(result, vector<float>{2112, 2112, 2112, 2112});
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
EXPECT_TRUE(test::all_close_f((vector<float>{0, 0, 0, 0}), read_vector<float>(result)));
}
NGRAPH_TEST(${BACKEND_NAME}, matmul_0x2_2x0)
{
Shape shape_a{0, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_b{2, 0};
auto B = make_shared<op::Parameter>(element::f32, shape_b);
Shape shape_r{0, 0};
auto f = make_shared<Function>(make_shared<op::MatMul>(A, B), ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape_a);
copy_data(a, vector<float>{});
auto b = backend->create_tensor(element::f32, shape_b);
copy_data(b, vector<float>{});
auto result = backend->create_tensor(element::f32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
EXPECT_TRUE(test::all_close_f((vector<float>{}), read_vector<float>(result)));
}
NGRAPH_TEST(${BACKEND_NAME}, matmul_3x2_2x0)
{
Shape shape_a{3, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_b{2, 0};
auto B = make_shared<op::Parameter>(element::f32, shape_b);
Shape shape_r{3, 0};
auto f = make_shared<Function>(make_shared<op::MatMul>(A, B), ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape_a);
copy_data(a, vector<float>{1, 2, 3, 4, 5, 6});
auto b = backend->create_tensor(element::f32, shape_b);
copy_data(b, vector<float>{});
auto result = backend->create_tensor(element::f32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
EXPECT_TRUE(test::all_close_f((vector<float>{}), read_vector<float>(result)));
}
NGRAPH_TEST(${BACKEND_NAME}, matmul_2x2_2x2)
{
Shape shape{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
Shape shape_r{2, 2};
auto f = make_shared<Function>(make_shared<op::MatMul>(A, B), ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape);
copy_data(a, vector<float>{1, 2, 3, 4});
auto b = backend->create_tensor(element::f32, shape);
copy_data(b, vector<float>{5, 6, 7, 8});
auto result = backend->create_tensor(element::f32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
EXPECT_TRUE(test::all_close_f((vector<float>{19, 22, 43, 50}), read_vector<float>(result)));
}
NGRAPH_TEST(${BACKEND_NAME}, matmul_2x3_3x3)
{
Shape shape_in1{2, 3};
Shape shape_in2{3, 3};
Shape shape_out{2, 3};
auto A = make_shared<op::Parameter>(element::f32, shape_in1);
auto B = make_shared<op::Parameter>(element::f32, shape_in2);
auto matmul = make_shared<op::MatMul>(A, B, false, false);
auto f = make_shared<Function>(matmul, ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::f32, shape_in2);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::f32, shape_out);
copy_data(a, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
copy_data(b, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f});
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
EXPECT_TRUE(test::all_close_f(read_vector<float>(result),
vector<float>{30.f, 36.f, 42.f, 66.f, 81.f, 96.f}));
}
NGRAPH_TEST(${BACKEND_NAME}, matmul_2x3_3x3_int64)
{
Shape shape_in1{2, 3};
Shape shape_in2{3, 3};
Shape shape_out{2, 3};
auto A = make_shared<op::Parameter>(element::i64, shape_in1);
auto B = make_shared<op::Parameter>(element::i64, shape_in2);
auto matmul = make_shared<op::MatMul>(A, B, false, false);
auto f = make_shared<Function>(matmul, ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::i64, shape_in1);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::i64, shape_in2);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::i64, shape_out);
copy_data(a, vector<int64_t>{1, 2, 3, 4, 5, 6});
copy_data(b, vector<int64_t>{1, 2, 3, 4, 5, 6, 7, 8, 9});
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
EXPECT_TRUE(
test::all_close(read_vector<int64_t>(result), vector<int64_t>{30, 36, 42, 66, 81, 96}));
}
NGRAPH_TEST(${BACKEND_NAME}, matmul_3x2_3x3_transpose)
{
Shape shape_in1{3, 2};
Shape shape_in2{3, 3};
Shape shape_out{2, 3};
auto A = make_shared<op::Parameter>(element::f32, shape_in1);
auto B = make_shared<op::Parameter>(element::f32, shape_in2);
auto matmul = make_shared<op::MatMul>(A, B, true, false);
auto f = make_shared<Function>(matmul, ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::f32, shape_in2);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::f32, shape_out);
copy_data(a, vector<float>{1.f, 4.f, 2.f, 5.f, 3.f, 6.f});
copy_data(b, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f});
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
EXPECT_TRUE(test::all_close_f(read_vector<float>(result),
vector<float>{30.f, 36.f, 42.f, 66.f, 81.f, 96.f}));
}
NGRAPH_TEST(${BACKEND_NAME}, matmul_3x2_2x3_transpose)
{
Shape shape_in1{3, 2};
Shape shape_in2{2, 3};
Shape shape_out{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_in1);
auto B = make_shared<op::Parameter>(element::f32, shape_in2);
auto matmul = make_shared<op::MatMul>(A, B, true, true);
auto f = make_shared<Function>(matmul, ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::f32, shape_in2);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::f32, shape_out);
copy_data(a, vector<float>{1.f, 4.f, 2.f, 5.f, 3.f, 6.f});
copy_data(b, vector<float>{1.f, 3.f, 5.f, 2.f, 4.f, 6.f});
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
EXPECT_TRUE(
test::all_close_f(read_vector<float>(result), vector<float>{22.f, 28.f, 49.f, 64.f}));
}
// RUN: ngraph-opt %s -convert-ngraph-to-affine -split-input-file | FileCheck %s
// Verify that operations using callbacks are properly converted to standard call.
// -----
// Softmax Op
// CHECK-LABEL: func @simple_softmax
// CHECK: %[[C1:.*]] = constant 0 : i64
// CHECK: %[[C2:.*]] = constant {{[0-9]+}} : i64
// CHECK: %0 = memref_cast %arg0 : memref<2x3xf32> to memref<*xf32>
// CHECK: %1 = memref_cast %arg2 : memref<2x3xf32> to memref<*xf32>
// CHECK: call @__mlir_callback_1_input(%0, %1, %[[C1]], %[[C2]]) : (memref<*xf32>, memref<*xf32>, i64, i64) -> ()
func @simple_softmax(%arg0: !ng.tensor<2x3xf32>, %arg1: !ng.tensor<1x!ng.i64>) -> !ng.tensor<2x3xf32> {
%0 = "ng.softmax"(%arg0) {axes = [0]} : (!ng.tensor<2x3xf32>) -> !ng.tensor<2x3xf32>
"ng.return"(%0) : (!ng.tensor<2x3xf32>) -> ()
}
// -----
// Gemm Op
// CHECK-LABEL: func @simple_gemm
// CHECK: %[[C1:.*]] = constant 0 : i64
// CHECK: %[[C2:.*]] = constant {{[0-9]+}} : i64
// CHECK: %0 = memref_cast %arg0 : memref<3x6xf32> to memref<*xf32>
// CHECK: %1 = memref_cast %arg1 : memref<6x4xf32> to memref<*xf32>
// CHECK: %2 = memref_cast %arg2 : memref<3x4xf32> to memref<*xf32>
// CHECK: %3 = memref_cast %arg3 : memref<3x4xf32> to memref<*xf32>
// CHECK: call @__mlir_callback_3_inputs(%0, %1, %2, %3, %[[C1]], %[[C2]]) : (memref<*xf32>, memref<*xf32>, memref<*xf32>, memref<*xf32>, i64, i64) -> ()
func @simple_gemm(%arg0: !ng.tensor<3x6xf32>, %arg1: !ng.tensor<6x4xf32>, %arg2: !ng.tensor<3x4xf32>) -> !ng.tensor<3x4xf32> {
%0 = "ng.gemm"(%arg0, %arg1, %arg2) {alpha = 1.000000e+00 : f32, beta = 1.000000e+00 : f32, transA = false, transB = false} : (!ng.tensor<3x6xf32>, !ng.tensor<6x4xf32>, !ng.tensor<3x4xf32>) -> !ng.tensor<3x4xf32>
"ng.return"(%0) : (!ng.tensor<3x4xf32>) -> ()
}
// -----
// MatMul Op
// CHECK-LABEL: func @simple_matmul
// CHECK: %[[C1:.*]] = constant 0 : i64
// CHECK: %[[C2:.*]] = constant {{[0-9]+}} : i64
// CHECK: %0 = memref_cast %arg0 : memref<3x2xf32> to memref<*xf32>
// CHECK: %1 = memref_cast %arg1 : memref<2x3xf32> to memref<*xf32>
// CHECK: %2 = memref_cast %arg2 : memref<2x2xf32> to memref<*xf32>
// CHECK: call @__mlir_callback_2_inputs(%0, %1, %2, %[[C1]], %[[C2]]) : (memref<*xf32>, memref<*xf32>, memref<*xf32>, i64, i64) -> ()
func @simple_matmul(%arg0: !ng.tensor<3x2xf32>, %arg1: !ng.tensor<2x3xf32>) -> !ng.tensor<2x2xf32> {
%0 = "ng.matmul"(%arg0, %arg1) {transposeA = true, transposeB = true} : (!ng.tensor<3x2xf32>, !ng.tensor<2x3xf32>) -> !ng.tensor<2x2xf32>
"ng.return"(%0) : (!ng.tensor<2x2xf32>) -> ()
}
// -----
// AvePool Op
// CHECK-LABEL: func @simple_avgpool
// CHECK: %0 = memref_cast %arg0 : memref<2x1x3x3xf32> to memref<*xf32>
// CHECK: %1 = memref_cast %arg1 : memref<2x1x3x3xf32> to memref<*xf32>
// CHECK: %[[C1:.*]] = constant 0 : i64
// CHECK: %[[C2:.*]] = constant {{[0-9]+}} : i64
// CHECK: call @__mlir_callback_1_input(%0, %1, %[[C1]], %[[C2]]) : (memref<*xf32>, memref<*xf32>, i64, i64) -> ()
func @simple_avgpool(%arg0: !ng.tensor<2x1x3x3xf32>) -> !ng.tensor<2x1x3x3xf32> {
%0 = "ng.avgPool"(%arg0) {includePadding = true, padAbove = [1, 1], padBelow = [0, 0], windowMovementStrides = [1, 1], windowShape = [2, 2]} : (!ng.tensor<2x1x3x3xf32>) -> !ng.tensor<2x1x3x3xf32>
"ng.return"(%0) : (!ng.tensor<2x1x3x3xf32>) -> ()
}
// -----
// AvgPoolBackprop Op
// CHECK-LABEL: func @simple_avgpoolbackprop
// CHECK: %0 = memref_cast %arg0 : memref<2x2x2x2xf32> to memref<*xf32>
// CHECK: %1 = memref_cast %arg1 : memref<2x2x3x3xf32> to memref<*xf32>
// CHECK: %[[C1:.*]] = constant 0 : i64
// CHECK: %[[C2:.*]] = constant {{[0-9]+}} : i64
// CHECK: call @__mlir_callback_1_input(%0, %1, %[[C1]], %[[C2]]) : (memref<*xf32>, memref<*xf32>, i64, i64) -> ()
func @simple_avgpoolbackprop(%arg0: !ng.tensor<2x2x2x2xf32>) -> !ng.tensor<2x2x3x3xf32> {
%0 = "ng.avgPoolBackprop"(%arg0) {forwardArgShape = [2, 2, 3, 3], includePadding = false, padAbove = [0, 0], padBelow = [0, 0], windowMovementStrides = [1, 1], windowShape = [2, 2]} : (!ng.tensor<2x2x2x2xf32>) -> !ng.tensor<2x2x3x3xf32>
"ng.return"(%0) : (!ng.tensor<2x2x3x3xf32>) -> ()
}
// -----
// MaxPool Op
// CHECK-LABEL: func @simple_maxpool
// CHECK: %0 = memref_cast %arg0 : memref<64x3x7x8x10xf32> to memref<*xf32>
// CHECK: %1 = memref_cast %arg1 : memref<64x3x9x6x5xf32> to memref<*xf32>
// CHECK: %[[C1:.*]] = constant 0 : i64
// CHECK: %[[C2:.*]] = constant {{[0-9]+}} : i64
// CHECK: call @__mlir_callback_1_input(%0, %1, %[[C1]], %[[C2]]) : (memref<*xf32>, memref<*xf32>, i64, i64) -> ()
func @simple_maxpool(%arg0: !ng.tensor<64x3x7x8x10xf32>) -> !ng.tensor<64x3x9x6x5xf32> {
%0 = "ng.maxPool"(%arg0) {padAbove = [6, 4, 5], padBelow = [5, 6, 4], windowMovementStrides = [2, 3, 4], windowShape = [2, 3, 2]} : (!ng.tensor<64x3x7x8x10xf32>) -> !ng.tensor<64x3x9x6x5xf32>
"ng.return"(%0) : (!ng.tensor<64x3x9x6x5xf32>) -> ()
}
// -----
// MaxPoolBackprop Op
// CHECK-LABEL: func @simple_maxpoolbackprop
// CHECK: %0 = memref_cast %arg0 : memref<2x2x5x5xf32> to memref<*xf32>
// CHECK: %1 = memref_cast %arg1 : memref<2x2x4x3xf32> to memref<*xf32>
// CHECK: %2 = memref_cast %arg2 : memref<2x2x5x5xf32> to memref<*xf32>
// CHECK: %[[C1:.*]] = constant 0 : i64
// CHECK: %[[C2:.*]] = constant {{[0-9]+}} : i64
// CHECK: call @__mlir_callback_2_inputs(%0, %1, %2, %[[C1]], %[[C2]]) : (memref<*xf32>, memref<*xf32>, memref<*xf32>, i64, i64) -> ()
func @simple_maxpoolbackprop(%arg0: !ng.tensor<2x2x5x5xf32>, %arg1: !ng.tensor<2x2x4x3xf32>) -> !ng.tensor<2x2x5x5xf32> {
%0 = "ng.maxPoolBackprop"(%arg0, %arg1) {padAbove = [0, 0], padBelow = [0, 0], windowMovementStrides = [1, 1], windowShape = [2, 3]} : (!ng.tensor<2x2x5x5xf32>, !ng.tensor<2x2x4x3xf32>) -> !ng.tensor<2x2x5x5xf32>
"ng.return"(%0) : (!ng.tensor<2x2x5x5xf32>) -> ()
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment