Merge branch 'master' into gwenger/deprecate_copy_from

98bd7b8c · Sang Ik Lee · GitHub · 1cfa3d66 · c59ea84c · 98bd7b8c
Unverified Commit 98bd7b8c authored Nov 18, 2019 by Sang Ik Lee Committed by GitHub Nov 18, 2019
26 changed files
--- a/cmake/external_mlir.cmake
+++ b/cmake/external_mlir.cmake
@@ -20,8 +20,8 @@ set(MLIR_LLVM_REPO_URL https://github.com/llvm/llvm-project.git)
 set(MLIR_REPO_URL https://github.com/tensorflow/mlir.git)

 # Change these commit IDs to move to latest stable versions
-set(MLIR_LLVM_COMMIT_ID 0845ac7331e)
-set(MLIR_COMMIT_ID 1f7893e0)
+set(MLIR_LLVM_COMMIT_ID e0f1d9d8729)
+set(MLIR_COMMIT_ID c61db4bb)

 # MLIR environment variables. Some of them are used by LIT tool.
 set(MLIR_PROJECT_ROOT ${CMAKE_CURRENT_BINARY_DIR}/mlir_project)

--- a/src/contrib/mlir/CMakeLists.txt
+++ b/src/contrib/mlir/CMakeLists.txt
@@ -61,9 +61,9 @@ target_link_libraries(
 # some libs need whole archive linkage because of Globals static initialization
 function(whole_archive_link target)
    if("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
-        set(link_flags "-Llib -Wl,-all_load ")
+        set(link_flags "-L${LLVM_BUILD_LIBRARY_DIR} -Wl,-force_load ")
        FOREACH(LIB ${ARGN})
-            string(CONCAT link_flags ${link_flags} "${LIB}")
+            string(CONCAT link_flags ${link_flags} "${LIB} ")
        ENDFOREACH(LIB)
    else()
        set(link_flags "-Llib -Wl,--whole-archive,")
@@ -77,10 +77,11 @@ function(whole_archive_link target)
    set_target_properties(${target} PROPERTIES LINK_FLAGS ${link_flags})
 endfunction(whole_archive_link)

-whole_archive_link(mlir_backend
+set(LIBS
    ${LLVM_BUILD_LIBRARY_DIR}/libMLIRAffineOps.a
    ${LLVM_BUILD_LIBRARY_DIR}/libMLIRStandardOps.a
 )
+whole_archive_link(mlir_backend ${LIBS})
 # Link LLVM libs
 target_link_libraries(
    mlir_backend PRIVATE 

--- a/src/contrib/mlir/backend/pass/affine_lowerer.cpp
+++ b/src/contrib/mlir/backend/pass/affine_lowerer.cpp
@@ -521,7 +521,7 @@ namespace
        NGRAPH_CHECK(lhs->getType().isa<MemRefType>());
        Type elemTy = lhs->getType().dyn_cast<MemRefType>().getElementType();

-        LoopNestBuilder(pivs, lbs, ubs, steps)([&] {
+        AffineLoopNestBuilder(pivs, lbs, ubs, steps)([&] {
            ValueHandle val = iLHS(ivs);
            ValueHandle zero = createZeroConstant(elemTy);
            iRes(ivs) = intrinsics::select(val > zero, val, zero);
@@ -591,12 +591,14 @@ namespace

        {
            IndexHandle n, k;
-            LoopBuilder(&n, nLb, nUb, nStep)(
-                [&] { LoopBuilder(&k, kLb, kUb, kStep)([&] { iRes(n, k) = zeroInit; }); });
+            LoopBuilder::makeAffine(&n, nLb, nUb, nStep)([&] {
+                LoopBuilder::makeAffine(&k, kLb, kUb, kStep)([&] { iRes(n, k) = zeroInit; });
+            });
        }
-        LoopBuilder(&n, nLb, nUb, nStep)([&] {
-            LoopBuilder(&m, mLb, mUb, mStep)([&] {
-                LoopBuilder(&k, kLb, kUb, kStep)([&] { iRes(n, k) += iLhs(n, m) * iRhs(m, k); });
+        LoopBuilder::makeAffine(&n, nLb, nUb, nStep)([&] {
+            LoopBuilder::makeAffine(&m, mLb, mUb, mStep)([&] {
+                LoopBuilder::makeAffine(&k, kLb, kUb, kStep)(
+                    [&] { iRes(n, k) += iLhs(n, m) * iRhs(m, k); });
            });
        });

@@ -658,7 +660,7 @@ namespace
                indexVarSteps.push_back(vOperand.step(i));
            }

-            LoopNestBuilder(indexVarPtrs, indexVarLbs, indexVarUbs, indexVarSteps)([&] {
+            AffineLoopNestBuilder(indexVarPtrs, indexVarLbs, indexVarUbs, indexVarSteps)([&] {
                IndexedValue ivRes(result);
                IndexedValue ivOperand(operand);

@@ -758,12 +760,12 @@ namespace
        //                   params[P_0, P_1, .. P_(A-1), indices[I_0, .., I_(M-1)],
        //                          P_(A+1), ... P_(N-1)];

-        LoopNestBuilder(indicesIVPtrs, indicesLbs, indicesUbs, indicesSteps)([&] {
+        AffineLoopNestBuilder(indicesIVPtrs, indicesLbs, indicesUbs, indicesSteps)([&] {
            // Load axis value from indices array and cast it to Index Type
            ValueHandle axisIdx = ValueHandle::create<IndexCastOp>(
                (ValueHandle)iIndices(indicesIVs), rewriter.getIndexType());

-            LoopNestBuilder(paramsIVPtrs, paramsLbs, paramsUbs, paramsSteps)([&] {
+            AffineLoopNestBuilder(paramsIVPtrs, paramsLbs, paramsUbs, paramsSteps)([&] {
                // construct indices for param
                // [P_0, P_1, .. P_axis-1, Indices[I0, I1, .. I_k-1], P_axis+1, P_axis+2, .. P_n-1]
                for (auto i = 0, j = 0; i < vParams.rank(); i++)
@@ -965,8 +967,7 @@ namespace

            NGRAPH_CHECK(affineExprs.size() == isEq.size() && isEq.size() == 2 * spatialRank,
                         "Invalid number of expressions in the IntegerSet");
-            nonPaddedRange =
-                rewriter.getIntegerSet(spatialRank, 2 * spatialRank, affineExprs, isEq);
+            nonPaddedRange = IntegerSet::get(spatialRank, 2 * spatialRank, affineExprs, isEq);
        }

        // Initialize output to zero
@@ -975,9 +976,9 @@ namespace
            auto resSpatialIndices = makeIndexHandles(spatialRank);
            auto resSpatialIndicesPtrs = makeIndexHandlePointers(resSpatialIndices);

-            LoopBuilder(&n, batchLb, batchUb, 1)([&] {
-                LoopBuilder(&k, numFiltersLb, numFiltersUb, 1)([&] {
-                    LoopNestBuilder(
+            LoopBuilder::makeAffine(&n, batchLb, batchUb, 1)([&] {
+                LoopBuilder::makeAffine(&k, numFiltersLb, numFiltersUb, 1)([&] {
+                    AffineLoopNestBuilder(
                        resSpatialIndicesPtrs, resSpatialLbs, resSpatialUbs, resSteps)([&] {
                        SmallVector<IndexHandle, 4> resIndices;
                        // Result indices
@@ -994,13 +995,13 @@ namespace

        IndexHandle n, k, c;
        // Convolution loop
-        LoopBuilder(&n, batchLb, batchUb, 1)([&] {
+        LoopBuilder::makeAffine(&n, batchLb, batchUb, 1)([&] {
            // Number of filters loop
-            LoopBuilder(&k, numFiltersLb, numFiltersUb, 1)([&] {
+            LoopBuilder::makeAffine(&k, numFiltersLb, numFiltersUb, 1)([&] {
                // Channels loop
-                LoopBuilder(&c, numChannelsLb, numChannelsUb, 1)([&] {
+                LoopBuilder::makeAffine(&c, numChannelsLb, numChannelsUb, 1)([&] {
                    // Results loop
-                    LoopNestBuilder(
+                    AffineLoopNestBuilder(
                        resSpatialIndicesPtrs, resSpatialLbs, resSpatialUbs, resSteps)([&] {
                        // Compute image start indices
                        SmallVector<IndexHandle, 4> imgStartIndices;
@@ -1017,10 +1018,10 @@ namespace
                        resIndices.insert(
                            resIndices.end(), resSpatialIndices.begin(), resSpatialIndices.end());
                        // Filters spatial loop
-                        LoopNestBuilder(filtersSpatialIndicesPtrs,
-                                        filtersSpatialLbs,
-                                        filtersSpatialUbs,
-                                        filtersSteps)([&] {
+                        AffineLoopNestBuilder(filtersSpatialIndicesPtrs,
+                                              filtersSpatialLbs,
+                                              filtersSpatialUbs,
+                                              filtersSteps)([&] {
                            SmallVector<IndexHandle, 4> imgIndices, filtersIndices;
                            // Image indices
                            // Here we compute the virtual start index into the padded image.
@@ -1131,7 +1132,7 @@ namespace
        NGRAPH_CHECK(lhs->getType().isa<MemRefType>());
        Type elemTy = lhs->getType().cast<MemRefType>().getElementType();

-        LoopNestBuilder(pivs, lbs, ubs, steps)([&] {
+        AffineLoopNestBuilder(pivs, lbs, ubs, steps)([&] {
            ValueHandle val = iLHS(ivs);
            if (isa<NGNegOp>(op))
            {
@@ -1173,7 +1174,7 @@ namespace
        auto pivs = makeIndexHandlePointers(ivs);
        // Steps
        auto steps = vLHS.getSteps();
-        LoopNestBuilder(pivs, lbs, ubs, steps)(
+        AffineLoopNestBuilder(pivs, lbs, ubs, steps)(
            // single stmt body
            [&] {
                if (isa<NGAddOp>(op))
@@ -1266,7 +1267,7 @@ namespace
            auto pivs = makeIndexHandlePointers(ivs);
            auto steps = vRes.getSteps();
            auto initVal = vArg.lb(axis);
-            LoopNestBuilder(pivs, resLbs, resUbs, steps)(
+            AffineLoopNestBuilder(pivs, resLbs, resUbs, steps)(
                [&] { iRes(ivs) = ValueHandle::create<IndexCastOp>(initVal, resTy); });
        }

@@ -1282,7 +1283,7 @@ namespace
                         "Expected integer result type in index reduction");

            // iterate over all argument dimensions
-            LoopNestBuilder(pAllIVs, argLbs, argUbs, steps)([&] {
+            AffineLoopNestBuilder(pAllIVs, argLbs, argUbs, steps)([&] {
                // build a list of non-reduction IVs
                for (auto i = 0; i < vArg.rank(); i++)
                {

--- a/src/contrib/mlir/core/compiler.cpp
+++ b/src/contrib/mlir/core/compiler.cpp
@@ -64,7 +64,6 @@
 #include <mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h>
 #include <mlir/Dialect/LLVMIR/LLVMDialect.h>
 #include <mlir/ExecutionEngine/ExecutionEngine.h>
-#include <mlir/ExecutionEngine/MemRefUtils.h>
 #include <mlir/ExecutionEngine/OptUtils.h>
 #include <mlir/Pass/PassManager.h>
 #include <mlir/Target/LLVMIR.h>

--- a/src/contrib/mlir/core/compiler.hpp
+++ b/src/contrib/mlir/core/compiler.hpp
@@ -24,7 +24,6 @@
 #include "ngraph/descriptor/tensor.hpp"
 #include "ngraph/node.hpp"

-#include <mlir/ExecutionEngine/MemRefUtils.h>
 #include <mlir/IR/Builders.h>
 #include <mlir/IR/Module.h>
 #include <mlir/IR/Types.h>

--- a/src/contrib/mlir/core/ngraph_dialect/dialect.cpp
+++ b/src/contrib/mlir/core/ngraph_dialect/dialect.cpp
@@ -18,12 +18,12 @@
 // not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.

 #include "dialect.hpp"
+#include <mlir/IR/DialectImplementation.h>
+#include <mlir/Parser.h>
 #include "ngraph/check.hpp"
 #include "ops.hpp"
 #include "type.hpp"

-#include <mlir/Parser.h>
-
 using namespace mlir;

 NGraphOpsDialect::NGraphOpsDialect(mlir::MLIRContext* ctx)
@@ -39,63 +39,64 @@ NGraphOpsDialect::NGraphOpsDialect(mlir::MLIRContext* ctx)
        >();
 }

-mlir::Type NGraphOpsDialect::parseType(llvm::StringRef tyData, mlir::Location loc) const
+mlir::Type NGraphOpsDialect::parseType(mlir::DialectAsmParser& parser) const
 {
-    StringRef origTypeStr = tyData;
    MLIRContext* context = getContext();

    // Process nGraph tensor type.
-    if (tyData.consume_front("tensor"))
+    // failure is true
+    if (!parser.parseOptionalKeyword("tensor"))
    {
-        if (!tyData.consume_front("<") || !tyData.consume_back(">"))
+        llvm::SMLoc typeLoc = parser.getCurrentLocation();
+        if (parser.parseLess())
        {
-            return (emitError(loc, "expected '<' and '>' enclosing the tensor shape: " + tyData),
-                    Type());
+            parser.emitError(typeLoc, "expected '<' and '>' enclosing the tensor shape");
+            return Type();
        }

-        // Get x-separated sub-strings.
-        SmallVector<StringRef, 8> subStrings;
-        tyData.split(subStrings, "x");
-
        // Parse shape dimensions.
        SmallVector<int64_t, 4> shape;
-        for (unsigned i = 0, end = subStrings.size() - 1; i < end; ++i)
-        {
-            StringRef dimStr = subStrings[i];
-            int64_t dim = -1;
-            // NOTE: `consumeInteger` returns false if an integer was parsed successfully.
-            if (dimStr.consumeInteger(/*Radix=*/10, dim) || !dimStr.empty())
-            {
-                return (
-                    emitError(loc, "expected a list of '[0-9]+x' dimension specifiers: " + tyData),
-                    Type());
-            }
-
-            shape.push_back(dim);
-        }
+        parser.parseDimensionList(shape);
+
+        // Parse the current element type.
+        Type eltType;

-        // Parse nGraph element type.
-        auto elem_ty = mlir::parseType(subStrings.back(), context);
-        if (!elem_ty)
+        parser.parseType(eltType);
+        if (!eltType)
        {
-            return (emitError(loc, "Unexpected element type in tensor type: " + tyData), Type());
+            typeLoc = parser.getCurrentLocation();
+            parser.emitError(typeLoc, "Invalid tensor element type");
        }
-
-        return NGTensorType::get(context, elem_ty, shape);
+        parser.parseGreater();
+        return NGTensorType::get(context, eltType, shape);
    }
+    else
+    {
+        // parse nGraph scalar type
+        return parseEltType(parser);
+    }
+}

+mlir::Type NGraphOpsDialect::parseEltType(mlir::DialectAsmParser& parser) const
+{
    // Process nGraph integer element types.
+    MLIRContext* context = getContext();
+    int width = 0;
+    bool isSigned = false;
+    llvm::SMLoc loc = parser.getCurrentLocation();
+
+    StringRef tyData = parser.getFullSymbolSpec();
+    StringRef origTypeStr = tyData;
+
    if (tyData.startswith("i") || tyData.startswith("u"))
    {
-        bool isSigned = tyData.consume_front("i");
-        bool isUnsigned = tyData.consume_front("u");
-        NGRAPH_CHECK(isSigned != isUnsigned, "nGraph integer cannot be signed and unsigned");
-
+        isSigned = tyData.consume_front("i");
+        tyData.consume_front("u");
        unsigned width = 0;
        // NOTE: `consumeInteger` returns false if an integer was parsed successfully.
        if (tyData.consumeInteger(/*Radix=*/10, width) || width == 0 || !tyData.empty())
        {
-            return (emitError(loc, "Unexpected nGraph integer type: " + origTypeStr), Type());
+            parser.emitError(loc, "Unexpected nGraph integer type: " + origTypeStr);
        }

        switch (width)
@@ -108,9 +109,7 @@ mlir::Type NGraphOpsDialect::parseType(llvm::StringRef tyData, mlir::Location lo
            return isSigned ? NGIntegerType::getInt32(context) : NGIntegerType::getUInt32(context);
        case 64:
            return isSigned ? NGIntegerType::getInt64(context) : NGIntegerType::getUInt64(context);
-        default:
-            return (emitError(loc, "Unexpected width for nGraph integer type: " + origTypeStr),
-                    Type());
+        default: parser.emitError(loc, "Unexpected width for nGraph integer type: " + origTypeStr);
        }
    }

@@ -119,43 +118,49 @@ mlir::Type NGraphOpsDialect::parseType(llvm::StringRef tyData, mlir::Location lo
                 "Floating point types should be processed by standard parser");

    // NOTE: We may hit this error if the nGraph type is not yet supported in parser.
-    return (emitError(loc, "Unknown nGraph type: " + origTypeStr), Type());
+    parser.emitError(loc, "Unknown nGraph type: " + origTypeStr);
+
+    return Type();
 }

-void NGraphOpsDialect::printType(mlir::Type type, raw_ostream& os) const
+void NGraphOpsDialect::printType(mlir::Type type, mlir::DialectAsmPrinter& printer) const
 {
    switch (type.getKind())
    {
    case NG_TENSOR_TYPE_ID:
    {
-        os << "tensor<";
+        printer << "tensor<";
        auto tensorTy = type.cast<NGTensorType>();
        for (auto dim : tensorTy.getShape())
        {
-            os << dim << 'x';
+            printer << dim << 'x';
        }
-        os << tensorTy.getElementType() << '>';
+        printer << tensorTy.getElementType() << '>';
        return;
    }
    case NG_I8_TYPE_ID:
    case NG_I16_TYPE_ID:
    case NG_I32_TYPE_ID:
    case NG_I64_TYPE_ID:
+    {
+        auto intTy = type.cast<NGIntegerType>();
+        printer << "i" << intTy.getWidth();
+        return;
+    }
    case NG_U8_TYPE_ID:
    case NG_U16_TYPE_ID:
    case NG_U32_TYPE_ID:
    case NG_U64_TYPE_ID:
    {
        auto intTy = type.cast<NGIntegerType>();
-        os << "i" << intTy.getWidth();
+        printer << "u" << intTy.getWidth();
        return;
    }
    case NG_BOOL_TYPE_ID:
    {
-        os << "bool";
+        printer << "bool";
        return;
    }
-    default: { NGRAPH_CHECK(false, "Incorrect type to print?");
-    }
+    default: NGRAPH_UNREACHABLE("Incorrect type to print?");
    }
 }
--- a/src/contrib/mlir/core/ngraph_dialect/dialect.hpp
+++ b/src/contrib/mlir/core/ngraph_dialect/dialect.hpp
@@ -34,9 +34,12 @@ namespace mlir
    {
    public:
        explicit NGraphOpsDialect(mlir::MLIRContext* ctx);
-        mlir::Type parseType(llvm::StringRef tyData, mlir::Location loc) const override;
-        void printType(mlir::Type type, llvm::raw_ostream& os) const override;
+
+        mlir::Type parseType(mlir::DialectAsmParser& parser) const override;
+        void printType(mlir::Type type, mlir::DialectAsmPrinter& printer) const override;

        static StringRef getDialectNamespace() { return "ng"; }
+    private:
+        mlir::Type parseEltType(mlir::DialectAsmParser& parser) const;
    };
 }
--- a/src/contrib/mlir/core/pass/ng_dialect_builder.cpp
+++ b/src/contrib/mlir/core/pass/ng_dialect_builder.cpp
@@ -46,32 +46,6 @@
 #include "ngraph/op/util/index_reduction.hpp"
 #include "ngraph/type/element_type.hpp"

-#include "contrib/mlir/utils.hpp"
-
-#include <llvm/ADT/STLExtras.h>
-#include <llvm/Analysis/TargetTransformInfo.h>
-#include <llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h>
-#include <llvm/IR/Module.h>
-#include <llvm/Support/ErrorOr.h>
-#include <llvm/Support/MemoryBuffer.h>
-#include <llvm/Support/SourceMgr.h>
-#include <llvm/Support/TargetSelect.h>
-#include <llvm/Target/TargetMachine.h>
-#include <mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h>
-#include <mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h>
-#include <mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h>
-#include <mlir/Dialect/LLVMIR/LLVMDialect.h>
-#include <mlir/ExecutionEngine/ExecutionEngine.h>
-#include <mlir/ExecutionEngine/MemRefUtils.h>
-#include <mlir/ExecutionEngine/OptUtils.h>
-#include <mlir/Pass/PassManager.h>
-#include <mlir/Target/LLVMIR.h>
-#include <mlir/Transforms/DialectConversion.h>
-#include <mlir/Transforms/Passes.h>
-
-#include <memory>
-#include <mutex>
-
 // Defines a new LLVM debug type for this file to be used by LLVM_DEBUG macro.
 #define DEBUG_TYPE "mlir-compiler"


--- a/src/contrib/mlir/core/pass/ng_dialect_builder.hpp
+++ b/src/contrib/mlir/core/pass/ng_dialect_builder.hpp
@@ -20,21 +20,13 @@
 #pragma once

 #include "contrib/mlir/core/compiler.hpp"
-#include "contrib/mlir/runtime/cpu/memory_manager.hpp"
+
 #include "ngraph/check.hpp"
 #include "ngraph/descriptor/tensor.hpp"
 #include "ngraph/node.hpp"

-#include <mlir/ExecutionEngine/MemRefUtils.h>
-#include <mlir/IR/Builders.h>
-#include <mlir/IR/Module.h>
-#include <mlir/IR/Types.h>
 #include <mlir/Pass/Pass.h>

-#include <typeindex>
-#include <unordered_map>
-#include <vector>
-
 using namespace ngraph::runtime::ngmlir;

 namespace ngraph

--- a/src/contrib/mlir/runtime/cpu/cpu_runtime.cpp
+++ b/src/contrib/mlir/runtime/cpu/cpu_runtime.cpp
@@ -30,8 +30,8 @@
 #include <llvm/Support/SourceMgr.h>
 #include <llvm/Support/TargetSelect.h>
 #include <llvm/Target/TargetMachine.h>
+#include <mlir/Dialect/LLVMIR/LLVMDialect.h>
 #include <mlir/ExecutionEngine/ExecutionEngine.h>
-#include <mlir/ExecutionEngine/MemRefUtils.h>
 #include <mlir/ExecutionEngine/OptUtils.h>
 #include <mlir/IR/Function.h>

@@ -81,7 +81,7 @@ void MLIRCPURuntime::bindArguments(std::vector<void*>& externalTensors)
 {
    NGRAPH_CHECK(m_module, "MLIR module is not ready.");

-    mlir::FuncOp func = m_module->lookupSymbol<mlir::FuncOp>("main");
+    auto func = m_module->lookupSymbol<mlir::LLVM::LLVMFuncOp>("main");
    NGRAPH_CHECK(func && !func.getBlocks().empty(), "Function not found");

    // Set external arguments
@@ -90,7 +90,7 @@ void MLIRCPURuntime::bindArguments(std::vector<void*>& externalTensors)
    // Create list with a type-erased double pointer for each invocation arguments.
    // We currently use 'allocateMemrefArgs', which creates the arguments list per call ABI (see
    // comment below).
-    // StaticFloatMemref is just a struct with the actual pointer to the data.
+    // StaticMemRef is just a struct with the actual pointer to the data.

    auto expectedArguments = allocateMemrefArgs();
    NGRAPH_CHECK(expectedArguments.size(), "Arguments can't be created");
@@ -102,7 +102,7 @@ void MLIRCPURuntime::bindArguments(std::vector<void*>& externalTensors)
    // Assign external tensor pointers to invocation arguments.
    for (size_t i = 0, numArgs = m_invokeArgs.size(); i < numArgs; ++i)
    {
-        auto* memRefArg = *(reinterpret_cast<mlir::StaticFloatMemRef**>(m_invokeArgs[i]));
+        auto* memRefArg = *(reinterpret_cast<StaticMemRef**>(m_invokeArgs[i]));
        memRefArg->data = reinterpret_cast<float*>((*m_externalTensors)[i]);
    }
 }
@@ -129,18 +129,18 @@ void MLIRCPURuntime::cleanup()
    // Free void double pointer arguments without freeing external tensor data.
    for (auto* arg : m_invokeArgs)
    {
-        auto* memRefArg = *(reinterpret_cast<mlir::StaticFloatMemRef**>(arg));
+        auto* memRefArg = *(reinterpret_cast<StaticMemRef**>(arg));
        free(memRefArg);
        free(arg);
    }
 }

 // The current call ABI takes a single arg pointer (argPtr) pointing to a list of args.
-// Each arg is a  pointer to a StaticFloatMemRef which contains a data pointer
+// Each arg is a  pointer to a StaticMemRef which contains a data pointer
 //
 // The args are laid out as follows
-// argPtr-> arg[0]-> StaticFloatMemRef -> <data>
-//          arg[1]-> StaticFloatMemRef -> <data>
+// argPtr-> arg[0]-> StaticMemRef -> <data>
+//          arg[1]-> StaticMemRef -> <data>
 //          ...
 SmallVector<void*, 8> MLIRCPURuntime::allocateMemrefArgs()
 {
@@ -148,20 +148,18 @@ SmallVector<void*, 8> MLIRCPURuntime::allocateMemrefArgs()
    for (auto i = 0; i < m_externalTensors->size(); i++)
    {
        auto descriptor = allocateMemrefDescriptor();
-        mlir::StaticFloatMemRef** arg =
-            reinterpret_cast<mlir::StaticFloatMemRef**>(malloc(sizeof(mlir::StaticFloatMemRef*)));
+        StaticMemRef** arg = reinterpret_cast<StaticMemRef**>(malloc(sizeof(StaticMemRef*)));
        *arg = descriptor;
        args.push_back(arg);
    }
    return args;
 }

-mlir::StaticFloatMemRef* MLIRCPURuntime::allocateMemrefDescriptor()
+StaticMemRef* MLIRCPURuntime::allocateMemrefDescriptor()
 {
-    // We only use StaticFloatMemRef because that's what MLIR currently offers.
+    // We only use StaticMemRef because that's what MLIR currently offers.
    // We should expand this with different types and dynamic MemRefs
-    auto* descriptor =
-        reinterpret_cast<mlir::StaticFloatMemRef*>(malloc(sizeof(mlir::StaticFloatMemRef)));
+    auto* descriptor = reinterpret_cast<StaticMemRef*>(malloc(sizeof(StaticMemRef)));
    NGRAPH_CHECK(descriptor != nullptr, "NULL MemRef descriptor");
    descriptor->data = nullptr;
    return descriptor;

--- a/src/contrib/mlir/runtime/cpu/cpu_runtime.hpp
+++ b/src/contrib/mlir/runtime/cpu/cpu_runtime.hpp
@@ -21,7 +21,6 @@

 #include <memory>
 #include <mlir/ExecutionEngine/ExecutionEngine.h>
-#include <mlir/ExecutionEngine/MemRefUtils.h>
 #include <mlir/IR/Builders.h>
 #include <mlir/IR/Module.h>
 #include <mlir/IR/Types.h>
@@ -34,6 +33,10 @@ namespace ngraph
    {
        namespace ngmlir
        {
+            struct StaticMemRef
+            {
+                void* data;
+            };
            /// A CPU Runtime is an MLIR runtime that owns an MLIR context and a module
            /// The module should be in LLVM dialect and ready to be lowered via an MLIR
            /// ExecutionEngine. The runtime owns the context and must out-live any MLIR
@@ -57,7 +60,7 @@ namespace ngraph
                llvm::SmallVector<void*, 8> allocateMemrefArgs();

                /// Helper to allocate a mem ref object. Handles static shapes only for now.
-                mlir::StaticFloatMemRef* allocateMemrefDescriptor();
+                StaticMemRef* allocateMemrefDescriptor();

            private:
                // Pointers to externally allocated memory for sub-graph's input and output tensors.

--- a/src/contrib/mlir/runtime/runtime.hpp
+++ b/src/contrib/mlir/runtime/runtime.hpp
@@ -23,7 +23,6 @@

 #include <memory>
 #include <mlir/ExecutionEngine/ExecutionEngine.h>
-#include <mlir/ExecutionEngine/MemRefUtils.h>
 #include <mlir/IR/Builders.h>
 #include <mlir/IR/Module.h>
 #include <mlir/IR/Types.h>

--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -213,6 +213,8 @@ set (SRC
    op/strided_slice.hpp
    op/floor.cpp
    op/floor.hpp
+    op/floor_mod.cpp
+    op/floor_mod.hpp
    op/gather.cpp
    op/gather.hpp
    op/gather_nd.cpp
@@ -373,6 +375,8 @@ set (SRC
    op/fused/partial_slice.hpp
    op/fused/prelu.cpp
    op/fused/prelu.hpp
+    op/fused/reciprocal.cpp
+    op/fused/reciprocal.hpp
    op/fused/rnn_cell.cpp
    op/fused/rnn_cell.hpp
    op/fused/scale_shift.cpp

--- a/src/ngraph/frontend/onnx_import/op/reciprocal.cpp
+++ b/src/ngraph/frontend/onnx_import/op/reciprocal.cpp
@@ -17,8 +17,7 @@
 #include <memory>
 #include <vector>

-#include "ngraph/op/constant.hpp"
-#include "ngraph/op/divide.hpp"
+#include "ngraph/op/fused/reciprocal.hpp"
 #include "ngraph/op/util/broadcasting.hpp"
 #include "ngraph/shape.hpp"

@@ -36,11 +35,7 @@ namespace ngraph
                {
                    auto data = node.get_ng_inputs().at(0);

-                    std::shared_ptr<ngraph::Node> one_node = std::make_shared<ngraph::op::Constant>(
-                        data->get_element_type(), Shape{}, std::vector<double>{1});
-                    one_node = ngraph::op::make_broadcast_node(one_node, data->get_shape());
-
-                    return {one_node / data};
+                    return {std::make_shared<ngraph::op::Reciprocal>(data)};
                }

            } // namespace set_1

--- a/src/ngraph/ngraph.hpp
+++ b/src/ngraph/ngraph.hpp
@@ -130,6 +130,7 @@ namespace ngraph
 #include "ngraph/op/experimental/tile.hpp"
 #include "ngraph/op/experimental/transpose.hpp"
 #include "ngraph/op/floor.hpp"
+#include "ngraph/op/floor_mod.hpp"
 #include "ngraph/op/fused/clamp.hpp"
 #include "ngraph/op/fused/conv_fused.hpp"
 #include "ngraph/op/fused/depth_to_space.hpp"
@@ -151,6 +152,7 @@ namespace ngraph
 #include "ngraph/op/fused/normalize_l2.hpp"
 #include "ngraph/op/fused/partial_slice.hpp"
 #include "ngraph/op/fused/prelu.hpp"
+#include "ngraph/op/fused/reciprocal.hpp"
 #include "ngraph/op/fused/rnn_cell.hpp"
 #include "ngraph/op/fused/scale_shift.hpp"
 #include "ngraph/op/fused/selu.hpp"

--- a/src/ngraph/op/floor_mod.cpp
+++ b/src/ngraph/op/floor_mod.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/op/floor_mod.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+constexpr NodeTypeInfo op::v1::FloorMod::type_info;
+
+op::v1::FloorMod::FloorMod(const Output<Node>& arg0,
+                           const Output<Node>& arg1,
+                           const AutoBroadcastSpec& auto_broadcast)
+    : BinaryElementwiseArithmetic(arg0, arg1, auto_broadcast)
+{
+    constructor_validate_and_infer_types();
+}
+
+shared_ptr<Node> op::v1::FloorMod::copy_with_new_args(const NodeVector& new_args) const
+{
+    check_new_args_count(this, new_args);
+    return make_shared<FloorMod>(new_args.at(0), new_args.at(1), this->get_autob());
+}
--- a/src/ngraph/op/floor_mod.hpp
+++ b/src/ngraph/op/floor_mod.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <memory>
+
+#include "ngraph/op/util/binary_elementwise_arithmetic.hpp"
+
+namespace ngraph
+{
+    namespace op
+    {
+        namespace v1
+        {
+            /// \brief Elementwise FloorMod operation.
+            ///
+            class FloorMod : public util::BinaryElementwiseArithmetic
+            {
+            public:
+                NGRAPH_API
+                static constexpr NodeTypeInfo type_info{"FloorMod", 1};
+                const NodeTypeInfo& get_type_info() const override { return type_info; }
+                /// \brief Constructs an uninitialized addition operation
+                FloorMod() = default;
+
+                /// \brief Constructs an Floor Mod operation.
+                ///
+                /// \param arg0 Output that produces the first input tensor.<br>
+                /// `[d0, ...]`
+                /// \param arg1 Output that produces the second input tensor.<br>
+                /// `[d0, ...]`
+                /// \param auto_broadcast Auto broadcast specification
+                ///
+                /// Output `[d0, ...]`
+                ///
+                FloorMod(const Output<Node>& arg0,
+                         const Output<Node>& arg1,
+                         const AutoBroadcastSpec& auto_broadcast = AutoBroadcastType::NUMPY);
+
+                std::shared_ptr<Node> copy_with_new_args(const NodeVector& new_args) const override;
+            };
+        } // namespace v1
+
+        using v1::FloorMod;
+    } // namespace op
+} // namespace ngraph
--- a/src/ngraph/op/fused/reciprocal.cpp
+++ b/src/ngraph/op/fused/reciprocal.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include "ngraph/op/fused/reciprocal.hpp"
+
+#include "ngraph/op/constant.hpp"
+#include "ngraph/op/divide.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+constexpr NodeTypeInfo op::Reciprocal::type_info;
+
+op::Reciprocal::Reciprocal(const Output<Node>& data)
+    : FusedOp({data})
+{
+    constructor_validate_and_infer_types();
+}
+
+NodeVector op::Reciprocal::decompose_op() const
+{
+    auto data = input_value(0);
+    auto one_node = op::Constant::create(data.get_element_type(), data.get_shape(), {1});
+    return {make_shared<op::v1::Divide>(one_node, data)};
+}
+
+shared_ptr<Node> op::Reciprocal::copy_with_new_args(const NodeVector& new_args) const
+{
+    check_new_args_count(this, new_args);
+    return make_shared<Reciprocal>(new_args.at(0));
+}
--- a/src/ngraph/op/fused/reciprocal.hpp
+++ b/src/ngraph/op/fused/reciprocal.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "ngraph/node.hpp"
+#include "ngraph/op/op.hpp"
+#include "ngraph/op/util/fused_op.hpp"
+
+namespace ngraph
+{
+    namespace op
+    {
+        /// \brief Reciprocal operation
+        /// f(x) = 1 / x
+        class Reciprocal : public ngraph::op::util::FusedOp
+        {
+        public:
+            NGRAPH_API
+            static constexpr NodeTypeInfo type_info{"Reciprocal", 0};
+            const NodeTypeInfo& get_type_info() const override { return type_info; }
+            Reciprocal() = default;
+            /// \brief Constructs a Reciprocal operation.
+            ///
+            /// \param data Input tensor
+            Reciprocal(const Output<Node>& data);
+
+            virtual NodeVector decompose_op() const override;
+
+            virtual std::shared_ptr<Node>
+                copy_with_new_args(const NodeVector& new_args) const override;
+        };
+    } // namespace op
+} // namespace ngraph
--- a/src/ngraph/op/fused_op_tbl.hpp
+++ b/src/ngraph/op/fused_op_tbl.hpp
@@ -48,6 +48,7 @@ NGRAPH_OP(NormalizeL2, ngraph::op)
 NGRAPH_OP(PartialSlice, ngraph::op)
 NGRAPH_OP(PartialSliceBackprop, ngraph::op)
 NGRAPH_OP(PRelu, ngraph::op)
+NGRAPH_OP(Reciprocal, ngraph::op)
 NGRAPH_OP(RNNCell, ngraph::op)
 NGRAPH_OP(ScaleShift, ngraph::op)
 NGRAPH_OP(Selu, ngraph::op)

--- a/src/ngraph/op/op_tbl.hpp
+++ b/src/ngraph/op/op_tbl.hpp
@@ -94,6 +94,7 @@ NGRAPH_OP(Equal, ngraph::op)
 NGRAPH_OP(Erf, ngraph::op)
 NGRAPH_OP(Exp, ngraph::op)
 NGRAPH_OP(Floor, ngraph::op)
+NGRAPH_OP(FloorMod, ngraph::op)
 NGRAPH_OP(Gather, ngraph::op)
 NGRAPH_OP(GatherND, ngraph::op)
 NGRAPH_OP(GenerateMask, ngraph::op)

--- a/src/ngraph/runtime/interpreter/int_executable.hpp
+++ b/src/ngraph/runtime/interpreter/int_executable.hpp
@@ -1867,6 +1867,7 @@ private:
        case OP_TYPEID::DynPad:
        case OP_TYPEID::Tile:
        case OP_TYPEID::DynReplaceSlice:
+        case OP_TYPEID::FloorMod:
            throw unsupported_op("Unsupported op '" + node.description() + "'");
 #if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop

--- a/src/ngraph/serializer.cpp
+++ b/src/ngraph/serializer.cpp
@@ -70,6 +70,7 @@
 #include "ngraph/op/experimental/tile.hpp"
 #include "ngraph/op/experimental/transpose.hpp"
 #include "ngraph/op/floor.hpp"
+#include "ngraph/op/floor_mod.hpp"
 #include "ngraph/op/fused/clamp.hpp"
 #include "ngraph/op/fused/conv_fused.hpp"
 #include "ngraph/op/fused/depth_to_space.hpp"
@@ -91,6 +92,7 @@
 #include "ngraph/op/fused/normalize_l2.hpp"
 #include "ngraph/op/fused/partial_slice.hpp"
 #include "ngraph/op/fused/prelu.hpp"
+#include "ngraph/op/fused/reciprocal.hpp"
 #include "ngraph/op/fused/rnn_cell.hpp"
 #include "ngraph/op/fused/scale_shift.hpp"
 #include "ngraph/op/fused/selu.hpp"
@@ -1543,6 +1545,12 @@ shared_ptr<Node> JSONDeserializer::deserialize_node(json node_js)
            node = make_shared<op::Floor>(args[0]);
            break;
        }
+        case OP_TYPEID::FloorMod:
+        {
+            node = make_shared<op::FloorMod>(
+                args[0], args[1], read_auto_broadcast(node_js, "auto_broadcast"));
+            break;
+        }
        case OP_TYPEID::Gather:
        {
            if (op_version == 0)
@@ -2379,6 +2387,11 @@ shared_ptr<Node> JSONDeserializer::deserialize_node(json node_js)
            node = make_shared<op::Range>(args[0], args[1], args[2]);
            break;
        }
+        case OP_TYPEID::Reciprocal:
+        {
+            node = make_shared<op::Reciprocal>(args[0]);
+            break;
+        }
        case OP_TYPEID::Relu:
        {
            node = make_shared<op::Relu>(args[0]);
@@ -3383,6 +3396,15 @@ json JSONSerializer::serialize_node(const Node& n)
    }
    case OP_TYPEID::Floor: { break;
    }
+    case OP_TYPEID::FloorMod:
+    {
+        auto tmp = static_cast<const op::FloorMod*>(&n);
+        if (tmp->get_autob().m_type != op::AutoBroadcastType::NONE)
+        {
+            node["auto_broadcast"] = write_auto_broadcast(tmp->get_autob());
+        }
+        break;
+    }
    case OP_TYPEID::Gather:
    {
        if (op_version == 0)
@@ -3945,6 +3967,8 @@ json JSONSerializer::serialize_node(const Node& n)
    }
    case OP_TYPEID::Range: { break;
    }
+    case OP_TYPEID::Reciprocal: { break;
+    }
    case OP_TYPEID::Relu: { break;
    }
    case OP_TYPEID::ReluBackprop: { break;

--- a/test/backend/fused_op.in.cpp
+++ b/test/backend/fused_op.in.cpp
@@ -95,6 +95,20 @@ NGRAPH_TEST(${BACKEND_NAME}, prelu)
    EXPECT_EQ(expected, read_vector<float>(result0));
 }

+NGRAPH_TEST(${BACKEND_NAME}, reciprocal)
+{
+    Shape shape{3, 2};
+    auto A = make_shared<op::Parameter>(element::f32, shape);
+    auto reciprocal = make_shared<op::Reciprocal>(A);
+    auto f0 = make_shared<Function>(NodeVector{reciprocal}, ParameterVector{A});
+
+    auto test_case = test::NgraphTestCase(f0, "${BACKEND_NAME}");
+    test_case.add_input(vector<float>{1, 2, 3, 4, 5, 6});
+    test_case.add_expected_output(
+        Shape{3, 2}, vector<float>{1.0f, 1 / 2.0f, 1 / 3.0f, 1 / 4.0f, 1 / 5.0f, 1 / 6.0f});
+    test_case.run();
+}
+
 NGRAPH_TEST(${BACKEND_NAME}, hardsigmoid)
 {
    Shape shape{2, 7};

--- a/test/mlir/ngraph_dialect/types.mlir
+++ b/test/mlir/ngraph_dialect/types.mlir
@@ -55,7 +55,7 @@ func @i64(%arg0: !ng.i64) {
 // -----

 // CHECK-LABEL: func @u8
-// CHECK-SAME: (%{{.*}}: !ng.i8)
+// CHECK-SAME: (%{{.*}}: !ng.u8)
 func @u8(%arg0: !ng.u8) {
  "ng.return"() : () -> ()
 }
@@ -63,7 +63,7 @@ func @u8(%arg0: !ng.u8) {
 // -----

 // CHECK-LABEL: func @u16
-// CHECK-SAME: (%{{.*}}: !ng.i16)
+// CHECK-SAME: (%{{.*}}: !ng.u16)
 func @u16(%arg0: !ng.u16) {
  "ng.return"() : () -> ()
 }
@@ -71,7 +71,7 @@ func @u16(%arg0: !ng.u16) {
 // -----

 // CHECK-LABEL: func @u32
-// CHECK-SAME: (%{{.*}}: !ng.i32)
+// CHECK-SAME: (%{{.*}}: !ng.u32)
 func @u32(%arg0: !ng.u32) {
  "ng.return"() : () -> ()
 }
@@ -83,3 +83,83 @@ func @u32(%arg0: !ng.u32) {
 func @u64(%arg0: !ng.u64) {
  "ng.return"() : () -> ()
 }
+
+// -----
+
+// CHECK: func @tensor_i8
+// CHECK-SAME: (%{{.*}}: !ng.tensor<2x2x!ng.i8>)
+func @tensor_i8(%arg0: !ng.tensor<2x2x!ng.i8>) {
+  "ng.return"() : () -> ()
+}
+
+// -----
+
+// CHECK: func @tensor_i16
+// CHECK-SAME: (%{{.*}}: !ng.tensor<2x2x!ng.i16>)
+func @tensor_i16(%arg0: !ng.tensor<2x2x!ng.i16>) {
+  "ng.return"() : () -> ()
+}
+
+// -----
+
+// CHECK: func @tensor_i32
+// CHECK-SAME: (%{{.*}}: !ng.tensor<2x2x!ng.i32>)
+func @tensor_i32(%arg0: !ng.tensor<2x2x!ng.i32>) {
+  "ng.return"() : () -> ()
+}
+
+// -----
+
+// CHECK: func @tensor_i64
+// CHECK-SAME: (%{{.*}}: !ng.tensor<2x2x!ng.i64>)
+func @tensor_i64(%arg0: !ng.tensor<2x2x!ng.i64>) {
+  "ng.return"() : () -> ()
+}
+
+// -----
+
+// CHECK: func @tensor_u8
+// CHECK-SAME: (%{{.*}}: !ng.tensor<2x2x!ng.u8>)
+func @tensor_u8(%arg0: !ng.tensor<2x2x!ng.u8>) {
+  "ng.return"() : () -> ()
+}
+
+// -----
+
+// CHECK: func @tensor_u16
+// CHECK-SAME: (%{{.*}}: !ng.tensor<2x2x!ng.u16>)
+func @tensor_u16(%arg0: !ng.tensor<2x2x!ng.u16>) {
+  "ng.return"() : () -> ()
+}
+
+// -----
+
+// CHECK: func @tensor_u32
+// CHECK-SAME: (%{{.*}}: !ng.tensor<2x2x!ng.u32>)
+func @tensor_u32(%arg0: !ng.tensor<2x2x!ng.u32>) {
+  "ng.return"() : () -> ()
+}
+
+// -----
+
+// CHECK: func @tensor_u64
+// CHECK-SAME: (%{{.*}}: !ng.tensor<2x2x!ng.u64>)
+func @tensor_u64(%arg0: !ng.tensor<2x2x!ng.u64>) {
+  "ng.return"() : () -> ()
+}
+
+// -----
+
+// CHECK: func @tensor_f32
+// CHECK-SAME: (%{{.*}}: !ng.tensor<2x2xf32>)
+func @tensor_f32(%arg0: !ng.tensor<2x2xf32>) {
+  "ng.return"() : () -> ()
+}
+
+// -----
+
+// CHECK: func @tensor_f64
+// CHECK-SAME: (%{{.*}}: !ng.tensor<2x2xf64>)
+func @tensor_f64(%arg0: !ng.tensor<2x2xf64>) {
+  "ng.return"() : () -> ()
+}
--- a/test/type_prop/unary_elementwise.cpp
+++ b/test/type_prop/unary_elementwise.cpp
@@ -40,3 +40,11 @@ TEST(type_prop, unary_arithmetic_bad_argument_element_types)
        FAIL() << "Deduced type check failed for unexpected reason";
    }
 }
+
+TEST(type_prop, reciprocal)
+{
+    auto param = make_shared<op::Parameter>(element::f32, Shape{2, 3, 4});
+    auto pad = make_shared<op::Reciprocal>(param);
+    EXPECT_EQ(pad->get_element_type(), element::f32);
+    EXPECT_EQ(pad->get_shape(), (Shape{2, 3, 4}));
+}