Unverified Commit 4abb06df authored by Scott Cyphers's avatar Scott Cyphers Committed by GitHub

Merge pull request #3013 from NervanaSystems/mlir

MLIR Integration into CPU backend
parents e7d0117e a2de30f3
...@@ -162,6 +162,7 @@ ngraph_var(NGRAPH_USE_PREBUILT_LLVM DEFAULT "FALSE") ...@@ -162,6 +162,7 @@ ngraph_var(NGRAPH_USE_PREBUILT_LLVM DEFAULT "FALSE")
option(NGRAPH_UNIT_TEST_ENABLE "Control the building of unit tests" TRUE) option(NGRAPH_UNIT_TEST_ENABLE "Control the building of unit tests" TRUE)
option(NGRAPH_TOOLS_ENABLE "Control the building of tool" TRUE) option(NGRAPH_TOOLS_ENABLE "Control the building of tool" TRUE)
option(NGRAPH_CPU_ENABLE "Control the building of the CPU backend" TRUE) option(NGRAPH_CPU_ENABLE "Control the building of the CPU backend" TRUE)
option(NGRAPH_MLIR_ENABLE "Control the building of MLIR backend" FALSE)
option(NGRAPH_INTELGPU_ENABLE "Control the building of the Intel GPU backend with clDNN" FALSE) option(NGRAPH_INTELGPU_ENABLE "Control the building of the Intel GPU backend with clDNN" FALSE)
option(NGRAPH_GPU_ENABLE "Control the building of the GPU backend" FALSE) option(NGRAPH_GPU_ENABLE "Control the building of the GPU backend" FALSE)
option(NGRAPH_INTERPRETER_ENABLE "Control the building of the INTERPRETER backend" TRUE) option(NGRAPH_INTERPRETER_ENABLE "Control the building of the INTERPRETER backend" TRUE)
...@@ -190,6 +191,12 @@ if (NGRAPH_CPU_ENABLE ...@@ -190,6 +191,12 @@ if (NGRAPH_CPU_ENABLE
set(NGRAPH_INTEL_CPU_ONLY_ENABLE ON) set(NGRAPH_INTEL_CPU_ONLY_ENABLE ON)
endif() endif()
if (NGRAPH_MLIR_ENABLE AND (NOT NGRAPH_DEX_ONLY))
#disable code-gen due to sym collision with LLVM
message(FATAL_ERROR "Cannot build with both MLIR and code-gen ON.\n"
"Use -DNGRAPH_DEX_ONLY=ON and try again. \n")
endif()
if (NGRAPH_DISTRIBUTED_ENABLE) if (NGRAPH_DISTRIBUTED_ENABLE)
if ("${NGRAPH_DISTRIBUTED_ENABLE}" STREQUAL "MLSL") if ("${NGRAPH_DISTRIBUTED_ENABLE}" STREQUAL "MLSL")
if (NGRAPH_INTEL_CPU_ONLY_ENABLE) if (NGRAPH_INTEL_CPU_ONLY_ENABLE)
...@@ -232,6 +239,7 @@ endmacro() ...@@ -232,6 +239,7 @@ endmacro()
NORMALIZE_BOOL(NGRAPH_UNIT_TEST_ENABLE) NORMALIZE_BOOL(NGRAPH_UNIT_TEST_ENABLE)
NORMALIZE_BOOL(NGRAPH_TOOLS_ENABLE) NORMALIZE_BOOL(NGRAPH_TOOLS_ENABLE)
NORMALIZE_BOOL(NGRAPH_CPU_ENABLE) NORMALIZE_BOOL(NGRAPH_CPU_ENABLE)
NORMALIZE_BOOL(NGRAPH_MLIR_ENABLE)
NORMALIZE_BOOL(NGRAPH_INTELGPU_ENABLE) NORMALIZE_BOOL(NGRAPH_INTELGPU_ENABLE)
NORMALIZE_BOOL(NGRAPH_GPU_ENABLE) NORMALIZE_BOOL(NGRAPH_GPU_ENABLE)
NORMALIZE_BOOL(NGRAPH_INTERPRETER_ENABLE) NORMALIZE_BOOL(NGRAPH_INTERPRETER_ENABLE)
...@@ -253,6 +261,7 @@ NORMALIZE_BOOL(NGRAPH_JSON_ENABLE) ...@@ -253,6 +261,7 @@ NORMALIZE_BOOL(NGRAPH_JSON_ENABLE)
message(STATUS "NGRAPH_UNIT_TEST_ENABLE: ${NGRAPH_UNIT_TEST_ENABLE}") message(STATUS "NGRAPH_UNIT_TEST_ENABLE: ${NGRAPH_UNIT_TEST_ENABLE}")
message(STATUS "NGRAPH_TOOLS_ENABLE: ${NGRAPH_TOOLS_ENABLE}") message(STATUS "NGRAPH_TOOLS_ENABLE: ${NGRAPH_TOOLS_ENABLE}")
message(STATUS "NGRAPH_CPU_ENABLE: ${NGRAPH_CPU_ENABLE}") message(STATUS "NGRAPH_CPU_ENABLE: ${NGRAPH_CPU_ENABLE}")
message(STATUS "NGRAPH_MLIR_ENABLE: ${NGRAPH_MLIR_ENABLE}")
message(STATUS "NGRAPH_INTELGPU_ENABLE: ${NGRAPH_INTELGPU_ENABLE}") message(STATUS "NGRAPH_INTELGPU_ENABLE: ${NGRAPH_INTELGPU_ENABLE}")
message(STATUS "NGRAPH_GPU_ENABLE: ${NGRAPH_GPU_ENABLE}") message(STATUS "NGRAPH_GPU_ENABLE: ${NGRAPH_GPU_ENABLE}")
message(STATUS "NGRAPH_INTERPRETER_ENABLE: ${NGRAPH_INTERPRETER_ENABLE}") message(STATUS "NGRAPH_INTERPRETER_ENABLE: ${NGRAPH_INTERPRETER_ENABLE}")
...@@ -375,6 +384,11 @@ if (NGRAPH_CPU_ENABLE) ...@@ -375,6 +384,11 @@ if (NGRAPH_CPU_ENABLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNGRAPH_CPU_ENABLE") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNGRAPH_CPU_ENABLE")
endif() endif()
if (NGRAPH_MLIR_ENABLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNGRAPH_MLIR_ENABLE")
set(NGRAPH_MLIR_SOURCE_DIR ${CMAKE_SOURCE_DIR}/src/contrib/mlir)
endif()
if (NGRAPH_PLAIDML_ENABLE) if (NGRAPH_PLAIDML_ENABLE)
find_package(PlaidML CONFIG) find_package(PlaidML CONFIG)
if (NOT PLAIDML_FOUND) if (NOT PLAIDML_FOUND)
...@@ -493,6 +507,9 @@ endif() ...@@ -493,6 +507,9 @@ endif()
if(NGRAPH_CPU_ENABLE) if(NGRAPH_CPU_ENABLE)
include(cmake/external_mkldnn.cmake) include(cmake/external_mkldnn.cmake)
endif() endif()
if (NGRAPH_MLIR_ENABLE)
include(cmake/external_mlir.cmake)
endif()
if(NGRAPH_INTELGPU_ENABLE) if(NGRAPH_INTELGPU_ENABLE)
include(cmake/external_cldnn.cmake) include(cmake/external_cldnn.cmake)
endif() endif()
......
...@@ -48,7 +48,8 @@ project/doc-contributor-README.rst @indie ...@@ -48,7 +48,8 @@ project/doc-contributor-README.rst @indie
/src/ngraph/pattern/ @jbobba @aprocter /src/ngraph/pattern/ @jbobba @aprocter
/src/ngraph/runtime/ @rkimballn1 @jbobba /src/ngraph/runtime/ @rkimballn1 @jbobba
/src/ngraph/runtime/cpu/ @jbobba /src/ngraph/runtime/cpu/ @jbobba
/src/ngraph/runtime/cpu/builder/allreduce.*pp @wenzhe-nrv @jbobba /src/contrib/mlir/ @nmostafa @dcaballe
/src/ngraph/runtime/cpu/builder/allreduce.*pp @wenzhe-nrv @jbobba @avijit-nervana
/src/ngraph/runtime/dynamic/ @aprocter /src/ngraph/runtime/dynamic/ @aprocter
/src/ngraph/runtime/gpu/ @rkimballn1 /src/ngraph/runtime/gpu/ @rkimballn1
/src/ngraph/runtime/hybrid/ @rkimballn1 /src/ngraph/runtime/hybrid/ @rkimballn1
......
# ******************************************************************************
# Copyright 2017-2019 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
include(ExternalProject)
set(MLIR_LLVM_REPO_URL https://github.com/llvm/llvm-project.git)
set(MLIR_REPO_URL https://github.com/tensorflow/mlir.git)
# Change these commit IDs to move to latest stable versions
set(MLIR_LLVM_COMMIT_ID bb2b527)
set(MLIR_COMMIT_ID 49f7efc)
set(MLIR_PROJECT_ROOT ${CMAKE_CURRENT_BINARY_DIR}/mlir_project)
set(MLIR_LLVM_ROOT ${MLIR_PROJECT_ROOT}/llvm-projects)
set(MLIR_SOURCE_DIR ${MLIR_LLVM_ROOT}/llvm/projects/mlir)
set(MLIR_BUILD_DIR ${MLIR_LLVM_ROOT}/build)
# MLIR has to be pre-built before ngraph build starts
# this will clone and build MLIR during cmake config instead
configure_file(${CMAKE_SOURCE_DIR}/cmake/mlir_fetch.cmake.in ${MLIR_PROJECT_ROOT}/CMakeLists.txt)
execute_process(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
WORKING_DIRECTORY "${MLIR_PROJECT_ROOT}")
# clone and build llvm
execute_process(COMMAND "${CMAKE_COMMAND}" --build . --target ext_mlir_llvm
WORKING_DIRECTORY "${MLIR_PROJECT_ROOT}")
# clone and build mlir
execute_process(COMMAND "${CMAKE_COMMAND}" --build . --target ext_mlir
WORKING_DIRECTORY "${MLIR_PROJECT_ROOT}")
# point find_package to the pre-built libs
set(LLVM_DIR ${MLIR_LLVM_ROOT}/build/lib/cmake/llvm)
set(MLIR_SRC_INCLUDE_PATH ${MLIR_SOURCE_DIR}/include)
set(MLIR_BIN_INCLUDE_PATH ${MLIR_BUILD_DIR}/projects/mlir/include)
set(MLIR_INCLUDE_PATHS ${MLIR_SRC_INCLUDE_PATH};${MLIR_BIN_INCLUDE_PATH})
# ******************************************************************************
# Copyright 2017-2019 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
cmake_minimum_required(VERSION 3.1)
include(ExternalProject)
project(mlir-fetch NONE)
ExternalProject_Add(
ext_mlir_llvm
PREFIX mlir_llvm
GIT_REPOSITORY ${MLIR_LLVM_REPO_URL}
GIT_TAG ${MLIR_LLVM_COMMIT_ID}
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
UPDATE_COMMAND ""
SOURCE_DIR ${MLIR_LLVM_ROOT}
DOWNLOAD_NO_PROGRESS TRUE
EXCLUDE_FROM_ALL TRUE
)
ExternalProject_Add(
ext_mlir
PREFIX mlir
DEPENDS ${MLIR_DEPENDS}
GIT_REPOSITORY ${MLIR_REPO_URL}
GIT_TAG ${MLIR_COMMIT_ID}
CONFIGURE_COMMAND ""
CMAKE_GENERATOR ${CMAKE_GENERATOR}
CMAKE_GENERATOR_PLATFORM ${CMAKE_GENERATOR_PLATFORM}
CMAKE_GENERATOR_TOOLSET ${CMAKE_GENERATOR_TOOLSET}
BUILD_COMMAND ${CMAKE_COMMAND} ../llvm -DLLVM_BUILD_EXAMPLES=OFF -DLLVM_ENABLE_CXX1Y=Y -DLLVM_TARGETS_TO_BUILD=host -DLLVM_ENABLE_RTTI=ON -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
COMMAND ${CMAKE_COMMAND} --build . --target check-mlir -- -j8
INSTALL_COMMAND ""
UPDATE_COMMAND ""
SOURCE_DIR ${MLIR_SOURCE_DIR}
BINARY_DIR ${MLIR_BUILD_DIR}
STAMP_DIR "${MLIR_PROJECT_ROOT}/mlir/stamp"
DOWNLOAD_NO_PROGRESS TRUE
EXCLUDE_FROM_ALL TRUE
)
==============================================================================
LLVM Release License
==============================================================================
University of Illinois/NCSA
Open Source License
Copyright (c) 2003-2018 University of Illinois at Urbana-Champaign.
All rights reserved.
Developed by:
LLVM Team
University of Illinois at Urbana-Champaign
http://llvm.org
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal with
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimers.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimers in the
documentation and/or other materials provided with the distribution.
* Neither the names of the LLVM Team, University of Illinois at
Urbana-Champaign, nor the names of its contributors may be used to
endorse or promote products derived from this Software without specific
prior written permission.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
SOFTWARE.
...@@ -20,6 +20,11 @@ include_directories(ngraph) ...@@ -20,6 +20,11 @@ include_directories(ngraph)
add_subdirectory(resource) add_subdirectory(resource)
# This must be added before any backend that uses MLIR
if (NGRAPH_MLIR_ENABLE)
add_subdirectory(${NGRAPH_MLIR_SOURCE_DIR})
endif()
add_subdirectory(ngraph) add_subdirectory(ngraph)
if (NGRAPH_TOOLS_ENABLE) if (NGRAPH_TOOLS_ENABLE)
......
# ******************************************************************************
# Copyright 2017-2019 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
set(SRC
dialect/dialect.cpp
dialect/type.cpp
dialect/ops.cpp
compiler.cpp
lowerer.cpp
memory_manager.cpp
pass/mlir_subgraph_extraction.cpp
pass/mlir_subgraph_extraction.hpp
)
if (NGRAPH_MLIR_ENABLE)
add_library(mlir_backend SHARED ${SRC})
message(STATUS "LLVM Directory: ${LLVM_DIR}")
# Link LLVM and MLIR
find_package(LLVM REQUIRED CONFIG)
set(MLIR_LLVM_INCLUDEPATH ${LLVM_INCLUDE_DIRS})
message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
message(STATUS "LLVM RTTI is ${LLVM_ENABLE_RTTI}")
add_definitions(${LLVM_DEFINITIONS})
target_include_directories(mlir_backend PRIVATE ${LLVM_INCLUDE_DIRS})
message(STATUS "MLIR Headers at : ${MLIR_INCLUDE_PATHS}")
message(STATUS "LLVM Headers at : ${MLIR_LLVM_INCLUDEPATH}")
target_include_directories(mlir_backend PRIVATE ${MLIR_INCLUDE_PATHS})
llvm_map_components_to_libnames(llvm_libs support core irreader)
# Link MLIR libs
target_link_libraries(
mlir_backend PRIVATE
MLIRAnalysis
MLIREDSC
MLIRExecutionEngine
MLIRIR
MLIRLLVMIR
MLIRParser
MLIRPass
MLIRTargetLLVMIR
MLIRTransforms
MLIRSupport
)
# some libs need whole archive linkage because of Globals static initialization
function(whole_archive_link target)
if("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
set(link_flags "-Llib -Wl,-all_load ")
FOREACH(LIB ${ARGN})
string(CONCAT link_flags ${link_flags} "${LIB}")
ENDFOREACH(LIB)
else()
set(link_flags "-Llib -Wl,--whole-archive,")
FOREACH(LIB ${ARGN})
string(CONCAT link_flags ${link_flags} "${LIB},")
ENDFOREACH(LIB)
string(CONCAT link_flags ${link_flags} "--no-whole-archive")
endif()
message(STATUS "MLIR Ops link flag: ${link_flags}" )
set_target_properties(${target} PROPERTIES LINK_FLAGS ${link_flags})
endfunction(whole_archive_link)
whole_archive_link(mlir_backend
${LLVM_BUILD_LIBRARY_DIR}/libMLIRAffineOps.a
${LLVM_BUILD_LIBRARY_DIR}/libMLIRStandardOps.a
)
# Link LLVM libs
target_link_libraries(
mlir_backend PRIVATE
${llvm_libs}
)
# Link ngraph
target_link_libraries(mlir_backend PUBLIC ngraph)
# table-gen dialect ops
# include table-gen helpers
include(${LLVM_DIR}/TableGen.cmake)
function(ngraph_tablegen ofn)
tablegen(MLIR ${ARGV} "-I${MLIR_SRC_INCLUDE_PATH}" "-I${MLIR_BIN_INCLUDE_PATH}")
set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} ${CMAKE_CURRENT_BINARY_DIR}/${ofn} PARENT_SCOPE)
endfunction()
set(MLIR_TABLEGEN_EXE mlir-tblgen)
set(LLVM_TARGET_DEFINITIONS dialect/ops.td)
ngraph_tablegen(ops.h.inc -gen-op-decls)
ngraph_tablegen(ops.cpp.inc -gen-op-defs)
add_public_tablegen_target(ngraph_ops_gen)
add_dependencies(mlir_backend ngraph_ops_gen)
target_include_directories(mlir_backend PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
endif()
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "memory_manager.hpp"
#include "ngraph/node.hpp"
#include <mlir/ExecutionEngine/ExecutionEngine.h>
#include <mlir/ExecutionEngine/MemRefUtils.h>
#include <mlir/IR/Builders.h>
#include <mlir/IR/Module.h>
#include <mlir/IR/Types.h>
#include <typeindex>
#include <unordered_map>
#include <vector>
namespace ngraph
{
namespace descriptor
{
class Tensor;
}
namespace element
{
class Type;
}
namespace op
{
class CompiledKernel;
}
namespace runtime
{
namespace ngmlir
{
/// This class is the entry point to MLIR from nGraph. It drives the conversion of
/// nGraph sub-graphs, represented with CompiledKernel nodes, to MLIR nGraph dialect
/// and its lowering, optimization and execution using LLVM-based MLIR execution engine.
class MLIRCompiler
{
public:
/// Initializes MLIR environment. It must be called only once per execution.
static void init_mlir();
public:
using TensorList = std::vector<descriptor::Tensor*>;
using TypeList = llvm::SmallVector<mlir::Type, 4>;
MLIRCompiler(const ngraph::op::CompiledKernel* compiled_kernel,
const std::vector<void*>& external_tensors);
/// Compiles and runs a subgraph in MLIR.
void compile_and_run();
/// Returns the memory manager used by this sub-graph compiler.
MLIRMemMgr& get_mem_mgr() { return m_mem_mgr; }
/// Returns memory manager pointer argument ID in call interface.
unsigned get_mem_mgr_arg_id(mlir::Function* func)
{
return func->getNumArguments() - 1;
}
private:
struct TensorInfo
{
// MLIR values this tensor maps to.
mlir::Value* m_value;
};
private:
void build_ng_dialect_module();
void lower_ng_dialect();
void optimize();
void bind_arguments();
void execute();
void cleanup();
mlir::Type get_mlir_type(const descriptor::Tensor* tensor);
mlir::Type get_mlir_type(const element::Type& type);
TensorInfo get_tensor_value(descriptor::Tensor* tensor);
void update_tensor_value(descriptor::Tensor* tensor, mlir::Value* value);
void build_ng_dialect();
template <typename OP>
static mlir::Value* create_op(MLIRCompiler& compiler, const ngraph::Node* ng_node)
{
throw std::runtime_error("Unimplemented op '" + ng_node->description() +
"' in MLIR Compiler");
}
template <typename BinOp>
mlir::Value* create_binary_op(const ngraph::Node* ng_node);
void create_return();
/// Helper to create memref arguments for MLIR function signature
llvm::SmallVector<void*, 8> allocate_memref_args(mlir::Function* func);
/// Helper to allocate a mem ref object. Handles static shapes only for now.
mlir::StaticFloatMemRef* allocate_memref_descriptor(mlir::Type type);
/// Helper to dump MLIR module into llvm::dbgs prepended by the message \p msg.
void dump_mlir_module(const std::string msg);
private:
// Sub-graph to be compiled and executed with MLIR.
const ngraph::op::CompiledKernel* m_compiled_kernel;
// Pointers to externally allocated memory for sub-graph's input and output tensors.
const std::vector<void*>& m_external_tensors;
// Arguments for the MLIR function generated for the nGraph sub-graph.
llvm::SmallVector<void*, 8> m_invoke_args;
// MLIR context that holds all the MLIR information related to the sub-graph
// compilation.
mlir::MLIRContext m_context;
std::unique_ptr<mlir::Module> m_module;
std::unique_ptr<mlir::FuncBuilder> m_builder;
std::unique_ptr<mlir::ExecutionEngine> m_engine;
using TensorToInfo = std::pair<descriptor::Tensor*, TensorInfo>;
using TensorToInfoMap = std::unordered_map<descriptor::Tensor*, TensorInfo>;
using MLIRCompOpFunction =
std::function<mlir::Value*(MLIRCompiler& compiler, const ngraph::Node*)>;
using MLIRCompOpMap = std::unordered_map<std::type_index, MLIRCompOpFunction>;
// Maps tensor to the value it represents in the IR
// use for MLIR dialect gen
TensorToInfoMap m_tensor_to_value_map;
static const MLIRCompOpMap op_dispatcher;
// Memory manager for temp allocations inside JIT'ed code
MLIRMemMgr m_mem_mgr;
};
}
}
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "dialect.hpp"
#include "ngraph/check.hpp"
#include "ops.hpp"
#include "type.hpp"
using namespace mlir;
NGDialect::NGDialect(mlir::MLIRContext* ctx)
: mlir::Dialect("ng", ctx)
{
addTypes<NGTensorType>();
addTypes<NGIntegerType>();
addTypes<NGBoolType>();
addOperations<
#define GET_OP_LIST
#include "ops.cpp.inc"
>();
}
void NGDialect::printType(mlir::Type type, raw_ostream& os) const
{
switch (type.getKind())
{
case NG_TENSOR_TYPE_ID:
{
os << "tensor<";
auto tensor_ty = type.cast<NGTensorType>();
for (auto dim : tensor_ty.getShape())
{
os << dim << 'x';
}
os << tensor_ty.getElementType() << '>';
return;
}
case NG_I8_TYPE_ID:
case NG_I16_TYPE_ID:
case NG_I32_TYPE_ID:
case NG_I64_TYPE_ID:
case NG_U8_TYPE_ID:
case NG_U16_TYPE_ID:
case NG_U32_TYPE_ID:
case NG_U64_TYPE_ID:
{
auto int_ty = type.cast<NGIntegerType>();
os << "i" << int_ty.getWidth();
return;
}
case NG_BOOL_TYPE_ID:
{
os << "bool";
return;
}
default: { NGRAPH_CHECK(false, "Incorrect type to print?");
}
}
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "mlir/IR/Dialect.h"
#include "mlir/IR/Function.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/OpImplementation.h"
#include "mlir/IR/StandardTypes.h"
#include "mlir/IR/TypeSupport.h"
#include "mlir/IR/Types.h"
#include "ngraph/check.hpp"
namespace mlir
{
class NGDialect : public mlir::Dialect
{
public:
explicit NGDialect(mlir::MLIRContext* ctx);
mlir::Type parseType(llvm::StringRef tyData, mlir::Location loc) const override
{
NGRAPH_CHECK(false, "Unsupported type parsing.");
return mlir::Type();
}
void printType(mlir::Type type, llvm::raw_ostream& os) const override;
};
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ops.hpp"
#include "assertion.hpp"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/raw_ostream.h"
#include "type.hpp"
using llvm::ArrayRef;
using llvm::raw_ostream;
using llvm::raw_string_ostream;
using llvm::SmallVector;
using llvm::StringRef;
using llvm::Twine;
using namespace mlir;
// TODO:
// - Move verifiers and other OP helpers (e.g. getSomeAttribute()) to separate files
//
// - Op helpers: Since it is not possible to add arbitrary code (and would complicate the .td file)
// to Ops classes, we will add helper classes with static methods for each Op that needs it
// Additional verification methods
// Tensor type checks are already verified by the caller of these methods
/// Checks if all operands and results are of compatible shapes
template <typename T>
static mlir::LogicalResult verifyCompatibleOperandsAndResults(T* op, bool checkResult = true)
{
mlir::Type t0 = op->getOperation()->getOperand(0)->getType();
mlir::NGTensorType opType0 = t0.cast<NGTensorType>();
Operation* opr = op->getOperation();
auto i = 0;
for (auto operand : opr->getOperands())
{
if (i == 0)
continue;
mlir::Type t = operand->getType();
mlir::NGTensorType opType = t.cast<NGTensorType>();
if (!opType.isCompatible(opType0))
return op->emitOpError("Incompatible operand shape");
i++;
}
if (checkResult)
{
for (auto result : opr->getResults())
{
mlir::Type t = result->getType();
mlir::NGTensorType resType = t.cast<NGTensorType>();
if (!resType.isCompatible(opType0))
return op->emitOpError("Incompatible operand shape");
}
}
return mlir::success();
}
template <typename T>
static mlir::LogicalResult verifyUnaryArithOp(T* op)
{
return verifyCompatibleOperandsAndResults(op);
}
template <typename T>
static mlir::LogicalResult verifyBinaryArithOp(T* op)
{
return verifyCompatibleOperandsAndResults(op);
}
template <typename T>
static mlir::LogicalResult verifyAxisReductionOp(T* op)
{
return mlir::failure();
}
template <typename T>
static mlir::LogicalResult verifyLogicalReductionOp(T* op)
{
// TODO: verifyAxisReductionOp(op) + input and return element type.
return mlir::failure();
}
template <typename T>
static mlir::LogicalResult verifyIndexReductionOp(T* op)
{
// TODO: verifyAxisReductionOp(op) + return element type + single axis.
return mlir::failure();
}
template <typename T>
static mlir::LogicalResult verifyOp(T* op)
{
return op->emitOpError("Unsupported verifier for this operation");
}
template <>
mlir::LogicalResult verifyOp(NGDotOp* op)
{
// TODO(dcab): Improve verification: proper shapes, etc.
return mlir::success();
}
template <>
mlir::LogicalResult verifyOp(NGSelectOp* op)
{
mlir::Type t0 = op->getOperation()->getOperand(0)->getType();
mlir::Type t1 = op->getOperation()->getOperand(1)->getType();
mlir::Type t2 = op->getOperation()->getOperand(2)->getType();
mlir::Type r0 = op->getOperation()->getResult(0)->getType();
NGTensorType opType0 = t0.cast<NGTensorType>();
NGTensorType opType1 = t1.cast<NGTensorType>();
NGTensorType opType2 = t2.cast<NGTensorType>();
NGTensorType resType = r0.cast<NGTensorType>();
// arg1 arg2 of same shape and elt type
if (!opType1.isCompatible(opType2))
return op->emitOpError("Incompatible operand shapes or types for select op");
// arg0 of same shape and elt type is bool
if (!opType0.isCompatibleShape(opType1) || !opType0.getElementType().isa<NGBoolType>())
return op->emitOpError("Incompatible shape for arg0 of select op");
// result is of same shape and elt type as arg1/2
if (!resType.isCompatible(opType1))
return op->emitOpError("Incompatible result shape or type for select op");
return mlir::success();
}
template <typename T>
static mlir::LogicalResult verifyCmpOp(T* op)
{
mlir::LogicalResult result = verifyCompatibleOperandsAndResults(op, false /*checkResult*/);
if (failed(result))
{
return result;
}
mlir::Type t0 = op->getOperation()->getOperand(0)->getType();
mlir::NGTensorType opType0 = t0.cast<NGTensorType>();
mlir::Type r0 = op->getOperation()->getResult(0)->getType();
NGTensorType resType = r0.cast<NGTensorType>();
// result of same shape as input and has bool type
if (!resType.isCompatibleShape(opType0) || !resType.getElementType().isa<NGBoolType>())
return op->emitOpError("Incompatible result shape or type for comparison op");
return mlir::success();
}
namespace mlir
{
#define GET_OP_CLASSES
#include "ops.cpp.inc"
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <cstdarg>
#include "mlir/IR/Builders.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/OperationSupport.h"
#include "mlir/IR/StandardTypes.h"
#include "mlir/Support/STLExtras.h"
namespace mlir
{
#define GET_OP_CLASSES
#include "ops.h.inc"
#undef GET_OP_CLASSES
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
//
// This is the nGraph Dialect operation definition file.
//
//===----------------------------------------------------------------------===//
include "mlir/IR/OpBase.td"
// nGraph Dialect operations definitions
//
// This files declares nGraph operations that table-gen uses to create C++ code
// For more information about tablegen. See https://llvm.org/docs/TableGen/index.html
//
// The output files are ops.h.inc and ops.cpp.inc and are generated at build time
// The file declares base classes to ease opcode definitions and hoist common parts out.
// Each class fixes a set of attributes. For example:
// class NG_Unary_Arith_Op defines a base class for all unary arithmetic ops without side-effects
//
// An opcode is a record definition of the form
// def AbsOp : NG_Unary_Arith_Op<"abs">;
//
// Each def will corresponding to a C++ class
def NG_Dialect : Dialect {
let name = "ng";
// TODO: Have the dialect under its own mlir::ngraph namespace
// At mlir top-level for now
let cppNamespace = "";
}
// nGraph Types
// This defines records equivalent to nGraph types. It doesn't generate code.
// This is used as a type in the DAG input/outputs.
// Constraints (CPred) are used to type-check args/results of that type during op verification
def NG_TensorType : Type<CPred<"$_self.isa<mlir::NGTensorType>()">,
"nGraph Tensor Type">;
// A generic un-typed MemRef. Used for Fake instructions inserted during dialect lowering
def NG_MemRefType : Type<IsMemRefTypePred, "MemRef Type">;
// nGraph operation base class.
// Prepends "ng." to operation name
class NG_Op<string mnemonic, list<OpTrait> traits = []> :
Op<NG_Dialect, mnemonic, traits> {}
// Operations producing single result.
// Will set OneResult trait based on Results out dag.
class NG_OneResult_Op<string mnemonic, list<OpTrait> traits = []> :
NG_Op<mnemonic, traits>, Results<(outs NG_TensorType:$res)> {}
// Base for fake instructions defining MemRef values
class NG_MemRefDef_Op<string mnemonic, list<OpTrait> traits = []> :
NG_Op<mnemonic, traits>, Results<(outs NG_MemRefType:$res)> {}
// Operations producing no results
class NG_ZeroResult_Op<string mnemonic, list<OpTrait> traits = []> :
NG_Op<mnemonic, traits>, Results<(outs)> {}
// Base class for arithmetic unary operations without side effects.
class NG_Unary_Arith_Op<string mnemonic, list<OpTrait> traits = []> :
NG_OneResult_Op<mnemonic, !listconcat([NoSideEffect], traits)>,
Arguments<(ins NG_TensorType:$arg)>
{
// TODO: Implement
let parser = [{ NGRAPH_CHECK(false, "No parser support"); return mlir::failure(); }];
let verifier = [{ return verifyUnaryArithOp(this); }];
}
// Base class for arithmetic binary operations without side effects.
class NG_Binary_Op<string mnemonic, list<OpTrait> traits = []> :
NG_OneResult_Op<mnemonic, !listconcat([NoSideEffect], traits)>,
Arguments<(ins NG_TensorType:$lhs, NG_TensorType:$rhs)>
{
// TODO: Implement
let parser = [{ NGRAPH_CHECK(false, "No parser support"); return mlir::failure(); }];
}
// Base class for arithmetic binary operations with verifier.
class NG_Binary_Arith_Op<string mnemonic, list<OpTrait> traits = []> :
NG_OneResult_Op<mnemonic, traits>,
Arguments<(ins NG_TensorType:$lhs, NG_TensorType:$rhs)>
{
// TODO: Implement
let parser = [{ NGRAPH_CHECK(false, "No parser support"); return mlir::failure(); }];
let verifier = [{ return verifyBinaryArithOp(this); }];
}
// Base class for comparison operations with verifier.
class NG_Cmp_Op<string mnemonic, list<OpTrait> traits = []> :
NG_OneResult_Op<mnemonic, traits>,
Arguments<(ins NG_TensorType:$lhs, NG_TensorType:$rhs)>
{
// TODO: Implement
let parser = [{ NGRAPH_CHECK(false, "No parser support"); return mlir::failure(); }];
let verifier = [{ return verifyCmpOp(this); }];
}
// Base class for ternary operations without side effects.
class NG_Ternary_Op<string mnemonic, list<OpTrait> traits = []> :
NG_OneResult_Op<mnemonic, !listconcat([NoSideEffect], traits)>,
Arguments<(ins NG_TensorType:$op0, NG_TensorType:$op1, NG_TensorType:$op2)>
{
// TODO: Implement
let parser = [{ NGRAPH_CHECK(false, "No parser support"); return mlir::failure(); }];
}
// Base class for terminator operations.
class NG_Terminator_Op<string mnemonic, list<OpTrait> traits = []> :
NG_Op<mnemonic, !listconcat(traits, [Terminator])>,
Arguments<(ins Variadic<NG_TensorType>:$args)>, Results<(outs)> {}
// Unary Operations
def NGAbsOp : NG_Unary_Arith_Op<"abs">;
def NGACosOp : NG_Unary_Arith_Op<"acos">;
def NGASinOp : NG_Unary_Arith_Op<"asin">;
def NGATanOp : NG_Unary_Arith_Op<"atan">;
def NGCeilOp : NG_Unary_Arith_Op<"ceil">;
def NGConvertOp : NG_Unary_Arith_Op<"conv">;
def NGCosOp : NG_Unary_Arith_Op<"cos">;
def NGCoshOp : NG_Unary_Arith_Op<"cosh">;
def NGExpOp : NG_Unary_Arith_Op<"exp">;
def NGFloorOp : NG_Unary_Arith_Op<"floor">;
def NGLogOp : NG_Unary_Arith_Op<"log">;
def NGNegOp : NG_Unary_Arith_Op<"neg">;
def NGNotOp : NG_Unary_Arith_Op<"not">;
def NGSignOp : NG_Unary_Arith_Op<"sign">;
def NGSinOp : NG_Unary_Arith_Op<"sin">;
def NGSinhOp : NG_Unary_Arith_Op<"sinh">;
def NGTanOp : NG_Unary_Arith_Op<"tan">;
def NGTanhOp : NG_Unary_Arith_Op<"tanh">;
def NGSqrtOp : NG_Unary_Arith_Op<"sqrt">;
// Binary Operations
def NGAddOp : NG_Binary_Arith_Op<"add", [Commutative]>;
def NGAndOp : NG_Binary_Arith_Op<"and", [Commutative]>;
def NGSubOp : NG_Binary_Arith_Op<"sub">;
def NGDivOp : NG_Binary_Arith_Op<"div">;
def NGMaxOp : NG_Binary_Arith_Op<"max", [Commutative]>;
def NGMinOp : NG_Binary_Arith_Op<"min", [Commutative]>;
def NGMulOp : NG_Binary_Arith_Op<"mul", [Commutative]>;
def NGPowOp : NG_Binary_Arith_Op<"pow">;
// Comparison
def NGEqOp : NG_Cmp_Op<"equal">;
def NGGreaterOp : NG_Cmp_Op<"greater">;
def NGGreaterEqOp : NG_Cmp_Op<"greater.eq">;
def NGLessOp : NG_Cmp_Op<"less">;
def NGLessEqOp : NG_Cmp_Op<"less.eq">;
def NGNotEqOp : NG_Cmp_Op<"not.equal">;
// Other
def NGSelectOp : NG_Ternary_Op<"select">
{
let verifier = [{ return verifyOp(this); }];
}
// Dot Product
def NGDotOp : NG_Binary_Op<"dot">
{
// TODO: Add reduction axis attribute when needed.
let verifier = [{ return verifyOp(this); }];
}
class NG_Axis_Reduction_Op<string mnemonic, list<OpTrait> traits = []> :
NG_OneResult_Op<mnemonic, !listconcat([NoSideEffect], traits)>,
Arguments<(ins NG_TensorType:$operand, I64ArrayAttr:$axes)>
{
let summary = "Base class for reduction operations that perform a reduction "
"across the axes of a single tensor.";
let description = "Axes are represented as an array of I64 attributes.";
let parser = [{ NGRAPH_CHECK(false, "No parser support"); return mlir::failure(); }];
// TODO
let verifier = [{ return verifyAxisReductionOp(this); }];
}
// Axis reduction operations.
def NGSumRedOp : NG_Axis_Reduction_Op<"sum.red">
{
let summary = "Axis sum reduction of a tensor.";
let verifier = [{ return verifyAxisReductionOp(this); }];
}
def NGProdRedOp : NG_Axis_Reduction_Op<"prod.red">
{
let summary = "Axis product reduction of a tensor.";
let verifier = [{ return verifyAxisReductionOp(this); }];
}
def NGMinRedOp : NG_Axis_Reduction_Op<"min.red">
{
let summary = "Axis minimum reduction of a tensor.";
let verifier = [{ return verifyAxisReductionOp(this); }];
}
def NGMaxRedOp : NG_Axis_Reduction_Op<"max.red">
{
let summary = "Axis maximum reduction of a tensor.";
let verifier = [{ return verifyAxisReductionOp(this); }];
}
def NGArgMinRedOp : NG_Axis_Reduction_Op<"argmin.red">
{
let summary = "Axis minimum index reduction of a tensor.";
let verifier = [{ return verifyIndexReductionOp(this); }];
}
def NGArgMaxRedOp : NG_Axis_Reduction_Op<"argmax.red">
{
let summary = "Axis maximum index reduction of a tensor.";
let verifier = [{ return verifyIndexReductionOp(this); }];
}
def NGAllRedOp : NG_Axis_Reduction_Op<"all.red">
{
let summary = "Axis logical AND reduction of a boolean tensor.";
let verifier = [{ return verifyLogicalReductionOp(this); }];
}
def NGAnyRedOp : NG_Axis_Reduction_Op<"any.red">
{
let summary = "Axis logical OR reduction of a boolean tensor.";
let verifier = [{ return verifyLogicalReductionOp(this); }];
}
// Terminator Ops
def NGReturnOp : NG_Terminator_Op<"return">;
// Fake ops
def NGFakeInputOp : NG_MemRefDef_Op<"fake.input", [NoSideEffect]>;
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "type.hpp"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/raw_ostream.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/StandardTypes.h"
#include "mlir/Support/STLExtras.h"
#include "ngraph/assertion.hpp"
using llvm::ArrayRef;
using llvm::raw_ostream;
using llvm::raw_string_ostream;
using llvm::SmallVector;
using llvm::StringRef;
using llvm::Twine;
using namespace mlir;
unsigned NGIntegerType::getWidth() const
{
switch (getKind())
{
case NG_I8_TYPE_ID:
case NG_U8_TYPE_ID: return 8;
case NG_I16_TYPE_ID:
case NG_U16_TYPE_ID: return 16;
case NG_I32_TYPE_ID:
case NG_U32_TYPE_ID: return 32;
case NG_I64_TYPE_ID:
case NG_U64_TYPE_ID: return 64;
default: NGRAPH_CHECK(false, "Invalid type ID");
}
return 0;
}
bool NGIntegerType::isSigned() const
{
switch (getKind())
{
case NG_I8_TYPE_ID:
case NG_I16_TYPE_ID:
case NG_I32_TYPE_ID:
case NG_I64_TYPE_ID: return true;
case NG_U8_TYPE_ID:
case NG_U16_TYPE_ID:
case NG_U32_TYPE_ID:
case NG_U64_TYPE_ID: return false;
default: NGRAPH_CHECK(false, "Invalid type ID");
}
return false;
}
/// Creates TensorType objects. They all point to the same storage if
/// element type and shape are the same.
NGTensorType NGTensorType::get(MLIRContext* context, EltType eltType, Shape shape)
{
return Base::get(context, NGTypeKind::NG_TENSOR_TYPE_ID, eltType, shape);
}
bool NGTensorType::isCompatible(NGTensorType& other) const
{
// Exact same tensor
if (this == &other)
return true;
// different tensors, check if of same element type and compatible shapes
if (getElementType() != other.getElementType())
return false;
// TODO: Handle dynamic ranks
// MLIR MemRefType doesn't seem to support it at the moment.
return isCompatibleShape(other);
}
bool NGTensorType::isCompatibleShape(NGTensorType& other) const
{
auto shape = getShape();
auto otherShape = other.getShape();
if (shape.size() != otherShape.size())
return false;
for (auto i = 0; i < shape.size(); i++)
{
NGRAPH_CHECK(shape[i] >= -1, "Invalid tensor shape", shape[i]);
NGRAPH_CHECK(otherShape[i] >= -1, "Invalid tensor shape", otherShape[i]);
if (shape[i] == -1 || otherShape[i] == -1 || shape[i] == otherShape[i])
continue;
return false;
}
return true;
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "mlir/IR/Dialect.h"
#include "mlir/IR/Function.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/OpImplementation.h"
#include "mlir/IR/StandardTypes.h"
#include "mlir/IR/TypeSupport.h"
#include "mlir/IR/Types.h"
#include "ngraph/check.hpp"
namespace mlir
{
using llvm::raw_ostream;
enum NGTypeKind
{
// The enum starts at the range reserved for this dialect.
// These values are pre-defined in MLIR lib and not configurable from here.
NG_TYPE = mlir::Type::Kind::FIRST_PRIVATE_EXPERIMENTAL_0_TYPE,
// Element types that are added by the dialect.
// Other types are just re-use of std dialect types.
NG_FIRST_INT_TYPE_ID,
NG_I8_TYPE_ID = NG_FIRST_INT_TYPE_ID,
NG_I16_TYPE_ID,
NG_I32_TYPE_ID,
NG_I64_TYPE_ID,
NG_U8_TYPE_ID,
NG_U16_TYPE_ID,
NG_U32_TYPE_ID,
NG_U64_TYPE_ID,
NG_LAST_INT_TYPE_ID = NG_U64_TYPE_ID,
NG_BOOL_TYPE_ID,
// Tensor type
NG_TENSOR_TYPE_ID
};
// reuse std float types as-is
using NGFloatType = mlir::FloatType;
/// Integer type. It represents an integer of width 8,16,32,64. Signed or not.
class NGIntegerType : public mlir::Type::TypeBase<NGIntegerType, mlir::Type>
{
public:
using Base::Base;
static NGIntegerType get(NGTypeKind kind, mlir::MLIRContext* context)
{
NGRAPH_CHECK(kindof(kind), "Not an integer kind.");
return Base::get(context, kind);
}
/// Create signed Int8
static NGIntegerType getInt8(mlir::MLIRContext* ctx)
{
return get(NGTypeKind::NG_I8_TYPE_ID, ctx);
}
/// Create signed Int16
static NGIntegerType getInt16(mlir::MLIRContext* ctx)
{
return get(NGTypeKind::NG_I16_TYPE_ID, ctx);
}
/// Create signed Int32
static NGIntegerType getInt32(mlir::MLIRContext* ctx)
{
return get(NGTypeKind::NG_I32_TYPE_ID, ctx);
}
/// Create signed Int64
static NGIntegerType getInt64(mlir::MLIRContext* ctx)
{
return get(NGTypeKind::NG_I64_TYPE_ID, ctx);
}
/// Create unsigned Int8
static NGIntegerType getUInt8(mlir::MLIRContext* ctx)
{
return get(NGTypeKind::NG_U8_TYPE_ID, ctx);
}
/// Create unsigned Int16
static NGIntegerType getUInt16(mlir::MLIRContext* ctx)
{
return get(NGTypeKind::NG_U16_TYPE_ID, ctx);
}
/// Create unsigned Int32
static NGIntegerType getUInt32(mlir::MLIRContext* ctx)
{
return get(NGTypeKind::NG_U32_TYPE_ID, ctx);
}
/// Create unsigned Int64
static NGIntegerType getUInt64(mlir::MLIRContext* ctx)
{
return get(NGTypeKind::NG_U64_TYPE_ID, ctx);
}
/// RTTI support. So we can do obj->isa<NGIntegerType>()
static bool kindof(unsigned kind)
{
return kind >= NGTypeKind::NG_FIRST_INT_TYPE_ID &&
kind <= NGTypeKind::NG_LAST_INT_TYPE_ID;
}
/// Return the bitwidth of this integer type.
unsigned getWidth() const;
/// Check if signed type
bool isSigned() const;
/// Check if Int8
bool isInt8() const { return getKind() == NG_I8_TYPE_ID; }
/// Check if UInt8
bool isUInt8() const { return getKind() == NG_U8_TYPE_ID; }
/// Check if Int16
bool isInt16() const { return getKind() == NG_I16_TYPE_ID; }
/// Check if UInt16
bool isUInt16() const { return getKind() == NG_U16_TYPE_ID; }
/// Check if Int32
bool isInt32() const { return getKind() == NG_I32_TYPE_ID; }
/// Check if UInt32
bool isUInt32() const { return getKind() == NG_U32_TYPE_ID; }
/// Check if Int64
bool isInt64() const { return getKind() == NG_I64_TYPE_ID; }
/// Check if UInt64
bool isUInt64() const { return getKind() == NG_U64_TYPE_ID; }
// Delete convenience methods inherited from MLIR Type class.
// This would avoid confusion if we do something like this and get false.
//
// if (type->cast<NGIntegerType>()->isInteger(32)) {}
//
// Those helpers use type id, and since we have our own Integer type id, they
// don't apply.
bool isInteger(unsigned width) const = delete;
unsigned getIntOrFloatBitWidth() const = delete;
bool isIntOrIndex() const = delete;
bool isIntOrIndexOrFloat() const = delete;
bool isIntOrFloat() const = delete;
};
/// Boolean Type.
class NGBoolType : public mlir::Type::TypeBase<NGBoolType, mlir::Type>
{
public:
using Base::Base;
static NGBoolType get(NGTypeKind kind, mlir::MLIRContext* context)
{
NGRAPH_CHECK(kindof(kind), "Not a bool type.");
return Base::get(context, kind);
}
static bool kindof(unsigned kind) { return kind == NGTypeKind::NG_BOOL_TYPE_ID; }
static NGBoolType get(mlir::MLIRContext* ctx) { return get(NG_BOOL_TYPE_ID, ctx); }
};
// Note that dialect types don't add new data members, so always possible
// to use NG or std types here
using EltType = mlir::Type;
// TODO: Can we use ngraph::shape here (given the hashing requirements)
using Shape = llvm::ArrayRef<int64_t>;
/// Tensor Type storage. There is a unique instance per type attributes.
/// Tensor Type is combination of the element type and shape. Each different
/// shape is a unique type.
struct NGTensorTypeStorage : public mlir::TypeStorage
{
// Tensor key is its type and shape.
// This is called when the user requests a specific tensor type
using KeyTy = std::tuple<EltType, Shape>;
static unsigned hashKey(const KeyTy& key)
{
return llvm::hash_combine(std::get<0>(key), std::get<1>(key));
}
bool operator==(const KeyTy& key) const
{
return key == KeyTy(getElementType(), getShape());
}
static NGTensorTypeStorage* construct(mlir::TypeStorageAllocator& allocator,
const KeyTy& key)
{
// Deep copy the type shape over to MLIR context
EltType eltType = std::get<0>(key);
Shape shape = allocator.copyInto(std::get<1>(key));
auto* storage = allocator.allocate<NGTensorTypeStorage>();
return new (storage) NGTensorTypeStorage(eltType, shape);
}
Shape getShape() const { return m_shape; }
EltType getElementType() const { return m_eltType; }
private:
NGTensorTypeStorage(EltType eltType, Shape shape)
: m_eltType(eltType)
, m_shape(shape)
{
}
private:
EltType m_eltType;
Shape m_shape;
};
/// NGraph Tensor Type
class NGTensorType : public mlir::Type::TypeBase<NGTensorType, mlir::Type, NGTensorTypeStorage>
{
public:
using Base::Base;
EltType getElementType() const { return getImpl()->getElementType(); }
Shape getShape() const { return getImpl()->getShape(); }
/// Tensor Rank. Static shape only for now
int getRank() { return getShape().size(); }
/// Computes tensor size in bytes
size_t getSizeInBytes()
{
size_t s = 1;
auto shape = getShape();
for (auto i = 0; i < getRank(); i++)
{
// no dynamic dims
if (shape[i] == -1)
return -1;
s *= shape[i];
}
// Multiply times element size
return s * llvm::divideCeil(getElementType().getIntOrFloatBitWidth(), 8);
}
/// Checks if two tensors are compatible. Compatible means:
/// Exactly same element types
/// Compatible shapes: see isCompatibleShape.
bool isCompatible(NGTensorType& other) const;
/// Check if Shapes are of same rank and matching dimensions unless one of them is dynamic.
bool isCompatibleShape(NGTensorType& other) const;
/// create a unique tensor type based on element type and shape.
static NGTensorType get(mlir::MLIRContext* context, EltType eltType, Shape shape);
/// for llvm RTTI
static bool kindof(unsigned kind) { return kind == NGTypeKind::NG_TENSOR_TYPE_ID; }
};
}
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "contrib/mlir/compiler.hpp"
#include <mlir/Pass/Pass.h>
namespace ngraph
{
namespace runtime
{
namespace ngmlir
{
class MLIRCompiler;
}
}
}
namespace mlir
{
mlir::Pass* createDialectLoweringPass(ngraph::runtime::ngmlir::MLIRCompiler* compiler);
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "memory_manager.hpp"
#include <memory>
#include "ngraph/ngraph_visibility.hpp"
using namespace ngraph::runtime::ngmlir;
/// Call back to allocate memory for temps from JIT'ed code
extern "C" NGRAPH_API void* __mlir_allocate(MLIRMemMgr* mem_mgr, size_t size)
{
return mem_mgr->allocate(size);
}
void* MLIRMemMgr::allocate(size_t size)
{
void* ptr = malloc(size);
ptrList.push_back(ptr);
return ptr;
}
void MLIRMemMgr::freeAll()
{
for (auto p : ptrList)
{
free(p);
}
}
...@@ -16,35 +16,32 @@ ...@@ -16,35 +16,32 @@
#pragma once #pragma once
#include "ngraph/op/op.hpp" #include <stdint.h>
#include "ngraph/util.hpp" #include <stdlib.h>
#include <vector>
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
{ {
namespace cpu namespace ngmlir
{ {
namespace op /// Memory manager for temporaries in MLIR compiled sub-graph
/// It handles call-backs from the code and returns pointer to allocated memory
/// Also, handles freeing up memory
class MLIRMemMgr
{ {
/// \brief LoopKernel represents graphs consisting public:
/// of arithmetic operations that can be executed in the same loop /// Allocates data for temporary tensor. Currently, it is called for each
class LoopKernel : public ngraph::op::Op /// temp tensor defintion. Keeps track of each pointer and free them during cleanup.
{ // TODO: Use pre-allocation from framework memory manager
public: void* allocate(size_t size);
LoopKernel(const NodeVector& node_list,
const NodeVector& outputs, /// Frees all allocated pointers
const NodeVector& args); void freeAll();
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
const NodeVector& get_node_list() const { return m_node_list; } private:
const NodeVector& get_kernel_outputs() const { return m_output_nodes; } std::vector<void*> ptrList;
private: };
NodeVector m_node_list;
NodeVector m_output_nodes;
};
}
} }
} }
} }
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
// Add new dialect ops lowerers to this file
#define DECL_OP_CONV(OP) \
class OP##Conversion : public mlir::DialectConversionPattern \
{\
public:\
explicit OP##Conversion(mlir::MLIRContext *context, DialectLoweringPass& pass)\
: mlir::DialectConversionPattern(mlir::OP::getOperationName(), 1, context),\
m_pass(pass)\
{} \
void rewrite(Operation *op, ArrayRef<Value *> operands, PatternRewriter &rewriter) const override; \
DialectLoweringPass& m_pass;\
};
DECL_OP_CONV(NGAddOp)
DECL_OP_CONV(NGDotOp)
DECL_OP_CONV(NGReturnOp)
#undef DECL_OP_CONV
// List of all ops supported by MLIR backend end-to-end
#ifndef MLIR_OP
#define MLIR_OP
#endif
MLIR_OP(Add)
MLIR_OP(Dot)
// Add new supported ops here
#undef MLIR_OP
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "mlir_subgraph_extraction.hpp"
#include "ngraph/assertion.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/op/add.hpp"
#include "ngraph/op/dot.hpp"
#include "ngraph/op/experimental/compiled_kernel.hpp"
#include "ngraph/op/get_output_element.hpp"
using namespace ngraph::descriptor;
using namespace ngraph::op;
using namespace ngraph::pass;
#define TI(x) std::type_index(typeid(x))
bool MLIRSubgraphExtractionPass::run_on_function(std::shared_ptr<Function> func)
{
// Create a CompiledKernel for all the ops in the function, except Parameters and Results.
NodeVector ck_ops;
for (auto op : func->get_ordered_ops())
{
// All ops must be supported by MLIR compiler
if (!is_supported_mlir_op(op))
{
return false;
}
if (TI(Parameter) != TI(*op) && TI(Result) != TI(*op))
{
ck_ops.push_back(op);
}
}
NodeVector ck_args;
for (auto& param : func->get_parameters())
{
ck_args.push_back(param);
}
NodeVector ck_outputs = std::move(get_subgraph_outputs(ck_ops, {} /*exclusions*/));
if (ck_outputs.size() != 1)
{
return false;
}
auto ck = std::make_shared<CompiledKernel>(ck_ops, ck_outputs, ck_args);
// Connect CompiledKernel to output nodes by replacing the output descriptors of the output
// nodes.
for (size_t i = 0, end = ck_outputs.size(); i < end; ++i)
{
auto& output_descs = ck_outputs[i]->get_outputs();
NGRAPH_CHECK(output_descs.size() == 1, "Unexpected multiple output descriptors");
auto& out_desc = output_descs[0];
// 'replace_output' invalidates iterator of the original container. Use a copy instead.
const std::set<descriptor::Input*> input_descs = out_desc.get_inputs();
for (descriptor::Input* in_desc : input_descs)
{
in_desc->replace_output(ck, i);
}
}
return true;
}
#define TI(x) std::type_index(typeid(x))
bool MLIRSubgraphExtractionPass::is_supported_mlir_op(std::shared_ptr<Node> node)
{
if (TI(Parameter) == TI(*node) || TI(Result) == TI(*node))
{
return true;
}
// supported by backend ?
if (m_supported_ops.find(TI(*node)) == m_supported_ops.end())
{
return false;
}
// check on invariants expected by MLIR backend
// Dot is 2D only
if (TI(ngraph::op::Dot) == TI(*node))
{
if (node->get_input_shape(0).size() != 2 || node->get_input_shape(1).size() != 2)
{
return false;
}
}
return true;
}
const std::set<std::type_index> MLIRSubgraphExtractionPass::m_supported_ops{
#define MLIR_OP(OP) TI(ngraph::op::OP),
#include "contrib/mlir/ops_supported.inc"
};
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/pass/pass.hpp"
namespace ngraph
{
namespace pass
{
/// This pass creates CompiledKernel ops enclosing sub-graphs that will be compiled and
/// executed by MLIR.
// TODO: WIP. Currently we only create a single CompiledKernel op for the whole function
// body.
class MLIRSubgraphExtractionPass : public ngraph::pass::FunctionPass
{
public:
MLIRSubgraphExtractionPass() {}
bool run_on_function(std::shared_ptr<Function> func) override;
/// Checks if an ngraph node is supported by MLIR backend
bool is_supported_mlir_op(std::shared_ptr<Node> node);
private:
static const std::set<std::type_index> m_supported_ops;
};
}
}
...@@ -170,6 +170,8 @@ set (SRC ...@@ -170,6 +170,8 @@ set (SRC
op/experimental/quantized_dot.hpp op/experimental/quantized_dot.hpp
op/experimental/quantized_dot_bias.cpp op/experimental/quantized_dot_bias.cpp
op/experimental/quantized_dot_bias.hpp op/experimental/quantized_dot_bias.hpp
op/experimental/compiled_kernel.cpp
op/experimental/compiled_kernel.hpp
op/experimental/transpose.cpp op/experimental/transpose.cpp
op/experimental/transpose.hpp op/experimental/transpose.hpp
op/experimental/layers/ctc_greedy_decoder.cpp op/experimental/layers/ctc_greedy_decoder.cpp
......
...@@ -14,15 +14,16 @@ ...@@ -14,15 +14,16 @@
// limitations under the License. // limitations under the License.
//***************************************************************************** //*****************************************************************************
#include "ngraph/runtime/cpu/op/loop_kernel.hpp" #include "ngraph/op/experimental/compiled_kernel.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/log.hpp" #include "ngraph/log.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
shared_ptr<Node> shared_ptr<Node> ngraph::op::CompiledKernel::copy_with_new_args(const NodeVector& new_args) const
ngraph::runtime::cpu::op::LoopKernel::copy_with_new_args(const NodeVector& new_args) const
{ {
auto args = get_arguments(); auto args = get_arguments();
if (new_args.size() != args.size()) if (new_args.size() != args.size())
...@@ -56,13 +57,13 @@ shared_ptr<Node> ...@@ -56,13 +57,13 @@ shared_ptr<Node>
new_outputs.push_back(nm.at(o.get())); new_outputs.push_back(nm.at(o.get()));
} }
return std::make_shared<LoopKernel>(new_node_list, new_outputs, new_args); return std::make_shared<CompiledKernel>(new_node_list, new_outputs, new_args);
} }
ngraph::runtime::cpu::op::LoopKernel::LoopKernel(const NodeVector& node_list, ngraph::op::CompiledKernel::CompiledKernel(const NodeVector& node_list,
const NodeVector& outputs, const NodeVector& outputs,
const NodeVector& args) const NodeVector& args)
: Op("LoopKernel", check_single_output_args({args})) : Op("CompiledKernel", check_single_output_args({args}))
, m_node_list(node_list) , m_node_list(node_list)
, m_output_nodes(outputs) , m_output_nodes(outputs)
{ {
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/op/op.hpp"
#include "ngraph/util.hpp"
namespace ngraph
{
namespace op
{
/// \brief CompiledKernel represents a sub-graph that can be compiled and executed
/// independently.
///
/// This op can be used to delimit sub-graphs that with special compilation requirements
/// within a function. For example, we currently use it to delimit sub-graphs that will be
/// independently compiled and executed by MLIR backend.
class CompiledKernel : public ngraph::op::Op
{
public:
CompiledKernel(const NodeVector& node_list,
const NodeVector& outputs,
const NodeVector& args);
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
const NodeVector& get_node_list() const { return m_node_list; }
const NodeVector& get_kernel_outputs() const { return m_output_nodes; }
private:
NodeVector m_node_list;
NodeVector m_output_nodes;
};
}
}
...@@ -104,7 +104,6 @@ set(SRC ...@@ -104,7 +104,6 @@ set(SRC
op/group_conv_bias.cpp op/group_conv_bias.cpp
op/halide_op.cpp op/halide_op.cpp
op/leaky_relu.cpp op/leaky_relu.cpp
op/loop_kernel.cpp
op/lstm.cpp op/lstm.cpp
op/matmul_bias.cpp op/matmul_bias.cpp
op/max_pool_with_indices.cpp op/max_pool_with_indices.cpp
...@@ -114,10 +113,10 @@ set(SRC ...@@ -114,10 +113,10 @@ set(SRC
op/update_slice.cpp op/update_slice.cpp
pass/cpu_assignment.cpp pass/cpu_assignment.cpp
pass/cpu_collapse_dims.cpp pass/cpu_collapse_dims.cpp
pass/cpu_compiled_kernel_fusion.cpp
pass/cpu_fusion.cpp pass/cpu_fusion.cpp
pass/cpu_horizontal_fusion.cpp pass/cpu_horizontal_fusion.cpp
pass/cpu_layout.cpp pass/cpu_layout.cpp
pass/cpu_loop_kernel_fusion.cpp
pass/cpu_mat_fusion.cpp pass/cpu_mat_fusion.cpp
pass/cpu_memory_assignment.cpp pass/cpu_memory_assignment.cpp
pass/cpu_memory_optimization.cpp pass/cpu_memory_optimization.cpp
...@@ -140,13 +139,20 @@ endif() ...@@ -140,13 +139,20 @@ endif()
if (NGRAPH_HALIDE) if (NGRAPH_HALIDE)
set(SRC set(SRC
${SRC} ${SRC}
builder/compiled_kernel.cpp
builder/halide_op.cpp builder/halide_op.cpp
builder/loop_kernel.cpp
builder/halide_generators.cpp builder/halide_generators.cpp
pass/halide_subgraph_extraction.cpp pass/halide_subgraph_extraction.cpp
) )
endif() endif()
if (NGRAPH_MLIR_ENABLE)
set(SRC
${SRC}
builder/mlir_cpu_compiled_kernel.cpp
)
endif()
if (NGRAPH_CPU_ENABLE) if (NGRAPH_CPU_ENABLE)
set(NGRAPH_CPU_DEBUGINFO_ENABLE 0 CACHE STRING "Enable debuginfo in the CPU backend") set(NGRAPH_CPU_DEBUGINFO_ENABLE 0 CACHE STRING "Enable debuginfo in the CPU backend")
...@@ -203,6 +209,7 @@ if (NGRAPH_CPU_ENABLE) ...@@ -203,6 +209,7 @@ if (NGRAPH_CPU_ENABLE)
target_link_libraries(cpu_backend PUBLIC codegen) target_link_libraries(cpu_backend PUBLIC codegen)
endif() endif()
target_include_directories(cpu_backend SYSTEM PUBLIC libmkldnn) target_include_directories(cpu_backend SYSTEM PUBLIC libmkldnn)
if (NOT APPLE AND NOT MSVS) if (NOT APPLE AND NOT MSVS)
# CPU backend uses third-party libraries like Eigen that might be linked in and # CPU backend uses third-party libraries like Eigen that might be linked in and
# exported by other DSOs as well. In the absence of versioning, this could lead to the # exported by other DSOs as well. In the absence of versioning, this could lead to the
...@@ -212,5 +219,23 @@ if (NGRAPH_CPU_ENABLE) ...@@ -212,5 +219,23 @@ if (NGRAPH_CPU_ENABLE)
set_property(TARGET cpu_backend APPEND PROPERTY LINK_FLAGS "-Wl,-Bsymbolic-functions -Wl,--exclude-libs=ALL") set_property(TARGET cpu_backend APPEND PROPERTY LINK_FLAGS "-Wl,-Bsymbolic-functions -Wl,--exclude-libs=ALL")
endif() endif()
if (NGRAPH_MLIR_ENABLE)
# TODO: can we get away without LLVM/MLIR include path.
# Currently mlir backend compiler.hpp include LLVM/MLIR files
get_directory_property(MLIR_LLVM_INCLUDEPATH
DIRECTORY ${NGRAPH_MLIR_SOURCE_DIR}
DEFINITION MLIR_LLVM_INCLUDEPATH)
message(STATUS "Building CPU backend with MLIR")
message(STATUS "MLIR INCLUDE DIRS: ${MLIR_INCLUDE_PATHS}")
message(STATUS "LLVM INCLUDE DIRS: ${MLIR_LLVM_INCLUDEPATH}")
add_dependencies(cpu_backend mlir_backend)
target_include_directories(cpu_backend PUBLIC ${MLIR_INCLUDE_PATHS} ${MLIR_LLVM_INCLUDEPATH})
target_link_libraries(cpu_backend PUBLIC mlir_backend)
# TODO: Get rid of the compile time def, and move all MLIR code to separate src files
# and add them to cpu_backend here instead.
target_compile_definitions(cpu_backend PRIVATE "NGRAPH_MLIR_ENABLE")
endif()
install(TARGETS cpu_backend DESTINATION ${NGRAPH_INSTALL_LIB}) install(TARGETS cpu_backend DESTINATION ${NGRAPH_INSTALL_LIB})
endif() endif()
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
#include "halide_generators.hpp" #include "halide_generators.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp" #include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/op/loop_kernel.hpp" #include "ngraph/runtime/cpu/op/compiled_kernel.hpp"
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
...@@ -49,10 +49,10 @@ namespace ngraph ...@@ -49,10 +49,10 @@ namespace ngraph
namespace cpu namespace cpu
{ {
template <> template <>
void Builder::BUILDER_DECL(ngraph::runtime::cpu::op::LoopKernel) void Builder::BUILDER_DECL(ngraph::op::CompiledKernel)
{ {
const ngraph::runtime::cpu::op::LoopKernel* hs = const ngraph::op::CompiledKernel* hs =
static_cast<const ngraph::runtime::cpu::op::LoopKernel*>(node); static_cast<const ngraph::op::CompiledKernel*>(node);
const auto& generators = ngraph::runtime::cpu::halide::get_halide_generators(); const auto& generators = ngraph::runtime::cpu::halide::get_halide_generators();
...@@ -99,7 +99,7 @@ namespace ngraph ...@@ -99,7 +99,7 @@ namespace ngraph
//a subgraph //a subgraph
if (op->get_outputs().size() > 1) if (op->get_outputs().size() > 1)
{ {
throw ngraph_error("no multi-output ops in a LoopKernel"); throw ngraph_error("no multi-output ops in a CompiledKernel");
} }
halide_functions[op->get_output_tensor_ptr()->get_name()] = halide_functions[op->get_output_tensor_ptr()->get_name()] =
generators.at(TI(*op))(inputs); generators.at(TI(*op))(inputs);
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "contrib/mlir/compiler.hpp"
#include "ngraph/op/experimental/compiled_kernel.hpp"
#include "ngraph/runtime/cpu/cpu_runtime_context.hpp"
using namespace ngraph;
using namespace ngraph::op;
using namespace ngraph::runtime::cpu;
using namespace ngraph::runtime::ngmlir;
#define TI(x) type_index(typeid(x))
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(CompiledKernel)
{
auto& functors = external_function->get_functors();
// Tensors haven't been allocated yet so we have to keep a pointer to the pointer
// that will hold the future memory address.
std::vector<size_t> buffer_indices;
for (const TensorViewWrapper& arg : args)
{
auto buffer_index = external_function->get_buffer_index(arg.get_name());
buffer_indices.push_back(buffer_index);
}
for (const TensorViewWrapper& result : out)
{
auto buffer_index = external_function->get_buffer_index(result.get_name());
buffer_indices.push_back(buffer_index);
}
// Create functor that will be executed to compile and run this CompiledKernel.
// Note that 'double_ptr_args' must be captured by value since it's a local var.
auto functor = [node, buffer_indices](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
// MLIR requires a list of type-erased pointer to arguments. Tensors must have
// been allocated at this point so we can get rid of the extra reference.
std::vector<void*> ptr_args;
for (auto& buffer_index : buffer_indices)
{
ptr_args.push_back(ctx->buffer_data[buffer_index]);
}
// Compile nodes within the CompiledKernel op.
auto* compiled_kernel = static_cast<const CompiledKernel*>(node);
MLIRCompiler mlir_compiler(compiled_kernel, ptr_args);
// TODO: Decouple 'compile' and 'run' APIs. We want to be able to run the same
// jitted code on different arguments.
mlir_compiler.compile_and_run();
};
functors.emplace_back(functor);
}
}
}
}
#undef TI
...@@ -25,6 +25,10 @@ ...@@ -25,6 +25,10 @@
#include "ngraph/runtime/cpu/cpu_tensor_view.hpp" #include "ngraph/runtime/cpu/cpu_tensor_view.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
#ifdef NGRAPH_MLIR_ENABLE
#include "contrib/mlir/compiler.hpp"
#endif
using namespace ngraph; using namespace ngraph;
using namespace std; using namespace std;
...@@ -90,6 +94,14 @@ shared_ptr<runtime::Executable> ...@@ -90,6 +94,14 @@ shared_ptr<runtime::Executable>
ngraph::pass::PassConfig& pass_config, ngraph::pass::PassConfig& pass_config,
bool performance_counters_enabled) bool performance_counters_enabled)
{ {
#ifdef NGRAPH_MLIR_ENABLE
if (std::getenv("NGRAPH_MLIR") != nullptr)
{
// Initialize MLIR compiler
ngmlir::MLIRCompiler::init_mlir();
}
#endif
shared_ptr<runtime::Executable> rc; shared_ptr<runtime::Executable> rc;
auto it = m_exec_map.find(func); auto it = m_exec_map.find(func);
if (it != m_exec_map.end()) if (it != m_exec_map.end())
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include "ngraph/op/divide.hpp" #include "ngraph/op/divide.hpp"
#include "ngraph/op/equal.hpp" #include "ngraph/op/equal.hpp"
#include "ngraph/op/exp.hpp" #include "ngraph/op/exp.hpp"
#include "ngraph/op/experimental/compiled_kernel.hpp"
#include "ngraph/op/floor.hpp" #include "ngraph/op/floor.hpp"
#include "ngraph/op/get_output_element.hpp" #include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/greater.hpp" #include "ngraph/op/greater.hpp"
...@@ -104,10 +105,13 @@ ...@@ -104,10 +105,13 @@
#include "ngraph/runtime/cpu/kernel/tanh.hpp" #include "ngraph/runtime/cpu/kernel/tanh.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp" #include "ngraph/runtime/cpu/op/convert_layout.hpp"
#include "ngraph/runtime/cpu/op/halide_op.hpp" #include "ngraph/runtime/cpu/op/halide_op.hpp"
#include "ngraph/runtime/cpu/op/loop_kernel.hpp"
#include "ngraph/type/element_type.hpp" #include "ngraph/type/element_type.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
#ifdef NGRAPH_MLIR_ENABLE
#include "contrib/mlir/compiler.hpp"
#endif
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
...@@ -472,8 +476,8 @@ namespace ngraph ...@@ -472,8 +476,8 @@ namespace ngraph
{ {
static BuildOpMap build_dispatcher{ static BuildOpMap build_dispatcher{
{TI(ngraph::op::Parameter), &runtime::cpu::Builder::nop}, {TI(ngraph::op::Parameter), &runtime::cpu::Builder::nop},
{TI(ngraph::runtime::cpu::op::LoopKernel), {TI(ngraph::op::CompiledKernel),
&runtime::cpu::Builder::build<ngraph::runtime::cpu::op::LoopKernel>}, &runtime::cpu::Builder::build<ngraph::op::CompiledKernel>},
{TI(ngraph::runtime::cpu::op::HalideOp), {TI(ngraph::runtime::cpu::op::HalideOp),
&runtime::cpu::Builder::build<ngraph::runtime::cpu::op::HalideOp>}}; &runtime::cpu::Builder::build<ngraph::runtime::cpu::op::HalideOp>}};
......
...@@ -53,6 +53,7 @@ ...@@ -53,6 +53,7 @@
#include "ngraph/op/erf.hpp" #include "ngraph/op/erf.hpp"
#include "ngraph/op/exp.hpp" #include "ngraph/op/exp.hpp"
#include "ngraph/op/experimental/batch_mat_mul.hpp" #include "ngraph/op/experimental/batch_mat_mul.hpp"
#include "ngraph/op/experimental/compiled_kernel.hpp"
#include "ngraph/op/experimental/generate_mask.hpp" #include "ngraph/op/experimental/generate_mask.hpp"
#include "ngraph/op/experimental/quantized_avg_pool.hpp" #include "ngraph/op/experimental/quantized_avg_pool.hpp"
#include "ngraph/op/experimental/quantized_concat.hpp" #include "ngraph/op/experimental/quantized_concat.hpp"
...@@ -125,7 +126,6 @@ ...@@ -125,7 +126,6 @@
#include "ngraph/runtime/cpu/op/dropout.hpp" #include "ngraph/runtime/cpu/op/dropout.hpp"
#include "ngraph/runtime/cpu/op/group_conv_bias.hpp" #include "ngraph/runtime/cpu/op/group_conv_bias.hpp"
#include "ngraph/runtime/cpu/op/leaky_relu.hpp" #include "ngraph/runtime/cpu/op/leaky_relu.hpp"
#include "ngraph/runtime/cpu/op/loop_kernel.hpp"
#include "ngraph/runtime/cpu/op/lstm.hpp" #include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/matmul_bias.hpp" #include "ngraph/runtime/cpu/op/matmul_bias.hpp"
#include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp" #include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
...@@ -3883,7 +3883,7 @@ namespace ngraph ...@@ -3883,7 +3883,7 @@ namespace ngraph
std::function<std::string(const std::vector<std::string>&)>> std::function<std::string(const std::vector<std::string>&)>>
inline_emitters = initialize_inline_emitters(); inline_emitters = initialize_inline_emitters();
// GOEE doesn't see GOEs in subgraphs that are hidden inside LoopKernels // GOEE doesn't see GOEs in subgraphs that are hidden inside CompiledKernels
// we have to manually propagate the source output // we have to manually propagate the source output
static const ngraph::descriptor::Output* static const ngraph::descriptor::Output*
get_goe_input_output(ngraph::descriptor::Output* output) get_goe_input_output(ngraph::descriptor::Output* output)
...@@ -3898,22 +3898,22 @@ namespace ngraph ...@@ -3898,22 +3898,22 @@ namespace ngraph
} }
template <> template <>
void CPU_Emitter::EMITTER_DECL(ngraph::runtime::cpu::op::LoopKernel) void CPU_Emitter::EMITTER_DECL(ngraph::op::CompiledKernel)
{ {
std::unordered_map<const ngraph::descriptor::Output*, std::string> std::unordered_map<const ngraph::descriptor::Output*, std::string>
loop_symbol_table; loop_symbol_table;
// pre-fill symbol table with inputs // pre-fill symbol table with inputs
const ngraph::runtime::cpu::op::LoopKernel* clk = const ngraph::op::CompiledKernel* ck =
static_cast<const ngraph::runtime::cpu::op::LoopKernel*>(node); static_cast<const ngraph::op::CompiledKernel*>(node);
NodeVector output_nodes = clk->get_kernel_outputs(); NodeVector output_nodes = ck->get_kernel_outputs();
NodeVector node_list = clk->get_node_list(); NodeVector node_list = ck->get_node_list();
for (size_t i = 0; i < args.size(); i++) for (size_t i = 0; i < args.size(); i++)
{ {
std::string sname = std::string(args[i].get_name()) + "[i]"; std::string sname = std::string(args[i].get_name()) + "[i]";
auto entry = std::make_pair(&clk->get_inputs().at(i).get_output(), sname); auto entry = std::make_pair(&ck->get_inputs().at(i).get_output(), sname);
loop_symbol_table.insert(entry); loop_symbol_table.insert(entry);
} }
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include "ngraph/codegen/execution_engine.hpp" #include "ngraph/codegen/execution_engine.hpp"
#endif #endif
#include "contrib/mlir/pass/mlir_subgraph_extraction.hpp"
#include "ngraph/descriptor/input.hpp" #include "ngraph/descriptor/input.hpp"
#include "ngraph/descriptor/output.hpp" #include "ngraph/descriptor/output.hpp"
#include "ngraph/file_util.hpp" #include "ngraph/file_util.hpp"
...@@ -69,6 +70,7 @@ ...@@ -69,6 +70,7 @@
#include "ngraph/op/erf.hpp" #include "ngraph/op/erf.hpp"
#include "ngraph/op/exp.hpp" #include "ngraph/op/exp.hpp"
#include "ngraph/op/experimental/batch_mat_mul.hpp" #include "ngraph/op/experimental/batch_mat_mul.hpp"
#include "ngraph/op/experimental/compiled_kernel.hpp"
#include "ngraph/op/experimental/generate_mask.hpp" #include "ngraph/op/experimental/generate_mask.hpp"
#include "ngraph/op/experimental/quantized_avg_pool.hpp" #include "ngraph/op/experimental/quantized_avg_pool.hpp"
#include "ngraph/op/experimental/quantized_concat.hpp" #include "ngraph/op/experimental/quantized_concat.hpp"
...@@ -171,7 +173,6 @@ ...@@ -171,7 +173,6 @@
#include "ngraph/runtime/cpu/op/dropout.hpp" #include "ngraph/runtime/cpu/op/dropout.hpp"
#include "ngraph/runtime/cpu/op/group_conv_bias.hpp" #include "ngraph/runtime/cpu/op/group_conv_bias.hpp"
#include "ngraph/runtime/cpu/op/leaky_relu.hpp" #include "ngraph/runtime/cpu/op/leaky_relu.hpp"
#include "ngraph/runtime/cpu/op/loop_kernel.hpp"
#include "ngraph/runtime/cpu/op/lstm.hpp" #include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/matmul_bias.hpp" #include "ngraph/runtime/cpu/op/matmul_bias.hpp"
#include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp" #include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
...@@ -427,8 +428,7 @@ static const runtime::cpu::OpMap dispatcher{ ...@@ -427,8 +428,7 @@ static const runtime::cpu::OpMap dispatcher{
{TI(ngraph::op::And), &runtime::cpu::CPU_Emitter::emit<op::And>}, {TI(ngraph::op::And), &runtime::cpu::CPU_Emitter::emit<op::And>},
{TI(ngraph::op::Or), &runtime::cpu::CPU_Emitter::emit<op::Or>}, {TI(ngraph::op::Or), &runtime::cpu::CPU_Emitter::emit<op::Or>},
{TI(ngraph::op::CPULeakyRelu), &runtime::cpu::CPU_Emitter::emit<op::CPULeakyRelu>}, {TI(ngraph::op::CPULeakyRelu), &runtime::cpu::CPU_Emitter::emit<op::CPULeakyRelu>},
{TI(ngraph::runtime::cpu::op::LoopKernel), {TI(ngraph::op::CompiledKernel), &runtime::cpu::CPU_Emitter::emit<op::CompiledKernel>},
&runtime::cpu::CPU_Emitter::emit<runtime::cpu::op::LoopKernel>},
{TI(ngraph::op::LRN), &runtime::cpu::CPU_Emitter::emit<ngraph::op::LRN>}, {TI(ngraph::op::LRN), &runtime::cpu::CPU_Emitter::emit<ngraph::op::LRN>},
{TI(ngraph::op::GenerateMask), &runtime::cpu::CPU_Emitter::emit<ngraph::op::GenerateMask>}, {TI(ngraph::op::GenerateMask), &runtime::cpu::CPU_Emitter::emit<ngraph::op::GenerateMask>},
{TI(ngraph::op::ConvolutionAdd), &runtime::cpu::CPU_Emitter::emit<op::ConvolutionAdd>}, {TI(ngraph::op::ConvolutionAdd), &runtime::cpu::CPU_Emitter::emit<op::ConvolutionAdd>},
...@@ -1181,7 +1181,12 @@ void runtime::cpu::CPU_ExternalFunction::register_common_passes( ...@@ -1181,7 +1181,12 @@ void runtime::cpu::CPU_ExternalFunction::register_common_passes(
REGISTER_KNOBBED_PASS(RecurrentReshapeElimination, false, ngraph::pass); REGISTER_KNOBBED_PASS(RecurrentReshapeElimination, false, ngraph::pass);
REGISTER_KNOBBED_PASS_WITH_ARGS( REGISTER_KNOBBED_PASS_WITH_ARGS(
CoreFusion, true, ngraph::pass, ngraph::pass::FusionType::ALL_FUSIONS); CoreFusion, true, ngraph::pass, ngraph::pass::FusionType::ALL_FUSIONS);
REGISTER_KNOBBED_PASS(CPUFusion, true, runtime::cpu::pass);
// Disable CPUFusion if MLIR is enabled to preserve core ops.
if (std::getenv("NGRAPH_MLIR") == nullptr)
{
REGISTER_KNOBBED_PASS(CPUFusion, true, runtime::cpu::pass);
}
REGISTER_KNOBBED_PASS(CPUQuantFusion, true, runtime::cpu::pass); REGISTER_KNOBBED_PASS(CPUQuantFusion, true, runtime::cpu::pass);
REGISTER_KNOBBED_PASS(CPUHorizontalFusion, true, runtime::cpu::pass); REGISTER_KNOBBED_PASS(CPUHorizontalFusion, true, runtime::cpu::pass);
REGISTER_KNOBBED_PASS(CPUCollapseDims, true, runtime::cpu::pass); REGISTER_KNOBBED_PASS(CPUCollapseDims, true, runtime::cpu::pass);
...@@ -1189,6 +1194,13 @@ void runtime::cpu::CPU_ExternalFunction::register_common_passes( ...@@ -1189,6 +1194,13 @@ void runtime::cpu::CPU_ExternalFunction::register_common_passes(
REGISTER_KNOBBED_PASS(HalideSubgraphExtraction, true, ngraph::runtime::cpu::pass); REGISTER_KNOBBED_PASS(HalideSubgraphExtraction, true, ngraph::runtime::cpu::pass);
#endif #endif
#ifdef NGRAPH_MLIR_ENABLE
if (std::getenv("NGRAPH_MLIR") != nullptr)
{
REGISTER_KNOBBED_PASS(MLIRSubgraphExtractionPass, /*enable by default*/ true, ngraph::pass);
}
#endif
NodeVector nv_cwi; // We dont need CPUWorkspaceInsertion to return list of indices NodeVector nv_cwi; // We dont need CPUWorkspaceInsertion to return list of indices
REGISTER_KNOBBED_PASS_WITH_ARGS(CPUWorkspaceInsertion, true, runtime::cpu::pass, nv_cwi, false); REGISTER_KNOBBED_PASS_WITH_ARGS(CPUWorkspaceInsertion, true, runtime::cpu::pass, nv_cwi, false);
REGISTER_KNOBBED_PASS_WITH_ARGS(CPUAssignment, true, runtime::cpu::pass, this); REGISTER_KNOBBED_PASS_WITH_ARGS(CPUAssignment, true, runtime::cpu::pass, this);
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "ngraph/log.hpp" #include "ngraph/log.hpp"
#include "ngraph/op/abs.hpp" #include "ngraph/op/abs.hpp"
#include "ngraph/op/add.hpp" #include "ngraph/op/add.hpp"
#include "ngraph/op/experimental/compiled_kernel.hpp"
#include "ngraph/op/get_output_element.hpp" #include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/maximum.hpp" #include "ngraph/op/maximum.hpp"
#include "ngraph/op/minimum.hpp" #include "ngraph/op/minimum.hpp"
...@@ -31,8 +32,7 @@ ...@@ -31,8 +32,7 @@
#include "ngraph/op/subtract.hpp" #include "ngraph/op/subtract.hpp"
#include "ngraph/op/util/binary_elementwise_arithmetic.hpp" #include "ngraph/op/util/binary_elementwise_arithmetic.hpp"
#include "ngraph/op/util/unary_elementwise_arithmetic.hpp" #include "ngraph/op/util/unary_elementwise_arithmetic.hpp"
#include "ngraph/runtime/cpu/op/loop_kernel.hpp" #include "ngraph/runtime/cpu/pass/cpu_compiled_kernel_fusion.hpp"
#include "ngraph/runtime/cpu/pass/cpu_loop_kernel_fusion.hpp"
#define TI(x) std::type_index(typeid(x)) #define TI(x) std::type_index(typeid(x))
...@@ -49,10 +49,10 @@ struct LKGraph ...@@ -49,10 +49,10 @@ struct LKGraph
NodeVector m_nodes; NodeVector m_nodes;
}; };
class LoopKernelCollector class CompiledKernelCollector
{ {
public: public:
LoopKernelCollector(std::shared_ptr<Function> f, size_t min_nodes_to_fuse) CompiledKernelCollector(std::shared_ptr<Function> f, size_t min_nodes_to_fuse)
{ {
for (auto n : f->get_ordered_ops()) for (auto n : f->get_ordered_ops())
{ {
...@@ -70,13 +70,13 @@ public: ...@@ -70,13 +70,13 @@ public:
else else
{ {
auto smallest_head = m_heads.at(arg_from_fusible_group); auto smallest_head = m_heads.at(arg_from_fusible_group);
auto& lkgraph = m_graphs.at(smallest_head); auto& ckgraph = m_graphs.at(smallest_head);
lkgraph.m_nodes.push_back(n); ckgraph.m_nodes.push_back(n);
for (auto arg : n->get_arguments()) for (auto arg : n->get_arguments())
{ {
if (is_leaf(arg)) if (is_leaf(arg))
{ {
lkgraph.m_inputs.push_back(arg); ckgraph.m_inputs.push_back(arg);
} }
} }
m_heads.insert(std::make_pair(n, smallest_head)); m_heads.insert(std::make_pair(n, smallest_head));
...@@ -88,18 +88,18 @@ public: ...@@ -88,18 +88,18 @@ public:
prune_graphs(min_nodes_to_fuse); prune_graphs(min_nodes_to_fuse);
} }
const std::vector<std::shared_ptr<runtime::cpu::op::LoopKernel>> get_loop_kernels() const const std::vector<std::shared_ptr<op::CompiledKernel>> get_compiled_kernels() const
{ {
std::vector<std::shared_ptr<runtime::cpu::op::LoopKernel>> lks; std::vector<std::shared_ptr<op::CompiledKernel>> cks;
for (auto e : m_graphs) for (auto e : m_graphs)
{ {
auto& lkg = e.second; auto& ckg = e.second;
NodeVector member_outputs = ngraph::get_subgraph_outputs(lkg.m_nodes, NodeVector{}); NodeVector member_outputs = ngraph::get_subgraph_outputs(ckg.m_nodes, NodeVector{});
auto lk = std::make_shared<runtime::cpu::op::LoopKernel>( auto ck =
lkg.m_nodes, member_outputs, lkg.m_inputs); std::make_shared<op::CompiledKernel>(ckg.m_nodes, member_outputs, ckg.m_inputs);
lks.push_back(lk); cks.push_back(ck);
} }
return lks; return cks;
} }
private: private:
...@@ -172,20 +172,20 @@ private: ...@@ -172,20 +172,20 @@ private:
std::unordered_map<std::shared_ptr<Node>, std::shared_ptr<Node>> m_heads; std::unordered_map<std::shared_ptr<Node>, std::shared_ptr<Node>> m_heads;
}; };
bool ngraph::runtime::cpu::pass::CPULoopKernelFusion::run_on_function( bool ngraph::runtime::cpu::pass::CPUCompiledKernelFusion::run_on_function(
std::shared_ptr<ngraph::Function> function) std::shared_ptr<ngraph::Function> function)
{ {
LoopKernelCollector lkc(function, m_min_kernel_size); CompiledKernelCollector ckc(function, m_min_kernel_size);
auto loop_kernels = lkc.get_loop_kernels(); auto compiled_kernels = ckc.get_compiled_kernels();
for (auto lk : loop_kernels) for (auto ck : compiled_kernels)
{ {
auto outputs = lk->get_kernel_outputs(); auto outputs = ck->get_kernel_outputs();
std::set<std::shared_ptr<Node>> lk_nodes_set(lk->get_node_list().begin(), std::set<std::shared_ptr<Node>> ck_nodes_set(ck->get_node_list().begin(),
lk->get_node_list().end()); ck->get_node_list().end());
for (size_t i = 0; i < outputs.size(); i++) for (size_t i = 0; i < outputs.size(); i++)
{ {
auto ith_goe = std::make_shared<ngraph::op::GetOutputElement>(lk, i); auto ith_goe = std::make_shared<ngraph::op::GetOutputElement>(ck, i);
auto& ith_output = ith_goe->get_outputs().at(0); auto& ith_output = ith_goe->get_outputs().at(0);
if (outputs.at(i)->get_outputs().size() > 1) if (outputs.at(i)->get_outputs().size() > 1)
...@@ -203,8 +203,8 @@ bool ngraph::runtime::cpu::pass::CPULoopKernelFusion::run_on_function( ...@@ -203,8 +203,8 @@ bool ngraph::runtime::cpu::pass::CPULoopKernelFusion::run_on_function(
for (auto input : inputs_copy) for (auto input : inputs_copy)
{ {
// this user is NOT internal to this loop kernel // this user is NOT internal to this loop kernel
// so it needs to be replaced with corresponding lk's GOE // so it needs to be replaced with corresponding ck's GOE
if (lk_nodes_set.count(input->get_node()) == 0) if (ck_nodes_set.count(input->get_node()) == 0)
{ {
input->replace_output(ith_output); input->replace_output(ith_output);
} }
...@@ -212,5 +212,5 @@ bool ngraph::runtime::cpu::pass::CPULoopKernelFusion::run_on_function( ...@@ -212,5 +212,5 @@ bool ngraph::runtime::cpu::pass::CPULoopKernelFusion::run_on_function(
} }
} }
return !loop_kernels.empty(); return !compiled_kernels.empty();
} }
...@@ -26,10 +26,10 @@ namespace ngraph ...@@ -26,10 +26,10 @@ namespace ngraph
{ {
namespace pass namespace pass
{ {
class CPULoopKernelFusion : public ngraph::pass::FunctionPass class CPUCompiledKernelFusion : public ngraph::pass::FunctionPass
{ {
public: public:
CPULoopKernelFusion(size_t min_kernel_size = 2) CPUCompiledKernelFusion(size_t min_kernel_size = 2)
: FunctionPass() : FunctionPass()
, m_min_kernel_size(min_kernel_size) , m_min_kernel_size(min_kernel_size)
{ {
......
...@@ -52,6 +52,7 @@ ...@@ -52,6 +52,7 @@
#include "ngraph/op/erf.hpp" #include "ngraph/op/erf.hpp"
#include "ngraph/op/exp.hpp" #include "ngraph/op/exp.hpp"
#include "ngraph/op/experimental/batch_mat_mul.hpp" #include "ngraph/op/experimental/batch_mat_mul.hpp"
#include "ngraph/op/experimental/compiled_kernel.hpp"
#include "ngraph/op/experimental/dyn_broadcast.hpp" #include "ngraph/op/experimental/dyn_broadcast.hpp"
#include "ngraph/op/experimental/dyn_pad.hpp" #include "ngraph/op/experimental/dyn_pad.hpp"
#include "ngraph/op/experimental/dyn_reshape.hpp" #include "ngraph/op/experimental/dyn_reshape.hpp"
......
...@@ -57,6 +57,36 @@ NGRAPH_TEST(${BACKEND_NAME}, add) ...@@ -57,6 +57,36 @@ NGRAPH_TEST(${BACKEND_NAME}, add)
(test::NDArray<float, 2>({{6, 8}, {10, 12}})).get_vector())); (test::NDArray<float, 2>({{6, 8}, {10, 12}})).get_vector()));
} }
NGRAPH_TEST(${BACKEND_NAME}, dot_add)
{
Shape shape_in1{2, 3};
Shape shape_in2{3, 3};
Shape shape_out{2, 3};
auto A = make_shared<op::Parameter>(element::f32, shape_in1);
auto B = make_shared<op::Parameter>(element::f32, shape_in2);
auto dot = make_shared<op::Dot>(A, B);
auto C = make_shared<op::Parameter>(element::f32, shape_out);
auto add = make_shared<op::Add>(dot, C);
auto f = make_shared<Function>(add, ParameterVector{A, B, C});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::f32, shape_in2);
shared_ptr<runtime::Tensor> c = backend->create_tensor(element::f32, shape_out);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::f32, shape_out);
copy_data(a, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
copy_data(b, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f});
copy_data(c, vector<float>{5.f, 4.f, 3.f, 2.f, 1.f, 0.f});
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b, c});
EXPECT_TRUE(test::all_close_f(read_vector<float>(result),
vector<float>{35.f, 40.f, 45.f, 68.f, 82.f, 96.f}));
}
NGRAPH_TEST(${BACKEND_NAME}, add_overload) NGRAPH_TEST(${BACKEND_NAME}, add_overload)
{ {
Shape shape{2, 2}; Shape shape{2, 2};
......
...@@ -421,6 +421,32 @@ NGRAPH_TEST(${BACKEND_NAME}, dot2d) ...@@ -421,6 +421,32 @@ NGRAPH_TEST(${BACKEND_NAME}, dot2d)
EXPECT_TRUE(test::all_close_f((vector<float>{19, 22, 43, 50}), read_vector<float>(result))); EXPECT_TRUE(test::all_close_f((vector<float>{19, 22, 43, 50}), read_vector<float>(result)));
} }
NGRAPH_TEST(${BACKEND_NAME}, dot2d_non_square)
{
Shape shape_in1{2, 3};
Shape shape_in2{3, 3};
Shape shape_out{2, 3};
auto A = make_shared<op::Parameter>(element::f32, shape_in1);
auto B = make_shared<op::Parameter>(element::f32, shape_in2);
auto dot = make_shared<op::Dot>(A, B);
auto f = make_shared<Function>(dot, ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::f32, shape_in1);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::f32, shape_in2);
shared_ptr<runtime::Tensor> result = backend->create_tensor(element::f32, shape_out);
copy_data(a, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
copy_data(b, vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f});
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
EXPECT_TRUE(test::all_close_f(read_vector<float>(result),
vector<float>{30.f, 36.f, 42.f, 66.f, 81.f, 96.f}));
}
// //
// Here is what numpy does: // Here is what numpy does:
// //
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment