Commit 9348da6d authored by Adam Procter's avatar Adam Procter Committed by Scott Cyphers

CPU backend support on macOS (#492)

* Allow caching of external dependencies (everything but TBB, which I can't figure out yet)

* Tweaks to get CPU backend building and linking (but not actually working) on macOS

* Tweaks to codegen/CPU backend to enable macOS (no builtin headers yet)

* Re-enable header caching stuff on macOS

* Suppress unsupported compiler warning for Apple Clang

* Fix includes for macOS compatibility
parent e57bb5c9
......@@ -26,20 +26,24 @@ endif()
project (ngraph)
SET( GCC_MIN_VERSION 4.8)
SET( CLANG_MIN_VERSION 3.8)
SET(GCC_MIN_VERSION 4.8)
SET(CLANG_MIN_VERSION 3.8)
SET(APPLE_CLANG_MIN_VERSION 9.0)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS GCC_MIN_VERSION)
message(FATAL_ERROR "GCC version must be at least ${GCC_MIN_VERSION}!")
endif()
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
# require at least clang 3.8
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS CLANG_MIN_VERSION)
message(FATAL_ERROR "Clang version must be at least ${CLANG_MIN_VERSION}!")
endif()
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS APPLE_CLANG_MIN_VERSION)
message(FATAL_ERROR "Apple Clang version must be at least ${APPLE_CLANG_MIN_VERSION}!")
endif()
else()
message(WARNING "You are using an unsupported compiler! Compilation has only been tested with Clang ( ${CLANG_MIN_VERSION} and up) and GCC ( ${GCC_MIN_VERSION} and up). ")
message(WARNING "You are using an unsupported compiler. Compilation has only been tested with Clang (${CLANG_MIN_VERSION} and up), Apple Clang (${APPLE_CLANG_MIN_VERSION} and up), and GCC (${GCC_MIN_VERSION} and up).")
endif()
if($ENV{NGRAPH_USE_PREBUILT_LLVM})
......
......@@ -16,8 +16,7 @@
include(ExternalProject)
if((NGRAPH_CPU_ENABLE OR NGRAPH_GPU_ENABLE) AND (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") AND
(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows"))
if((NGRAPH_CPU_ENABLE OR NGRAPH_GPU_ENABLE) AND (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows"))
set(CMAKE_DISABLE_SOURCE_CHANGES ON)
set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
......@@ -97,82 +96,163 @@ if((NGRAPH_CPU_ENABLE OR NGRAPH_GPU_ENABLE) AND (NOT ${CMAKE_SYSTEM_NAME} MATCHE
set(LLVM_INCLUDE_DIR "${EXTERNAL_PROJECTS_ROOT}/llvm/include") # used by other external projects in current scope
set(LLVM_LIB_DIR "${EXTERNAL_PROJECTS_ROOT}/llvm/lib" PARENT_SCOPE)
set(LLVM_LINK_LIBS
clangTooling
clangFrontendTool
clangFrontend
clangDriver
clangSerialization
clangCodeGen
clangParse
clangSema
clangStaticAnalyzerFrontend
clangStaticAnalyzerCheckers
clangStaticAnalyzerCore
clangAnalysis
clangARCMigrate
clangRewriteFrontend
clangEdit
clangAST
clangLex
clangBasic
LLVMLTO
LLVMPasses
LLVMObjCARCOpts
LLVMSymbolize
LLVMDebugInfoPDB
LLVMDebugInfoDWARF
LLVMMIRParser
LLVMCoverage
LLVMTableGen
LLVMDlltoolDriver
LLVMOrcJIT
LLVMObjectYAML
LLVMLibDriver
LLVMOption
LLVMX86Disassembler
LLVMX86AsmParser
LLVMX86CodeGen
LLVMGlobalISel
LLVMSelectionDAG
LLVMAsmPrinter
LLVMDebugInfoCodeView
LLVMDebugInfoMSF
LLVMX86Desc
LLVMMCDisassembler
LLVMX86Info
LLVMX86AsmPrinter
LLVMX86Utils
LLVMMCJIT
LLVMLineEditor
LLVMInterpreter
LLVMExecutionEngine
LLVMRuntimeDyld
LLVMCodeGen
LLVMTarget
LLVMCoroutines
LLVMipo
LLVMInstrumentation
LLVMVectorize
LLVMScalarOpts
LLVMLinker
LLVMIRReader
LLVMAsmParser
LLVMInstCombine
LLVMTransformUtils
LLVMBitWriter
LLVMAnalysis
LLVMProfileData
LLVMObject
LLVMMCParser
LLVMMC
LLVMBitReader
LLVMCore
LLVMBinaryFormat
LLVMSupport
LLVMDemangle
tinfo
z
m
PARENT_SCOPE)
if(APPLE)
set(LLVM_LINK_LIBS
clangTooling
clangFrontendTool
clangFrontend
clangDriver
clangSerialization
clangCodeGen
clangParse
clangSema
clangStaticAnalyzerFrontend
clangStaticAnalyzerCheckers
clangStaticAnalyzerCore
clangAnalysis
clangARCMigrate
clangRewriteFrontend
clangEdit
clangAST
clangLex
clangBasic
LLVMLTO
LLVMPasses
LLVMObjCARCOpts
LLVMSymbolize
LLVMDebugInfoPDB
LLVMDebugInfoDWARF
LLVMMIRParser
LLVMCoverage
LLVMTableGen
LLVMDlltoolDriver
LLVMOrcJIT
LLVMObjectYAML
LLVMLibDriver
LLVMOption
LLVMX86Disassembler
LLVMX86AsmParser
LLVMX86CodeGen
LLVMGlobalISel
LLVMSelectionDAG
LLVMAsmPrinter
LLVMDebugInfoCodeView
LLVMDebugInfoMSF
LLVMX86Desc
LLVMMCDisassembler
LLVMX86Info
LLVMX86AsmPrinter
LLVMX86Utils
LLVMMCJIT
LLVMLineEditor
LLVMInterpreter
LLVMExecutionEngine
LLVMRuntimeDyld
LLVMCodeGen
LLVMTarget
LLVMCoroutines
LLVMipo
LLVMInstrumentation
LLVMVectorize
LLVMScalarOpts
LLVMLinker
LLVMIRReader
LLVMAsmParser
LLVMInstCombine
LLVMTransformUtils
LLVMBitWriter
LLVMAnalysis
LLVMProfileData
LLVMObject
LLVMMCParser
LLVMMC
LLVMBitReader
LLVMCore
LLVMBinaryFormat
LLVMSupport
LLVMDemangle
curses
z
m
PARENT_SCOPE)
else()
set(LLVM_LINK_LIBS
clangTooling
clangFrontendTool
clangFrontend
clangDriver
clangSerialization
clangCodeGen
clangParse
clangSema
clangStaticAnalyzerFrontend
clangStaticAnalyzerCheckers
clangStaticAnalyzerCore
clangAnalysis
clangARCMigrate
clangRewriteFrontend
clangEdit
clangAST
clangLex
clangBasic
LLVMLTO
LLVMPasses
LLVMObjCARCOpts
LLVMSymbolize
LLVMDebugInfoPDB
LLVMDebugInfoDWARF
LLVMMIRParser
LLVMCoverage
LLVMTableGen
LLVMDlltoolDriver
LLVMOrcJIT
LLVMObjectYAML
LLVMLibDriver
LLVMOption
LLVMX86Disassembler
LLVMX86AsmParser
LLVMX86CodeGen
LLVMGlobalISel
LLVMSelectionDAG
LLVMAsmPrinter
LLVMDebugInfoCodeView
LLVMDebugInfoMSF
LLVMX86Desc
LLVMMCDisassembler
LLVMX86Info
LLVMX86AsmPrinter
LLVMX86Utils
LLVMMCJIT
LLVMLineEditor
LLVMInterpreter
LLVMExecutionEngine
LLVMRuntimeDyld
LLVMCodeGen
LLVMTarget
LLVMCoroutines
LLVMipo
LLVMInstrumentation
LLVMVectorize
LLVMScalarOpts
LLVMLinker
LLVMIRReader
LLVMAsmParser
LLVMInstCombine
LLVMTransformUtils
LLVMBitWriter
LLVMAnalysis
LLVMProfileData
LLVMObject
LLVMMCParser
LLVMMC
LLVMBitReader
LLVMCore
LLVMBinaryFormat
LLVMSupport
LLVMDemangle
tinfo
z
m
PARENT_SCOPE)
endif()
endif()
......@@ -20,7 +20,7 @@ include(ExternalProject)
# Fetch and install MKL-DNN
#----------------------------------------------------------------------------------------------------------
if(NGRAPH_CPU_ENABLE AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
if(NGRAPH_CPU_ENABLE)
set(MKLDNN_GIT_REPO_URL https://github.com/intel/mkl-dnn)
set(MKLDNN_GIT_TAG "3e1f8f5")
......
......@@ -22,13 +22,11 @@ if(NGRAPH_TBB_ENABLE)
set(TBB_GIT_REPO_URL https://github.com/01org/tbb)
set(TBB_GIT_TAG "tbb_2018")
if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
configure_file(${CMAKE_SOURCE_DIR}/cmake/tbb_fetch.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/tbb/CMakeLists.txt)
execute_process(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/tbb")
execute_process(COMMAND "${CMAKE_COMMAND}" --build .
WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/tbb")
configure_file(${CMAKE_SOURCE_DIR}/cmake/tbb_fetch.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/tbb/CMakeLists.txt)
execute_process(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/tbb")
execute_process(COMMAND "${CMAKE_COMMAND}" --build .
WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/tbb")
set(TBB_ROOT ${CMAKE_CURRENT_BINARY_DIR}/tbb/tbb-src PARENT_SCOPE)
endif()
set(TBB_ROOT ${CMAKE_CURRENT_BINARY_DIR}/tbb/tbb-src PARENT_SCOPE)
endif()
......@@ -329,35 +329,33 @@ install(DIRECTORY
FILES_MATCHING PATTERN "*.hpp"
)
if (NOT APPLE)
install(DIRECTORY
${MKLDNN_LIB_DIR}/
DESTINATION "${NGRAPH_INSTALL_LIB}"
)
install(DIRECTORY
${MKLDNN_LIB_DIR}/
DESTINATION "${NGRAPH_INSTALL_LIB}"
)
if (NGRAPH_TBB_ENABLE)
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tbb_build/tbb_release/
DESTINATION ${NGRAPH_INSTALL_LIB}
FILES_MATCHING PATTERN "libtbb.so.*"
)
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tbb_build/tbb_debug/
DESTINATION ${NGRAPH_INSTALL_LIB}
FILES_MATCHING PATTERN "libtbb_debug.so.*"
)
endif()
if (NGRAPH_TBB_ENABLE)
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tbb_build/tbb_release/
DESTINATION ${NGRAPH_INSTALL_LIB}
FILES_MATCHING PATTERN "libtbb.so.*"
)
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tbb_build/tbb_debug/
DESTINATION ${NGRAPH_INSTALL_LIB}
FILES_MATCHING PATTERN "libtbb_debug.so.*"
)
endif()
if (NGRAPH_ARGON_ENABLE)
install(DIRECTORY ${ARGON_TRANSFORMER_LIB_DIR}/
DESTINATION ${NGRAPH_INSTALL_LIB}
FILES_MATCHING PATTERN "*.so"
)
install(DIRECTORY ${ARGON_TRANSFORMER_INCLUDE_DIR}/
DESTINATION ${NGRAPH_INSTALL_INCLUDE}
FILES_MATCHING PATTERN "*.hpp"
)
install(DIRECTORY ${ARGON_TRANSFORMER_INCLUDE_DIR}/
DESTINATION ${NGRAPH_INSTALL_INCLUDE}
FILES_MATCHING PATTERN "*.h"
)
endif()
if (NGRAPH_ARGON_ENABLE)
install(DIRECTORY ${ARGON_TRANSFORMER_LIB_DIR}/
DESTINATION ${NGRAPH_INSTALL_LIB}
FILES_MATCHING PATTERN "*.so"
)
install(DIRECTORY ${ARGON_TRANSFORMER_INCLUDE_DIR}/
DESTINATION ${NGRAPH_INSTALL_INCLUDE}
FILES_MATCHING PATTERN "*.hpp"
)
install(DIRECTORY ${ARGON_TRANSFORMER_INCLUDE_DIR}/
DESTINATION ${NGRAPH_INSTALL_INCLUDE}
FILES_MATCHING PATTERN "*.h"
)
endif()
......@@ -151,6 +151,8 @@ void codegen::StaticCompiler::initialize()
// Prepare DiagnosticEngine
IntrusiveRefCntPtr<DiagnosticOptions> diag_options = new DiagnosticOptions();
diag_options->ErrorLimit = 20;
diag_options->ShowCarets = false;
diag_options->ShowFixits = false;
IntrusiveRefCntPtr<DiagnosticIDs> diag_id(new DiagnosticIDs());
DiagnosticsEngine diag_engine(diag_id, &*diag_options);
......@@ -351,6 +353,15 @@ void codegen::StaticCompiler::configure_search_path()
{
#ifdef USE_BUILTIN
load_headers_from_resource();
#elif defined(__APPLE__)
add_header_search_path(EIGEN_HEADERS_PATH);
add_header_search_path(MKLDNN_HEADERS_PATH);
add_header_search_path(TBB_HEADERS_PATH);
add_header_search_path(NGRAPH_HEADERS_PATH);
add_header_search_path(INSTALLED_HEADERS_PATH);
add_header_search_path(CLANG_BUILTIN_HEADERS_PATH);
add_header_search_path("/Library/Developer/CommandLineTools/usr/include/c++/v1");
#else
// Add base toolchain-supplied header paths
// Ideally one would use the Linux toolchain definition in clang/lib/Driver/ToolChains.h
......
......@@ -78,6 +78,13 @@ void codegen::ExecutionEngine::finalize()
void* codegen::ExecutionEngine::get_pointer_to_named_function(const std::string& func_name)
{
// For whatever reason, macOS seems to expect that we prefix this with an underscore.
#ifdef __APPLE__
std::string fname = "_" + func_name;
#else
const std::string& fname = func_name;
#endif
// set AbortOnFailure flag to false so call fails by returning nullptr
return m_execution_engine->getPointerToNamedFunction(func_name, false);
return m_execution_engine->getPointerToNamedFunction(fname, false);
}
......@@ -423,10 +423,16 @@ void runtime::cpu::CPU_Emitter::EMITTER_DECL(EmitAbs)
writer << emit_array1d(out[0]) << " =\n";
writer << "Eigen::abs(" << emit_array1d(args[0]) << ");\n";
#else
// Some C++ implementations don't like it when we call std::abs on unsigned types, so we will
// avoid doing so here.
auto& result_element_type = out[0].get_element_type();
writer << "#pragma omp parallel for\n";
writer << "for (size_t i = 0; i < " << out[0].get_size() << "; i++)\n";
writer << "{\n";
writer << " " << out[0].get_name() << "[i] = std::abs(" << args[0].get_name() << "[i]);\n";
writer << " " << out[0].get_name()
<< "[i] = " << (result_element_type.is_signed() ? "std::abs" : "") << "("
<< args[0].get_name() << "[i]);\n";
writer << "}\n";
#endif
writer.indent--;
......
......@@ -831,7 +831,11 @@ using namespace ngraph::runtime;
m_execution_engine->add_module(codegen_module);
m_execution_engine->finalize();
m_compiled_function = m_execution_engine->find_function<EntryPoint_t>(function_name);
assert(m_compiled_function);
if (m_compiled_function == nullptr)
{
throw runtime_error("could not find compiled function");
}
m_is_compiled = true;
if (m_release_function)
......
......@@ -18,6 +18,7 @@
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
#include <mkldnn.hpp>
......
......@@ -14,6 +14,7 @@
* limitations under the License.
*******************************************************************************/
#include <string>
#include <typeindex>
#include <typeinfo>
#include <unordered_set>
......
......@@ -16,6 +16,7 @@
#pragma once
#include <string>
#include <typeindex>
#include <typeinfo>
#include <unordered_set>
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "cpu_fusion.hpp"
#include <algorithm>
#include <iostream>
#include <numeric>
#include <unordered_set>
#include "ngraph/graph_util.hpp"
#include "ngraph/log.hpp"
#include "ngraph/ops/add.hpp"
#include "ngraph/ops/add.hpp"
#include "ngraph/ops/batch_norm.hpp"
#include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/constant.hpp"
#include "ngraph/ops/divide.hpp"
#include "ngraph/ops/dot.hpp"
#include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/parameter.hpp"
#include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/sqrt.hpp"
#include "ngraph/ops/subtract.hpp"
#include "ngraph/ops/sum.hpp"
#include "ngraph/pattern/matcher.hpp"
#include "ngraph/pattern/op/any.hpp"
#include "ngraph/pattern/op/label.hpp"
#include "ngraph/runtime/cpu/ops/matmul_bias.hpp"
static bool init_cblas_arg(std::shared_ptr<ngraph::Node> reshape,
std::shared_ptr<ngraph::Node> arg,
bool& transpose_w,
ngraph::Shape& shape_w)
{
auto r_w = std::dynamic_pointer_cast<ngraph::op::Reshape>(reshape);
if (!r_w)
{
return true; //nth to do; reshape isn't a reshape
}
if (r_w->get_shape().size() != 2)
{
NGRAPH_DEBUG << "Reshape for " << reshape->get_name() << " doesn't reshape into matrix"
<< ngraph::vector_to_string(r_w->get_shape());
return false;
}
auto io = r_w->get_input_order();
if (r_w->get_shape().size() != arg->get_shape().size()) //reshape
{
ngraph::AxisVector dio(io.size());
std::iota(begin(dio), end(dio), 0);
if (io != dio) //we can't reshape and transpose at the same time
{
NGRAPH_DEBUG << "Reshape for " << reshape->get_name() << " is not in default order "
<< ngraph::vector_to_string(io);
NGRAPH_DEBUG << "r_w shape = " << ngraph::vector_to_string(r_w->get_shape());
NGRAPH_DEBUG << "arg shape = " << ngraph::vector_to_string(arg->get_shape());
return false;
}
shape_w = r_w->get_shape();
}
else
{
if (io == ngraph::AxisVector{1, 0})
{
transpose_w = true;
}
//otherwise no-op reshape
}
return true;
}
template <typename T>
static std::vector<T> apply_permutation(std::vector<T> input, ngraph::AxisVector order)
{
if (input.size() != order.size())
{
throw "input and order sizes don't match!";
}
std::vector<T> output(input.size());
for (size_t i = 0; i < order.size(); i++)
{
output[i] = input.at(order.at(i));
}
return output;
}
void ngraph::runtime::cpu::pass::CPUFusion::construct_gemm_pattern()
{
Shape shape_w{2, 4};
Shape shape_x{4, 1};
Shape shape_b{1};
Shape shape_dot{2, 1};
auto W = std::make_shared<pattern::op::Label>(element::f32, shape_w);
auto x = std::make_shared<pattern::op::Label>(element::f32, shape_x);
auto reshape_pred = [](std::shared_ptr<Node> n) {
return static_cast<bool>(std::dynamic_pointer_cast<op::Reshape>(n));
};
auto skip_w = std::make_shared<pattern::op::Any>(W, reshape_pred);
auto skip_x = std::make_shared<pattern::op::Any>(x, reshape_pred);
auto pdot = std::make_shared<op::Dot>(skip_w, skip_x);
auto b = std::make_shared<pattern::op::Label>(element::f32, shape_b);
auto pbroadcast = std::make_shared<op::Broadcast>(b, shape_dot, AxisSet{0});
auto padd = pdot + pbroadcast;
ngraph::pattern::gr_callback_fn callback = [W, x, b](pattern::Matcher& m) {
NGRAPH_DEBUG << "In callback for construct_gemm_pattern against node = "
<< m.match_root()->get_name();
auto pattern_map = m.get_pattern_map();
std::shared_ptr<Node> nn = nullptr;
auto mpattern = m.match_root();
if (mpattern->get_element_type() != element::f32)
{
NGRAPH_DEBUG << "mpattern = " << mpattern->get_name() << " type is not float!";
return nn;
}
auto dot = mpattern->get_input_op(0);
if (dot->get_shape().size() != 2)
{
NGRAPH_DEBUG << "dot = " << dot->get_name() << " shape is not equal to 2!";
return nn;
}
bool transpose_w = false;
Shape shape_arg0{pattern_map[W]->get_shape()};
if (!init_cblas_arg(dot->get_input_op(0), pattern_map[W], transpose_w, shape_arg0))
{
return nn;
}
bool transpose_x = false;
Shape shape_arg1{pattern_map[x]->get_shape()};
if (!init_cblas_arg(dot->get_input_op(1), pattern_map[x], transpose_x, shape_arg1))
{
return nn;
}
auto cg = std::shared_ptr<Node>(new op::MatmulBias(pattern_map[W],
pattern_map[x],
mpattern->get_input_op(1),
shape_arg0,
shape_arg1,
transpose_w,
transpose_x));
return cg;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(padd, callback);
this->add_matcher(m);
}
void ngraph::runtime::cpu::pass::CPUFusion::construct_fprop_bn()
{
// construct varaiance
auto N = op::Constant::create(element::f32, Shape{3}, {2, 2, 2});
auto input = std::make_shared<pattern::op::Label>(element::f32, Shape{2, 3});
auto input_sq = std::make_shared<op::Multiply>(input, input);
auto sum_input = std::make_shared<op::Sum>(input, AxisSet{0});
auto square_sumed_input = std::make_shared<op::Multiply>(sum_input, sum_input);
auto sum_squared_input = std::make_shared<op::Sum>(input_sq, AxisSet{0});
auto avg_input_sum_sq = std::make_shared<op::Divide>(square_sumed_input, N);
auto xmu = std::make_shared<op::Subtract>(sum_squared_input, avg_input_sum_sq);
auto variance = std::make_shared<op::Divide>(xmu, N);
auto variance_label = std::make_shared<pattern::op::Label>(variance, nullptr, Nodes{variance});
auto variance_with_broadcast =
std::make_shared<op::Broadcast>(variance_label, Shape{2, 3}, AxisSet{0});
// construct mean
auto sum_input1 = std::make_shared<op::Sum>(input, AxisSet{0});
auto mean = std::make_shared<op::Divide>(sum_input1, N);
auto mean_label = std::make_shared<pattern::op::Label>(mean, nullptr, Nodes{mean});
auto mean_with_broadcast = std::make_shared<op::Broadcast>(mean_label, Shape{2, 3}, AxisSet{0});
auto input_diff_mean = std::make_shared<op::Subtract>(input, mean_with_broadcast);
// Eps
auto eps_label = std::make_shared<pattern::op::Label>(element::f32, Shape{3});
auto eps_with_broadcast = std::make_shared<op::Broadcast>(eps_label, Shape{2, 3}, AxisSet{0});
auto add1 = std::make_shared<op::Add>(eps_with_broadcast, variance_with_broadcast);
auto sqrt_variance_eps = std::make_shared<op::Sqrt>(add1);
auto divide_mean_variance = std::make_shared<op::Divide>(input_diff_mean, sqrt_variance_eps);
//Gamma
auto gamma_label = std::make_shared<pattern::op::Label>(element::f32, Shape{3});
auto gamma_with_broadcast =
std::make_shared<op::Broadcast>(gamma_label, Shape{2, 3}, AxisSet{0});
auto multiply_gamma =
std::make_shared<op::Multiply>(gamma_with_broadcast, divide_mean_variance);
//Beta
auto beta_label = std::make_shared<pattern::op::Label>(element::f32, Shape{3});
auto beta_with_broadcast = std::make_shared<op::Broadcast>(beta_label, Shape{2, 3}, AxisSet{0});
auto add_beta = std::make_shared<op::Add>(beta_with_broadcast, multiply_gamma);
// This completes fprop bn pattern
//Define a call back that needs to called once the DFG matches the pattern
ngraph::pattern::gr_callback_fn callback =
[variance_label, mean_label, input, eps_label, gamma_label, beta_label](
pattern::Matcher& m) {
NGRAPH_DEBUG << "In a callback for construct_fprop_bn pattern against "
<< m.match_root()->get_name();
std::shared_ptr<Node> nn = nullptr;
//TODO - add assert's based on the matched node
auto pattern_map = m.get_pattern_map();
NGRAPH_DEBUG << "Input: " << pattern_map[input]->get_name() << " "
<< pattern_map[input]->get_shape().size();
NGRAPH_DEBUG << "Variance: " << pattern_map[variance_label]->get_name() << " "
<< pattern_map[variance_label]->get_shape().size();
NGRAPH_DEBUG << "Mean: " << pattern_map[mean_label]->get_name() << " "
<< pattern_map[mean_label]->get_shape().size();
NGRAPH_DEBUG << "eps: " << pattern_map[eps_label]->get_name() << " "
<< pattern_map[eps_label]->get_shape().size();
NGRAPH_DEBUG << "gamma: " << pattern_map[gamma_label]->get_name() << " "
<< pattern_map[gamma_label]->get_shape().size();
NGRAPH_DEBUG << "beta: " << pattern_map[beta_label]->get_name() << " "
<< pattern_map[beta_label]->get_shape().size();
// dont fuse if the inout doesnt have 4dims
if (pattern_map[input]->get_shape().size() != 4)
{
NGRAPH_DEBUG << "Input to bn doesnt not have a rank=4, so not fusing";
return nn;
}
Shape bn_output_shape{m.match_root()->get_shape()};
Shape m_bn_mean_shape{pattern_map[mean_label]->get_shape()};
Shape m_bn_variance_shape{pattern_map[variance_label]->get_shape()};
// get epsilon value
auto eps_ptr = std::dynamic_pointer_cast<op::Constant>(pattern_map[eps_label]);
double epsilon = *(reinterpret_cast<const double*>(eps_ptr->get_data_ptr()));
auto bn_node = std::shared_ptr<Node>(new op::BatchNorm(epsilon,
pattern_map[gamma_label],
pattern_map[beta_label],
pattern_map[input],
pattern_map[mean_label],
pattern_map[variance_label]));
return bn_node;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(add_beta, callback);
this->add_matcher(m);
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "cpu_fusion.hpp"
#include <algorithm>
#include <iostream>
#include <numeric>
#include <unordered_set>
#include "ngraph/graph_util.hpp"
#include "ngraph/log.hpp"
#include "ngraph/ops/add.hpp"
#include "ngraph/ops/add.hpp"
#include "ngraph/ops/batch_norm.hpp"
#include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/constant.hpp"
#include "ngraph/ops/divide.hpp"
#include "ngraph/ops/dot.hpp"
#include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/parameter.hpp"
#include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/sqrt.hpp"
#include "ngraph/ops/subtract.hpp"
#include "ngraph/ops/sum.hpp"
#include "ngraph/pattern/matcher.hpp"
#include "ngraph/pattern/op/any.hpp"
#include "ngraph/pattern/op/label.hpp"
#include "ngraph/runtime/cpu/ops/matmul_bias.hpp"
static bool init_cblas_arg(std::shared_ptr<ngraph::Node> reshape,
std::shared_ptr<ngraph::Node> arg,
bool& transpose_w,
ngraph::Shape& shape_w)
{
auto r_w = std::dynamic_pointer_cast<ngraph::op::Reshape>(reshape);
if (!r_w)
{
return true; //nth to do; reshape isn't a reshape
}
if (r_w->get_shape().size() != 2)
{
NGRAPH_DEBUG << "Reshape for " << reshape->get_name() << " doesn't reshape into matrix"
<< ngraph::vector_to_string(r_w->get_shape());
return false;
}
auto io = r_w->get_input_order();
if (r_w->get_shape().size() != arg->get_shape().size()) //reshape
{
ngraph::AxisVector dio(io.size());
std::iota(begin(dio), end(dio), 0);
if (io != dio) //we can't reshape and transpose at the same time
{
NGRAPH_DEBUG << "Reshape for " << reshape->get_name() << " is not in default order "
<< ngraph::vector_to_string(io);
NGRAPH_DEBUG << "r_w shape = " << ngraph::vector_to_string(r_w->get_shape());
NGRAPH_DEBUG << "arg shape = " << ngraph::vector_to_string(arg->get_shape());
return false;
}
shape_w = r_w->get_shape();
}
else
{
if (io == ngraph::AxisVector{1, 0})
{
transpose_w = true;
}
//otherwise no-op reshape
}
return true;
}
template <typename T>
static std::vector<T> apply_permutation(std::vector<T> input, ngraph::AxisVector order)
{
if (input.size() != order.size())
{
throw "input and order sizes don't match!";
}
std::vector<T> output(input.size());
for (size_t i = 0; i < order.size(); i++)
{
output[i] = input.at(order.at(i));
}
return output;
}
void ngraph::runtime::cpu::pass::CPUFusion::construct_gemm_pattern()
{
Shape shape_w{2, 4};
Shape shape_x{4, 1};
Shape shape_b{1};
Shape shape_dot{2, 1};
auto W = std::make_shared<pattern::op::Label>(element::f32, shape_w);
auto x = std::make_shared<pattern::op::Label>(element::f32, shape_x);
auto reshape_pred = [](std::shared_ptr<Node> n) {
return static_cast<bool>(std::dynamic_pointer_cast<op::Reshape>(n));
};
auto skip_w = std::make_shared<pattern::op::Any>(W, reshape_pred);
auto skip_x = std::make_shared<pattern::op::Any>(x, reshape_pred);
auto pdot = std::make_shared<op::Dot>(skip_w, skip_x);
auto b = std::make_shared<pattern::op::Label>(element::f32, shape_b);
auto pbroadcast = std::make_shared<op::Broadcast>(b, shape_dot, AxisSet{0});
auto padd = pdot + pbroadcast;
ngraph::pattern::gr_callback_fn callback = [W, x, b](pattern::Matcher& m) {
NGRAPH_DEBUG << "In callback for construct_gemm_pattern against node = "
<< m.match_root()->get_name();
auto pattern_map = m.get_pattern_map();
std::shared_ptr<Node> nn = nullptr;
auto mpattern = m.match_root();
if (mpattern->get_element_type() != element::f32)
{
NGRAPH_DEBUG << "mpattern = " << mpattern->get_name() << " type is not float!";
return nn;
}
auto dot = mpattern->get_input_op(0);
if (dot->get_shape().size() != 2)
{
NGRAPH_DEBUG << "dot = " << dot->get_name() << " shape is not equal to 2!";
return nn;
}
bool transpose_w = false;
Shape shape_arg0{pattern_map[W]->get_shape()};
if (!init_cblas_arg(dot->get_input_op(0), pattern_map[W], transpose_w, shape_arg0))
{
return nn;
}
bool transpose_x = false;
Shape shape_arg1{pattern_map[x]->get_shape()};
if (!init_cblas_arg(dot->get_input_op(1), pattern_map[x], transpose_x, shape_arg1))
{
return nn;
}
auto cg = std::shared_ptr<Node>(new op::MatmulBias(pattern_map[W],
pattern_map[x],
mpattern->get_input_op(1),
shape_arg0,
shape_arg1,
transpose_w,
transpose_x));
return cg;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(padd, callback);
this->add_matcher(m);
}
void ngraph::runtime::cpu::pass::CPUFusion::construct_fprop_bn()
{
// construct varaiance
auto N = op::Constant::create(element::f32, Shape{3}, {2, 2, 2});
auto input = std::make_shared<pattern::op::Label>(element::f32, Shape{2, 3});
auto input_sq = std::make_shared<op::Multiply>(input, input);
auto sum_input = std::make_shared<op::Sum>(input, AxisSet{0});
auto square_sumed_input = std::make_shared<op::Multiply>(sum_input, sum_input);
auto sum_squared_input = std::make_shared<op::Sum>(input_sq, AxisSet{0});
auto avg_input_sum_sq = std::make_shared<op::Divide>(square_sumed_input, N);
auto xmu = std::make_shared<op::Subtract>(sum_squared_input, avg_input_sum_sq);
auto variance = std::make_shared<op::Divide>(xmu, N);
auto variance_label = std::make_shared<pattern::op::Label>(variance, nullptr, Nodes{variance});
auto variance_with_broadcast =
std::make_shared<op::Broadcast>(variance_label, Shape{2, 3}, AxisSet{0});
// construct mean
auto sum_input1 = std::make_shared<op::Sum>(input, AxisSet{0});
auto mean = std::make_shared<op::Divide>(sum_input1, N);
auto mean_label = std::make_shared<pattern::op::Label>(mean, nullptr, Nodes{mean});
auto mean_with_broadcast = std::make_shared<op::Broadcast>(mean_label, Shape{2, 3}, AxisSet{0});
auto input_diff_mean = std::make_shared<op::Subtract>(input, mean_with_broadcast);
// Eps
auto eps_label = std::make_shared<pattern::op::Label>(element::f32, Shape{3});
auto eps_with_broadcast = std::make_shared<op::Broadcast>(eps_label, Shape{2, 3}, AxisSet{0});
auto add1 = std::make_shared<op::Add>(eps_with_broadcast, variance_with_broadcast);
auto sqrt_variance_eps = std::make_shared<op::Sqrt>(add1);
auto divide_mean_variance = std::make_shared<op::Divide>(input_diff_mean, sqrt_variance_eps);
//Gamma
auto gamma_label = std::make_shared<pattern::op::Label>(element::f32, Shape{3});
auto gamma_with_broadcast =
std::make_shared<op::Broadcast>(gamma_label, Shape{2, 3}, AxisSet{0});
auto multiply_gamma =
std::make_shared<op::Multiply>(gamma_with_broadcast, divide_mean_variance);
//Beta
auto beta_label = std::make_shared<pattern::op::Label>(element::f32, Shape{3});
auto beta_with_broadcast = std::make_shared<op::Broadcast>(beta_label, Shape{2, 3}, AxisSet{0});
auto add_beta = std::make_shared<op::Add>(beta_with_broadcast, multiply_gamma);
// This completes fprop bn pattern
//Define a call back that needs to called once the DFG matches the pattern
ngraph::pattern::gr_callback_fn callback =
[variance_label, mean_label, input, eps_label, gamma_label, beta_label](
pattern::Matcher& m) {
NGRAPH_DEBUG << "In a callback for construct_fprop_bn pattern against "
<< m.match_root()->get_name();
std::shared_ptr<Node> nn = nullptr;
//TODO - add assert's based on the matched node
auto pattern_map = m.get_pattern_map();
NGRAPH_DEBUG << "Input: " << pattern_map[input]->get_name() << " "
<< pattern_map[input]->get_shape().size();
NGRAPH_DEBUG << "Variance: " << pattern_map[variance_label]->get_name() << " "
<< pattern_map[variance_label]->get_shape().size();
NGRAPH_DEBUG << "Mean: " << pattern_map[mean_label]->get_name() << " "
<< pattern_map[mean_label]->get_shape().size();
NGRAPH_DEBUG << "eps: " << pattern_map[eps_label]->get_name() << " "
<< pattern_map[eps_label]->get_shape().size();
NGRAPH_DEBUG << "gamma: " << pattern_map[gamma_label]->get_name() << " "
<< pattern_map[gamma_label]->get_shape().size();
NGRAPH_DEBUG << "beta: " << pattern_map[beta_label]->get_name() << " "
<< pattern_map[beta_label]->get_shape().size();
// dont fuse if the inout doesnt have 4dims
if (pattern_map[input]->get_shape().size() != 4)
{
NGRAPH_DEBUG << "Input to bn doesnt not have a rank=4, so not fusing";
return nn;
}
Shape bn_output_shape{m.match_root()->get_shape()};
Shape m_bn_mean_shape{pattern_map[mean_label]->get_shape()};
Shape m_bn_variance_shape{pattern_map[variance_label]->get_shape()};
// get epsilon value
auto eps_ptr = std::dynamic_pointer_cast<op::Constant>(pattern_map[eps_label]);
double epsilon = *(reinterpret_cast<const double*>(eps_ptr->get_data_ptr()));
auto bn_node = std::shared_ptr<Node>(new op::BatchNorm(epsilon,
pattern_map[gamma_label],
pattern_map[beta_label],
pattern_map[input],
pattern_map[mean_label],
pattern_map[variance_label]));
return bn_node;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(add_beta, callback);
this->add_matcher(m);
}
......@@ -16,6 +16,7 @@
#include <algorithm>
#include <memory>
#include <string>
#include <mkldnn.hpp>
......
......@@ -14,7 +14,7 @@
# limitations under the License.
# ******************************************************************************
if (NGRAPH_CPU_ENABLE AND NOT APPLE)
if (NGRAPH_CPU_ENABLE)
set (SRC
main.cpp
util.cpp
......
......@@ -83,10 +83,21 @@ int main(int argc, char** argv)
}
}
vector<ResourceInfo> include_paths;
#ifdef __APPLE__
include_paths.push_back({EIGEN_HEADERS_PATH, {}, true});
include_paths.push_back({MKLDNN_HEADERS_PATH, {}, true});
#ifdef NGRAPH_TBB_ENABLE
include_paths.push_back({TBB_HEADERS_PATH, {}, true});
#endif
include_paths.push_back({NGRAPH_HEADERS_PATH, {}, true});
include_paths.push_back({CLANG_BUILTIN_HEADERS_PATH, {}, true});
include_paths.push_back({"/Library/Developer/CommandLineTools/usr/include/c++/v1", {}});
#else // __APPLE__
string cpp0 = find_path("/usr/include/x86_64-linux-gnu/c++/");
string cpp1 = find_path("/usr/include/c++/");
vector<ResourceInfo> include_paths;
include_paths.push_back({CLANG_BUILTIN_HEADERS_PATH, {}, true});
include_paths.push_back({"/usr/include/x86_64-linux-gnu", {"asm", "sys", "bits", "gnu"}});
include_paths.push_back({"/usr/include", {"asm", "sys", "bits", "gnu"}});
......@@ -99,6 +110,7 @@ int main(int argc, char** argv)
include_paths.push_back({NGRAPH_HEADERS_PATH, {}, true});
#ifdef NGRAPH_TBB_ENABLE
include_paths.push_back({TBB_HEADERS_PATH, {}, true});
#endif
#endif
if (output_path.empty())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment