Unverified Commit f5768063 authored by Robert Kimball's avatar Robert Kimball Committed by GitHub

Merge pull request #222 from NervanaSystems/jmenon/cpu_kernels

CPU Backend: More ops and kernels
parents 69a2d4aa 792d3328
...@@ -99,18 +99,21 @@ include_directories( ...@@ -99,18 +99,21 @@ include_directories(
"${EIGEN_INCLUDE_DIR}" "${EIGEN_INCLUDE_DIR}"
) )
if(LLVM_INCLUDE_DIR) if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
MKLDNN_INCLUDE_DIR)
find_package(ZLIB REQUIRED) find_package(ZLIB REQUIRED)
include_directories(SYSTEM ${LLVM_INCLUDE_DIR}) include_directories(SYSTEM ${LLVM_INCLUDE_DIR} ${MKLDNN_INCLUDE_DIR})
link_directories(${LLVM_LIB_DIR}) link_directories(${LLVM_LIB_DIR} ${MKLDNN_LIB_DIR})
# Add sources for the CPU backend # Add sources for the CPU backend
# and all its dependencies # and all its dependencies
set(SRC ${SRC} set(SRC ${SRC}
codegen/compiler.cpp codegen/compiler.cpp
runtime/cpu/call_frame.cpp runtime/cpu/call_frame.cpp
runtime/cpu/cpu_manager.cpp
runtime/cpu/cpu_backend.cpp runtime/cpu/cpu_backend.cpp
runtime/cpu/cpu_manager.cpp
runtime/cpu/cpu_kernels.cpp
runtime/cpu/emitter.cpp runtime/cpu/emitter.cpp
runtime/cpu/external_function.cpp runtime/cpu/external_function.cpp
) )
...@@ -129,7 +132,7 @@ endif() ...@@ -129,7 +132,7 @@ endif()
add_library(ngraph SHARED ${SRC}) add_library(ngraph SHARED ${SRC})
target_include_directories(ngraph PUBLIC "${NGRAPH_INCLUDE_PATH}") target_include_directories(ngraph PUBLIC "${NGRAPH_INCLUDE_PATH}")
if(LLVM_LINK_LIBS) if(NGRAPH_CPU_ENABLE AND LLVM_LINK_LIBS)
target_link_libraries(ngraph LINK_PRIVATE ${LLVM_LINK_LIBS}) target_link_libraries(ngraph LINK_PRIVATE ${LLVM_LINK_LIBS})
endif() endif()
...@@ -137,8 +140,10 @@ if (APPLE) ...@@ -137,8 +140,10 @@ if (APPLE)
set_property(TARGET ngraph PROPERTY PREFIX "lib") set_property(TARGET ngraph PROPERTY PREFIX "lib")
set_property(TARGET ngraph PROPERTY OUTPUT_NAME "ngraph.so") set_property(TARGET ngraph PROPERTY OUTPUT_NAME "ngraph.so")
set_property(TARGET ngraph PROPERTY SUFFIX "") set_property(TARGET ngraph PROPERTY SUFFIX "")
else() endif()
include_directories("${MKLDNN_INCLUDE_DIR}")
if(NGRAPH_CPU_ENABLE AND MKLDNN_LIB_DIR)
target_link_libraries(ngraph LINK_PRIVATE mkldnn)
endif() endif()
#----------------------------------------------------------------------------------------------- #-----------------------------------------------------------------------------------------------
...@@ -178,6 +183,10 @@ endif() ...@@ -178,6 +183,10 @@ endif()
add_dependencies(ngraph eigen) add_dependencies(ngraph eigen)
if(NOT LLVM_PACKAGED AND LLVM_INCLUDE_DIR) if(NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR)
add_dependencies(ngraph ext_llvm) add_dependencies(ngraph ext_llvm)
endif() endif()
if(NGRAPH_CPU_ENABLE AND MKLDNN_INCLUDE_DIR)
add_dependencies(ngraph ext_mkldnn)
endif()
...@@ -145,10 +145,19 @@ std::unique_ptr<llvm::Module> execution_state::compile(const string& source, con ...@@ -145,10 +145,19 @@ std::unique_ptr<llvm::Module> execution_state::compile(const string& source, con
LO->OpenMP = 1; LO->OpenMP = 1;
LO->OpenMPUseTLS = 1; LO->OpenMPUseTLS = 1;
// CodeGen options
auto& CGO = Clang->getInvocation().getCodeGenOpts();
CGO.OptimizationLevel = 3;
CGO.RelocationModel = "static";
CGO.ThreadModel = "posix";
CGO.FloatABI = "hard";
CGO.OmitLeafFramePointer = 1;
CGO.VectorizeLoop = 1;
CGO.VectorizeSLP = 1;
CGO.CXAAtExit = 0;
if (debuginfo_enabled) if (debuginfo_enabled)
{ {
// CodeGen options
auto& CGO = Clang->getInvocation().getCodeGenOpts();
CGO.setDebugInfo(codegenoptions::FullDebugInfo); CGO.setDebugInfo(codegenoptions::FullDebugInfo);
} }
...@@ -163,6 +172,12 @@ std::unique_ptr<llvm::Module> execution_state::compile(const string& source, con ...@@ -163,6 +172,12 @@ std::unique_ptr<llvm::Module> execution_state::compile(const string& source, con
// Enable various target features // Enable various target features
// Most of these are for Eigen // Most of these are for Eigen
auto& TO = Clang->getInvocation().getTargetOpts(); auto& TO = Clang->getInvocation().getTargetOpts();
// TODO: This needs to be configurable and selected carefully
TO.CPU = "broadwell";
TO.FeaturesAsWritten.emplace_back("+sse");
TO.FeaturesAsWritten.emplace_back("+sse2");
TO.FeaturesAsWritten.emplace_back("+sse3");
TO.FeaturesAsWritten.emplace_back("+ssse3");
TO.FeaturesAsWritten.emplace_back("+sse4.1"); TO.FeaturesAsWritten.emplace_back("+sse4.1");
TO.FeaturesAsWritten.emplace_back("+sse4.2"); TO.FeaturesAsWritten.emplace_back("+sse4.2");
TO.FeaturesAsWritten.emplace_back("+avx"); TO.FeaturesAsWritten.emplace_back("+avx");
......
...@@ -22,12 +22,13 @@ using namespace ngraph::runtime::cpu; ...@@ -22,12 +22,13 @@ using namespace ngraph::runtime::cpu;
CallFrame::CallFrame(EntryPoint compiled_function, CallFrame::CallFrame(EntryPoint compiled_function,
size_t n_outputs, size_t n_outputs,
size_t n_inputs, size_t n_inputs,
const TensorViewPtrs& temps) const TensorViewPtrs& temps,
const std::vector<std::shared_ptr<CallFrame>>& callees)
: m_n_outputs(n_outputs) : m_n_outputs(n_outputs)
, m_n_inputs(n_inputs) , m_n_inputs(n_inputs)
, m_tensor_views(n_inputs + n_outputs + temps.size()) , m_tensor_views(n_outputs + n_inputs + temps.size())
, m_compiled_function(compiled_function) , m_compiled_function(compiled_function)
, m_callees(callees)
{ {
copy(temps.begin(), temps.end(), m_tensor_views.begin() + m_n_outputs + m_n_inputs); copy(temps.begin(), temps.end(), m_tensor_views.begin() + m_n_outputs + m_n_inputs);
} }
...@@ -40,7 +41,7 @@ void CallFrame::tensor_call( ...@@ -40,7 +41,7 @@ void CallFrame::tensor_call(
copy(inputs.begin(), inputs.end(), m_tensor_views.begin() + m_n_outputs); copy(inputs.begin(), inputs.end(), m_tensor_views.begin() + m_n_outputs);
// Invoke compiled computation // Invoke compiled computation
m_compiled_function(this, m_tensor_views); m_compiled_function(this, m_tensor_views, m_callees);
// Don't hold onto inputs/outputs // Don't hold onto inputs/outputs
fill_n(m_tensor_views.begin(), m_n_outputs + m_n_inputs, nullptr); fill_n(m_tensor_views.begin(), m_n_outputs + m_n_inputs, nullptr);
......
...@@ -31,8 +31,10 @@ namespace ngraph ...@@ -31,8 +31,10 @@ namespace ngraph
namespace cpu namespace cpu
{ {
class CallFrame; class CallFrame;
using EntryPoint = std::function<void(ngraph::runtime::cpu::CallFrame*, using EntryPoint = std::function<void(ngraph::runtime::cpu::CallFrame*,
ngraph::runtime::TensorViewPtrs&)>; ngraph::runtime::TensorViewPtrs&,
const std::vector<std::shared_ptr<CallFrame>>&)>;
// Compile and execute graphs // Compile and execute graphs
class CallFrame : public ngraph::runtime::CallFrame class CallFrame : public ngraph::runtime::CallFrame
...@@ -41,7 +43,8 @@ namespace ngraph ...@@ -41,7 +43,8 @@ namespace ngraph
CallFrame(EntryPoint compiled_function, CallFrame(EntryPoint compiled_function,
size_t n_outputs, size_t n_outputs,
size_t n_inputs, size_t n_inputs,
const TensorViewPtrs& temps); const TensorViewPtrs& temps,
const std::vector<std::shared_ptr<CallFrame>>& callees);
/// @brief Invoke the function with values matching the signature of the function. /// @brief Invoke the function with values matching the signature of the function.
/// ///
...@@ -73,6 +76,7 @@ namespace ngraph ...@@ -73,6 +76,7 @@ namespace ngraph
TensorViewPtrs m_tensor_views; TensorViewPtrs m_tensor_views;
bool m_return; bool m_return;
EntryPoint m_compiled_function; EntryPoint m_compiled_function;
std::vector<std::shared_ptr<CallFrame>> m_callees;
}; };
} }
} }
......
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#include "ngraph/runtime/cpu/cpu_kernels.hpp"
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#pragma once
#include "ngraph/types/element_type.hpp"
// CBLAS types and wrappers
namespace cblas
{
enum class Layout
{
RowMajor = 101,
ColMajor = 102
};
enum class Transpose
{
None = 111,
Transpose = 112,
ConjTrans = 113
};
enum class UpperLower
{
Upper = 121,
Lower = 122
};
enum class Diag
{
NonUnit = 131,
Unit = 132
};
enum class Side
{
Left = 141,
Right = 142
};
enum class Storage
{
Packed = 151
};
enum class Ident
{
AMatrix = 161,
BMatrix = 162
};
enum class Offset
{
RowOffset = 171,
ColOffset = 172,
FixOffset = 173
};
extern "C" {
void cblas_sgemm(const Layout layout,
const Transpose TransA,
const Transpose TransB,
const ngraph::element::Int64::type M,
const ngraph::element::Int64::type N,
const ngraph::element::Int64::type K,
const ngraph::element::Float32::type alpha,
const ngraph::element::Float32::type* A,
const ngraph::element::Int64::type lda,
const ngraph::element::Float32::type* B,
const ngraph::element::Int64::type ldb,
const ngraph::element::Float32::type beta,
ngraph::element::Float32::type* C,
const ngraph::element::Int64::type ldc);
}
}
namespace mkl
{
extern "C" {
void MKL_Somatcopy(char ordering,
char trans,
size_t rows,
size_t cols,
const ngraph::element::Float32::type alpha,
const ngraph::element::Float32::type* A,
size_t lda,
ngraph::element::Float32::type* B,
size_t ldb);
}
}
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
#include <algorithm>
#include <iostream> #include <iostream>
#include <string> #include <string>
#include <typeindex> #include <typeindex>
...@@ -23,7 +24,13 @@ ...@@ -23,7 +24,13 @@
#include "ngraph/ops/broadcast.hpp" #include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/concatenate.hpp" #include "ngraph/ops/concatenate.hpp"
#include "ngraph/ops/constant.hpp" #include "ngraph/ops/constant.hpp"
#include "ngraph/ops/function_call.hpp"
#include "ngraph/ops/get_tuple_element.hpp" #include "ngraph/ops/get_tuple_element.hpp"
#include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/slice.hpp"
#include "ngraph/ops/sum.hpp"
#include "ngraph/runtime/cpu/call_frame.hpp"
#include "ngraph/runtime/cpu/emitter.hpp" #include "ngraph/runtime/cpu/emitter.hpp"
#include "ngraph/runtime/cpu/external_function.hpp" #include "ngraph/runtime/cpu/external_function.hpp"
#include "ngraph/runtime/tensor_view_info.hpp" #include "ngraph/runtime/tensor_view_info.hpp"
...@@ -177,30 +184,58 @@ void Emitter::EMITTER_DECL(EmitDot) ...@@ -177,30 +184,58 @@ void Emitter::EMITTER_DECL(EmitDot)
auto arg1_layout = inputs[1].get_layout<DenseTensorViewLayout>(); auto arg1_layout = inputs[1].get_layout<DenseTensorViewLayout>();
auto out_layout = outputs[0].get_layout<DenseTensorViewLayout>(); auto out_layout = outputs[0].get_layout<DenseTensorViewLayout>();
TU += // Emit an MKL SGEMM call if possible
" {\n" // clang-format off
" auto arg0 = call_frame->get_tensor_view_data<" + if (arg0_element_type == ngraph::element::Float32::element_type())
element_type_names[TI(arg0_element_type)] + ">(" + to_string(inputs[0].get_index()) + {
");\n" TU +=
" auto arg1 = call_frame->get_tensor_view_data<" + " {\n"
element_type_names[TI(arg0_element_type)] + ">(" + to_string(inputs[1].get_index()) + " auto arg0 = call_frame->get_tensor_view_data<" +
");\n" element_type_names[TI(arg0_element_type)] + ">(" + to_string(inputs[0].get_index()) +
" auto out = call_frame->get_tensor_view_data<" + ");\n"
element_type_names[TI(arg0_element_type)] + ">(" + to_string(outputs[0].get_index()) + " auto arg1 = call_frame->get_tensor_view_data<" +
");\n" element_type_names[TI(arg0_element_type)] + ">(" + to_string(inputs[1].get_index()) +
" EigenMatrix<" + ");\n"
element_type_names[TI(arg0_element_type)] + ">(out, " + " auto out = call_frame->get_tensor_view_data<" +
EIGEN_MATRIX_FORMAT(out_layout->get_shape(), out_layout->get_strides()) + element_type_names[TI(arg0_element_type)] + ">(" + to_string(outputs[0].get_index()) +
") = \n" ");\n"
" EigenMatrix<" + " cblas::cblas_sgemm(cblas::Layout::RowMajor, cblas::Transpose::None, cblas::Transpose::None, " +
element_type_names[TI(arg0_element_type)] + ">(arg0, " + to_string(arg0_shape[0]) + ", " + to_string(arg1_shape[1]) + ", " + to_string(arg0_shape[1]) + ",\n"
EIGEN_MATRIX_FORMAT(arg0_layout->get_shape(), arg0_layout->get_strides()) + " 1.0f, arg0, " + to_string(max(1UL, arg0_shape[1])) + ", arg1, " + to_string(max(1UL, arg1_shape[1])) + ", 0.0f,\n"
") * " " out, " + to_string(max(1UL, arg1_shape[1])) + ");\n"
"EigenMatrix<" + " }\n";
element_type_names[TI(arg0_element_type)] + ">(arg1, " + }
EIGEN_MATRIX_FORMAT(arg1_layout->get_shape(), arg1_layout->get_strides()) + // clang-format on
");\n" else
" }\n"; {
TU +=
" {\n"
" auto arg0 = call_frame->get_tensor_view_data<" +
element_type_names[TI(arg0_element_type)] + ">(" +
to_string(inputs[0].get_index()) +
");\n"
" auto arg1 = call_frame->get_tensor_view_data<" +
element_type_names[TI(arg0_element_type)] + ">(" +
to_string(inputs[1].get_index()) +
");\n"
" auto out = call_frame->get_tensor_view_data<" +
element_type_names[TI(arg0_element_type)] + ">(" +
to_string(outputs[0].get_index()) +
");\n"
" EigenMatrix<" +
element_type_names[TI(arg0_element_type)] + ">(out, " +
EIGEN_MATRIX_FORMAT(out_layout->get_shape(), out_layout->get_strides()) +
") = \n"
" EigenMatrix<" +
element_type_names[TI(arg0_element_type)] + ">(arg0, " +
EIGEN_MATRIX_FORMAT(arg0_layout->get_shape(), arg0_layout->get_strides()) +
") * "
"EigenMatrix<" +
element_type_names[TI(arg0_element_type)] + ">(arg1, " +
EIGEN_MATRIX_FORMAT(arg1_layout->get_shape(), arg1_layout->get_strides()) +
");\n"
" }\n";
}
} }
else else
{ {
...@@ -501,7 +536,26 @@ void Emitter::EMITTER_DECL(EmitMaximum) ...@@ -501,7 +536,26 @@ void Emitter::EMITTER_DECL(EmitMaximum)
" EigenArray1d<" + element_type_names[TI(et)] + ">(out, " " EigenArray1d<" + element_type_names[TI(et)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n" EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(arg0, " " EigenArray1d<" + element_type_names[TI(et)] + ">(arg0, "
EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ").max(" EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ").max(\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(arg1, "
EIGEN_VECTOR_FORMAT(inputs[1].get_layout<DenseTensorViewLayout>()->get_size()) "));\n"
" }\n";
}
void Emitter::EMITTER_DECL(EmitMinimum)
{
const element::Type& et =
(dynamic_pointer_cast<const TensorViewType>(n->get_arguments().at(0)->get_value_type()))
->get_element_type();
TU += " {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" + to_string(inputs[0].get_index()) + ");\n"
" auto arg1 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" + to_string(inputs[1].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" + to_string(outputs[0].get_index()) + ");\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(arg0, "
EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ").min(\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(arg1, " " EigenArray1d<" + element_type_names[TI(et)] + ">(arg1, "
EIGEN_VECTOR_FORMAT(inputs[1].get_layout<DenseTensorViewLayout>()->get_size()) "));\n" EIGEN_VECTOR_FORMAT(inputs[1].get_layout<DenseTensorViewLayout>()->get_size()) "));\n"
" }\n"; " }\n";
...@@ -882,3 +936,775 @@ void Emitter::EMITTER_DECL(EmitConvert) ...@@ -882,3 +936,775 @@ void Emitter::EMITTER_DECL(EmitConvert)
".template cast<typename " + element_type_names[TI(result_element_type)] + "::type>();\n" ".template cast<typename " + element_type_names[TI(result_element_type)] + "::type>();\n"
" }\n"; " }\n";
} }
void Emitter::EMITTER_DECL(EmitConstant)
{
auto c = static_cast<const op::Constant*>(n);
auto c_tensor_type = dynamic_pointer_cast<const TensorViewType>(c->get_value_type());
assert(c_tensor_type);
auto& c_element_type = c_tensor_type->get_element_type();
auto c_value_strings = c->get_value_strings();
TU +=
" {\n"
" call_frame->get_parameterized_tensor_view<" +
element_type_names[TI(c_element_type)] + ">(" + to_string(outputs[0].get_index()) +
")->get_vector() = std::vector<" + element_type_names[TI(c_element_type)] + "::type>{";
for (size_t i = 0; i < c_value_strings.size(); i++)
{
if (i)
TU += ", ";
TU += c_value_strings[i];
}
TU += "};\n }\n";
}
void Emitter::EMITTER_DECL(EmitReshape)
{
auto reshape = static_cast<const op::Reshape*>(n);
auto arg_type = reshape->get_arguments().at(0)->get_value_type();
auto arg_tensor_view_type = dynamic_pointer_cast<const TensorViewType>(arg_type);
assert(arg_tensor_view_type);
auto arg_shape = arg_tensor_view_type->get_shape();
auto arg_rank = arg_shape.size();
auto result_type = reshape->get_value_type();
auto result_tensor_view_type = dynamic_pointer_cast<const TensorViewType>(result_type);
assert(result_tensor_view_type);
auto result_shape = result_tensor_view_type->get_shape();
auto& result_element_type = result_tensor_view_type->get_element_type();
auto input_order = reshape->get_input_order();
bool same_layout = std::is_sorted(input_order.begin(), input_order.end());
size_t result_shape_product = 1;
for (auto i : result_shape)
{
result_shape_product *= i;
}
// If there is no layout change or we are just going from 1^n to 1^m or a zero-size tensor, we can just copy.
if (same_layout || result_shape_product < 2)
{
TU +=
" {\n"
" call_frame->get_parameterized_tensor_view<" +
element_type_names[TI(result_element_type)] + ">(" +
to_string(outputs.at(0).get_index()) +
")->get_vector() =\n"
" call_frame->get_parameterized_tensor_view<" +
element_type_names[TI(result_element_type)] + ">(" +
to_string(inputs.at(0).get_index()) +
")->get_vector();\n"
" }\n";
}
// If there *is* a layout change in the 2D case, we transpose the input.
else if (arg_rank == 2)
{
auto arg0_layout = inputs[0].get_layout<DenseTensorViewLayout>();
auto out_layout = outputs[0].get_layout<DenseTensorViewLayout>();
// Emit an MKL transpose call if possible
// clang-format off
if (result_element_type == ngraph::element::Float32::element_type())
{
TU +=
" {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(result_element_type)] +
">(" + to_string(inputs[0].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(result_element_type)] +
">(" + to_string(outputs[0].get_index()) + ");\n"
" mkl::MKL_Somatcopy('R', 'T', " + to_string(arg_shape[0]) + ",\n"
" " + to_string(arg_shape[1]) + ", 1.0f,\n"
" arg0, " + to_string(arg_shape[1]) + ",\n"
" out, " + to_string(arg_shape[0]) + ");\n"
" }\n";
}
// clang-format on
else
{
TU +=
" {\n"
" auto arg0 = call_frame->get_tensor_view_data<" +
element_type_names[TI(result_element_type)] + ">(" +
to_string(inputs[0].get_index()) +
");\n"
" auto out = call_frame->get_tensor_view_data<" +
element_type_names[TI(result_element_type)] + ">(" +
to_string(outputs[0].get_index()) +
");\n"
" EigenMatrix<" +
element_type_names[TI(result_element_type)] + ">(out, " +
EIGEN_MATRIX_FORMAT(out_layout->get_shape(), out_layout->get_strides()) +
") =\n"
" EigenMatrix<" +
element_type_names[TI(result_element_type)] + ">(arg0, " +
EIGEN_MATRIX_FORMAT(arg0_layout->get_shape(), arg0_layout->get_strides()) +
").transpose();\n"
" }\n";
}
}
// Other cases (reordering of axes for tensors with rank>2) are not handled yet.
else
{
throw ngraph_error(
"Axis permutation in reshape is not implemented yet for tensors with rank>2");
}
}
void Emitter::EMITTER_DECL(EmitFunctionCall)
{
auto function_call = static_cast<const op::FunctionCall*>(n);
auto function = function_call->get_function();
std::shared_ptr<ExternalFunction> external;
try
{
external = function_map.at(function);
}
catch (const std::out_of_range)
{
external = make_shared<ExternalFunction>(function);
function_map.insert({function, external});
}
std::shared_ptr<CallFrame> cf =
std::dynamic_pointer_cast<CallFrame>(external->make_call_frame());
ef->get_callees().emplace_back(cf);
TU +=
" {\n"
" auto cf = callees.at(" +
to_string(ef->get_callees().size() - 1) +
");\n"
" std::vector<std::shared_ptr<ngraph::runtime::Value>> inputs;\n"
" std::vector<std::shared_ptr<ngraph::runtime::Value>> outputs;\n";
for (const auto& in : inputs)
{
TU += " inputs.emplace_back(call_frame->get_tensor_view(" +
to_string(in.get_index()) + "));\n";
}
for (const auto& out : outputs)
{
TU += " outputs.emplace_back(call_frame->get_tensor_view(" +
to_string(out.get_index()) + "));\n";
}
TU +=
" (*cf)(inputs, outputs);\n"
" }\n";
}
// TODO: This and other ops include comments/notes that
// we don't want to just copy-paste here. Figure out a better way
// or just point to ngvm/external_function.cpp with a note that
// the compiled version of these ops is intended to have semantics identical
// to what's seen there (for now atleast)
void Emitter::EMITTER_DECL(EmitReduce)
{
auto reduce = static_cast<const op::Reduce*>(n);
auto reduction_function = reduce->get_reduction_function();
std::shared_ptr<ExternalFunction> external;
try
{
external = function_map.at(reduction_function);
}
catch (const std::out_of_range)
{
external = make_shared<ExternalFunction>(reduction_function);
function_map.insert({reduction_function, external});
}
auto reductee_type = reduce->get_arguments().at(0)->get_value_type();
auto reductee_tensor_view_type = dynamic_pointer_cast<const TensorViewType>(reductee_type);
assert(reductee_tensor_view_type);
auto reductee_shape = reductee_tensor_view_type->get_shape();
auto f_result_type = reduction_function->get_result_type();
auto f_result_tensor_view_type = dynamic_pointer_cast<const TensorViewType>(f_result_type);
assert(f_result_tensor_view_type);
auto& f_result_element_type = f_result_tensor_view_type->get_element_type();
auto result_type = reduce->get_value_type();
auto result_tensor_view_type = dynamic_pointer_cast<const TensorViewType>(result_type);
assert(result_tensor_view_type);
auto result_shape = result_tensor_view_type->get_shape();
auto& reduction_axes = reduce->get_reduction_axes();
auto arg0_layout = inputs[0].get_layout<DenseTensorViewLayout>();
// Trivial case: no reduction axes (this includes the scalar-reductee case).
if (reduction_axes.empty())
{
TU +=
" {\n"
" call_frame->get_parameterized_tensor_view<" +
element_type_names[TI(f_result_element_type)] + ">(" +
to_string(outputs.at(0).get_index()) +
")->get_vector() =\n"
" call_frame->get_parameterized_tensor_view<" +
element_type_names[TI(f_result_element_type)] + ">(" +
to_string(inputs.at(0).get_index()) +
")->get_vector();\n"
" }\n";
}
// Behavior for zero-size axes bears some explanation here. XLA's reduce
// operator provides an "base" element (usually, but not necessarily,
// an identity element) that it apparently *may* choose to insert anywhere
// in the reduction any number of times. For example, given:
//
// reduce{{1,2,3},b,+)
//
// any of the following are valid reductions (I think!):
//
// b+(b+1+2)+3
// b+(1+(2+3))
// (1+2)+3 (I think!)
//
// etc. Here we will choose never to instantiate the base element, which
// works well with Eigen's default behavior for non-zero-length axes. The
// exceptional case is when we reduce on a zero-length axis. In this case,
// Eigen's default behavior is to put a zero in the output, which is not
// what we want, so we detect that case here and override with a copy
// instruction (for reduce-to-scalar) or a broadcast (for reduce-to-vector)
// from the base element.
//
// What I'm actually not sure about is whether the identity element is
// required to appear at least once. If so, this will need to be reworked,
// assuming we actually want to mimic XLA's semantics that closely, which
// we may not.
else if ((reductee_shape.size() == 1 && reduction_axes == AxisSet{0}) ||
(reductee_shape.size() == 2 && reduction_axes == AxisSet{0, 1}))
{
if (reductee_shape.at(0) == 0 || (reductee_shape.size() == 2 && reductee_shape.at(1) == 0))
{
TU +=
" {\n"
" call_frame->get_parameterized_tensor_view<" +
element_type_names[TI(f_result_element_type)] + ">(" +
to_string(outputs.at(0).get_index()) +
")->get_vector() =\n"
" call_frame->get_parameterized_tensor_view<" +
element_type_names[TI(f_result_element_type)] + ">(" +
to_string(inputs.at(1).get_index()) +
")->get_vector();\n"
" }\n";
}
else
{
std::shared_ptr<CallFrame> cf =
std::dynamic_pointer_cast<CallFrame>(external->make_call_frame());
ef->get_callees().emplace_back(cf);
TU +=
" {\n"
" using ET = " + element_type_names[TI(f_result_element_type)] + ";\n"
" auto cf = callees.at(" + to_string(ef->get_callees().size() - 1) + ");\n"
" auto f = [cf](typename ET::type x, typename ET::type y) -> typename ET::type {\n"
" auto tx = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});\n"
" *tx = std::vector<typename ET::type>({x});\n"
" auto ty = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});\n"
" *ty = std::vector<typename ET::type>({y});\n"
" auto tr = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});\n"
" (*cf)({tx, ty}, {tr});\n"
" return tr->get_vector()[0];\n"
" };\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(f_result_element_type)] +
">(" + to_string(inputs[0].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(f_result_element_type)] +
">(" + to_string(outputs[0].get_index()) + ");\n"
" EigenArray1d<" + element_type_names[TI(f_result_element_type)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenArray1d<" + element_type_names[TI(f_result_element_type)] + ">(arg0, "
EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ").redux(f);\n"
" }\n";
}
}
else if (reductee_shape.size() == 2 && reduction_axes == AxisSet{1})
{
if (reductee_shape.at(1) == 0)
{
TU += " {\n"
" auto arg1 = call_frame->get_tensor_view_data<" + element_type_names[TI(f_result_element_type)] +
">(" + to_string(inputs[1].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(f_result_element_type)] +
">(" + to_string(outputs[0].get_index()) + ");\n"
" EigenArray1d<" + element_type_names[TI(f_result_element_type)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenArray1d<" + element_type_names[TI(f_result_element_type)] + ">(arg1, "
EIGEN_VECTOR_FORMAT(inputs[1].get_layout<DenseTensorViewLayout>()->get_size()) ")(0, 0);\n"
" }\n";
}
else
{
std::shared_ptr<CallFrame> cf =
std::dynamic_pointer_cast<CallFrame>(external->make_call_frame());
ef->get_callees().emplace_back(cf);
TU +=
" {\n"
" using ET = " + element_type_names[TI(f_result_element_type)] + ";\n"
" auto cf = callees.at(" + to_string(ef->get_callees().size() - 1) + ");\n"
" auto f = [cf](typename ET::type x, typename ET::type y) -> typename ET::type {\n"
" auto tx = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});\n"
" *tx = std::vector<typename ET::type>({x});\n"
" auto ty = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});\n"
" *ty = std::vector<typename ET::type>({y});\n"
" auto tr = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});\n"
" (*cf)({tx, ty}, {tr});\n"
" return tr->get_vector()[0];\n"
" };\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(f_result_element_type)] +
">(" + to_string(inputs[0].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(f_result_element_type)] +
">(" + to_string(outputs[0].get_index()) + ");\n"
" EigenVector<" + element_type_names[TI(f_result_element_type)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenMatrix<" + element_type_names[TI(f_result_element_type)] + ">(arg0, " +
EIGEN_MATRIX_FORMAT(arg0_layout->get_shape(), arg0_layout->get_strides()) + ").rowwise().redux(f);\n"
" }\n";
}
}
else if (reductee_shape.size() == 2 && reduction_axes == AxisSet{0})
{
if (reductee_shape.at(0) == 0)
{
TU += " {\n"
" auto arg1 = call_frame->get_tensor_view_data<" + element_type_names[TI(f_result_element_type)] +
">(" + to_string(inputs[1].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(f_result_element_type)] +
">(" + to_string(outputs[0].get_index()) + ");\n"
" EigenArray1d<" + element_type_names[TI(f_result_element_type)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenArray1d<" + element_type_names[TI(f_result_element_type)] + ">(arg1, "
EIGEN_VECTOR_FORMAT(inputs[1].get_layout<DenseTensorViewLayout>()->get_size()) ")(0, 0);\n"
" }\n";
}
else
{
std::shared_ptr<CallFrame> cf =
std::dynamic_pointer_cast<CallFrame>(external->make_call_frame());
ef->get_callees().emplace_back(cf);
TU +=
" {\n"
" using ET = " + element_type_names[TI(f_result_element_type)] + ";\n"
" auto cf = callees.at(" + to_string(ef->get_callees().size() - 1) + ");\n"
" auto f = [cf](typename ET::type x, typename ET::type y) -> typename ET::type {\n"
" auto tx = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});\n"
" *tx = std::vector<typename ET::type>({x});\n"
" auto ty = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});\n"
" *ty = std::vector<typename ET::type>({y});\n"
" auto tr = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});\n"
" (*cf)({tx, ty}, {tr});\n"
" return tr->get_vector()[0];\n"
" };\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(f_result_element_type)] +
">(" + to_string(inputs[0].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(f_result_element_type)] +
">(" + to_string(outputs[0].get_index()) + ");\n"
" EigenVector<" + element_type_names[TI(f_result_element_type)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenMatrix<" + element_type_names[TI(f_result_element_type)] + ">(arg0, " +
EIGEN_MATRIX_FORMAT(arg0_layout->get_shape(), arg0_layout->get_strides()) + ").colwise().redux(f);\n"
" }\n";
}
}
else
{
throw ngraph_error("Reduce: only vectors and matrices are currently supported");
}
}
void Emitter::EMITTER_DECL(EmitSign)
{
const element::Type& et =
(dynamic_pointer_cast<const TensorViewType>(n->get_arguments().at(0)->get_value_type()))
->get_element_type();
TU += " {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" + to_string(inputs[0].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" + to_string(outputs[0].get_index()) + ");\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(arg0, "
EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ").sign();\n"
" }\n";
}
void Emitter::EMITTER_DECL(EmitSlice)
{
auto slice = static_cast<const op::Slice*>(n);
for (auto d : slice->get_step())
{
if (1 != d)
{
throw ngraph_error("Slice does not support non-unit step yet");
}
}
auto arg_type = slice->get_arguments().at(0)->get_value_type();
auto arg_tensor_view_type = dynamic_pointer_cast<const TensorViewType>(arg_type);
assert(arg_tensor_view_type);
auto arg_shape = arg_tensor_view_type->get_shape();
auto arg_rank = arg_shape.size();
auto& arg_element_type = arg_tensor_view_type->get_element_type();
auto& lower_bounds = slice->get_lower_bounds();
auto& upper_bounds = slice->get_upper_bounds();
// Scalar slice is necessarily just a copy.
if (arg_rank == 0)
{
TU +=
" {\n"
" call_frame->get_parameterized_tensor_view<" +
element_type_names[TI(arg_element_type)] + ">(" + to_string(outputs.at(0).get_index()) +
")->get_vector() =\n"
" call_frame->get_parameterized_tensor_view<" +
element_type_names[TI(arg_element_type)] + ">(" + to_string(inputs.at(0).get_index()) +
")->get_vector();\n"
" }\n";
}
else if (arg_rank == 1)
{
TU +=
" {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(arg_element_type)] +
">(" + to_string(inputs[0].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(arg_element_type)] +
">(" + to_string(outputs[0].get_index()) + ");\n"
" EigenVector<" + element_type_names[TI(arg_element_type)] +
">(out, " EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenVector<" + element_type_names[TI(arg_element_type)] +
">(arg0, " EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ").segment(\n"
" " + to_string(lower_bounds[0]) + ", " + to_string(upper_bounds[0] - lower_bounds[0]) + ");\n"
" }\n";
}
else if (arg_rank == 2)
{
auto arg0_layout = inputs[0].get_layout<DenseTensorViewLayout>();
auto out_layout = outputs[0].get_layout<DenseTensorViewLayout>();
TU +=
" {\n"
" auto arg0 = call_frame->get_tensor_view_data<" +
element_type_names[TI(arg_element_type)] + ">(" + to_string(inputs[0].get_index()) +
");\n"
" auto out = call_frame->get_tensor_view_data<" +
element_type_names[TI(arg_element_type)] + ">(" + to_string(outputs[0].get_index()) +
");\n"
" EigenMatrix<" +
element_type_names[TI(arg_element_type)] + ">(out, " +
EIGEN_MATRIX_FORMAT(out_layout->get_shape(), out_layout->get_strides()) +
") = \n"
" EigenMatrix<" +
element_type_names[TI(arg_element_type)] + ">(arg0, " +
EIGEN_MATRIX_FORMAT(arg0_layout->get_shape(), arg0_layout->get_strides()) + ").block(" +
to_string(lower_bounds[0]) + ", " + to_string(lower_bounds[1]) +
",\n"
" " +
to_string(upper_bounds[0] - lower_bounds[0]) +
",\n"
" " +
to_string(upper_bounds[1] - lower_bounds[1]) +
");\n"
" }\n";
}
// Other cases (reordering of axes for tensors with rank>2) are not handled yet.
else
{
throw ngraph_error("Slice is not implemented yet for tensors with rank>2");
}
}
void Emitter::EMITTER_DECL(EmitSum)
{
auto s = static_cast<const op::Sum*>(n);
auto s_tensor_view_type = dynamic_pointer_cast<const TensorViewType>(s->get_value_type());
assert(s_tensor_view_type);
auto& s_element_type = s_tensor_view_type->get_element_type();
auto s_shape = s_tensor_view_type->get_shape();
auto arg = s->get_arguments().at(0);
auto arg_type = arg->get_value_type();
auto arg_tensor_view_type = dynamic_pointer_cast<const TensorViewType>(arg_type);
assert(arg_tensor_view_type);
auto arg_shape = arg_tensor_view_type->get_shape();
auto arg_rank = arg_shape.size();
auto& reduction_axes = s->get_reduction_axes();
// Trivial case: no reduction axes.
if (reduction_axes.size() == 0)
{
TU +=
" {\n"
" call_frame->get_parameterized_tensor_view<" +
element_type_names[TI(s_element_type)] + ">(" + to_string(outputs.at(0).get_index()) +
")->get_vector() =\n"
" call_frame->get_parameterized_tensor_view<" +
element_type_names[TI(s_element_type)] + ">(" + to_string(inputs.at(0).get_index()) +
")->get_vector();\n"
" }\n";
}
// Full reduction? Then sum to scalar.
else if ((arg_rank == 1 && reduction_axes == AxisSet{0}) ||
(arg_rank == 2 && reduction_axes == AxisSet{0, 1}))
{
TU +=
" {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(s_element_type)] +
">(" + to_string(inputs[0].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(s_element_type)] +
">(" + to_string(outputs[0].get_index()) + ");\n"
" EigenArray1d<" + element_type_names[TI(s_element_type)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenArray1d<" + element_type_names[TI(s_element_type)] + ">(arg0, "
EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ").sum();\n"
" }\n";
}
else if (arg_rank == 2 && reduction_axes == AxisSet{1})
{
auto arg0_layout = inputs[0].get_layout<DenseTensorViewLayout>();
TU +=
" {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(s_element_type)] +
">(" + to_string(inputs[0].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(s_element_type)] +
">(" + to_string(outputs[0].get_index()) + ");\n"
" EigenVector<" + element_type_names[TI(s_element_type)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenMatrix<" + element_type_names[TI(s_element_type)] + ">(arg0, " +
EIGEN_MATRIX_FORMAT(arg0_layout->get_shape(), arg0_layout->get_strides()) + ").rowwise().sum();\n"
" }\n";
}
else if (arg_rank == 2 && reduction_axes == AxisSet{0})
{
auto arg0_layout = inputs[0].get_layout<DenseTensorViewLayout>();
TU +=
" {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(s_element_type)] +
">(" + to_string(inputs[0].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(s_element_type)] +
">(" + to_string(outputs[0].get_index()) + ");\n"
" EigenVector<" + element_type_names[TI(s_element_type)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenMatrix<" + element_type_names[TI(s_element_type)] + ">(arg0, " +
EIGEN_MATRIX_FORMAT(arg0_layout->get_shape(), arg0_layout->get_strides()) + ").colwise().sum();\n"
" }\n";
}
else
{
throw ngraph_error("Sum: only vectors and matrices are currently supported");
}
}
void Emitter::EMITTER_DECL(EmitExp)
{
const element::Type& et =
(dynamic_pointer_cast<const TensorViewType>(n->get_arguments().at(0)->get_value_type()))
->get_element_type();
TU +=
" {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" +
to_string(inputs[0].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" +
to_string(outputs[0].get_index()) + ");\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(arg0, "
EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ").exp();\n"
" }\n";
}
void Emitter::EMITTER_DECL(EmitSin)
{
const element::Type& et =
(dynamic_pointer_cast<const TensorViewType>(n->get_arguments().at(0)->get_value_type()))
->get_element_type();
TU +=
" {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" +
to_string(inputs[0].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" +
to_string(outputs[0].get_index()) + ");\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(arg0, "
EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ").sin();\n"
" }\n";
}
void Emitter::EMITTER_DECL(EmitSinh)
{
const element::Type& et =
(dynamic_pointer_cast<const TensorViewType>(n->get_arguments().at(0)->get_value_type()))
->get_element_type();
TU +=
" {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" +
to_string(inputs[0].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" +
to_string(outputs[0].get_index()) + ");\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(arg0, "
EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ").sinh();\n"
" }\n";
}
void Emitter::EMITTER_DECL(EmitCos)
{
const element::Type& et =
(dynamic_pointer_cast<const TensorViewType>(n->get_arguments().at(0)->get_value_type()))
->get_element_type();
TU +=
" {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" +
to_string(inputs[0].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" +
to_string(outputs[0].get_index()) + ");\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(arg0, "
EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ").cos();\n"
" }\n";
}
void Emitter::EMITTER_DECL(EmitCosh)
{
const element::Type& et =
(dynamic_pointer_cast<const TensorViewType>(n->get_arguments().at(0)->get_value_type()))
->get_element_type();
TU +=
" {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" +
to_string(inputs[0].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" +
to_string(outputs[0].get_index()) + ");\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(arg0, "
EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ").cosh();\n"
" }\n";
}
void Emitter::EMITTER_DECL(EmitTan)
{
const element::Type& et =
(dynamic_pointer_cast<const TensorViewType>(n->get_arguments().at(0)->get_value_type()))
->get_element_type();
TU +=
" {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" +
to_string(inputs[0].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" +
to_string(outputs[0].get_index()) + ");\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(arg0, "
EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ").tan();\n"
" }\n";
}
void Emitter::EMITTER_DECL(EmitTanh)
{
const element::Type& et =
(dynamic_pointer_cast<const TensorViewType>(n->get_arguments().at(0)->get_value_type()))
->get_element_type();
// Eigen's generic_fast_tanh_float<float> is currently miscompiled by Clang/LLVM
// so we fall-back to std::tanh
// TODO: Implement our own internal fast/approximate tanh if this actually gets used
// by models
TU +=
" {\n"
" auto& arg0 = call_frame->get_parameterized_tensor_view<" +
element_type_names[TI(et)] + ">(" + to_string(inputs[0].get_index()) +
")->get_vector();\n"
" auto& out = call_frame->get_parameterized_tensor_view<" +
element_type_names[TI(et)] + ">(" + to_string(outputs[0].get_index()) +
")->get_vector();\n"
" std::transform(arg0.begin(), arg0.end(), out.begin(), [](" +
element_type_names[TI(et)] + "::type x) -> " + element_type_names[TI(et)] +
"::type { return std::tanh(x); });\n"
" }\n";
}
void Emitter::EMITTER_DECL(EmitAsin)
{
const element::Type& et =
(dynamic_pointer_cast<const TensorViewType>(n->get_arguments().at(0)->get_value_type()))
->get_element_type();
TU +=
" {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" +
to_string(inputs[0].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" +
to_string(outputs[0].get_index()) + ");\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(arg0, "
EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ").asin();\n"
" }\n";
}
void Emitter::EMITTER_DECL(EmitAcos)
{
const element::Type& et =
(dynamic_pointer_cast<const TensorViewType>(n->get_arguments().at(0)->get_value_type()))
->get_element_type();
TU +=
" {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" +
to_string(inputs[0].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" +
to_string(outputs[0].get_index()) + ");\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(arg0, "
EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ").acos();\n"
" }\n";
}
void Emitter::EMITTER_DECL(EmitAtan)
{
const element::Type& et =
(dynamic_pointer_cast<const TensorViewType>(n->get_arguments().at(0)->get_value_type()))
->get_element_type();
TU +=
" {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" +
to_string(inputs[0].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" +
to_string(outputs[0].get_index()) + ");\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(arg0, "
EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ").atan();\n"
" }\n";
}
...@@ -61,6 +61,7 @@ namespace ngraph ...@@ -61,6 +61,7 @@ namespace ngraph
void EMITTER_DECL(EmitLessEq); void EMITTER_DECL(EmitLessEq);
void EMITTER_DECL(EmitLog); void EMITTER_DECL(EmitLog);
void EMITTER_DECL(EmitMaximum); void EMITTER_DECL(EmitMaximum);
void EMITTER_DECL(EmitMinimum);
void EMITTER_DECL(EmitNegative); void EMITTER_DECL(EmitNegative);
void EMITTER_DECL(EmitNotEqual); void EMITTER_DECL(EmitNotEqual);
void EMITTER_DECL(EmitSelect); void EMITTER_DECL(EmitSelect);
...@@ -75,6 +76,23 @@ namespace ngraph ...@@ -75,6 +76,23 @@ namespace ngraph
void EMITTER_DECL(EmitParameterizedConstantUInt64); void EMITTER_DECL(EmitParameterizedConstantUInt64);
void EMITTER_DECL(EmitBroadcast); void EMITTER_DECL(EmitBroadcast);
void EMITTER_DECL(EmitConvert); void EMITTER_DECL(EmitConvert);
void EMITTER_DECL(EmitConstant);
void EMITTER_DECL(EmitReshape);
void EMITTER_DECL(EmitFunctionCall);
void EMITTER_DECL(EmitReduce);
void EMITTER_DECL(EmitSign);
void EMITTER_DECL(EmitSlice);
void EMITTER_DECL(EmitSum);
void EMITTER_DECL(EmitExp);
void EMITTER_DECL(EmitSin);
void EMITTER_DECL(EmitSinh);
void EMITTER_DECL(EmitCos);
void EMITTER_DECL(EmitCosh);
void EMITTER_DECL(EmitTan);
void EMITTER_DECL(EmitTanh);
void EMITTER_DECL(EmitAsin);
void EMITTER_DECL(EmitAcos);
void EMITTER_DECL(EmitAtan);
}; };
} }
} }
......
...@@ -27,14 +27,20 @@ ...@@ -27,14 +27,20 @@
#include "ngraph/function.hpp" #include "ngraph/function.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/ops/abs.hpp" #include "ngraph/ops/abs.hpp"
#include "ngraph/ops/acos.hpp"
#include "ngraph/ops/add.hpp" #include "ngraph/ops/add.hpp"
#include "ngraph/ops/asin.hpp"
#include "ngraph/ops/atan.hpp"
#include "ngraph/ops/broadcast.hpp" #include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/concatenate.hpp" #include "ngraph/ops/concatenate.hpp"
#include "ngraph/ops/constant.hpp" #include "ngraph/ops/constant.hpp"
#include "ngraph/ops/convert.hpp" #include "ngraph/ops/convert.hpp"
#include "ngraph/ops/cos.hpp"
#include "ngraph/ops/cosh.hpp"
#include "ngraph/ops/divide.hpp" #include "ngraph/ops/divide.hpp"
#include "ngraph/ops/dot.hpp" #include "ngraph/ops/dot.hpp"
#include "ngraph/ops/equal.hpp" #include "ngraph/ops/equal.hpp"
#include "ngraph/ops/exp.hpp"
#include "ngraph/ops/function_call.hpp" #include "ngraph/ops/function_call.hpp"
#include "ngraph/ops/get_tuple_element.hpp" #include "ngraph/ops/get_tuple_element.hpp"
#include "ngraph/ops/greater.hpp" #include "ngraph/ops/greater.hpp"
...@@ -43,12 +49,21 @@ ...@@ -43,12 +49,21 @@
#include "ngraph/ops/less_eq.hpp" #include "ngraph/ops/less_eq.hpp"
#include "ngraph/ops/log.hpp" #include "ngraph/ops/log.hpp"
#include "ngraph/ops/maximum.hpp" #include "ngraph/ops/maximum.hpp"
#include "ngraph/ops/minimum.hpp"
#include "ngraph/ops/multiply.hpp" #include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/negative.hpp" #include "ngraph/ops/negative.hpp"
#include "ngraph/ops/not_equal.hpp" #include "ngraph/ops/not_equal.hpp"
#include "ngraph/ops/reduce.hpp" #include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/select.hpp" #include "ngraph/ops/select.hpp"
#include "ngraph/ops/sign.hpp"
#include "ngraph/ops/sin.hpp"
#include "ngraph/ops/sinh.hpp"
#include "ngraph/ops/slice.hpp"
#include "ngraph/ops/subtract.hpp" #include "ngraph/ops/subtract.hpp"
#include "ngraph/ops/sum.hpp"
#include "ngraph/ops/tan.hpp"
#include "ngraph/ops/tanh.hpp"
#include "ngraph/ops/tuple.hpp" #include "ngraph/ops/tuple.hpp"
#include "ngraph/pass/assign_layout.hpp" #include "ngraph/pass/assign_layout.hpp"
#include "ngraph/pass/assign_tensors.hpp" #include "ngraph/pass/assign_tensors.hpp"
...@@ -84,6 +99,7 @@ static const OpMap dispatcher{ ...@@ -84,6 +99,7 @@ static const OpMap dispatcher{
{TI(ngraph::op::LessEq), &Emitter::EmitLessEq}, {TI(ngraph::op::LessEq), &Emitter::EmitLessEq},
{TI(ngraph::op::Log), &Emitter::EmitLog}, {TI(ngraph::op::Log), &Emitter::EmitLog},
{TI(ngraph::op::Maximum), &Emitter::EmitMaximum}, {TI(ngraph::op::Maximum), &Emitter::EmitMaximum},
{TI(ngraph::op::Minimum), &Emitter::EmitMinimum},
{TI(ngraph::op::Negative), &Emitter::EmitNegative}, {TI(ngraph::op::Negative), &Emitter::EmitNegative},
{TI(ngraph::op::NotEqual), &Emitter::EmitNotEqual}, {TI(ngraph::op::NotEqual), &Emitter::EmitNotEqual},
{TI(ngraph::op::Select), &Emitter::EmitSelect}, {TI(ngraph::op::Select), &Emitter::EmitSelect},
...@@ -106,6 +122,23 @@ static const OpMap dispatcher{ ...@@ -106,6 +122,23 @@ static const OpMap dispatcher{
&Emitter::EmitParameterizedConstantUInt64}, &Emitter::EmitParameterizedConstantUInt64},
{TI(ngraph::op::Broadcast), &Emitter::EmitBroadcast}, {TI(ngraph::op::Broadcast), &Emitter::EmitBroadcast},
{TI(ngraph::op::Convert), &Emitter::EmitConvert}, {TI(ngraph::op::Convert), &Emitter::EmitConvert},
{TI(ngraph::op::Constant), &Emitter::EmitConstant},
{TI(ngraph::op::Reshape), &Emitter::EmitReshape},
{TI(ngraph::op::FunctionCall), &Emitter::EmitFunctionCall},
{TI(ngraph::op::Reduce), &Emitter::EmitReduce},
{TI(ngraph::op::Sign), &Emitter::EmitSign},
{TI(ngraph::op::Slice), &Emitter::EmitSlice},
{TI(ngraph::op::Sum), &Emitter::EmitSum},
{TI(ngraph::op::Exp), &Emitter::EmitExp},
{TI(ngraph::op::Sin), &Emitter::EmitSin},
{TI(ngraph::op::Sinh), &Emitter::EmitSinh},
{TI(ngraph::op::Cos), &Emitter::EmitCos},
{TI(ngraph::op::Cosh), &Emitter::EmitCosh},
{TI(ngraph::op::Tan), &Emitter::EmitTan},
{TI(ngraph::op::Tanh), &Emitter::EmitTanh},
{TI(ngraph::op::Asin), &Emitter::EmitAsin},
{TI(ngraph::op::Acos), &Emitter::EmitAcos},
{TI(ngraph::op::Atan), &Emitter::EmitAtan},
}; };
#undef TI #undef TI
...@@ -174,7 +207,9 @@ void ExternalFunction::compile(FunctionMap& function_map) ...@@ -174,7 +207,9 @@ void ExternalFunction::compile(FunctionMap& function_map)
// Now we build the TU // Now we build the TU
Emitter emitter; Emitter emitter;
auto& TU = emitter.GetTU(); auto& TU = emitter.GetTU();
TU += R"( TU += R"(// Generated by the NGraph CPU backend
#include <algorithm>
#include <cmath>
#include <memory> #include <memory>
#include <vector> #include <vector>
...@@ -182,17 +217,18 @@ void ExternalFunction::compile(FunctionMap& function_map) ...@@ -182,17 +217,18 @@ void ExternalFunction::compile(FunctionMap& function_map)
#include "ngraph/descriptor/layout/dense_tensor_view_layout.hpp" #include "ngraph/descriptor/layout/dense_tensor_view_layout.hpp"
#include "ngraph/runtime/cpu/call_frame.hpp" #include "ngraph/runtime/cpu/call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_kernels.hpp"
#include "ngraph/runtime/cpu/eigen_utils.hpp" #include "ngraph/runtime/cpu/eigen_utils.hpp"
#include "ngraph/runtime/tensor_view_info.hpp" #include "ngraph/runtime/utils.hpp"
void *__dso_handle = 0;
using namespace ngraph::element; using namespace ngraph::element;
using namespace ngraph::runtime; using namespace ngraph::runtime;
using namespace ngraph::runtime::cpu::eigen; using namespace ngraph::runtime::cpu::eigen;
extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame, extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
ngraph::runtime::TensorViewPtrs& tensor_views) ngraph::runtime::TensorViewPtrs& tensor_views,
const std::vector<std::shared_ptr<ngraph::runtime::cpu::CallFrame>>& callees)
{ {
)"; )";
...@@ -243,8 +279,10 @@ extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame, ...@@ -243,8 +279,10 @@ extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
assert(llvm_module); assert(llvm_module);
estate.add_module(llvm_module); estate.add_module(llvm_module);
estate.finalize(); estate.finalize();
compiled_function = estate.find_function<void( compiled_function =
ngraph::runtime::cpu::CallFrame*, ngraph::runtime::TensorViewPtrs&)>("__entrypoint"); estate.find_function<void(ngraph::runtime::cpu::CallFrame*,
ngraph::runtime::TensorViewPtrs&,
const std::vector<std::shared_ptr<CallFrame>>&)>("__entrypoint");
assert(compiled_function); assert(compiled_function);
m_is_compiled = true; m_is_compiled = true;
...@@ -322,5 +360,5 @@ shared_ptr<ngraph::runtime::CallFrame> ExternalFunction::make_call_frame() ...@@ -322,5 +360,5 @@ shared_ptr<ngraph::runtime::CallFrame> ExternalFunction::make_call_frame()
#undef M #undef M
} }
return make_shared<ngraph::runtime::cpu::CallFrame>( return make_shared<ngraph::runtime::cpu::CallFrame>(
compiled_function, m_n_outputs, m_n_inputs, temps); compiled_function, m_n_outputs, m_n_inputs, temps, callees);
} }
...@@ -47,8 +47,10 @@ namespace ngraph ...@@ -47,8 +47,10 @@ namespace ngraph
using OpMap = std::unordered_map<std::type_index, OpFunction>; using OpMap = std::unordered_map<std::type_index, OpFunction>;
using EntryPoint = std::function<void(ngraph::runtime::cpu::CallFrame*, using EntryPoint = std::function<void(
ngraph::runtime::TensorViewPtrs&)>; ngraph::runtime::cpu::CallFrame*,
ngraph::runtime::TensorViewPtrs&,
const std::vector<std::shared_ptr<ngraph::runtime::cpu::CallFrame>>&)>;
class ExternalFunction : public ngraph::runtime::ExternalFunction class ExternalFunction : public ngraph::runtime::ExternalFunction
{ {
...@@ -56,7 +58,7 @@ namespace ngraph ...@@ -56,7 +58,7 @@ namespace ngraph
ExternalFunction(const std::shared_ptr<ngraph::Function>& function, ExternalFunction(const std::shared_ptr<ngraph::Function>& function,
bool release_function = true); bool release_function = true);
std::shared_ptr<ngraph::runtime::CallFrame> make_call_frame(); std::shared_ptr<ngraph::runtime::CallFrame> make_call_frame();
std::vector<std::shared_ptr<CallFrame>>& get_callees() { return callees; }
protected: protected:
void compile(FunctionMap& function_map); void compile(FunctionMap& function_map);
...@@ -64,6 +66,7 @@ namespace ngraph ...@@ -64,6 +66,7 @@ namespace ngraph
size_t m_n_outputs; size_t m_n_outputs;
ngraph::descriptor::TensorViewPtrs m_temp_views; ngraph::descriptor::TensorViewPtrs m_temp_views;
EntryPoint compiled_function; EntryPoint compiled_function;
std::vector<std::shared_ptr<CallFrame>> callees;
}; };
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment