Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
f5768063
Unverified
Commit
f5768063
authored
Nov 07, 2017
by
Robert Kimball
Committed by
GitHub
Nov 07, 2017
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #222 from NervanaSystems/jmenon/cpu_kernels
CPU Backend: More ops and kernels
parents
69a2d4aa
792d3328
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
1084 additions
and
52 deletions
+1084
-52
CMakeLists.txt
src/ngraph/CMakeLists.txt
+17
-8
compiler.cpp
src/ngraph/codegen/compiler.cpp
+17
-2
call_frame.cpp
src/ngraph/runtime/cpu/call_frame.cpp
+5
-4
call_frame.hpp
src/ngraph/runtime/cpu/call_frame.hpp
+6
-2
cpu_kernels.cpp
src/ngraph/runtime/cpu/cpu_kernels.cpp
+15
-0
cpu_kernels.hpp
src/ngraph/runtime/cpu/cpu_kernels.hpp
+103
-0
emitter.cpp
src/ngraph/runtime/cpu/emitter.cpp
+851
-25
emitter.hpp
src/ngraph/runtime/cpu/emitter.hpp
+18
-0
external_function.cpp
src/ngraph/runtime/cpu/external_function.cpp
+46
-8
external_function.hpp
src/ngraph/runtime/cpu/external_function.hpp
+6
-3
No files found.
src/ngraph/CMakeLists.txt
View file @
f5768063
...
@@ -99,18 +99,21 @@ include_directories(
...
@@ -99,18 +99,21 @@ include_directories(
"
${
EIGEN_INCLUDE_DIR
}
"
"
${
EIGEN_INCLUDE_DIR
}
"
)
)
if
(
LLVM_INCLUDE_DIR
)
if
(
NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
MKLDNN_INCLUDE_DIR
)
find_package
(
ZLIB REQUIRED
)
find_package
(
ZLIB REQUIRED
)
include_directories
(
SYSTEM
${
LLVM_INCLUDE_DIR
}
)
include_directories
(
SYSTEM
${
LLVM_INCLUDE_DIR
}
${
MKLDNN_INCLUDE_DIR
}
)
link_directories
(
${
LLVM_LIB_DIR
}
)
link_directories
(
${
LLVM_LIB_DIR
}
${
MKLDNN_LIB_DIR
}
)
# Add sources for the CPU backend
# Add sources for the CPU backend
# and all its dependencies
# and all its dependencies
set
(
SRC
${
SRC
}
set
(
SRC
${
SRC
}
codegen/compiler.cpp
codegen/compiler.cpp
runtime/cpu/call_frame.cpp
runtime/cpu/call_frame.cpp
runtime/cpu/cpu_manager.cpp
runtime/cpu/cpu_backend.cpp
runtime/cpu/cpu_backend.cpp
runtime/cpu/cpu_manager.cpp
runtime/cpu/cpu_kernels.cpp
runtime/cpu/emitter.cpp
runtime/cpu/emitter.cpp
runtime/cpu/external_function.cpp
runtime/cpu/external_function.cpp
)
)
...
@@ -129,7 +132,7 @@ endif()
...
@@ -129,7 +132,7 @@ endif()
add_library
(
ngraph SHARED
${
SRC
}
)
add_library
(
ngraph SHARED
${
SRC
}
)
target_include_directories
(
ngraph PUBLIC
"
${
NGRAPH_INCLUDE_PATH
}
"
)
target_include_directories
(
ngraph PUBLIC
"
${
NGRAPH_INCLUDE_PATH
}
"
)
if
(
LLVM_LINK_LIBS
)
if
(
NGRAPH_CPU_ENABLE AND
LLVM_LINK_LIBS
)
target_link_libraries
(
ngraph LINK_PRIVATE
${
LLVM_LINK_LIBS
}
)
target_link_libraries
(
ngraph LINK_PRIVATE
${
LLVM_LINK_LIBS
}
)
endif
()
endif
()
...
@@ -137,8 +140,10 @@ if (APPLE)
...
@@ -137,8 +140,10 @@ if (APPLE)
set_property
(
TARGET ngraph PROPERTY PREFIX
"lib"
)
set_property
(
TARGET ngraph PROPERTY PREFIX
"lib"
)
set_property
(
TARGET ngraph PROPERTY OUTPUT_NAME
"ngraph.so"
)
set_property
(
TARGET ngraph PROPERTY OUTPUT_NAME
"ngraph.so"
)
set_property
(
TARGET ngraph PROPERTY SUFFIX
""
)
set_property
(
TARGET ngraph PROPERTY SUFFIX
""
)
else
()
endif
()
include_directories
(
"
${
MKLDNN_INCLUDE_DIR
}
"
)
if
(
NGRAPH_CPU_ENABLE AND MKLDNN_LIB_DIR
)
target_link_libraries
(
ngraph LINK_PRIVATE mkldnn
)
endif
()
endif
()
#-----------------------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------
...
@@ -178,6 +183,10 @@ endif()
...
@@ -178,6 +183,10 @@ endif()
add_dependencies
(
ngraph eigen
)
add_dependencies
(
ngraph eigen
)
if
(
N
OT LLVM_PACKAGED
AND LLVM_INCLUDE_DIR
)
if
(
N
GRAPH_CPU_ENABLE
AND LLVM_INCLUDE_DIR
)
add_dependencies
(
ngraph ext_llvm
)
add_dependencies
(
ngraph ext_llvm
)
endif
()
endif
()
if
(
NGRAPH_CPU_ENABLE AND MKLDNN_INCLUDE_DIR
)
add_dependencies
(
ngraph ext_mkldnn
)
endif
()
src/ngraph/codegen/compiler.cpp
View file @
f5768063
...
@@ -145,10 +145,19 @@ std::unique_ptr<llvm::Module> execution_state::compile(const string& source, con
...
@@ -145,10 +145,19 @@ std::unique_ptr<llvm::Module> execution_state::compile(const string& source, con
LO
->
OpenMP
=
1
;
LO
->
OpenMP
=
1
;
LO
->
OpenMPUseTLS
=
1
;
LO
->
OpenMPUseTLS
=
1
;
// CodeGen options
auto
&
CGO
=
Clang
->
getInvocation
().
getCodeGenOpts
();
CGO
.
OptimizationLevel
=
3
;
CGO
.
RelocationModel
=
"static"
;
CGO
.
ThreadModel
=
"posix"
;
CGO
.
FloatABI
=
"hard"
;
CGO
.
OmitLeafFramePointer
=
1
;
CGO
.
VectorizeLoop
=
1
;
CGO
.
VectorizeSLP
=
1
;
CGO
.
CXAAtExit
=
0
;
if
(
debuginfo_enabled
)
if
(
debuginfo_enabled
)
{
{
// CodeGen options
auto
&
CGO
=
Clang
->
getInvocation
().
getCodeGenOpts
();
CGO
.
setDebugInfo
(
codegenoptions
::
FullDebugInfo
);
CGO
.
setDebugInfo
(
codegenoptions
::
FullDebugInfo
);
}
}
...
@@ -163,6 +172,12 @@ std::unique_ptr<llvm::Module> execution_state::compile(const string& source, con
...
@@ -163,6 +172,12 @@ std::unique_ptr<llvm::Module> execution_state::compile(const string& source, con
// Enable various target features
// Enable various target features
// Most of these are for Eigen
// Most of these are for Eigen
auto
&
TO
=
Clang
->
getInvocation
().
getTargetOpts
();
auto
&
TO
=
Clang
->
getInvocation
().
getTargetOpts
();
// TODO: This needs to be configurable and selected carefully
TO
.
CPU
=
"broadwell"
;
TO
.
FeaturesAsWritten
.
emplace_back
(
"+sse"
);
TO
.
FeaturesAsWritten
.
emplace_back
(
"+sse2"
);
TO
.
FeaturesAsWritten
.
emplace_back
(
"+sse3"
);
TO
.
FeaturesAsWritten
.
emplace_back
(
"+ssse3"
);
TO
.
FeaturesAsWritten
.
emplace_back
(
"+sse4.1"
);
TO
.
FeaturesAsWritten
.
emplace_back
(
"+sse4.1"
);
TO
.
FeaturesAsWritten
.
emplace_back
(
"+sse4.2"
);
TO
.
FeaturesAsWritten
.
emplace_back
(
"+sse4.2"
);
TO
.
FeaturesAsWritten
.
emplace_back
(
"+avx"
);
TO
.
FeaturesAsWritten
.
emplace_back
(
"+avx"
);
...
...
src/ngraph/runtime/cpu/call_frame.cpp
View file @
f5768063
...
@@ -22,12 +22,13 @@ using namespace ngraph::runtime::cpu;
...
@@ -22,12 +22,13 @@ using namespace ngraph::runtime::cpu;
CallFrame
::
CallFrame
(
EntryPoint
compiled_function
,
CallFrame
::
CallFrame
(
EntryPoint
compiled_function
,
size_t
n_outputs
,
size_t
n_outputs
,
size_t
n_inputs
,
size_t
n_inputs
,
const
TensorViewPtrs
&
temps
)
const
TensorViewPtrs
&
temps
,
const
std
::
vector
<
std
::
shared_ptr
<
CallFrame
>>&
callees
)
:
m_n_outputs
(
n_outputs
)
:
m_n_outputs
(
n_outputs
)
,
m_n_inputs
(
n_inputs
)
,
m_n_inputs
(
n_inputs
)
,
m_tensor_views
(
n_
inputs
+
n_out
puts
+
temps
.
size
())
,
m_tensor_views
(
n_
outputs
+
n_in
puts
+
temps
.
size
())
,
m_compiled_function
(
compiled_function
)
,
m_compiled_function
(
compiled_function
)
,
m_callees
(
callees
)
{
{
copy
(
temps
.
begin
(),
temps
.
end
(),
m_tensor_views
.
begin
()
+
m_n_outputs
+
m_n_inputs
);
copy
(
temps
.
begin
(),
temps
.
end
(),
m_tensor_views
.
begin
()
+
m_n_outputs
+
m_n_inputs
);
}
}
...
@@ -40,7 +41,7 @@ void CallFrame::tensor_call(
...
@@ -40,7 +41,7 @@ void CallFrame::tensor_call(
copy
(
inputs
.
begin
(),
inputs
.
end
(),
m_tensor_views
.
begin
()
+
m_n_outputs
);
copy
(
inputs
.
begin
(),
inputs
.
end
(),
m_tensor_views
.
begin
()
+
m_n_outputs
);
// Invoke compiled computation
// Invoke compiled computation
m_compiled_function
(
this
,
m_tensor_views
);
m_compiled_function
(
this
,
m_tensor_views
,
m_callees
);
// Don't hold onto inputs/outputs
// Don't hold onto inputs/outputs
fill_n
(
m_tensor_views
.
begin
(),
m_n_outputs
+
m_n_inputs
,
nullptr
);
fill_n
(
m_tensor_views
.
begin
(),
m_n_outputs
+
m_n_inputs
,
nullptr
);
...
...
src/ngraph/runtime/cpu/call_frame.hpp
View file @
f5768063
...
@@ -31,8 +31,10 @@ namespace ngraph
...
@@ -31,8 +31,10 @@ namespace ngraph
namespace
cpu
namespace
cpu
{
{
class
CallFrame
;
class
CallFrame
;
using
EntryPoint
=
std
::
function
<
void
(
ngraph
::
runtime
::
cpu
::
CallFrame
*
,
using
EntryPoint
=
std
::
function
<
void
(
ngraph
::
runtime
::
cpu
::
CallFrame
*
,
ngraph
::
runtime
::
TensorViewPtrs
&
)
>
;
ngraph
::
runtime
::
TensorViewPtrs
&
,
const
std
::
vector
<
std
::
shared_ptr
<
CallFrame
>>&
)
>
;
// Compile and execute graphs
// Compile and execute graphs
class
CallFrame
:
public
ngraph
::
runtime
::
CallFrame
class
CallFrame
:
public
ngraph
::
runtime
::
CallFrame
...
@@ -41,7 +43,8 @@ namespace ngraph
...
@@ -41,7 +43,8 @@ namespace ngraph
CallFrame
(
EntryPoint
compiled_function
,
CallFrame
(
EntryPoint
compiled_function
,
size_t
n_outputs
,
size_t
n_outputs
,
size_t
n_inputs
,
size_t
n_inputs
,
const
TensorViewPtrs
&
temps
);
const
TensorViewPtrs
&
temps
,
const
std
::
vector
<
std
::
shared_ptr
<
CallFrame
>>&
callees
);
/// @brief Invoke the function with values matching the signature of the function.
/// @brief Invoke the function with values matching the signature of the function.
///
///
...
@@ -73,6 +76,7 @@ namespace ngraph
...
@@ -73,6 +76,7 @@ namespace ngraph
TensorViewPtrs
m_tensor_views
;
TensorViewPtrs
m_tensor_views
;
bool
m_return
;
bool
m_return
;
EntryPoint
m_compiled_function
;
EntryPoint
m_compiled_function
;
std
::
vector
<
std
::
shared_ptr
<
CallFrame
>>
m_callees
;
};
};
}
}
}
}
...
...
src/ngraph/runtime/cpu/cpu_kernels.cpp
0 → 100644
View file @
f5768063
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#include "ngraph/runtime/cpu/cpu_kernels.hpp"
src/ngraph/runtime/cpu/cpu_kernels.hpp
0 → 100644
View file @
f5768063
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#pragma once
#include "ngraph/types/element_type.hpp"
// CBLAS types and wrappers
namespace
cblas
{
enum
class
Layout
{
RowMajor
=
101
,
ColMajor
=
102
};
enum
class
Transpose
{
None
=
111
,
Transpose
=
112
,
ConjTrans
=
113
};
enum
class
UpperLower
{
Upper
=
121
,
Lower
=
122
};
enum
class
Diag
{
NonUnit
=
131
,
Unit
=
132
};
enum
class
Side
{
Left
=
141
,
Right
=
142
};
enum
class
Storage
{
Packed
=
151
};
enum
class
Ident
{
AMatrix
=
161
,
BMatrix
=
162
};
enum
class
Offset
{
RowOffset
=
171
,
ColOffset
=
172
,
FixOffset
=
173
};
extern
"C"
{
void
cblas_sgemm
(
const
Layout
layout
,
const
Transpose
TransA
,
const
Transpose
TransB
,
const
ngraph
::
element
::
Int64
::
type
M
,
const
ngraph
::
element
::
Int64
::
type
N
,
const
ngraph
::
element
::
Int64
::
type
K
,
const
ngraph
::
element
::
Float32
::
type
alpha
,
const
ngraph
::
element
::
Float32
::
type
*
A
,
const
ngraph
::
element
::
Int64
::
type
lda
,
const
ngraph
::
element
::
Float32
::
type
*
B
,
const
ngraph
::
element
::
Int64
::
type
ldb
,
const
ngraph
::
element
::
Float32
::
type
beta
,
ngraph
::
element
::
Float32
::
type
*
C
,
const
ngraph
::
element
::
Int64
::
type
ldc
);
}
}
namespace
mkl
{
extern
"C"
{
void
MKL_Somatcopy
(
char
ordering
,
char
trans
,
size_t
rows
,
size_t
cols
,
const
ngraph
::
element
::
Float32
::
type
alpha
,
const
ngraph
::
element
::
Float32
::
type
*
A
,
size_t
lda
,
ngraph
::
element
::
Float32
::
type
*
B
,
size_t
ldb
);
}
}
src/ngraph/runtime/cpu/emitter.cpp
View file @
f5768063
...
@@ -12,6 +12,7 @@
...
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
#include <algorithm>
#include <iostream>
#include <iostream>
#include <string>
#include <string>
#include <typeindex>
#include <typeindex>
...
@@ -23,7 +24,13 @@
...
@@ -23,7 +24,13 @@
#include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/concatenate.hpp"
#include "ngraph/ops/concatenate.hpp"
#include "ngraph/ops/constant.hpp"
#include "ngraph/ops/constant.hpp"
#include "ngraph/ops/function_call.hpp"
#include "ngraph/ops/get_tuple_element.hpp"
#include "ngraph/ops/get_tuple_element.hpp"
#include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/slice.hpp"
#include "ngraph/ops/sum.hpp"
#include "ngraph/runtime/cpu/call_frame.hpp"
#include "ngraph/runtime/cpu/emitter.hpp"
#include "ngraph/runtime/cpu/emitter.hpp"
#include "ngraph/runtime/cpu/external_function.hpp"
#include "ngraph/runtime/cpu/external_function.hpp"
#include "ngraph/runtime/tensor_view_info.hpp"
#include "ngraph/runtime/tensor_view_info.hpp"
...
@@ -177,30 +184,58 @@ void Emitter::EMITTER_DECL(EmitDot)
...
@@ -177,30 +184,58 @@ void Emitter::EMITTER_DECL(EmitDot)
auto
arg1_layout
=
inputs
[
1
].
get_layout
<
DenseTensorViewLayout
>
();
auto
arg1_layout
=
inputs
[
1
].
get_layout
<
DenseTensorViewLayout
>
();
auto
out_layout
=
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
();
auto
out_layout
=
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
();
TU
+=
// Emit an MKL SGEMM call if possible
" {
\n
"
// clang-format off
" auto arg0 = call_frame->get_tensor_view_data<"
+
if
(
arg0_element_type
==
ngraph
::
element
::
Float32
::
element_type
())
element_type_names
[
TI
(
arg0_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
{
");
\n
"
TU
+=
" auto arg1 = call_frame->get_tensor_view_data<"
+
" {
\n
"
element_type_names
[
TI
(
arg0_element_type
)]
+
">("
+
to_string
(
inputs
[
1
].
get_index
())
+
" auto arg0 = call_frame->get_tensor_view_data<"
+
");
\n
"
element_type_names
[
TI
(
arg0_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
" auto out = call_frame->get_tensor_view_data<"
+
");
\n
"
element_type_names
[
TI
(
arg0_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
" auto arg1 = call_frame->get_tensor_view_data<"
+
");
\n
"
element_type_names
[
TI
(
arg0_element_type
)]
+
">("
+
to_string
(
inputs
[
1
].
get_index
())
+
" EigenMatrix<"
+
");
\n
"
element_type_names
[
TI
(
arg0_element_type
)]
+
">(out, "
+
" auto out = call_frame->get_tensor_view_data<"
+
EIGEN_MATRIX_FORMAT
(
out_layout
->
get_shape
(),
out_layout
->
get_strides
())
+
element_type_names
[
TI
(
arg0_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
") =
\n
"
");
\n
"
" EigenMatrix<"
+
" cblas::cblas_sgemm(cblas::Layout::RowMajor, cblas::Transpose::None, cblas::Transpose::None, "
+
element_type_names
[
TI
(
arg0_element_type
)]
+
">(arg0, "
+
to_string
(
arg0_shape
[
0
])
+
", "
+
to_string
(
arg1_shape
[
1
])
+
", "
+
to_string
(
arg0_shape
[
1
])
+
",
\n
"
EIGEN_MATRIX_FORMAT
(
arg0_layout
->
get_shape
(),
arg0_layout
->
get_strides
())
+
" 1.0f, arg0, "
+
to_string
(
max
(
1UL
,
arg0_shape
[
1
]))
+
", arg1, "
+
to_string
(
max
(
1UL
,
arg1_shape
[
1
]))
+
", 0.0f,
\n
"
") * "
" out, "
+
to_string
(
max
(
1UL
,
arg1_shape
[
1
]))
+
");
\n
"
"EigenMatrix<"
+
" }
\n
"
;
element_type_names
[
TI
(
arg0_element_type
)]
+
">(arg1, "
+
}
EIGEN_MATRIX_FORMAT
(
arg1_layout
->
get_shape
(),
arg1_layout
->
get_strides
())
+
// clang-format on
");
\n
"
else
" }
\n
"
;
{
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
arg0_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto arg1 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
arg0_element_type
)]
+
">("
+
to_string
(
inputs
[
1
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
arg0_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenMatrix<"
+
element_type_names
[
TI
(
arg0_element_type
)]
+
">(out, "
+
EIGEN_MATRIX_FORMAT
(
out_layout
->
get_shape
(),
out_layout
->
get_strides
())
+
") =
\n
"
" EigenMatrix<"
+
element_type_names
[
TI
(
arg0_element_type
)]
+
">(arg0, "
+
EIGEN_MATRIX_FORMAT
(
arg0_layout
->
get_shape
(),
arg0_layout
->
get_strides
())
+
") * "
"EigenMatrix<"
+
element_type_names
[
TI
(
arg0_element_type
)]
+
">(arg1, "
+
EIGEN_MATRIX_FORMAT
(
arg1_layout
->
get_shape
(),
arg1_layout
->
get_strides
())
+
");
\n
"
" }
\n
"
;
}
}
}
else
else
{
{
...
@@ -501,7 +536,26 @@ void Emitter::EMITTER_DECL(EmitMaximum)
...
@@ -501,7 +536,26 @@ void Emitter::EMITTER_DECL(EmitMaximum)
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").max("
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").max(
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg1, "
EIGEN_VECTOR_FORMAT
(
inputs
[
1
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
"));
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitMinimum
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto arg1 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
1
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").min(
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg1, "
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg1, "
EIGEN_VECTOR_FORMAT
(
inputs
[
1
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
"));
\n
"
EIGEN_VECTOR_FORMAT
(
inputs
[
1
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
"));
\n
"
" }
\n
"
;
" }
\n
"
;
...
@@ -882,3 +936,775 @@ void Emitter::EMITTER_DECL(EmitConvert)
...
@@ -882,3 +936,775 @@ void Emitter::EMITTER_DECL(EmitConvert)
".template cast<typename "
+
element_type_names
[
TI
(
result_element_type
)]
+
"::type>();
\n
"
".template cast<typename "
+
element_type_names
[
TI
(
result_element_type
)]
+
"::type>();
\n
"
" }
\n
"
;
" }
\n
"
;
}
}
void
Emitter
::
EMITTER_DECL
(
EmitConstant
)
{
auto
c
=
static_cast
<
const
op
::
Constant
*>
(
n
);
auto
c_tensor_type
=
dynamic_pointer_cast
<
const
TensorViewType
>
(
c
->
get_value_type
());
assert
(
c_tensor_type
);
auto
&
c_element_type
=
c_tensor_type
->
get_element_type
();
auto
c_value_strings
=
c
->
get_value_strings
();
TU
+=
" {
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
c_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
")->get_vector() = std::vector<"
+
element_type_names
[
TI
(
c_element_type
)]
+
"::type>{"
;
for
(
size_t
i
=
0
;
i
<
c_value_strings
.
size
();
i
++
)
{
if
(
i
)
TU
+=
", "
;
TU
+=
c_value_strings
[
i
];
}
TU
+=
"};
\n
}
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitReshape
)
{
auto
reshape
=
static_cast
<
const
op
::
Reshape
*>
(
n
);
auto
arg_type
=
reshape
->
get_arguments
().
at
(
0
)
->
get_value_type
();
auto
arg_tensor_view_type
=
dynamic_pointer_cast
<
const
TensorViewType
>
(
arg_type
);
assert
(
arg_tensor_view_type
);
auto
arg_shape
=
arg_tensor_view_type
->
get_shape
();
auto
arg_rank
=
arg_shape
.
size
();
auto
result_type
=
reshape
->
get_value_type
();
auto
result_tensor_view_type
=
dynamic_pointer_cast
<
const
TensorViewType
>
(
result_type
);
assert
(
result_tensor_view_type
);
auto
result_shape
=
result_tensor_view_type
->
get_shape
();
auto
&
result_element_type
=
result_tensor_view_type
->
get_element_type
();
auto
input_order
=
reshape
->
get_input_order
();
bool
same_layout
=
std
::
is_sorted
(
input_order
.
begin
(),
input_order
.
end
());
size_t
result_shape_product
=
1
;
for
(
auto
i
:
result_shape
)
{
result_shape_product
*=
i
;
}
// If there is no layout change or we are just going from 1^n to 1^m or a zero-size tensor, we can just copy.
if
(
same_layout
||
result_shape_product
<
2
)
{
TU
+=
" {
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
result_element_type
)]
+
">("
+
to_string
(
outputs
.
at
(
0
).
get_index
())
+
")->get_vector() =
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
result_element_type
)]
+
">("
+
to_string
(
inputs
.
at
(
0
).
get_index
())
+
")->get_vector();
\n
"
" }
\n
"
;
}
// If there *is* a layout change in the 2D case, we transpose the input.
else
if
(
arg_rank
==
2
)
{
auto
arg0_layout
=
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
();
auto
out_layout
=
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
();
// Emit an MKL transpose call if possible
// clang-format off
if
(
result_element_type
==
ngraph
::
element
::
Float32
::
element_type
())
{
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
result_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
result_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" mkl::MKL_Somatcopy('R', 'T', "
+
to_string
(
arg_shape
[
0
])
+
",
\n
"
" "
+
to_string
(
arg_shape
[
1
])
+
", 1.0f,
\n
"
" arg0, "
+
to_string
(
arg_shape
[
1
])
+
",
\n
"
" out, "
+
to_string
(
arg_shape
[
0
])
+
");
\n
"
" }
\n
"
;
}
// clang-format on
else
{
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
result_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
result_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenMatrix<"
+
element_type_names
[
TI
(
result_element_type
)]
+
">(out, "
+
EIGEN_MATRIX_FORMAT
(
out_layout
->
get_shape
(),
out_layout
->
get_strides
())
+
") =
\n
"
" EigenMatrix<"
+
element_type_names
[
TI
(
result_element_type
)]
+
">(arg0, "
+
EIGEN_MATRIX_FORMAT
(
arg0_layout
->
get_shape
(),
arg0_layout
->
get_strides
())
+
").transpose();
\n
"
" }
\n
"
;
}
}
// Other cases (reordering of axes for tensors with rank>2) are not handled yet.
else
{
throw
ngraph_error
(
"Axis permutation in reshape is not implemented yet for tensors with rank>2"
);
}
}
void
Emitter
::
EMITTER_DECL
(
EmitFunctionCall
)
{
auto
function_call
=
static_cast
<
const
op
::
FunctionCall
*>
(
n
);
auto
function
=
function_call
->
get_function
();
std
::
shared_ptr
<
ExternalFunction
>
external
;
try
{
external
=
function_map
.
at
(
function
);
}
catch
(
const
std
::
out_of_range
)
{
external
=
make_shared
<
ExternalFunction
>
(
function
);
function_map
.
insert
({
function
,
external
});
}
std
::
shared_ptr
<
CallFrame
>
cf
=
std
::
dynamic_pointer_cast
<
CallFrame
>
(
external
->
make_call_frame
());
ef
->
get_callees
().
emplace_back
(
cf
);
TU
+=
" {
\n
"
" auto cf = callees.at("
+
to_string
(
ef
->
get_callees
().
size
()
-
1
)
+
");
\n
"
" std::vector<std::shared_ptr<ngraph::runtime::Value>> inputs;
\n
"
" std::vector<std::shared_ptr<ngraph::runtime::Value>> outputs;
\n
"
;
for
(
const
auto
&
in
:
inputs
)
{
TU
+=
" inputs.emplace_back(call_frame->get_tensor_view("
+
to_string
(
in
.
get_index
())
+
"));
\n
"
;
}
for
(
const
auto
&
out
:
outputs
)
{
TU
+=
" outputs.emplace_back(call_frame->get_tensor_view("
+
to_string
(
out
.
get_index
())
+
"));
\n
"
;
}
TU
+=
" (*cf)(inputs, outputs);
\n
"
" }
\n
"
;
}
// TODO: This and other ops include comments/notes that
// we don't want to just copy-paste here. Figure out a better way
// or just point to ngvm/external_function.cpp with a note that
// the compiled version of these ops is intended to have semantics identical
// to what's seen there (for now atleast)
void
Emitter
::
EMITTER_DECL
(
EmitReduce
)
{
auto
reduce
=
static_cast
<
const
op
::
Reduce
*>
(
n
);
auto
reduction_function
=
reduce
->
get_reduction_function
();
std
::
shared_ptr
<
ExternalFunction
>
external
;
try
{
external
=
function_map
.
at
(
reduction_function
);
}
catch
(
const
std
::
out_of_range
)
{
external
=
make_shared
<
ExternalFunction
>
(
reduction_function
);
function_map
.
insert
({
reduction_function
,
external
});
}
auto
reductee_type
=
reduce
->
get_arguments
().
at
(
0
)
->
get_value_type
();
auto
reductee_tensor_view_type
=
dynamic_pointer_cast
<
const
TensorViewType
>
(
reductee_type
);
assert
(
reductee_tensor_view_type
);
auto
reductee_shape
=
reductee_tensor_view_type
->
get_shape
();
auto
f_result_type
=
reduction_function
->
get_result_type
();
auto
f_result_tensor_view_type
=
dynamic_pointer_cast
<
const
TensorViewType
>
(
f_result_type
);
assert
(
f_result_tensor_view_type
);
auto
&
f_result_element_type
=
f_result_tensor_view_type
->
get_element_type
();
auto
result_type
=
reduce
->
get_value_type
();
auto
result_tensor_view_type
=
dynamic_pointer_cast
<
const
TensorViewType
>
(
result_type
);
assert
(
result_tensor_view_type
);
auto
result_shape
=
result_tensor_view_type
->
get_shape
();
auto
&
reduction_axes
=
reduce
->
get_reduction_axes
();
auto
arg0_layout
=
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
();
// Trivial case: no reduction axes (this includes the scalar-reductee case).
if
(
reduction_axes
.
empty
())
{
TU
+=
" {
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
outputs
.
at
(
0
).
get_index
())
+
")->get_vector() =
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
inputs
.
at
(
0
).
get_index
())
+
")->get_vector();
\n
"
" }
\n
"
;
}
// Behavior for zero-size axes bears some explanation here. XLA's reduce
// operator provides an "base" element (usually, but not necessarily,
// an identity element) that it apparently *may* choose to insert anywhere
// in the reduction any number of times. For example, given:
//
// reduce{{1,2,3},b,+)
//
// any of the following are valid reductions (I think!):
//
// b+(b+1+2)+3
// b+(1+(2+3))
// (1+2)+3 (I think!)
//
// etc. Here we will choose never to instantiate the base element, which
// works well with Eigen's default behavior for non-zero-length axes. The
// exceptional case is when we reduce on a zero-length axis. In this case,
// Eigen's default behavior is to put a zero in the output, which is not
// what we want, so we detect that case here and override with a copy
// instruction (for reduce-to-scalar) or a broadcast (for reduce-to-vector)
// from the base element.
//
// What I'm actually not sure about is whether the identity element is
// required to appear at least once. If so, this will need to be reworked,
// assuming we actually want to mimic XLA's semantics that closely, which
// we may not.
else
if
((
reductee_shape
.
size
()
==
1
&&
reduction_axes
==
AxisSet
{
0
})
||
(
reductee_shape
.
size
()
==
2
&&
reduction_axes
==
AxisSet
{
0
,
1
}))
{
if
(
reductee_shape
.
at
(
0
)
==
0
||
(
reductee_shape
.
size
()
==
2
&&
reductee_shape
.
at
(
1
)
==
0
))
{
TU
+=
" {
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
outputs
.
at
(
0
).
get_index
())
+
")->get_vector() =
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
inputs
.
at
(
1
).
get_index
())
+
")->get_vector();
\n
"
" }
\n
"
;
}
else
{
std
::
shared_ptr
<
CallFrame
>
cf
=
std
::
dynamic_pointer_cast
<
CallFrame
>
(
external
->
make_call_frame
());
ef
->
get_callees
().
emplace_back
(
cf
);
TU
+=
" {
\n
"
" using ET = "
+
element_type_names
[
TI
(
f_result_element_type
)]
+
";
\n
"
" auto cf = callees.at("
+
to_string
(
ef
->
get_callees
().
size
()
-
1
)
+
");
\n
"
" auto f = [cf](typename ET::type x, typename ET::type y) -> typename ET::type {
\n
"
" auto tx = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});
\n
"
" *tx = std::vector<typename ET::type>({x});
\n
"
" auto ty = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});
\n
"
" *ty = std::vector<typename ET::type>({y});
\n
"
" auto tr = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});
\n
"
" (*cf)({tx, ty}, {tr});
\n
"
" return tr->get_vector()[0];
\n
"
" };
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").redux(f);
\n
"
" }
\n
"
;
}
}
else
if
(
reductee_shape
.
size
()
==
2
&&
reduction_axes
==
AxisSet
{
1
})
{
if
(
reductee_shape
.
at
(
1
)
==
0
)
{
TU
+=
" {
\n
"
" auto arg1 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
inputs
[
1
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">(arg1, "
EIGEN_VECTOR_FORMAT
(
inputs
[
1
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
")(0, 0);
\n
"
" }
\n
"
;
}
else
{
std
::
shared_ptr
<
CallFrame
>
cf
=
std
::
dynamic_pointer_cast
<
CallFrame
>
(
external
->
make_call_frame
());
ef
->
get_callees
().
emplace_back
(
cf
);
TU
+=
" {
\n
"
" using ET = "
+
element_type_names
[
TI
(
f_result_element_type
)]
+
";
\n
"
" auto cf = callees.at("
+
to_string
(
ef
->
get_callees
().
size
()
-
1
)
+
");
\n
"
" auto f = [cf](typename ET::type x, typename ET::type y) -> typename ET::type {
\n
"
" auto tx = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});
\n
"
" *tx = std::vector<typename ET::type>({x});
\n
"
" auto ty = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});
\n
"
" *ty = std::vector<typename ET::type>({y});
\n
"
" auto tr = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});
\n
"
" (*cf)({tx, ty}, {tr});
\n
"
" return tr->get_vector()[0];
\n
"
" };
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenVector<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenMatrix<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">(arg0, "
+
EIGEN_MATRIX_FORMAT
(
arg0_layout
->
get_shape
(),
arg0_layout
->
get_strides
())
+
").rowwise().redux(f);
\n
"
" }
\n
"
;
}
}
else
if
(
reductee_shape
.
size
()
==
2
&&
reduction_axes
==
AxisSet
{
0
})
{
if
(
reductee_shape
.
at
(
0
)
==
0
)
{
TU
+=
" {
\n
"
" auto arg1 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
inputs
[
1
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">(arg1, "
EIGEN_VECTOR_FORMAT
(
inputs
[
1
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
")(0, 0);
\n
"
" }
\n
"
;
}
else
{
std
::
shared_ptr
<
CallFrame
>
cf
=
std
::
dynamic_pointer_cast
<
CallFrame
>
(
external
->
make_call_frame
());
ef
->
get_callees
().
emplace_back
(
cf
);
TU
+=
" {
\n
"
" using ET = "
+
element_type_names
[
TI
(
f_result_element_type
)]
+
";
\n
"
" auto cf = callees.at("
+
to_string
(
ef
->
get_callees
().
size
()
-
1
)
+
");
\n
"
" auto f = [cf](typename ET::type x, typename ET::type y) -> typename ET::type {
\n
"
" auto tx = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});
\n
"
" *tx = std::vector<typename ET::type>({x});
\n
"
" auto ty = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});
\n
"
" *ty = std::vector<typename ET::type>({y});
\n
"
" auto tr = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});
\n
"
" (*cf)({tx, ty}, {tr});
\n
"
" return tr->get_vector()[0];
\n
"
" };
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenVector<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenMatrix<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">(arg0, "
+
EIGEN_MATRIX_FORMAT
(
arg0_layout
->
get_shape
(),
arg0_layout
->
get_strides
())
+
").colwise().redux(f);
\n
"
" }
\n
"
;
}
}
else
{
throw
ngraph_error
(
"Reduce: only vectors and matrices are currently supported"
);
}
}
void
Emitter
::
EMITTER_DECL
(
EmitSign
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").sign();
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitSlice
)
{
auto
slice
=
static_cast
<
const
op
::
Slice
*>
(
n
);
for
(
auto
d
:
slice
->
get_step
())
{
if
(
1
!=
d
)
{
throw
ngraph_error
(
"Slice does not support non-unit step yet"
);
}
}
auto
arg_type
=
slice
->
get_arguments
().
at
(
0
)
->
get_value_type
();
auto
arg_tensor_view_type
=
dynamic_pointer_cast
<
const
TensorViewType
>
(
arg_type
);
assert
(
arg_tensor_view_type
);
auto
arg_shape
=
arg_tensor_view_type
->
get_shape
();
auto
arg_rank
=
arg_shape
.
size
();
auto
&
arg_element_type
=
arg_tensor_view_type
->
get_element_type
();
auto
&
lower_bounds
=
slice
->
get_lower_bounds
();
auto
&
upper_bounds
=
slice
->
get_upper_bounds
();
// Scalar slice is necessarily just a copy.
if
(
arg_rank
==
0
)
{
TU
+=
" {
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
arg_element_type
)]
+
">("
+
to_string
(
outputs
.
at
(
0
).
get_index
())
+
")->get_vector() =
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
arg_element_type
)]
+
">("
+
to_string
(
inputs
.
at
(
0
).
get_index
())
+
")->get_vector();
\n
"
" }
\n
"
;
}
else
if
(
arg_rank
==
1
)
{
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
arg_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
arg_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenVector<"
+
element_type_names
[
TI
(
arg_element_type
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenVector<"
+
element_type_names
[
TI
(
arg_element_type
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").segment(
\n
"
" "
+
to_string
(
lower_bounds
[
0
])
+
", "
+
to_string
(
upper_bounds
[
0
]
-
lower_bounds
[
0
])
+
");
\n
"
" }
\n
"
;
}
else
if
(
arg_rank
==
2
)
{
auto
arg0_layout
=
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
();
auto
out_layout
=
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
arg_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
arg_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenMatrix<"
+
element_type_names
[
TI
(
arg_element_type
)]
+
">(out, "
+
EIGEN_MATRIX_FORMAT
(
out_layout
->
get_shape
(),
out_layout
->
get_strides
())
+
") =
\n
"
" EigenMatrix<"
+
element_type_names
[
TI
(
arg_element_type
)]
+
">(arg0, "
+
EIGEN_MATRIX_FORMAT
(
arg0_layout
->
get_shape
(),
arg0_layout
->
get_strides
())
+
").block("
+
to_string
(
lower_bounds
[
0
])
+
", "
+
to_string
(
lower_bounds
[
1
])
+
",
\n
"
" "
+
to_string
(
upper_bounds
[
0
]
-
lower_bounds
[
0
])
+
",
\n
"
" "
+
to_string
(
upper_bounds
[
1
]
-
lower_bounds
[
1
])
+
");
\n
"
" }
\n
"
;
}
// Other cases (reordering of axes for tensors with rank>2) are not handled yet.
else
{
throw
ngraph_error
(
"Slice is not implemented yet for tensors with rank>2"
);
}
}
void
Emitter
::
EMITTER_DECL
(
EmitSum
)
{
auto
s
=
static_cast
<
const
op
::
Sum
*>
(
n
);
auto
s_tensor_view_type
=
dynamic_pointer_cast
<
const
TensorViewType
>
(
s
->
get_value_type
());
assert
(
s_tensor_view_type
);
auto
&
s_element_type
=
s_tensor_view_type
->
get_element_type
();
auto
s_shape
=
s_tensor_view_type
->
get_shape
();
auto
arg
=
s
->
get_arguments
().
at
(
0
);
auto
arg_type
=
arg
->
get_value_type
();
auto
arg_tensor_view_type
=
dynamic_pointer_cast
<
const
TensorViewType
>
(
arg_type
);
assert
(
arg_tensor_view_type
);
auto
arg_shape
=
arg_tensor_view_type
->
get_shape
();
auto
arg_rank
=
arg_shape
.
size
();
auto
&
reduction_axes
=
s
->
get_reduction_axes
();
// Trivial case: no reduction axes.
if
(
reduction_axes
.
size
()
==
0
)
{
TU
+=
" {
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">("
+
to_string
(
outputs
.
at
(
0
).
get_index
())
+
")->get_vector() =
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">("
+
to_string
(
inputs
.
at
(
0
).
get_index
())
+
")->get_vector();
\n
"
" }
\n
"
;
}
// Full reduction? Then sum to scalar.
else
if
((
arg_rank
==
1
&&
reduction_axes
==
AxisSet
{
0
})
||
(
arg_rank
==
2
&&
reduction_axes
==
AxisSet
{
0
,
1
}))
{
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").sum();
\n
"
" }
\n
"
;
}
else
if
(
arg_rank
==
2
&&
reduction_axes
==
AxisSet
{
1
})
{
auto
arg0_layout
=
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenVector<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenMatrix<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">(arg0, "
+
EIGEN_MATRIX_FORMAT
(
arg0_layout
->
get_shape
(),
arg0_layout
->
get_strides
())
+
").rowwise().sum();
\n
"
" }
\n
"
;
}
else
if
(
arg_rank
==
2
&&
reduction_axes
==
AxisSet
{
0
})
{
auto
arg0_layout
=
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenVector<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenMatrix<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">(arg0, "
+
EIGEN_MATRIX_FORMAT
(
arg0_layout
->
get_shape
(),
arg0_layout
->
get_strides
())
+
").colwise().sum();
\n
"
" }
\n
"
;
}
else
{
throw
ngraph_error
(
"Sum: only vectors and matrices are currently supported"
);
}
}
void
Emitter
::
EMITTER_DECL
(
EmitExp
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").exp();
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitSin
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").sin();
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitSinh
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").sinh();
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitCos
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").cos();
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitCosh
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").cosh();
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitTan
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").tan();
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitTanh
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
// Eigen's generic_fast_tanh_float<float> is currently miscompiled by Clang/LLVM
// so we fall-back to std::tanh
// TODO: Implement our own internal fast/approximate tanh if this actually gets used
// by models
TU
+=
" {
\n
"
" auto& arg0 = call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
")->get_vector();
\n
"
" auto& out = call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
")->get_vector();
\n
"
" std::transform(arg0.begin(), arg0.end(), out.begin(), []("
+
element_type_names
[
TI
(
et
)]
+
"::type x) -> "
+
element_type_names
[
TI
(
et
)]
+
"::type { return std::tanh(x); });
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitAsin
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").asin();
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitAcos
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").acos();
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitAtan
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").atan();
\n
"
" }
\n
"
;
}
src/ngraph/runtime/cpu/emitter.hpp
View file @
f5768063
...
@@ -61,6 +61,7 @@ namespace ngraph
...
@@ -61,6 +61,7 @@ namespace ngraph
void
EMITTER_DECL
(
EmitLessEq
);
void
EMITTER_DECL
(
EmitLessEq
);
void
EMITTER_DECL
(
EmitLog
);
void
EMITTER_DECL
(
EmitLog
);
void
EMITTER_DECL
(
EmitMaximum
);
void
EMITTER_DECL
(
EmitMaximum
);
void
EMITTER_DECL
(
EmitMinimum
);
void
EMITTER_DECL
(
EmitNegative
);
void
EMITTER_DECL
(
EmitNegative
);
void
EMITTER_DECL
(
EmitNotEqual
);
void
EMITTER_DECL
(
EmitNotEqual
);
void
EMITTER_DECL
(
EmitSelect
);
void
EMITTER_DECL
(
EmitSelect
);
...
@@ -75,6 +76,23 @@ namespace ngraph
...
@@ -75,6 +76,23 @@ namespace ngraph
void
EMITTER_DECL
(
EmitParameterizedConstantUInt64
);
void
EMITTER_DECL
(
EmitParameterizedConstantUInt64
);
void
EMITTER_DECL
(
EmitBroadcast
);
void
EMITTER_DECL
(
EmitBroadcast
);
void
EMITTER_DECL
(
EmitConvert
);
void
EMITTER_DECL
(
EmitConvert
);
void
EMITTER_DECL
(
EmitConstant
);
void
EMITTER_DECL
(
EmitReshape
);
void
EMITTER_DECL
(
EmitFunctionCall
);
void
EMITTER_DECL
(
EmitReduce
);
void
EMITTER_DECL
(
EmitSign
);
void
EMITTER_DECL
(
EmitSlice
);
void
EMITTER_DECL
(
EmitSum
);
void
EMITTER_DECL
(
EmitExp
);
void
EMITTER_DECL
(
EmitSin
);
void
EMITTER_DECL
(
EmitSinh
);
void
EMITTER_DECL
(
EmitCos
);
void
EMITTER_DECL
(
EmitCosh
);
void
EMITTER_DECL
(
EmitTan
);
void
EMITTER_DECL
(
EmitTanh
);
void
EMITTER_DECL
(
EmitAsin
);
void
EMITTER_DECL
(
EmitAcos
);
void
EMITTER_DECL
(
EmitAtan
);
};
};
}
}
}
}
...
...
src/ngraph/runtime/cpu/external_function.cpp
View file @
f5768063
...
@@ -27,14 +27,20 @@
...
@@ -27,14 +27,20 @@
#include "ngraph/function.hpp"
#include "ngraph/function.hpp"
#include "ngraph/node.hpp"
#include "ngraph/node.hpp"
#include "ngraph/ops/abs.hpp"
#include "ngraph/ops/abs.hpp"
#include "ngraph/ops/acos.hpp"
#include "ngraph/ops/add.hpp"
#include "ngraph/ops/add.hpp"
#include "ngraph/ops/asin.hpp"
#include "ngraph/ops/atan.hpp"
#include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/concatenate.hpp"
#include "ngraph/ops/concatenate.hpp"
#include "ngraph/ops/constant.hpp"
#include "ngraph/ops/constant.hpp"
#include "ngraph/ops/convert.hpp"
#include "ngraph/ops/convert.hpp"
#include "ngraph/ops/cos.hpp"
#include "ngraph/ops/cosh.hpp"
#include "ngraph/ops/divide.hpp"
#include "ngraph/ops/divide.hpp"
#include "ngraph/ops/dot.hpp"
#include "ngraph/ops/dot.hpp"
#include "ngraph/ops/equal.hpp"
#include "ngraph/ops/equal.hpp"
#include "ngraph/ops/exp.hpp"
#include "ngraph/ops/function_call.hpp"
#include "ngraph/ops/function_call.hpp"
#include "ngraph/ops/get_tuple_element.hpp"
#include "ngraph/ops/get_tuple_element.hpp"
#include "ngraph/ops/greater.hpp"
#include "ngraph/ops/greater.hpp"
...
@@ -43,12 +49,21 @@
...
@@ -43,12 +49,21 @@
#include "ngraph/ops/less_eq.hpp"
#include "ngraph/ops/less_eq.hpp"
#include "ngraph/ops/log.hpp"
#include "ngraph/ops/log.hpp"
#include "ngraph/ops/maximum.hpp"
#include "ngraph/ops/maximum.hpp"
#include "ngraph/ops/minimum.hpp"
#include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/negative.hpp"
#include "ngraph/ops/negative.hpp"
#include "ngraph/ops/not_equal.hpp"
#include "ngraph/ops/not_equal.hpp"
#include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/select.hpp"
#include "ngraph/ops/select.hpp"
#include "ngraph/ops/sign.hpp"
#include "ngraph/ops/sin.hpp"
#include "ngraph/ops/sinh.hpp"
#include "ngraph/ops/slice.hpp"
#include "ngraph/ops/subtract.hpp"
#include "ngraph/ops/subtract.hpp"
#include "ngraph/ops/sum.hpp"
#include "ngraph/ops/tan.hpp"
#include "ngraph/ops/tanh.hpp"
#include "ngraph/ops/tuple.hpp"
#include "ngraph/ops/tuple.hpp"
#include "ngraph/pass/assign_layout.hpp"
#include "ngraph/pass/assign_layout.hpp"
#include "ngraph/pass/assign_tensors.hpp"
#include "ngraph/pass/assign_tensors.hpp"
...
@@ -84,6 +99,7 @@ static const OpMap dispatcher{
...
@@ -84,6 +99,7 @@ static const OpMap dispatcher{
{
TI
(
ngraph
::
op
::
LessEq
),
&
Emitter
::
EmitLessEq
},
{
TI
(
ngraph
::
op
::
LessEq
),
&
Emitter
::
EmitLessEq
},
{
TI
(
ngraph
::
op
::
Log
),
&
Emitter
::
EmitLog
},
{
TI
(
ngraph
::
op
::
Log
),
&
Emitter
::
EmitLog
},
{
TI
(
ngraph
::
op
::
Maximum
),
&
Emitter
::
EmitMaximum
},
{
TI
(
ngraph
::
op
::
Maximum
),
&
Emitter
::
EmitMaximum
},
{
TI
(
ngraph
::
op
::
Minimum
),
&
Emitter
::
EmitMinimum
},
{
TI
(
ngraph
::
op
::
Negative
),
&
Emitter
::
EmitNegative
},
{
TI
(
ngraph
::
op
::
Negative
),
&
Emitter
::
EmitNegative
},
{
TI
(
ngraph
::
op
::
NotEqual
),
&
Emitter
::
EmitNotEqual
},
{
TI
(
ngraph
::
op
::
NotEqual
),
&
Emitter
::
EmitNotEqual
},
{
TI
(
ngraph
::
op
::
Select
),
&
Emitter
::
EmitSelect
},
{
TI
(
ngraph
::
op
::
Select
),
&
Emitter
::
EmitSelect
},
...
@@ -106,6 +122,23 @@ static const OpMap dispatcher{
...
@@ -106,6 +122,23 @@ static const OpMap dispatcher{
&
Emitter
::
EmitParameterizedConstantUInt64
},
&
Emitter
::
EmitParameterizedConstantUInt64
},
{
TI
(
ngraph
::
op
::
Broadcast
),
&
Emitter
::
EmitBroadcast
},
{
TI
(
ngraph
::
op
::
Broadcast
),
&
Emitter
::
EmitBroadcast
},
{
TI
(
ngraph
::
op
::
Convert
),
&
Emitter
::
EmitConvert
},
{
TI
(
ngraph
::
op
::
Convert
),
&
Emitter
::
EmitConvert
},
{
TI
(
ngraph
::
op
::
Constant
),
&
Emitter
::
EmitConstant
},
{
TI
(
ngraph
::
op
::
Reshape
),
&
Emitter
::
EmitReshape
},
{
TI
(
ngraph
::
op
::
FunctionCall
),
&
Emitter
::
EmitFunctionCall
},
{
TI
(
ngraph
::
op
::
Reduce
),
&
Emitter
::
EmitReduce
},
{
TI
(
ngraph
::
op
::
Sign
),
&
Emitter
::
EmitSign
},
{
TI
(
ngraph
::
op
::
Slice
),
&
Emitter
::
EmitSlice
},
{
TI
(
ngraph
::
op
::
Sum
),
&
Emitter
::
EmitSum
},
{
TI
(
ngraph
::
op
::
Exp
),
&
Emitter
::
EmitExp
},
{
TI
(
ngraph
::
op
::
Sin
),
&
Emitter
::
EmitSin
},
{
TI
(
ngraph
::
op
::
Sinh
),
&
Emitter
::
EmitSinh
},
{
TI
(
ngraph
::
op
::
Cos
),
&
Emitter
::
EmitCos
},
{
TI
(
ngraph
::
op
::
Cosh
),
&
Emitter
::
EmitCosh
},
{
TI
(
ngraph
::
op
::
Tan
),
&
Emitter
::
EmitTan
},
{
TI
(
ngraph
::
op
::
Tanh
),
&
Emitter
::
EmitTanh
},
{
TI
(
ngraph
::
op
::
Asin
),
&
Emitter
::
EmitAsin
},
{
TI
(
ngraph
::
op
::
Acos
),
&
Emitter
::
EmitAcos
},
{
TI
(
ngraph
::
op
::
Atan
),
&
Emitter
::
EmitAtan
},
};
};
#undef TI
#undef TI
...
@@ -174,7 +207,9 @@ void ExternalFunction::compile(FunctionMap& function_map)
...
@@ -174,7 +207,9 @@ void ExternalFunction::compile(FunctionMap& function_map)
// Now we build the TU
// Now we build the TU
Emitter
emitter
;
Emitter
emitter
;
auto
&
TU
=
emitter
.
GetTU
();
auto
&
TU
=
emitter
.
GetTU
();
TU
+=
R"(
TU
+=
R"(// Generated by the NGraph CPU backend
#include <algorithm>
#include <cmath>
#include <memory>
#include <memory>
#include <vector>
#include <vector>
...
@@ -182,17 +217,18 @@ void ExternalFunction::compile(FunctionMap& function_map)
...
@@ -182,17 +217,18 @@ void ExternalFunction::compile(FunctionMap& function_map)
#include "ngraph/descriptor/layout/dense_tensor_view_layout.hpp"
#include "ngraph/descriptor/layout/dense_tensor_view_layout.hpp"
#include "ngraph/runtime/cpu/call_frame.hpp"
#include "ngraph/runtime/cpu/call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_kernels.hpp"
#include "ngraph/runtime/cpu/eigen_utils.hpp"
#include "ngraph/runtime/cpu/eigen_utils.hpp"
#include "ngraph/runtime/tensor_view_info.hpp"
#include "ngraph/runtime/utils.hpp"
void *__dso_handle = 0;
using namespace ngraph::element;
using namespace ngraph::element;
using namespace ngraph::runtime;
using namespace ngraph::runtime;
using namespace ngraph::runtime::cpu::eigen;
using namespace ngraph::runtime::cpu::eigen;
extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
ngraph::runtime::TensorViewPtrs& tensor_views)
ngraph::runtime::TensorViewPtrs& tensor_views,
const std::vector<std::shared_ptr<ngraph::runtime::cpu::CallFrame>>& callees)
{
{
)"
;
)"
;
...
@@ -243,8 +279,10 @@ extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
...
@@ -243,8 +279,10 @@ extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
assert
(
llvm_module
);
assert
(
llvm_module
);
estate
.
add_module
(
llvm_module
);
estate
.
add_module
(
llvm_module
);
estate
.
finalize
();
estate
.
finalize
();
compiled_function
=
estate
.
find_function
<
void
(
compiled_function
=
ngraph
::
runtime
::
cpu
::
CallFrame
*
,
ngraph
::
runtime
::
TensorViewPtrs
&
)
>
(
"__entrypoint"
);
estate
.
find_function
<
void
(
ngraph
::
runtime
::
cpu
::
CallFrame
*
,
ngraph
::
runtime
::
TensorViewPtrs
&
,
const
std
::
vector
<
std
::
shared_ptr
<
CallFrame
>>&
)
>
(
"__entrypoint"
);
assert
(
compiled_function
);
assert
(
compiled_function
);
m_is_compiled
=
true
;
m_is_compiled
=
true
;
...
@@ -322,5 +360,5 @@ shared_ptr<ngraph::runtime::CallFrame> ExternalFunction::make_call_frame()
...
@@ -322,5 +360,5 @@ shared_ptr<ngraph::runtime::CallFrame> ExternalFunction::make_call_frame()
#undef M
#undef M
}
}
return
make_shared
<
ngraph
::
runtime
::
cpu
::
CallFrame
>
(
return
make_shared
<
ngraph
::
runtime
::
cpu
::
CallFrame
>
(
compiled_function
,
m_n_outputs
,
m_n_inputs
,
temps
);
compiled_function
,
m_n_outputs
,
m_n_inputs
,
temps
,
callees
);
}
}
src/ngraph/runtime/cpu/external_function.hpp
View file @
f5768063
...
@@ -47,8 +47,10 @@ namespace ngraph
...
@@ -47,8 +47,10 @@ namespace ngraph
using
OpMap
=
std
::
unordered_map
<
std
::
type_index
,
OpFunction
>
;
using
OpMap
=
std
::
unordered_map
<
std
::
type_index
,
OpFunction
>
;
using
EntryPoint
=
std
::
function
<
void
(
ngraph
::
runtime
::
cpu
::
CallFrame
*
,
using
EntryPoint
=
std
::
function
<
void
(
ngraph
::
runtime
::
TensorViewPtrs
&
)
>
;
ngraph
::
runtime
::
cpu
::
CallFrame
*
,
ngraph
::
runtime
::
TensorViewPtrs
&
,
const
std
::
vector
<
std
::
shared_ptr
<
ngraph
::
runtime
::
cpu
::
CallFrame
>>&
)
>
;
class
ExternalFunction
:
public
ngraph
::
runtime
::
ExternalFunction
class
ExternalFunction
:
public
ngraph
::
runtime
::
ExternalFunction
{
{
...
@@ -56,7 +58,7 @@ namespace ngraph
...
@@ -56,7 +58,7 @@ namespace ngraph
ExternalFunction
(
const
std
::
shared_ptr
<
ngraph
::
Function
>&
function
,
ExternalFunction
(
const
std
::
shared_ptr
<
ngraph
::
Function
>&
function
,
bool
release_function
=
true
);
bool
release_function
=
true
);
std
::
shared_ptr
<
ngraph
::
runtime
::
CallFrame
>
make_call_frame
();
std
::
shared_ptr
<
ngraph
::
runtime
::
CallFrame
>
make_call_frame
();
std
::
vector
<
std
::
shared_ptr
<
CallFrame
>>&
get_callees
()
{
return
callees
;
}
protected
:
protected
:
void
compile
(
FunctionMap
&
function_map
);
void
compile
(
FunctionMap
&
function_map
);
...
@@ -64,6 +66,7 @@ namespace ngraph
...
@@ -64,6 +66,7 @@ namespace ngraph
size_t
m_n_outputs
;
size_t
m_n_outputs
;
ngraph
::
descriptor
::
TensorViewPtrs
m_temp_views
;
ngraph
::
descriptor
::
TensorViewPtrs
m_temp_views
;
EntryPoint
compiled_function
;
EntryPoint
compiled_function
;
std
::
vector
<
std
::
shared_ptr
<
CallFrame
>>
callees
;
};
};
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment