Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
f5768063
Unverified
Commit
f5768063
authored
7 years ago
by
Robert Kimball
Committed by
GitHub
7 years ago
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #222 from NervanaSystems/jmenon/cpu_kernels
CPU Backend: More ops and kernels
parents
69a2d4aa
792d3328
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
1084 additions
and
52 deletions
+1084
-52
CMakeLists.txt
src/ngraph/CMakeLists.txt
+17
-8
compiler.cpp
src/ngraph/codegen/compiler.cpp
+17
-2
call_frame.cpp
src/ngraph/runtime/cpu/call_frame.cpp
+5
-4
call_frame.hpp
src/ngraph/runtime/cpu/call_frame.hpp
+6
-2
cpu_kernels.cpp
src/ngraph/runtime/cpu/cpu_kernels.cpp
+15
-0
cpu_kernels.hpp
src/ngraph/runtime/cpu/cpu_kernels.hpp
+103
-0
emitter.cpp
src/ngraph/runtime/cpu/emitter.cpp
+851
-25
emitter.hpp
src/ngraph/runtime/cpu/emitter.hpp
+18
-0
external_function.cpp
src/ngraph/runtime/cpu/external_function.cpp
+46
-8
external_function.hpp
src/ngraph/runtime/cpu/external_function.hpp
+6
-3
No files found.
src/ngraph/CMakeLists.txt
View file @
f5768063
...
@@ -99,18 +99,21 @@ include_directories(
...
@@ -99,18 +99,21 @@ include_directories(
"
${
EIGEN_INCLUDE_DIR
}
"
"
${
EIGEN_INCLUDE_DIR
}
"
)
)
if
(
LLVM_INCLUDE_DIR
)
if
(
NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
MKLDNN_INCLUDE_DIR
)
find_package
(
ZLIB REQUIRED
)
find_package
(
ZLIB REQUIRED
)
include_directories
(
SYSTEM
${
LLVM_INCLUDE_DIR
}
)
include_directories
(
SYSTEM
${
LLVM_INCLUDE_DIR
}
${
MKLDNN_INCLUDE_DIR
}
)
link_directories
(
${
LLVM_LIB_DIR
}
)
link_directories
(
${
LLVM_LIB_DIR
}
${
MKLDNN_LIB_DIR
}
)
# Add sources for the CPU backend
# Add sources for the CPU backend
# and all its dependencies
# and all its dependencies
set
(
SRC
${
SRC
}
set
(
SRC
${
SRC
}
codegen/compiler.cpp
codegen/compiler.cpp
runtime/cpu/call_frame.cpp
runtime/cpu/call_frame.cpp
runtime/cpu/cpu_manager.cpp
runtime/cpu/cpu_backend.cpp
runtime/cpu/cpu_backend.cpp
runtime/cpu/cpu_manager.cpp
runtime/cpu/cpu_kernels.cpp
runtime/cpu/emitter.cpp
runtime/cpu/emitter.cpp
runtime/cpu/external_function.cpp
runtime/cpu/external_function.cpp
)
)
...
@@ -129,7 +132,7 @@ endif()
...
@@ -129,7 +132,7 @@ endif()
add_library
(
ngraph SHARED
${
SRC
}
)
add_library
(
ngraph SHARED
${
SRC
}
)
target_include_directories
(
ngraph PUBLIC
"
${
NGRAPH_INCLUDE_PATH
}
"
)
target_include_directories
(
ngraph PUBLIC
"
${
NGRAPH_INCLUDE_PATH
}
"
)
if
(
LLVM_LINK_LIBS
)
if
(
NGRAPH_CPU_ENABLE AND
LLVM_LINK_LIBS
)
target_link_libraries
(
ngraph LINK_PRIVATE
${
LLVM_LINK_LIBS
}
)
target_link_libraries
(
ngraph LINK_PRIVATE
${
LLVM_LINK_LIBS
}
)
endif
()
endif
()
...
@@ -137,8 +140,10 @@ if (APPLE)
...
@@ -137,8 +140,10 @@ if (APPLE)
set_property
(
TARGET ngraph PROPERTY PREFIX
"lib"
)
set_property
(
TARGET ngraph PROPERTY PREFIX
"lib"
)
set_property
(
TARGET ngraph PROPERTY OUTPUT_NAME
"ngraph.so"
)
set_property
(
TARGET ngraph PROPERTY OUTPUT_NAME
"ngraph.so"
)
set_property
(
TARGET ngraph PROPERTY SUFFIX
""
)
set_property
(
TARGET ngraph PROPERTY SUFFIX
""
)
else
()
endif
()
include_directories
(
"
${
MKLDNN_INCLUDE_DIR
}
"
)
if
(
NGRAPH_CPU_ENABLE AND MKLDNN_LIB_DIR
)
target_link_libraries
(
ngraph LINK_PRIVATE mkldnn
)
endif
()
endif
()
#-----------------------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------
...
@@ -178,6 +183,10 @@ endif()
...
@@ -178,6 +183,10 @@ endif()
add_dependencies
(
ngraph eigen
)
add_dependencies
(
ngraph eigen
)
if
(
N
OT LLVM_PACKAGED
AND LLVM_INCLUDE_DIR
)
if
(
N
GRAPH_CPU_ENABLE
AND LLVM_INCLUDE_DIR
)
add_dependencies
(
ngraph ext_llvm
)
add_dependencies
(
ngraph ext_llvm
)
endif
()
endif
()
if
(
NGRAPH_CPU_ENABLE AND MKLDNN_INCLUDE_DIR
)
add_dependencies
(
ngraph ext_mkldnn
)
endif
()
This diff is collapsed.
Click to expand it.
src/ngraph/codegen/compiler.cpp
View file @
f5768063
...
@@ -145,10 +145,19 @@ std::unique_ptr<llvm::Module> execution_state::compile(const string& source, con
...
@@ -145,10 +145,19 @@ std::unique_ptr<llvm::Module> execution_state::compile(const string& source, con
LO
->
OpenMP
=
1
;
LO
->
OpenMP
=
1
;
LO
->
OpenMPUseTLS
=
1
;
LO
->
OpenMPUseTLS
=
1
;
// CodeGen options
auto
&
CGO
=
Clang
->
getInvocation
().
getCodeGenOpts
();
CGO
.
OptimizationLevel
=
3
;
CGO
.
RelocationModel
=
"static"
;
CGO
.
ThreadModel
=
"posix"
;
CGO
.
FloatABI
=
"hard"
;
CGO
.
OmitLeafFramePointer
=
1
;
CGO
.
VectorizeLoop
=
1
;
CGO
.
VectorizeSLP
=
1
;
CGO
.
CXAAtExit
=
0
;
if
(
debuginfo_enabled
)
if
(
debuginfo_enabled
)
{
{
// CodeGen options
auto
&
CGO
=
Clang
->
getInvocation
().
getCodeGenOpts
();
CGO
.
setDebugInfo
(
codegenoptions
::
FullDebugInfo
);
CGO
.
setDebugInfo
(
codegenoptions
::
FullDebugInfo
);
}
}
...
@@ -163,6 +172,12 @@ std::unique_ptr<llvm::Module> execution_state::compile(const string& source, con
...
@@ -163,6 +172,12 @@ std::unique_ptr<llvm::Module> execution_state::compile(const string& source, con
// Enable various target features
// Enable various target features
// Most of these are for Eigen
// Most of these are for Eigen
auto
&
TO
=
Clang
->
getInvocation
().
getTargetOpts
();
auto
&
TO
=
Clang
->
getInvocation
().
getTargetOpts
();
// TODO: This needs to be configurable and selected carefully
TO
.
CPU
=
"broadwell"
;
TO
.
FeaturesAsWritten
.
emplace_back
(
"+sse"
);
TO
.
FeaturesAsWritten
.
emplace_back
(
"+sse2"
);
TO
.
FeaturesAsWritten
.
emplace_back
(
"+sse3"
);
TO
.
FeaturesAsWritten
.
emplace_back
(
"+ssse3"
);
TO
.
FeaturesAsWritten
.
emplace_back
(
"+sse4.1"
);
TO
.
FeaturesAsWritten
.
emplace_back
(
"+sse4.1"
);
TO
.
FeaturesAsWritten
.
emplace_back
(
"+sse4.2"
);
TO
.
FeaturesAsWritten
.
emplace_back
(
"+sse4.2"
);
TO
.
FeaturesAsWritten
.
emplace_back
(
"+avx"
);
TO
.
FeaturesAsWritten
.
emplace_back
(
"+avx"
);
...
...
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/cpu/call_frame.cpp
View file @
f5768063
...
@@ -22,12 +22,13 @@ using namespace ngraph::runtime::cpu;
...
@@ -22,12 +22,13 @@ using namespace ngraph::runtime::cpu;
CallFrame
::
CallFrame
(
EntryPoint
compiled_function
,
CallFrame
::
CallFrame
(
EntryPoint
compiled_function
,
size_t
n_outputs
,
size_t
n_outputs
,
size_t
n_inputs
,
size_t
n_inputs
,
const
TensorViewPtrs
&
temps
)
const
TensorViewPtrs
&
temps
,
const
std
::
vector
<
std
::
shared_ptr
<
CallFrame
>>&
callees
)
:
m_n_outputs
(
n_outputs
)
:
m_n_outputs
(
n_outputs
)
,
m_n_inputs
(
n_inputs
)
,
m_n_inputs
(
n_inputs
)
,
m_tensor_views
(
n_
inputs
+
n_out
puts
+
temps
.
size
())
,
m_tensor_views
(
n_
outputs
+
n_in
puts
+
temps
.
size
())
,
m_compiled_function
(
compiled_function
)
,
m_compiled_function
(
compiled_function
)
,
m_callees
(
callees
)
{
{
copy
(
temps
.
begin
(),
temps
.
end
(),
m_tensor_views
.
begin
()
+
m_n_outputs
+
m_n_inputs
);
copy
(
temps
.
begin
(),
temps
.
end
(),
m_tensor_views
.
begin
()
+
m_n_outputs
+
m_n_inputs
);
}
}
...
@@ -40,7 +41,7 @@ void CallFrame::tensor_call(
...
@@ -40,7 +41,7 @@ void CallFrame::tensor_call(
copy
(
inputs
.
begin
(),
inputs
.
end
(),
m_tensor_views
.
begin
()
+
m_n_outputs
);
copy
(
inputs
.
begin
(),
inputs
.
end
(),
m_tensor_views
.
begin
()
+
m_n_outputs
);
// Invoke compiled computation
// Invoke compiled computation
m_compiled_function
(
this
,
m_tensor_views
);
m_compiled_function
(
this
,
m_tensor_views
,
m_callees
);
// Don't hold onto inputs/outputs
// Don't hold onto inputs/outputs
fill_n
(
m_tensor_views
.
begin
(),
m_n_outputs
+
m_n_inputs
,
nullptr
);
fill_n
(
m_tensor_views
.
begin
(),
m_n_outputs
+
m_n_inputs
,
nullptr
);
...
...
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/cpu/call_frame.hpp
View file @
f5768063
...
@@ -31,8 +31,10 @@ namespace ngraph
...
@@ -31,8 +31,10 @@ namespace ngraph
namespace
cpu
namespace
cpu
{
{
class
CallFrame
;
class
CallFrame
;
using
EntryPoint
=
std
::
function
<
void
(
ngraph
::
runtime
::
cpu
::
CallFrame
*
,
using
EntryPoint
=
std
::
function
<
void
(
ngraph
::
runtime
::
cpu
::
CallFrame
*
,
ngraph
::
runtime
::
TensorViewPtrs
&
)
>
;
ngraph
::
runtime
::
TensorViewPtrs
&
,
const
std
::
vector
<
std
::
shared_ptr
<
CallFrame
>>&
)
>
;
// Compile and execute graphs
// Compile and execute graphs
class
CallFrame
:
public
ngraph
::
runtime
::
CallFrame
class
CallFrame
:
public
ngraph
::
runtime
::
CallFrame
...
@@ -41,7 +43,8 @@ namespace ngraph
...
@@ -41,7 +43,8 @@ namespace ngraph
CallFrame
(
EntryPoint
compiled_function
,
CallFrame
(
EntryPoint
compiled_function
,
size_t
n_outputs
,
size_t
n_outputs
,
size_t
n_inputs
,
size_t
n_inputs
,
const
TensorViewPtrs
&
temps
);
const
TensorViewPtrs
&
temps
,
const
std
::
vector
<
std
::
shared_ptr
<
CallFrame
>>&
callees
);
/// @brief Invoke the function with values matching the signature of the function.
/// @brief Invoke the function with values matching the signature of the function.
///
///
...
@@ -73,6 +76,7 @@ namespace ngraph
...
@@ -73,6 +76,7 @@ namespace ngraph
TensorViewPtrs
m_tensor_views
;
TensorViewPtrs
m_tensor_views
;
bool
m_return
;
bool
m_return
;
EntryPoint
m_compiled_function
;
EntryPoint
m_compiled_function
;
std
::
vector
<
std
::
shared_ptr
<
CallFrame
>>
m_callees
;
};
};
}
}
}
}
...
...
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/cpu/cpu_kernels.cpp
0 → 100644
View file @
f5768063
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#include "ngraph/runtime/cpu/cpu_kernels.hpp"
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/cpu/cpu_kernels.hpp
0 → 100644
View file @
f5768063
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#pragma once
#include "ngraph/types/element_type.hpp"
// CBLAS types and wrappers
namespace
cblas
{
enum
class
Layout
{
RowMajor
=
101
,
ColMajor
=
102
};
enum
class
Transpose
{
None
=
111
,
Transpose
=
112
,
ConjTrans
=
113
};
enum
class
UpperLower
{
Upper
=
121
,
Lower
=
122
};
enum
class
Diag
{
NonUnit
=
131
,
Unit
=
132
};
enum
class
Side
{
Left
=
141
,
Right
=
142
};
enum
class
Storage
{
Packed
=
151
};
enum
class
Ident
{
AMatrix
=
161
,
BMatrix
=
162
};
enum
class
Offset
{
RowOffset
=
171
,
ColOffset
=
172
,
FixOffset
=
173
};
extern
"C"
{
void
cblas_sgemm
(
const
Layout
layout
,
const
Transpose
TransA
,
const
Transpose
TransB
,
const
ngraph
::
element
::
Int64
::
type
M
,
const
ngraph
::
element
::
Int64
::
type
N
,
const
ngraph
::
element
::
Int64
::
type
K
,
const
ngraph
::
element
::
Float32
::
type
alpha
,
const
ngraph
::
element
::
Float32
::
type
*
A
,
const
ngraph
::
element
::
Int64
::
type
lda
,
const
ngraph
::
element
::
Float32
::
type
*
B
,
const
ngraph
::
element
::
Int64
::
type
ldb
,
const
ngraph
::
element
::
Float32
::
type
beta
,
ngraph
::
element
::
Float32
::
type
*
C
,
const
ngraph
::
element
::
Int64
::
type
ldc
);
}
}
namespace
mkl
{
extern
"C"
{
void
MKL_Somatcopy
(
char
ordering
,
char
trans
,
size_t
rows
,
size_t
cols
,
const
ngraph
::
element
::
Float32
::
type
alpha
,
const
ngraph
::
element
::
Float32
::
type
*
A
,
size_t
lda
,
ngraph
::
element
::
Float32
::
type
*
B
,
size_t
ldb
);
}
}
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/cpu/emitter.cpp
View file @
f5768063
...
@@ -12,6 +12,7 @@
...
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
#include <algorithm>
#include <iostream>
#include <iostream>
#include <string>
#include <string>
#include <typeindex>
#include <typeindex>
...
@@ -23,7 +24,13 @@
...
@@ -23,7 +24,13 @@
#include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/concatenate.hpp"
#include "ngraph/ops/concatenate.hpp"
#include "ngraph/ops/constant.hpp"
#include "ngraph/ops/constant.hpp"
#include "ngraph/ops/function_call.hpp"
#include "ngraph/ops/get_tuple_element.hpp"
#include "ngraph/ops/get_tuple_element.hpp"
#include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/slice.hpp"
#include "ngraph/ops/sum.hpp"
#include "ngraph/runtime/cpu/call_frame.hpp"
#include "ngraph/runtime/cpu/emitter.hpp"
#include "ngraph/runtime/cpu/emitter.hpp"
#include "ngraph/runtime/cpu/external_function.hpp"
#include "ngraph/runtime/cpu/external_function.hpp"
#include "ngraph/runtime/tensor_view_info.hpp"
#include "ngraph/runtime/tensor_view_info.hpp"
...
@@ -177,30 +184,58 @@ void Emitter::EMITTER_DECL(EmitDot)
...
@@ -177,30 +184,58 @@ void Emitter::EMITTER_DECL(EmitDot)
auto
arg1_layout
=
inputs
[
1
].
get_layout
<
DenseTensorViewLayout
>
();
auto
arg1_layout
=
inputs
[
1
].
get_layout
<
DenseTensorViewLayout
>
();
auto
out_layout
=
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
();
auto
out_layout
=
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
();
TU
+=
// Emit an MKL SGEMM call if possible
" {
\n
"
// clang-format off
" auto arg0 = call_frame->get_tensor_view_data<"
+
if
(
arg0_element_type
==
ngraph
::
element
::
Float32
::
element_type
())
element_type_names
[
TI
(
arg0_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
{
");
\n
"
TU
+=
" auto arg1 = call_frame->get_tensor_view_data<"
+
" {
\n
"
element_type_names
[
TI
(
arg0_element_type
)]
+
">("
+
to_string
(
inputs
[
1
].
get_index
())
+
" auto arg0 = call_frame->get_tensor_view_data<"
+
");
\n
"
element_type_names
[
TI
(
arg0_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
" auto out = call_frame->get_tensor_view_data<"
+
");
\n
"
element_type_names
[
TI
(
arg0_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
" auto arg1 = call_frame->get_tensor_view_data<"
+
");
\n
"
element_type_names
[
TI
(
arg0_element_type
)]
+
">("
+
to_string
(
inputs
[
1
].
get_index
())
+
" EigenMatrix<"
+
");
\n
"
element_type_names
[
TI
(
arg0_element_type
)]
+
">(out, "
+
" auto out = call_frame->get_tensor_view_data<"
+
EIGEN_MATRIX_FORMAT
(
out_layout
->
get_shape
(),
out_layout
->
get_strides
())
+
element_type_names
[
TI
(
arg0_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
") =
\n
"
");
\n
"
" EigenMatrix<"
+
" cblas::cblas_sgemm(cblas::Layout::RowMajor, cblas::Transpose::None, cblas::Transpose::None, "
+
element_type_names
[
TI
(
arg0_element_type
)]
+
">(arg0, "
+
to_string
(
arg0_shape
[
0
])
+
", "
+
to_string
(
arg1_shape
[
1
])
+
", "
+
to_string
(
arg0_shape
[
1
])
+
",
\n
"
EIGEN_MATRIX_FORMAT
(
arg0_layout
->
get_shape
(),
arg0_layout
->
get_strides
())
+
" 1.0f, arg0, "
+
to_string
(
max
(
1UL
,
arg0_shape
[
1
]))
+
", arg1, "
+
to_string
(
max
(
1UL
,
arg1_shape
[
1
]))
+
", 0.0f,
\n
"
") * "
" out, "
+
to_string
(
max
(
1UL
,
arg1_shape
[
1
]))
+
");
\n
"
"EigenMatrix<"
+
" }
\n
"
;
element_type_names
[
TI
(
arg0_element_type
)]
+
">(arg1, "
+
}
EIGEN_MATRIX_FORMAT
(
arg1_layout
->
get_shape
(),
arg1_layout
->
get_strides
())
+
// clang-format on
");
\n
"
else
" }
\n
"
;
{
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
arg0_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto arg1 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
arg0_element_type
)]
+
">("
+
to_string
(
inputs
[
1
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
arg0_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenMatrix<"
+
element_type_names
[
TI
(
arg0_element_type
)]
+
">(out, "
+
EIGEN_MATRIX_FORMAT
(
out_layout
->
get_shape
(),
out_layout
->
get_strides
())
+
") =
\n
"
" EigenMatrix<"
+
element_type_names
[
TI
(
arg0_element_type
)]
+
">(arg0, "
+
EIGEN_MATRIX_FORMAT
(
arg0_layout
->
get_shape
(),
arg0_layout
->
get_strides
())
+
") * "
"EigenMatrix<"
+
element_type_names
[
TI
(
arg0_element_type
)]
+
">(arg1, "
+
EIGEN_MATRIX_FORMAT
(
arg1_layout
->
get_shape
(),
arg1_layout
->
get_strides
())
+
");
\n
"
" }
\n
"
;
}
}
}
else
else
{
{
...
@@ -501,7 +536,26 @@ void Emitter::EMITTER_DECL(EmitMaximum)
...
@@ -501,7 +536,26 @@ void Emitter::EMITTER_DECL(EmitMaximum)
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").max("
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").max(
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg1, "
EIGEN_VECTOR_FORMAT
(
inputs
[
1
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
"));
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitMinimum
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto arg1 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
1
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").min(
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg1, "
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg1, "
EIGEN_VECTOR_FORMAT
(
inputs
[
1
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
"));
\n
"
EIGEN_VECTOR_FORMAT
(
inputs
[
1
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
"));
\n
"
" }
\n
"
;
" }
\n
"
;
...
@@ -882,3 +936,775 @@ void Emitter::EMITTER_DECL(EmitConvert)
...
@@ -882,3 +936,775 @@ void Emitter::EMITTER_DECL(EmitConvert)
".template cast<typename "
+
element_type_names
[
TI
(
result_element_type
)]
+
"::type>();
\n
"
".template cast<typename "
+
element_type_names
[
TI
(
result_element_type
)]
+
"::type>();
\n
"
" }
\n
"
;
" }
\n
"
;
}
}
void
Emitter
::
EMITTER_DECL
(
EmitConstant
)
{
auto
c
=
static_cast
<
const
op
::
Constant
*>
(
n
);
auto
c_tensor_type
=
dynamic_pointer_cast
<
const
TensorViewType
>
(
c
->
get_value_type
());
assert
(
c_tensor_type
);
auto
&
c_element_type
=
c_tensor_type
->
get_element_type
();
auto
c_value_strings
=
c
->
get_value_strings
();
TU
+=
" {
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
c_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
")->get_vector() = std::vector<"
+
element_type_names
[
TI
(
c_element_type
)]
+
"::type>{"
;
for
(
size_t
i
=
0
;
i
<
c_value_strings
.
size
();
i
++
)
{
if
(
i
)
TU
+=
", "
;
TU
+=
c_value_strings
[
i
];
}
TU
+=
"};
\n
}
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitReshape
)
{
auto
reshape
=
static_cast
<
const
op
::
Reshape
*>
(
n
);
auto
arg_type
=
reshape
->
get_arguments
().
at
(
0
)
->
get_value_type
();
auto
arg_tensor_view_type
=
dynamic_pointer_cast
<
const
TensorViewType
>
(
arg_type
);
assert
(
arg_tensor_view_type
);
auto
arg_shape
=
arg_tensor_view_type
->
get_shape
();
auto
arg_rank
=
arg_shape
.
size
();
auto
result_type
=
reshape
->
get_value_type
();
auto
result_tensor_view_type
=
dynamic_pointer_cast
<
const
TensorViewType
>
(
result_type
);
assert
(
result_tensor_view_type
);
auto
result_shape
=
result_tensor_view_type
->
get_shape
();
auto
&
result_element_type
=
result_tensor_view_type
->
get_element_type
();
auto
input_order
=
reshape
->
get_input_order
();
bool
same_layout
=
std
::
is_sorted
(
input_order
.
begin
(),
input_order
.
end
());
size_t
result_shape_product
=
1
;
for
(
auto
i
:
result_shape
)
{
result_shape_product
*=
i
;
}
// If there is no layout change or we are just going from 1^n to 1^m or a zero-size tensor, we can just copy.
if
(
same_layout
||
result_shape_product
<
2
)
{
TU
+=
" {
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
result_element_type
)]
+
">("
+
to_string
(
outputs
.
at
(
0
).
get_index
())
+
")->get_vector() =
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
result_element_type
)]
+
">("
+
to_string
(
inputs
.
at
(
0
).
get_index
())
+
")->get_vector();
\n
"
" }
\n
"
;
}
// If there *is* a layout change in the 2D case, we transpose the input.
else
if
(
arg_rank
==
2
)
{
auto
arg0_layout
=
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
();
auto
out_layout
=
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
();
// Emit an MKL transpose call if possible
// clang-format off
if
(
result_element_type
==
ngraph
::
element
::
Float32
::
element_type
())
{
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
result_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
result_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" mkl::MKL_Somatcopy('R', 'T', "
+
to_string
(
arg_shape
[
0
])
+
",
\n
"
" "
+
to_string
(
arg_shape
[
1
])
+
", 1.0f,
\n
"
" arg0, "
+
to_string
(
arg_shape
[
1
])
+
",
\n
"
" out, "
+
to_string
(
arg_shape
[
0
])
+
");
\n
"
" }
\n
"
;
}
// clang-format on
else
{
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
result_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
result_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenMatrix<"
+
element_type_names
[
TI
(
result_element_type
)]
+
">(out, "
+
EIGEN_MATRIX_FORMAT
(
out_layout
->
get_shape
(),
out_layout
->
get_strides
())
+
") =
\n
"
" EigenMatrix<"
+
element_type_names
[
TI
(
result_element_type
)]
+
">(arg0, "
+
EIGEN_MATRIX_FORMAT
(
arg0_layout
->
get_shape
(),
arg0_layout
->
get_strides
())
+
").transpose();
\n
"
" }
\n
"
;
}
}
// Other cases (reordering of axes for tensors with rank>2) are not handled yet.
else
{
throw
ngraph_error
(
"Axis permutation in reshape is not implemented yet for tensors with rank>2"
);
}
}
void
Emitter
::
EMITTER_DECL
(
EmitFunctionCall
)
{
auto
function_call
=
static_cast
<
const
op
::
FunctionCall
*>
(
n
);
auto
function
=
function_call
->
get_function
();
std
::
shared_ptr
<
ExternalFunction
>
external
;
try
{
external
=
function_map
.
at
(
function
);
}
catch
(
const
std
::
out_of_range
)
{
external
=
make_shared
<
ExternalFunction
>
(
function
);
function_map
.
insert
({
function
,
external
});
}
std
::
shared_ptr
<
CallFrame
>
cf
=
std
::
dynamic_pointer_cast
<
CallFrame
>
(
external
->
make_call_frame
());
ef
->
get_callees
().
emplace_back
(
cf
);
TU
+=
" {
\n
"
" auto cf = callees.at("
+
to_string
(
ef
->
get_callees
().
size
()
-
1
)
+
");
\n
"
" std::vector<std::shared_ptr<ngraph::runtime::Value>> inputs;
\n
"
" std::vector<std::shared_ptr<ngraph::runtime::Value>> outputs;
\n
"
;
for
(
const
auto
&
in
:
inputs
)
{
TU
+=
" inputs.emplace_back(call_frame->get_tensor_view("
+
to_string
(
in
.
get_index
())
+
"));
\n
"
;
}
for
(
const
auto
&
out
:
outputs
)
{
TU
+=
" outputs.emplace_back(call_frame->get_tensor_view("
+
to_string
(
out
.
get_index
())
+
"));
\n
"
;
}
TU
+=
" (*cf)(inputs, outputs);
\n
"
" }
\n
"
;
}
// TODO: This and other ops include comments/notes that
// we don't want to just copy-paste here. Figure out a better way
// or just point to ngvm/external_function.cpp with a note that
// the compiled version of these ops is intended to have semantics identical
// to what's seen there (for now atleast)
void
Emitter
::
EMITTER_DECL
(
EmitReduce
)
{
auto
reduce
=
static_cast
<
const
op
::
Reduce
*>
(
n
);
auto
reduction_function
=
reduce
->
get_reduction_function
();
std
::
shared_ptr
<
ExternalFunction
>
external
;
try
{
external
=
function_map
.
at
(
reduction_function
);
}
catch
(
const
std
::
out_of_range
)
{
external
=
make_shared
<
ExternalFunction
>
(
reduction_function
);
function_map
.
insert
({
reduction_function
,
external
});
}
auto
reductee_type
=
reduce
->
get_arguments
().
at
(
0
)
->
get_value_type
();
auto
reductee_tensor_view_type
=
dynamic_pointer_cast
<
const
TensorViewType
>
(
reductee_type
);
assert
(
reductee_tensor_view_type
);
auto
reductee_shape
=
reductee_tensor_view_type
->
get_shape
();
auto
f_result_type
=
reduction_function
->
get_result_type
();
auto
f_result_tensor_view_type
=
dynamic_pointer_cast
<
const
TensorViewType
>
(
f_result_type
);
assert
(
f_result_tensor_view_type
);
auto
&
f_result_element_type
=
f_result_tensor_view_type
->
get_element_type
();
auto
result_type
=
reduce
->
get_value_type
();
auto
result_tensor_view_type
=
dynamic_pointer_cast
<
const
TensorViewType
>
(
result_type
);
assert
(
result_tensor_view_type
);
auto
result_shape
=
result_tensor_view_type
->
get_shape
();
auto
&
reduction_axes
=
reduce
->
get_reduction_axes
();
auto
arg0_layout
=
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
();
// Trivial case: no reduction axes (this includes the scalar-reductee case).
if
(
reduction_axes
.
empty
())
{
TU
+=
" {
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
outputs
.
at
(
0
).
get_index
())
+
")->get_vector() =
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
inputs
.
at
(
0
).
get_index
())
+
")->get_vector();
\n
"
" }
\n
"
;
}
// Behavior for zero-size axes bears some explanation here. XLA's reduce
// operator provides an "base" element (usually, but not necessarily,
// an identity element) that it apparently *may* choose to insert anywhere
// in the reduction any number of times. For example, given:
//
// reduce{{1,2,3},b,+)
//
// any of the following are valid reductions (I think!):
//
// b+(b+1+2)+3
// b+(1+(2+3))
// (1+2)+3 (I think!)
//
// etc. Here we will choose never to instantiate the base element, which
// works well with Eigen's default behavior for non-zero-length axes. The
// exceptional case is when we reduce on a zero-length axis. In this case,
// Eigen's default behavior is to put a zero in the output, which is not
// what we want, so we detect that case here and override with a copy
// instruction (for reduce-to-scalar) or a broadcast (for reduce-to-vector)
// from the base element.
//
// What I'm actually not sure about is whether the identity element is
// required to appear at least once. If so, this will need to be reworked,
// assuming we actually want to mimic XLA's semantics that closely, which
// we may not.
else
if
((
reductee_shape
.
size
()
==
1
&&
reduction_axes
==
AxisSet
{
0
})
||
(
reductee_shape
.
size
()
==
2
&&
reduction_axes
==
AxisSet
{
0
,
1
}))
{
if
(
reductee_shape
.
at
(
0
)
==
0
||
(
reductee_shape
.
size
()
==
2
&&
reductee_shape
.
at
(
1
)
==
0
))
{
TU
+=
" {
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
outputs
.
at
(
0
).
get_index
())
+
")->get_vector() =
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
inputs
.
at
(
1
).
get_index
())
+
")->get_vector();
\n
"
" }
\n
"
;
}
else
{
std
::
shared_ptr
<
CallFrame
>
cf
=
std
::
dynamic_pointer_cast
<
CallFrame
>
(
external
->
make_call_frame
());
ef
->
get_callees
().
emplace_back
(
cf
);
TU
+=
" {
\n
"
" using ET = "
+
element_type_names
[
TI
(
f_result_element_type
)]
+
";
\n
"
" auto cf = callees.at("
+
to_string
(
ef
->
get_callees
().
size
()
-
1
)
+
");
\n
"
" auto f = [cf](typename ET::type x, typename ET::type y) -> typename ET::type {
\n
"
" auto tx = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});
\n
"
" *tx = std::vector<typename ET::type>({x});
\n
"
" auto ty = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});
\n
"
" *ty = std::vector<typename ET::type>({y});
\n
"
" auto tr = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});
\n
"
" (*cf)({tx, ty}, {tr});
\n
"
" return tr->get_vector()[0];
\n
"
" };
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").redux(f);
\n
"
" }
\n
"
;
}
}
else
if
(
reductee_shape
.
size
()
==
2
&&
reduction_axes
==
AxisSet
{
1
})
{
if
(
reductee_shape
.
at
(
1
)
==
0
)
{
TU
+=
" {
\n
"
" auto arg1 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
inputs
[
1
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">(arg1, "
EIGEN_VECTOR_FORMAT
(
inputs
[
1
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
")(0, 0);
\n
"
" }
\n
"
;
}
else
{
std
::
shared_ptr
<
CallFrame
>
cf
=
std
::
dynamic_pointer_cast
<
CallFrame
>
(
external
->
make_call_frame
());
ef
->
get_callees
().
emplace_back
(
cf
);
TU
+=
" {
\n
"
" using ET = "
+
element_type_names
[
TI
(
f_result_element_type
)]
+
";
\n
"
" auto cf = callees.at("
+
to_string
(
ef
->
get_callees
().
size
()
-
1
)
+
");
\n
"
" auto f = [cf](typename ET::type x, typename ET::type y) -> typename ET::type {
\n
"
" auto tx = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});
\n
"
" *tx = std::vector<typename ET::type>({x});
\n
"
" auto ty = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});
\n
"
" *ty = std::vector<typename ET::type>({y});
\n
"
" auto tr = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});
\n
"
" (*cf)({tx, ty}, {tr});
\n
"
" return tr->get_vector()[0];
\n
"
" };
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenVector<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenMatrix<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">(arg0, "
+
EIGEN_MATRIX_FORMAT
(
arg0_layout
->
get_shape
(),
arg0_layout
->
get_strides
())
+
").rowwise().redux(f);
\n
"
" }
\n
"
;
}
}
else
if
(
reductee_shape
.
size
()
==
2
&&
reduction_axes
==
AxisSet
{
0
})
{
if
(
reductee_shape
.
at
(
0
)
==
0
)
{
TU
+=
" {
\n
"
" auto arg1 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
inputs
[
1
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">(arg1, "
EIGEN_VECTOR_FORMAT
(
inputs
[
1
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
")(0, 0);
\n
"
" }
\n
"
;
}
else
{
std
::
shared_ptr
<
CallFrame
>
cf
=
std
::
dynamic_pointer_cast
<
CallFrame
>
(
external
->
make_call_frame
());
ef
->
get_callees
().
emplace_back
(
cf
);
TU
+=
" {
\n
"
" using ET = "
+
element_type_names
[
TI
(
f_result_element_type
)]
+
";
\n
"
" auto cf = callees.at("
+
to_string
(
ef
->
get_callees
().
size
()
-
1
)
+
");
\n
"
" auto f = [cf](typename ET::type x, typename ET::type y) -> typename ET::type {
\n
"
" auto tx = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});
\n
"
" *tx = std::vector<typename ET::type>({x});
\n
"
" auto ty = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});
\n
"
" *ty = std::vector<typename ET::type>({y});
\n
"
" auto tr = ngraph::runtime::make_tensor<ET>(ngraph::Shape{});
\n
"
" (*cf)({tx, ty}, {tr});
\n
"
" return tr->get_vector()[0];
\n
"
" };
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenVector<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenMatrix<"
+
element_type_names
[
TI
(
f_result_element_type
)]
+
">(arg0, "
+
EIGEN_MATRIX_FORMAT
(
arg0_layout
->
get_shape
(),
arg0_layout
->
get_strides
())
+
").colwise().redux(f);
\n
"
" }
\n
"
;
}
}
else
{
throw
ngraph_error
(
"Reduce: only vectors and matrices are currently supported"
);
}
}
void
Emitter
::
EMITTER_DECL
(
EmitSign
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").sign();
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitSlice
)
{
auto
slice
=
static_cast
<
const
op
::
Slice
*>
(
n
);
for
(
auto
d
:
slice
->
get_step
())
{
if
(
1
!=
d
)
{
throw
ngraph_error
(
"Slice does not support non-unit step yet"
);
}
}
auto
arg_type
=
slice
->
get_arguments
().
at
(
0
)
->
get_value_type
();
auto
arg_tensor_view_type
=
dynamic_pointer_cast
<
const
TensorViewType
>
(
arg_type
);
assert
(
arg_tensor_view_type
);
auto
arg_shape
=
arg_tensor_view_type
->
get_shape
();
auto
arg_rank
=
arg_shape
.
size
();
auto
&
arg_element_type
=
arg_tensor_view_type
->
get_element_type
();
auto
&
lower_bounds
=
slice
->
get_lower_bounds
();
auto
&
upper_bounds
=
slice
->
get_upper_bounds
();
// Scalar slice is necessarily just a copy.
if
(
arg_rank
==
0
)
{
TU
+=
" {
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
arg_element_type
)]
+
">("
+
to_string
(
outputs
.
at
(
0
).
get_index
())
+
")->get_vector() =
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
arg_element_type
)]
+
">("
+
to_string
(
inputs
.
at
(
0
).
get_index
())
+
")->get_vector();
\n
"
" }
\n
"
;
}
else
if
(
arg_rank
==
1
)
{
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
arg_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
arg_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenVector<"
+
element_type_names
[
TI
(
arg_element_type
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenVector<"
+
element_type_names
[
TI
(
arg_element_type
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").segment(
\n
"
" "
+
to_string
(
lower_bounds
[
0
])
+
", "
+
to_string
(
upper_bounds
[
0
]
-
lower_bounds
[
0
])
+
");
\n
"
" }
\n
"
;
}
else
if
(
arg_rank
==
2
)
{
auto
arg0_layout
=
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
();
auto
out_layout
=
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
arg_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
arg_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenMatrix<"
+
element_type_names
[
TI
(
arg_element_type
)]
+
">(out, "
+
EIGEN_MATRIX_FORMAT
(
out_layout
->
get_shape
(),
out_layout
->
get_strides
())
+
") =
\n
"
" EigenMatrix<"
+
element_type_names
[
TI
(
arg_element_type
)]
+
">(arg0, "
+
EIGEN_MATRIX_FORMAT
(
arg0_layout
->
get_shape
(),
arg0_layout
->
get_strides
())
+
").block("
+
to_string
(
lower_bounds
[
0
])
+
", "
+
to_string
(
lower_bounds
[
1
])
+
",
\n
"
" "
+
to_string
(
upper_bounds
[
0
]
-
lower_bounds
[
0
])
+
",
\n
"
" "
+
to_string
(
upper_bounds
[
1
]
-
lower_bounds
[
1
])
+
");
\n
"
" }
\n
"
;
}
// Other cases (reordering of axes for tensors with rank>2) are not handled yet.
else
{
throw
ngraph_error
(
"Slice is not implemented yet for tensors with rank>2"
);
}
}
void
Emitter
::
EMITTER_DECL
(
EmitSum
)
{
auto
s
=
static_cast
<
const
op
::
Sum
*>
(
n
);
auto
s_tensor_view_type
=
dynamic_pointer_cast
<
const
TensorViewType
>
(
s
->
get_value_type
());
assert
(
s_tensor_view_type
);
auto
&
s_element_type
=
s_tensor_view_type
->
get_element_type
();
auto
s_shape
=
s_tensor_view_type
->
get_shape
();
auto
arg
=
s
->
get_arguments
().
at
(
0
);
auto
arg_type
=
arg
->
get_value_type
();
auto
arg_tensor_view_type
=
dynamic_pointer_cast
<
const
TensorViewType
>
(
arg_type
);
assert
(
arg_tensor_view_type
);
auto
arg_shape
=
arg_tensor_view_type
->
get_shape
();
auto
arg_rank
=
arg_shape
.
size
();
auto
&
reduction_axes
=
s
->
get_reduction_axes
();
// Trivial case: no reduction axes.
if
(
reduction_axes
.
size
()
==
0
)
{
TU
+=
" {
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">("
+
to_string
(
outputs
.
at
(
0
).
get_index
())
+
")->get_vector() =
\n
"
" call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">("
+
to_string
(
inputs
.
at
(
0
).
get_index
())
+
")->get_vector();
\n
"
" }
\n
"
;
}
// Full reduction? Then sum to scalar.
else
if
((
arg_rank
==
1
&&
reduction_axes
==
AxisSet
{
0
})
||
(
arg_rank
==
2
&&
reduction_axes
==
AxisSet
{
0
,
1
}))
{
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").sum();
\n
"
" }
\n
"
;
}
else
if
(
arg_rank
==
2
&&
reduction_axes
==
AxisSet
{
1
})
{
auto
arg0_layout
=
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenVector<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenMatrix<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">(arg0, "
+
EIGEN_MATRIX_FORMAT
(
arg0_layout
->
get_shape
(),
arg0_layout
->
get_strides
())
+
").rowwise().sum();
\n
"
" }
\n
"
;
}
else
if
(
arg_rank
==
2
&&
reduction_axes
==
AxisSet
{
0
})
{
auto
arg0_layout
=
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenVector<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenMatrix<"
+
element_type_names
[
TI
(
s_element_type
)]
+
">(arg0, "
+
EIGEN_MATRIX_FORMAT
(
arg0_layout
->
get_shape
(),
arg0_layout
->
get_strides
())
+
").colwise().sum();
\n
"
" }
\n
"
;
}
else
{
throw
ngraph_error
(
"Sum: only vectors and matrices are currently supported"
);
}
}
void
Emitter
::
EMITTER_DECL
(
EmitExp
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").exp();
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitSin
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").sin();
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitSinh
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").sinh();
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitCos
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").cos();
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitCosh
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").cosh();
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitTan
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").tan();
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitTanh
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
// Eigen's generic_fast_tanh_float<float> is currently miscompiled by Clang/LLVM
// so we fall-back to std::tanh
// TODO: Implement our own internal fast/approximate tanh if this actually gets used
// by models
TU
+=
" {
\n
"
" auto& arg0 = call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
")->get_vector();
\n
"
" auto& out = call_frame->get_parameterized_tensor_view<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
")->get_vector();
\n
"
" std::transform(arg0.begin(), arg0.end(), out.begin(), []("
+
element_type_names
[
TI
(
et
)]
+
"::type x) -> "
+
element_type_names
[
TI
(
et
)]
+
"::type { return std::tanh(x); });
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitAsin
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").asin();
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitAcos
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").acos();
\n
"
" }
\n
"
;
}
void
Emitter
::
EMITTER_DECL
(
EmitAtan
)
{
const
element
::
Type
&
et
=
(
dynamic_pointer_cast
<
const
TensorViewType
>
(
n
->
get_arguments
().
at
(
0
)
->
get_value_type
()))
->
get_element_type
();
TU
+=
" {
\n
"
" auto arg0 = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
inputs
[
0
].
get_index
())
+
");
\n
"
" auto out = call_frame->get_tensor_view_data<"
+
element_type_names
[
TI
(
et
)]
+
">("
+
to_string
(
outputs
[
0
].
get_index
())
+
");
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(out, "
EIGEN_VECTOR_FORMAT
(
outputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
") =
\n
"
" EigenArray1d<"
+
element_type_names
[
TI
(
et
)]
+
">(arg0, "
EIGEN_VECTOR_FORMAT
(
inputs
[
0
].
get_layout
<
DenseTensorViewLayout
>
()
->
get_size
())
").atan();
\n
"
" }
\n
"
;
}
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/cpu/emitter.hpp
View file @
f5768063
...
@@ -61,6 +61,7 @@ namespace ngraph
...
@@ -61,6 +61,7 @@ namespace ngraph
void
EMITTER_DECL
(
EmitLessEq
);
void
EMITTER_DECL
(
EmitLessEq
);
void
EMITTER_DECL
(
EmitLog
);
void
EMITTER_DECL
(
EmitLog
);
void
EMITTER_DECL
(
EmitMaximum
);
void
EMITTER_DECL
(
EmitMaximum
);
void
EMITTER_DECL
(
EmitMinimum
);
void
EMITTER_DECL
(
EmitNegative
);
void
EMITTER_DECL
(
EmitNegative
);
void
EMITTER_DECL
(
EmitNotEqual
);
void
EMITTER_DECL
(
EmitNotEqual
);
void
EMITTER_DECL
(
EmitSelect
);
void
EMITTER_DECL
(
EmitSelect
);
...
@@ -75,6 +76,23 @@ namespace ngraph
...
@@ -75,6 +76,23 @@ namespace ngraph
void
EMITTER_DECL
(
EmitParameterizedConstantUInt64
);
void
EMITTER_DECL
(
EmitParameterizedConstantUInt64
);
void
EMITTER_DECL
(
EmitBroadcast
);
void
EMITTER_DECL
(
EmitBroadcast
);
void
EMITTER_DECL
(
EmitConvert
);
void
EMITTER_DECL
(
EmitConvert
);
void
EMITTER_DECL
(
EmitConstant
);
void
EMITTER_DECL
(
EmitReshape
);
void
EMITTER_DECL
(
EmitFunctionCall
);
void
EMITTER_DECL
(
EmitReduce
);
void
EMITTER_DECL
(
EmitSign
);
void
EMITTER_DECL
(
EmitSlice
);
void
EMITTER_DECL
(
EmitSum
);
void
EMITTER_DECL
(
EmitExp
);
void
EMITTER_DECL
(
EmitSin
);
void
EMITTER_DECL
(
EmitSinh
);
void
EMITTER_DECL
(
EmitCos
);
void
EMITTER_DECL
(
EmitCosh
);
void
EMITTER_DECL
(
EmitTan
);
void
EMITTER_DECL
(
EmitTanh
);
void
EMITTER_DECL
(
EmitAsin
);
void
EMITTER_DECL
(
EmitAcos
);
void
EMITTER_DECL
(
EmitAtan
);
};
};
}
}
}
}
...
...
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/cpu/external_function.cpp
View file @
f5768063
...
@@ -27,14 +27,20 @@
...
@@ -27,14 +27,20 @@
#include "ngraph/function.hpp"
#include "ngraph/function.hpp"
#include "ngraph/node.hpp"
#include "ngraph/node.hpp"
#include "ngraph/ops/abs.hpp"
#include "ngraph/ops/abs.hpp"
#include "ngraph/ops/acos.hpp"
#include "ngraph/ops/add.hpp"
#include "ngraph/ops/add.hpp"
#include "ngraph/ops/asin.hpp"
#include "ngraph/ops/atan.hpp"
#include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/concatenate.hpp"
#include "ngraph/ops/concatenate.hpp"
#include "ngraph/ops/constant.hpp"
#include "ngraph/ops/constant.hpp"
#include "ngraph/ops/convert.hpp"
#include "ngraph/ops/convert.hpp"
#include "ngraph/ops/cos.hpp"
#include "ngraph/ops/cosh.hpp"
#include "ngraph/ops/divide.hpp"
#include "ngraph/ops/divide.hpp"
#include "ngraph/ops/dot.hpp"
#include "ngraph/ops/dot.hpp"
#include "ngraph/ops/equal.hpp"
#include "ngraph/ops/equal.hpp"
#include "ngraph/ops/exp.hpp"
#include "ngraph/ops/function_call.hpp"
#include "ngraph/ops/function_call.hpp"
#include "ngraph/ops/get_tuple_element.hpp"
#include "ngraph/ops/get_tuple_element.hpp"
#include "ngraph/ops/greater.hpp"
#include "ngraph/ops/greater.hpp"
...
@@ -43,12 +49,21 @@
...
@@ -43,12 +49,21 @@
#include "ngraph/ops/less_eq.hpp"
#include "ngraph/ops/less_eq.hpp"
#include "ngraph/ops/log.hpp"
#include "ngraph/ops/log.hpp"
#include "ngraph/ops/maximum.hpp"
#include "ngraph/ops/maximum.hpp"
#include "ngraph/ops/minimum.hpp"
#include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/negative.hpp"
#include "ngraph/ops/negative.hpp"
#include "ngraph/ops/not_equal.hpp"
#include "ngraph/ops/not_equal.hpp"
#include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/select.hpp"
#include "ngraph/ops/select.hpp"
#include "ngraph/ops/sign.hpp"
#include "ngraph/ops/sin.hpp"
#include "ngraph/ops/sinh.hpp"
#include "ngraph/ops/slice.hpp"
#include "ngraph/ops/subtract.hpp"
#include "ngraph/ops/subtract.hpp"
#include "ngraph/ops/sum.hpp"
#include "ngraph/ops/tan.hpp"
#include "ngraph/ops/tanh.hpp"
#include "ngraph/ops/tuple.hpp"
#include "ngraph/ops/tuple.hpp"
#include "ngraph/pass/assign_layout.hpp"
#include "ngraph/pass/assign_layout.hpp"
#include "ngraph/pass/assign_tensors.hpp"
#include "ngraph/pass/assign_tensors.hpp"
...
@@ -84,6 +99,7 @@ static const OpMap dispatcher{
...
@@ -84,6 +99,7 @@ static const OpMap dispatcher{
{
TI
(
ngraph
::
op
::
LessEq
),
&
Emitter
::
EmitLessEq
},
{
TI
(
ngraph
::
op
::
LessEq
),
&
Emitter
::
EmitLessEq
},
{
TI
(
ngraph
::
op
::
Log
),
&
Emitter
::
EmitLog
},
{
TI
(
ngraph
::
op
::
Log
),
&
Emitter
::
EmitLog
},
{
TI
(
ngraph
::
op
::
Maximum
),
&
Emitter
::
EmitMaximum
},
{
TI
(
ngraph
::
op
::
Maximum
),
&
Emitter
::
EmitMaximum
},
{
TI
(
ngraph
::
op
::
Minimum
),
&
Emitter
::
EmitMinimum
},
{
TI
(
ngraph
::
op
::
Negative
),
&
Emitter
::
EmitNegative
},
{
TI
(
ngraph
::
op
::
Negative
),
&
Emitter
::
EmitNegative
},
{
TI
(
ngraph
::
op
::
NotEqual
),
&
Emitter
::
EmitNotEqual
},
{
TI
(
ngraph
::
op
::
NotEqual
),
&
Emitter
::
EmitNotEqual
},
{
TI
(
ngraph
::
op
::
Select
),
&
Emitter
::
EmitSelect
},
{
TI
(
ngraph
::
op
::
Select
),
&
Emitter
::
EmitSelect
},
...
@@ -106,6 +122,23 @@ static const OpMap dispatcher{
...
@@ -106,6 +122,23 @@ static const OpMap dispatcher{
&
Emitter
::
EmitParameterizedConstantUInt64
},
&
Emitter
::
EmitParameterizedConstantUInt64
},
{
TI
(
ngraph
::
op
::
Broadcast
),
&
Emitter
::
EmitBroadcast
},
{
TI
(
ngraph
::
op
::
Broadcast
),
&
Emitter
::
EmitBroadcast
},
{
TI
(
ngraph
::
op
::
Convert
),
&
Emitter
::
EmitConvert
},
{
TI
(
ngraph
::
op
::
Convert
),
&
Emitter
::
EmitConvert
},
{
TI
(
ngraph
::
op
::
Constant
),
&
Emitter
::
EmitConstant
},
{
TI
(
ngraph
::
op
::
Reshape
),
&
Emitter
::
EmitReshape
},
{
TI
(
ngraph
::
op
::
FunctionCall
),
&
Emitter
::
EmitFunctionCall
},
{
TI
(
ngraph
::
op
::
Reduce
),
&
Emitter
::
EmitReduce
},
{
TI
(
ngraph
::
op
::
Sign
),
&
Emitter
::
EmitSign
},
{
TI
(
ngraph
::
op
::
Slice
),
&
Emitter
::
EmitSlice
},
{
TI
(
ngraph
::
op
::
Sum
),
&
Emitter
::
EmitSum
},
{
TI
(
ngraph
::
op
::
Exp
),
&
Emitter
::
EmitExp
},
{
TI
(
ngraph
::
op
::
Sin
),
&
Emitter
::
EmitSin
},
{
TI
(
ngraph
::
op
::
Sinh
),
&
Emitter
::
EmitSinh
},
{
TI
(
ngraph
::
op
::
Cos
),
&
Emitter
::
EmitCos
},
{
TI
(
ngraph
::
op
::
Cosh
),
&
Emitter
::
EmitCosh
},
{
TI
(
ngraph
::
op
::
Tan
),
&
Emitter
::
EmitTan
},
{
TI
(
ngraph
::
op
::
Tanh
),
&
Emitter
::
EmitTanh
},
{
TI
(
ngraph
::
op
::
Asin
),
&
Emitter
::
EmitAsin
},
{
TI
(
ngraph
::
op
::
Acos
),
&
Emitter
::
EmitAcos
},
{
TI
(
ngraph
::
op
::
Atan
),
&
Emitter
::
EmitAtan
},
};
};
#undef TI
#undef TI
...
@@ -174,7 +207,9 @@ void ExternalFunction::compile(FunctionMap& function_map)
...
@@ -174,7 +207,9 @@ void ExternalFunction::compile(FunctionMap& function_map)
// Now we build the TU
// Now we build the TU
Emitter
emitter
;
Emitter
emitter
;
auto
&
TU
=
emitter
.
GetTU
();
auto
&
TU
=
emitter
.
GetTU
();
TU
+=
R"(
TU
+=
R"(// Generated by the NGraph CPU backend
#include <algorithm>
#include <cmath>
#include <memory>
#include <memory>
#include <vector>
#include <vector>
...
@@ -182,17 +217,18 @@ void ExternalFunction::compile(FunctionMap& function_map)
...
@@ -182,17 +217,18 @@ void ExternalFunction::compile(FunctionMap& function_map)
#include "ngraph/descriptor/layout/dense_tensor_view_layout.hpp"
#include "ngraph/descriptor/layout/dense_tensor_view_layout.hpp"
#include "ngraph/runtime/cpu/call_frame.hpp"
#include "ngraph/runtime/cpu/call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_kernels.hpp"
#include "ngraph/runtime/cpu/eigen_utils.hpp"
#include "ngraph/runtime/cpu/eigen_utils.hpp"
#include "ngraph/runtime/tensor_view_info.hpp"
#include "ngraph/runtime/utils.hpp"
void *__dso_handle = 0;
using namespace ngraph::element;
using namespace ngraph::element;
using namespace ngraph::runtime;
using namespace ngraph::runtime;
using namespace ngraph::runtime::cpu::eigen;
using namespace ngraph::runtime::cpu::eigen;
extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
ngraph::runtime::TensorViewPtrs& tensor_views)
ngraph::runtime::TensorViewPtrs& tensor_views,
const std::vector<std::shared_ptr<ngraph::runtime::cpu::CallFrame>>& callees)
{
{
)"
;
)"
;
...
@@ -243,8 +279,10 @@ extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
...
@@ -243,8 +279,10 @@ extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
assert
(
llvm_module
);
assert
(
llvm_module
);
estate
.
add_module
(
llvm_module
);
estate
.
add_module
(
llvm_module
);
estate
.
finalize
();
estate
.
finalize
();
compiled_function
=
estate
.
find_function
<
void
(
compiled_function
=
ngraph
::
runtime
::
cpu
::
CallFrame
*
,
ngraph
::
runtime
::
TensorViewPtrs
&
)
>
(
"__entrypoint"
);
estate
.
find_function
<
void
(
ngraph
::
runtime
::
cpu
::
CallFrame
*
,
ngraph
::
runtime
::
TensorViewPtrs
&
,
const
std
::
vector
<
std
::
shared_ptr
<
CallFrame
>>&
)
>
(
"__entrypoint"
);
assert
(
compiled_function
);
assert
(
compiled_function
);
m_is_compiled
=
true
;
m_is_compiled
=
true
;
...
@@ -322,5 +360,5 @@ shared_ptr<ngraph::runtime::CallFrame> ExternalFunction::make_call_frame()
...
@@ -322,5 +360,5 @@ shared_ptr<ngraph::runtime::CallFrame> ExternalFunction::make_call_frame()
#undef M
#undef M
}
}
return
make_shared
<
ngraph
::
runtime
::
cpu
::
CallFrame
>
(
return
make_shared
<
ngraph
::
runtime
::
cpu
::
CallFrame
>
(
compiled_function
,
m_n_outputs
,
m_n_inputs
,
temps
);
compiled_function
,
m_n_outputs
,
m_n_inputs
,
temps
,
callees
);
}
}
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/cpu/external_function.hpp
View file @
f5768063
...
@@ -47,8 +47,10 @@ namespace ngraph
...
@@ -47,8 +47,10 @@ namespace ngraph
using
OpMap
=
std
::
unordered_map
<
std
::
type_index
,
OpFunction
>
;
using
OpMap
=
std
::
unordered_map
<
std
::
type_index
,
OpFunction
>
;
using
EntryPoint
=
std
::
function
<
void
(
ngraph
::
runtime
::
cpu
::
CallFrame
*
,
using
EntryPoint
=
std
::
function
<
void
(
ngraph
::
runtime
::
TensorViewPtrs
&
)
>
;
ngraph
::
runtime
::
cpu
::
CallFrame
*
,
ngraph
::
runtime
::
TensorViewPtrs
&
,
const
std
::
vector
<
std
::
shared_ptr
<
ngraph
::
runtime
::
cpu
::
CallFrame
>>&
)
>
;
class
ExternalFunction
:
public
ngraph
::
runtime
::
ExternalFunction
class
ExternalFunction
:
public
ngraph
::
runtime
::
ExternalFunction
{
{
...
@@ -56,7 +58,7 @@ namespace ngraph
...
@@ -56,7 +58,7 @@ namespace ngraph
ExternalFunction
(
const
std
::
shared_ptr
<
ngraph
::
Function
>&
function
,
ExternalFunction
(
const
std
::
shared_ptr
<
ngraph
::
Function
>&
function
,
bool
release_function
=
true
);
bool
release_function
=
true
);
std
::
shared_ptr
<
ngraph
::
runtime
::
CallFrame
>
make_call_frame
();
std
::
shared_ptr
<
ngraph
::
runtime
::
CallFrame
>
make_call_frame
();
std
::
vector
<
std
::
shared_ptr
<
CallFrame
>>&
get_callees
()
{
return
callees
;
}
protected
:
protected
:
void
compile
(
FunctionMap
&
function_map
);
void
compile
(
FunctionMap
&
function_map
);
...
@@ -64,6 +66,7 @@ namespace ngraph
...
@@ -64,6 +66,7 @@ namespace ngraph
size_t
m_n_outputs
;
size_t
m_n_outputs
;
ngraph
::
descriptor
::
TensorViewPtrs
m_temp_views
;
ngraph
::
descriptor
::
TensorViewPtrs
m_temp_views
;
EntryPoint
compiled_function
;
EntryPoint
compiled_function
;
std
::
vector
<
std
::
shared_ptr
<
CallFrame
>>
callees
;
};
};
}
}
}
}
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment