Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
b50c17bf
Unverified
Commit
b50c17bf
authored
Jul 24, 2019
by
Robert Kimball
Committed by
GitHub
Jul 24, 2019
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' into tsocha/improve-cmake-grama
parents
b5549e0d
1eda1350
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
42 changed files
with
1220 additions
and
200 deletions
+1220
-200
abc.cpp
doc/examples/abc/abc.cpp
+1
-1
abc_operator.cpp
doc/examples/abc_operator/abc_operator.cpp
+1
-1
dist_mnist_mlp.cpp
doc/examples/mnist_mlp/dist_mnist_mlp.cpp
+4
-4
mnist_mlp.cpp
doc/examples/mnist_mlp/mnist_mlp.cpp
+4
-4
buildlb.rst
doc/sphinx/source/buildlb.rst
+13
-13
execute.rst
doc/sphinx/source/core/constructing-graphs/execute.rst
+2
-2
introduction.rst
doc/sphinx/source/project/introduction.rst
+4
-4
release-notes.rst
doc/sphinx/source/project/release-notes.rst
+9
-5
sitemap.rst
doc/sphinx/source/sitemap.rst
+10
-10
function.cpp
python/pyngraph/function.cpp
+39
-0
compiler.cpp
src/contrib/mlir/compiler.cpp
+0
-0
compiler.hpp
src/contrib/mlir/compiler.hpp
+9
-13
ops.cpp
src/contrib/mlir/dialect/ops.cpp
+33
-0
ops.td
src/contrib/mlir/dialect/ops.td
+21
-3
type.hpp
src/contrib/mlir/dialect/type.hpp
+1
-0
lowerer.cpp
src/contrib/mlir/lowerer.cpp
+118
-1
op_lowerers.inc
src/contrib/mlir/op_lowerers.inc
+1
-0
ops_supported.inc
src/contrib/mlir/ops_supported.inc
+1
-0
mlir_subgraph_extraction.cpp
src/contrib/mlir/pass/mlir_subgraph_extraction.cpp
+1
-0
CMakeLists.txt
src/ngraph/CMakeLists.txt
+2
-0
adjoints.cpp
src/ngraph/autodiff/adjoints.cpp
+0
-5
adjoints.hpp
src/ngraph/autodiff/adjoints.hpp
+0
-2
function.cpp
src/ngraph/function.cpp
+29
-0
function.hpp
src/ngraph/function.hpp
+6
-0
chrome_trace.cpp
src/ngraph/runtime/chrome_trace.cpp
+240
-0
chrome_trace.hpp
src/ngraph/runtime/chrome_trace.hpp
+144
-0
host_tensor.cpp
src/ngraph/runtime/host_tensor.cpp
+4
-0
int_executable.cpp
src/ngraph/runtime/interpreter/int_executable.cpp
+75
-0
int_executable.hpp
src/ngraph/runtime/interpreter/int_executable.hpp
+12
-0
CMakeLists.txt
src/tools/nbench/CMakeLists.txt
+2
-0
benchmark.cpp
src/tools/nbench/benchmark.cpp
+4
-108
benchmark.hpp
src/tools/nbench/benchmark.hpp
+0
-4
benchmark_pipelined.cpp
src/tools/nbench/benchmark_pipelined.cpp
+186
-0
benchmark_pipelined.hpp
src/tools/nbench/benchmark_pipelined.hpp
+33
-0
benchmark_utils.cpp
src/tools/nbench/benchmark_utils.cpp
+116
-0
benchmark_utils.hpp
src/tools/nbench/benchmark_utils.hpp
+57
-0
nbench.cpp
src/tools/nbench/nbench.cpp
+18
-2
batch_norm.in.cpp
test/backend/batch_norm.in.cpp
+2
-2
binary_elementwise.in.cpp
test/backend/binary_elementwise.in.cpp
+3
-3
cpu_fusion.cpp
test/cpu_fusion.cpp
+11
-9
backprop_derivative.hpp
test/util/autodiff/backprop_derivative.hpp
+1
-1
backprop_function.cpp
test/util/autodiff/backprop_function.cpp
+3
-3
No files found.
doc/examples/abc/abc.cpp
View file @
b50c17bf
...
...
@@ -32,7 +32,7 @@ int main()
auto
t1
=
std
::
make_shared
<
op
::
Multiply
>
(
t0
,
c
);
// Make the function
auto
f
=
std
::
make_shared
<
Function
>
(
Node
Vector
{
t1
},
auto
f
=
std
::
make_shared
<
Function
>
(
Output
Vector
{
t1
},
ParameterVector
{
a
,
b
,
c
});
// Create the backend
...
...
doc/examples/abc_operator/abc_operator.cpp
View file @
b50c17bf
...
...
@@ -31,7 +31,7 @@ int main()
auto
t1
=
(
a
+
b
)
*
c
;
// Make the function
auto
f
=
std
::
make_shared
<
Function
>
(
Node
Vector
{
t1
},
auto
f
=
std
::
make_shared
<
Function
>
(
Output
Vector
{
t1
},
ParameterVector
{
a
,
b
,
c
});
// Get the backend
...
...
doc/examples/mnist_mlp/dist_mnist_mlp.cpp
View file @
b50c17bf
...
...
@@ -175,8 +175,8 @@ int main(int argc, char* argv[])
auto
delta
=
-
learning_rate
*
loss
;
// Updates
ngraph
::
autodiff
::
Adjoints
adjoints
(
Node
Vector
{
loss
},
Node
Vector
{
delta
});
ngraph
::
autodiff
::
Adjoints
adjoints
(
Output
Vector
{
loss
},
Output
Vector
{
delta
});
auto
grad_W0
=
adjoints
.
backprop_node
(
W0
);
auto
grad_b0
=
adjoints
.
backprop_node
(
b0
);
auto
grad_W1
=
adjoints
.
backprop_node
(
W1
);
...
...
@@ -231,7 +231,7 @@ int main(int argc, char* argv[])
NodeMap
train_node_map
;
auto
train_function
=
clone_function
(
Function
(
Node
Vector
{
loss
,
softmax
,
W0_next
,
b0_next
,
W1_next
,
b1_next
},
Output
Vector
{
loss
,
softmax
,
W0_next
,
b0_next
,
W1_next
,
b1_next
},
ParameterVector
{
X
,
Y
,
N
,
learning_rate
,
W0
,
b0
,
W1
,
b1
}),
train_node_map
);
auto
train_exec
=
backend
->
compile
(
train_function
);
...
...
@@ -240,7 +240,7 @@ int main(int argc, char* argv[])
// X, W0, b0, W1, b1 -> softmax
NodeMap
inference_node_map
;
auto
inference_function
=
clone_function
(
Function
(
Node
Vector
{
softmax
},
ParameterVector
{
X
,
W0
,
b0
,
W1
,
b1
}),
Function
(
Output
Vector
{
softmax
},
ParameterVector
{
X
,
W0
,
b0
,
W1
,
b1
}),
inference_node_map
);
auto
inference_exec
=
backend
->
compile
(
inference_function
);
...
...
doc/examples/mnist_mlp/mnist_mlp.cpp
View file @
b50c17bf
...
...
@@ -172,8 +172,8 @@ int main(int argc, const char* argv[])
auto
delta
=
-
learning_rate
*
loss
;
// Updates
ngraph
::
autodiff
::
Adjoints
adjoints
(
Node
Vector
{
loss
},
Node
Vector
{
delta
});
ngraph
::
autodiff
::
Adjoints
adjoints
(
Output
Vector
{
loss
},
Output
Vector
{
delta
});
auto
W0_next
=
W0
+
adjoints
.
backprop_node
(
W0
);
auto
b0_next
=
b0
+
adjoints
.
backprop_node
(
b0
);
auto
W1_next
=
W1
+
adjoints
.
backprop_node
(
W1
);
...
...
@@ -218,7 +218,7 @@ int main(int argc, const char* argv[])
NodeMap
train_node_map
;
auto
train_function
=
clone_function
(
Function
(
Node
Vector
{
loss
,
softmax
,
W0_next
,
b0_next
,
W1_next
,
b1_next
},
Output
Vector
{
loss
,
softmax
,
W0_next
,
b0_next
,
W1_next
,
b1_next
},
ParameterVector
{
X
,
Y
,
N
,
learning_rate
,
W0
,
b0
,
W1
,
b1
}),
train_node_map
);
auto
train_exec
=
backend
->
compile
(
train_function
);
...
...
@@ -227,7 +227,7 @@ int main(int argc, const char* argv[])
// X, W0, b0, W1, b1 -> softmax
NodeMap
inference_node_map
;
auto
inference_function
=
clone_function
(
Function
(
Node
Vector
{
softmax
},
ParameterVector
{
X
,
W0
,
b0
,
W1
,
b1
}),
Function
(
Output
Vector
{
softmax
},
ParameterVector
{
X
,
W0
,
b0
,
W1
,
b1
}),
inference_node_map
);
auto
inference_exe
=
backend
->
compile
(
inference_function
);
...
...
doc/sphinx/source/buildlb.rst
View file @
b50c17bf
...
...
@@ -5,7 +5,7 @@ Build and Test
###############
* :ref:`default_ngflags`
*
:ref:`ngraph_plaidml_backend`
..
:ref:`ngraph_plaidml_backend`
There are a few common paths to take when manually building the |project|
from source code. Today nGraph supports various developers working on all
...
...
@@ -161,17 +161,17 @@ The process documented here will work on CentOS 7.4.
$ make && sudo make install
..
_ngraph_plaidml_backend:
..
.. _ngraph_plaidml_backend: hide this until announcement is official
Building nGraph-PlaidML from source
===================================
..
Building nGraph-PlaidML from source
..
===================================
The following instructions will create the ``~/ngraph_plaidml_dist``
locally:
..
The following instructions will create the ``~/ngraph_plaidml_dist``
..
locally:
#. Ensure you have installed the :ref:`prerequisites` for your OS.
..
#. Ensure you have installed the :ref:`prerequisites` for your OS.
#. Install the prerequisites for the backend. Our hybrid ``NGRAPH_PLAIDML``
..
#. Install the prerequisites for the backend. Our hybrid ``NGRAPH_PLAIDML``
backend works best with Python3 versions. We recommend that you use a
virtual environment, due to some of the difficulties that users have
seen when trying to install outside of a venv.
...
...
@@ -182,20 +182,20 @@ locally:
$ pip install plaidml
$ plaidml-setup
#. Clone the source code, create and enter your build directory:
..
#. Clone the source code, create and enter your build directory:
.. code-block:: console
$ git clone https://github.com/NervanaSystems/ngraph.git
$ cd ngraph && mkdir build && cd build
#. Prepare the CMake files as follows:
..
#. Prepare the CMake files as follows:
.. code-block:: console
$ cmake .. -DCMAKE_INSTALL_PREFIX=~/ngraph_plaidml_dist -DNGRAPH_CPU_ENABLE=OFF -DNGRAPH_PLAIDML_ENABLE=ON
#. Run :command:`make` and ``make install``. Note that if you are building
..
#. Run :command:`make` and ``make install``. Note that if you are building
outside a local or user path, you may need to run ``make install`` as the
root user.
...
...
@@ -210,8 +210,8 @@ locally:
tests can be run when PlaidML devices are available at the machine
level.
For more about working with the PlaidML backend from nGraph, see our
API documentation :doc:`backends/plaidml-ng-api/index`.
..
For more about working with the PlaidML backend from nGraph, see our
..
API documentation :doc:`backends/plaidml-ng-api/index`.
macOS\* development
...
...
doc/sphinx/source/core/constructing-graphs/execute.rst
View file @
b50c17bf
...
...
@@ -99,8 +99,8 @@ Once the graph is built, we need to package it in a ``Function``:
:lines: 35-36
The first argument to the constuctor specifies the nodes that the function will
return; in this case, the product. A
``NodeVector`` is a vector of shared
pointer
s of ``op::Node``. The second argument specifies the parameters of the
return; in this case, the product. A
n ``OutputVector`` is a vector of references to
output
s of ``op::Node``. The second argument specifies the parameters of the
function, in the order they are to be passed to the compiled function. A
``ParameterVector`` is a vector of shared pointers to ``op::Parameter``.
...
...
doc/sphinx/source/project/introduction.rst
View file @
b50c17bf
...
...
@@ -62,14 +62,14 @@ more detail and describe how nGraph addresses them.
Problem 1: Kernel libraries do not support graph-level optimizations
--------------------------------------------------------------------
The example diagrams below show
s
how a deep learning framework, when integrated
The example diagrams below show how a deep learning framework, when integrated
with a kernel library, can optimally run each operation in a computational
graph, but the choice of operations in the graph may not be optimal.
.. _figure-A:
.. figure:: ../graphics/kernel-problem-1.png
:width:
555px
:width:
100%
:alt:
...
...
@@ -95,7 +95,7 @@ diagram.
.. _figure-B:
.. figure:: ../graphics/kernel-problem-2.png
:width:
555px
:width:
100%
:alt:
Each framework must be manually integrated with each hardware-specific kernel
...
...
@@ -130,7 +130,7 @@ work for what will ultimately be a fragile setup that is costly to maintain.
.. _figure-C:
.. figure:: ../graphics/kernel-problem-3.png
:width:
555px
:width:
100%
:alt:
...
...
doc/sphinx/source/project/release-notes.rst
View file @
b50c17bf
...
...
@@ -16,14 +16,19 @@ We are pleased to announce the release of version |version|-doc.
Core updates for |version|
~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Better PlaidML support
+ More ONNX ops
+ Elementwise divide defaults to Python semantics
+ GenerateMask seed optional
+ Graph visualization improvements
+ Preserve control dependencies in more places
+ GetOutputElement has single input
Latest doc updates
~~~~~~~~~~~~~~~~~~
+ Add instructions how to build ``NGRAPH_PLAIDML`` backend.
.. Latest doc updates
.. ~~~~~~~~~~~~~~~~~~
.. + Add instructions how to build ``NGRAPH_PLAIDML`` backend.
.. important:: Pre-releases (``-rc-0.*``) have newer features, and are less stable.
...
...
@@ -35,7 +40,6 @@ Changelog on Previous Releases
0.23
----
+ PlaidML support
+ More ONNX ops
+ Elementwise divide defaults to Python semantics
+ GenerateMask seed optional
...
...
doc/sphinx/source/sitemap.rst
View file @
b50c17bf
:orphan:
.. toctree::
:caption: Sitemap
:maxdepth: 1
frameworks
/index
python_api/index
inspection/index
core/overview
backends/index
project
/index
.. toctree::
:includehidden:
frameworks/index
project
/index
python_api/index
inspection/index
core/overview
backends/index
project/extras
/index
python/pyngraph/function.cpp
View file @
b50c17bf
...
...
@@ -23,6 +23,8 @@
namespace
py
=
pybind11
;
static
const
char
*
CAPSULE_NAME
=
"ngraph_function"
;
void
regclass_pyngraph_Function
(
py
::
module
m
)
{
py
::
class_
<
ngraph
::
Function
,
std
::
shared_ptr
<
ngraph
::
Function
>>
function
(
m
,
"Function"
);
...
...
@@ -49,4 +51,41 @@ void regclass_pyngraph_Function(py::module m)
py
::
cast
(
self
.
get_output_shape
(
0
)).
attr
(
"__str__"
)().
cast
<
std
::
string
>
();
return
"<"
+
class_name
+
": '"
+
self
.
get_friendly_name
()
+
"' ("
+
shape
+
")>"
;
});
function
.
def_static
(
"from_capsule"
,
[](
py
::
object
*
capsule
)
{
// get the underlying PyObject* which is a PyCapsule pointer
auto
*
pybind_capsule_ptr
=
capsule
->
ptr
();
// extract the pointer stored in the PyCapsule under the name CAPSULE_NAME
auto
*
capsule_ptr
=
PyCapsule_GetPointer
(
pybind_capsule_ptr
,
CAPSULE_NAME
);
auto
*
ngraph_function
=
static_cast
<
std
::
shared_ptr
<
ngraph
::
Function
>*>
(
capsule_ptr
);
if
(
ngraph_function
)
{
return
*
ngraph_function
;
}
else
{
throw
std
::
runtime_error
(
"The provided capsule does not contain an ngraph::Function"
);
}
});
function
.
def_static
(
"to_capsule"
,
[](
std
::
shared_ptr
<
ngraph
::
Function
>&
ngraph_function
)
{
// create a shared pointer on the heap before putting it in the capsule
// this secures the lifetime of the object transferred by the capsule
auto
*
sp_copy
=
new
std
::
shared_ptr
<
ngraph
::
Function
>
(
ngraph_function
);
// a destructor callback that will delete the heap allocated shared_ptr
// when the capsule is destructed
auto
sp_deleter
=
[](
PyObject
*
capsule
)
{
auto
*
capsule_ptr
=
PyCapsule_GetPointer
(
capsule
,
CAPSULE_NAME
);
auto
*
function_sp
=
static_cast
<
std
::
shared_ptr
<
ngraph
::
Function
>*>
(
capsule_ptr
);
if
(
function_sp
)
{
delete
function_sp
;
}
};
// put the shared_ptr in a new capsule under the same name as in "from_capsule"
auto
pybind_capsule
=
py
::
capsule
(
sp_copy
,
CAPSULE_NAME
,
sp_deleter
);
return
pybind_capsule
;
});
}
src/contrib/mlir/compiler.cpp
View file @
b50c17bf
This diff is collapsed.
Click to expand it.
src/contrib/mlir/compiler.hpp
View file @
b50c17bf
...
...
@@ -98,25 +98,21 @@ namespace ngraph
void
build_ng_dialect
();
template
<
typename
OP
>
static
mlir
::
Value
*
create_op
(
MLIRCompiler
&
compiler
,
const
ngraph
::
Node
*
ng_node
)
template
<
typename
Op
>
static
mlir
::
Operation
*
create_op
(
MLIRCompiler
&
compiler
,
const
ngraph
::
Node
*
ng_node
)
{
throw
std
::
runtime_error
(
"Unimplemented op '"
+
ng_node
->
description
()
+
"' in MLIR Compiler"
);
}
template
<
typename
UnaryOp
>
mlir
::
Value
*
create_unary_op
(
const
ngraph
::
Node
*
ng_node
);
template
<
typename
BinOp
>
mlir
::
Value
*
create_binary_op
(
const
ngraph
::
Node
*
ng_node
);
// TODO(amprocte): Can we have a create_variadic_op that is able to handle the
// attributes?
mlir
::
Value
*
create_concat
(
const
ngraph
::
Node
*
ng_node
);
// Generic op lowerer to ng dialect.
// Simply maps ngraph tensors to values and generate an OP. No op-specific logic.
template
<
typename
Op
>
mlir
::
Operation
*
create_generic_op
(
const
ngraph
::
Node
*
ng_node
);
template
<
typename
RedOp
>
mlir
::
Value
*
create_index_reduction
(
const
ngraph
::
Node
*
ng_node
);
mlir
::
Operation
*
create_index_reduction
(
const
ngraph
::
Node
*
ng_node
);
void
create_return
();
...
...
@@ -150,7 +146,7 @@ namespace ngraph
using
TensorToInfo
=
std
::
pair
<
descriptor
::
Tensor
*
,
TensorInfo
>
;
using
TensorToInfoMap
=
std
::
unordered_map
<
descriptor
::
Tensor
*
,
TensorInfo
>
;
using
MLIRCompOpFunction
=
std
::
function
<
mlir
::
Value
*
(
MLIRCompiler
&
compiler
,
const
ngraph
::
Node
*
)
>
;
std
::
function
<
mlir
::
Operation
*
(
MLIRCompiler
&
compiler
,
const
ngraph
::
Node
*
)
>
;
using
MLIRCompOpMap
=
std
::
unordered_map
<
std
::
type_index
,
MLIRCompOpFunction
>
;
// Maps tensor to the value it represents in the IR
...
...
src/contrib/mlir/dialect/ops.cpp
View file @
b50c17bf
...
...
@@ -168,6 +168,39 @@ static mlir::LogicalResult verifyCmpOp(T* op)
return
mlir
::
success
();
}
template
<>
mlir
::
LogicalResult
verifyOp
(
NGGatherOp
*
op
)
{
Type
ty
=
op
->
params
()
->
getType
();
NGTensorType
inputType
=
ty
.
cast
<
NGTensorType
>
();
ty
=
op
->
indices
()
->
getType
();
NGTensorType
indicesType
=
ty
.
cast
<
NGTensorType
>
();
// ensure axis < params rank
if
(
op
->
axis
().
getSExtValue
()
>=
inputType
.
getRank
())
return
op
->
emitOpError
(
"Gather axis is larger than input rank"
);
ty
=
indicesType
.
getElementType
();
// ensure indices are I32 or I64
if
(
!
ty
.
isa
<
NGIntegerType
>
())
return
op
->
emitOpError
(
"Indices tensor is not of Integer type"
);
NGIntegerType
indicesEltType
=
ty
.
cast
<
NGIntegerType
>
();
if
(
!
indicesEltType
.
isInt32
()
&&
!
indicesEltType
.
isInt64
())
return
op
->
emitOpError
(
"Indices tensor is not of I32 or I64 type"
);
mlir
::
Type
r0
=
op
->
res
()
->
getType
();
NGTensorType
resType
=
r0
.
cast
<
NGTensorType
>
();
// ensure result is compatible with input
if
(
!
resType
.
getRank
()
==
inputType
.
getRank
()
+
indicesType
.
getRank
()
-
1
)
return
op
->
emitOpError
(
"Incompatible result shape and/or type"
);
return
mlir
::
success
();
}
namespace
mlir
{
#define GET_OP_CLASSES
...
...
src/contrib/mlir/dialect/ops.td
View file @
b50c17bf
...
...
@@ -186,8 +186,8 @@ def NGDotOp : NG_Binary_Op<"dot">
// class, but I'm not sure how to add concatenation_axis into the args if we
// do that.
def NGConcatOp :
NG_OneResult_Op<"concat", [NoSideEffect]>,
Arguments<(ins Variadic<NG_TensorType>:$args, I64Attr:$concatenation_axis)>
NG_OneResult_Op<"concat", [NoSideEffect]>,
Arguments<(ins Variadic<NG_TensorType>:$args, I64Attr:$concatenation_axis)>
{
let parser = [{ NGRAPH_CHECK(false, "No parser support"); return mlir::failure(); }];
...
...
@@ -200,7 +200,7 @@ class NG_Axis_Reduction_Op<string mnemonic, list<OpTrait> traits = []> :
{
let summary = "Base class for reduction operations that perform a reduction "
"across the axes of a single tensor.";
let description =
"Axes are represented as an array of I64 attributes."
;
let description =
[{Axes are represented as an array of I64 attributes.}]
;
let parser = [{ NGRAPH_CHECK(false, "No parser support"); return mlir::failure(); }];
...
...
@@ -257,6 +257,24 @@ def NGAnyRedOp : NG_Axis_Reduction_Op<"any.red">
let verifier = [{ return verifyLogicalReductionOp(this); }];
}
// Gather
def NGGatherOp :
NG_OneResult_Op<"gather", [NoSideEffect]>,
Arguments<(ins NG_TensorType:$params, NG_TensorType:$indices, I64Attr:$axis)>
{
let summary = "Gather slices from params along the specified axis according to indices";
let description = [{
Gather slices from axis of params according to indices
params The tensor from which slices are gathered
indices Index tensor. Data type must be `element::i32` or `element::i64`
axis Axis in params to gather
}];
let parser = [{ NGRAPH_CHECK(false, "No parser support"); return mlir::failure(); }];
let verifier = [{ return verifyOp(this); }];
}
// Terminator Ops
def NGReturnOp : NG_Terminator_Op<"return">;
...
...
src/contrib/mlir/dialect/type.hpp
View file @
b50c17bf
...
...
@@ -199,6 +199,7 @@ namespace mlir
}
Shape
getShape
()
const
{
return
m_shape
;
}
int64_t
getRank
()
const
{
return
m_shape
.
size
();
}
EltType
getElementType
()
const
{
return
m_eltType
;
}
private
:
NGTensorTypeStorage
(
EltType
eltType
,
Shape
shape
)
...
...
src/contrib/mlir/lowerer.cpp
View file @
b50c17bf
...
...
@@ -646,6 +646,123 @@ namespace
return
matchSuccess
();
}
REWRITER
(
NGGatherOp
)
{
auto
gatherOp
=
cast
<
NGGatherOp
>
(
op
);
auto
loc
=
gatherOp
.
getLoc
();
ScopedContext
scope
(
rewriter
,
loc
);
// Get operands
Value
*
result
=
m_pass
.
buildOutputDefs
(
op
,
rewriter
)[
0
];
NGRAPH_CHECK
(
result
,
"Unexpected null result in GatherOp"
);
auto
resultTy
=
result
->
getType
().
cast
<
MemRefType
>
();
Value
*
params
=
operands
[
0
];
Value
*
indices
=
operands
[
1
];
auto
axis
=
gatherOp
.
axis
().
getSExtValue
();
// Create view to write into result.
MemRefView
vRes
(
result
),
vParams
(
params
),
vIndices
(
indices
);
// Indexed Values
IndexedValue
iRes
(
result
),
iParams
(
params
),
iIndices
(
indices
);
// Construct outer loop for params dims. Exclude the axis dim.
SmallVector
<
ValueHandle
,
4
>
paramsLbs
,
paramsUbs
;
SmallVector
<
IndexHandle
,
4
>
paramsIVs
;
SmallVector
<
int64_t
,
4
>
paramsSteps
;
SmallVector
<
ValueHandle
*
,
4
>
paramsIVPtrs
;
for
(
auto
i
=
0
;
i
<
vParams
.
rank
();
i
++
)
{
// skip gather axis
if
(
i
==
axis
)
continue
;
paramsLbs
.
push_back
(
IndexHandle
(
vParams
.
lb
(
i
)));
paramsUbs
.
push_back
(
IndexHandle
(
vParams
.
ub
(
i
)));
paramsSteps
.
push_back
(
vParams
.
step
(
i
));
}
NGRAPH_CHECK
(
paramsLbs
.
size
()
==
vParams
.
rank
()
-
1
&&
paramsUbs
.
size
()
==
paramsLbs
.
size
()
&&
paramsSteps
.
size
()
==
paramsLbs
.
size
(),
"Incorrect loop nest bounds size for gather params"
);
paramsIVs
=
IndexHandle
::
makeIndexHandles
(
vParams
.
rank
()
-
1
);
paramsIVPtrs
=
IndexHandle
::
makeIndexHandlePointers
(
paramsIVs
);
auto
indicesLbs
=
vIndices
.
getLbs
();
auto
indicesUbs
=
vIndices
.
getUbs
();
auto
indicesSteps
=
vIndices
.
getSteps
();
auto
indicesIVs
=
IndexHandle
::
makeIndexHandles
(
vIndices
.
rank
());
auto
indicesIVPtrs
=
IndexHandle
::
makeIndexHandlePointers
(
indicesIVs
);
SmallVector
<
IndexHandle
,
8
>
paramsIndices
,
resIndices
;
// Make sure we are going to create loops
NGRAPH_CHECK
(
vParams
.
rank
()
>
0
,
"Invalid size for indices steps"
);
// Let params rank : N
// Let indices rank : M
// Let axis be A
// Generate
// params loops
// for P_0: 0 -> params.dim[0]
// for P_1: 0 -> params.dim[1]
// for P_2: 0 -> params.dim[2]
// ...
// for P_(A-1):0 -> params.dim[A-1]
// for P_(A+1):0 -> params.dim[A+1]
// ...
// for P_(N-1):0 -> params.dim[N-1]
// indices loops
// for I_0:0 -> indices.dim[0]
// ...
// for I_(M-1):0 -> indices.dim[M-1]
// res[P_0, P_1, .. P_(A-1), I_0, .., I_(M-1), P_(A+1), ... P_(N-1)] =
// params[P_0, P_1, .. P_(A-1), indices[I_0, .., I_(M-1)], P_(A+1), ... P_(N-1)];
LoopNestBuilder
(
paramsIVPtrs
,
paramsLbs
,
paramsUbs
,
paramsSteps
)([
&
]
{
LoopNestBuilder
(
indicesIVPtrs
,
indicesLbs
,
indicesUbs
,
indicesSteps
)([
&
]
{
// Load axis value from indices array and cast it to Index Type
ValueHandle
axisIdx
=
ValueHandle
::
create
<
IndexCastOp
>
(
(
ValueHandle
)
iIndices
(
indicesIVs
),
rewriter
.
getIndexType
());
// construct indices for param
// [P_0, P_1, .. P_axis-1, Indices[I0, I1, .. I_k-1], P_axis+1, P_axis+2, .. P_n-1]
for
(
auto
i
=
0
,
j
=
0
;
i
<
vParams
.
rank
();
i
++
)
{
if
(
i
==
axis
)
{
paramsIndices
.
push_back
(
IndexHandle
(
axisIdx
));
}
else
{
paramsIndices
.
push_back
(
paramsIVs
[
j
++
]);
}
}
// construct indices for result
// [P_0, P_1, .. P_axis-1, I0, I1, .. I_k-1, P_axis+1, P_axis+2, .. P_n-1]
for
(
auto
i
=
0
,
j
=
0
;
i
<
vParams
.
rank
()
+
vIndices
.
rank
()
-
1
;)
{
if
(
i
==
axis
&&
indicesIVs
.
size
()
>
0
)
{
resIndices
.
append
(
indicesIVs
.
begin
(),
indicesIVs
.
end
());
i
+=
indicesIVs
.
size
();
}
else
{
resIndices
.
push_back
(
paramsIVs
[
j
++
]);
i
++
;
}
}
// Store into result
iRes
(
resIndices
)
=
iParams
(
paramsIndices
);
});
});
rewriter
.
replaceOp
(
op
,
{
result
});
return
matchSuccess
();
}
REWRITER
(
NGReturnOp
)
{
rewriter
.
replaceOpWithNewOp
<
ReturnOp
>
(
op
);
...
...
@@ -653,7 +770,7 @@ namespace
}
#undef REWRITER
/// End of pattern matchers
template
<
typename
OP
>
void
lower_binary_elementwise
(
Operation
*
op
,
ArrayRef
<
Value
*>
operands
,
...
...
src/contrib/mlir/op_lowerers.inc
View file @
b50c17bf
...
...
@@ -29,6 +29,7 @@ MLIR_OP(NGArgMinRedOp)
MLIR_OP
(
NGConcatOp
)
MLIR_OP
(
NGDivOp
)
MLIR_OP
(
NGDotOp
)
MLIR_OP
(
NGGatherOp
)
MLIR_OP
(
NGGreaterOp
)
MLIR_OP
(
NGLessOp
)
MLIR_OP
(
NGMulOp
)
...
...
src/contrib/mlir/ops_supported.inc
View file @
b50c17bf
...
...
@@ -9,6 +9,7 @@ MLIR_OP(ArgMax)
MLIR_OP
(
Divide
)
MLIR_OP
(
Dot
)
MLIR_OP
(
Concat
)
MLIR_OP
(
Gather
)
MLIR_OP
(
Greater
)
MLIR_OP
(
Less
)
MLIR_OP
(
Maximum
)
...
...
src/contrib/mlir/pass/mlir_subgraph_extraction.cpp
View file @
b50c17bf
...
...
@@ -25,6 +25,7 @@
#include "ngraph/op/divide.hpp"
#include "ngraph/op/dot.hpp"
#include "ngraph/op/experimental/compiled_kernel.hpp"
#include "ngraph/op/gather.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/greater.hpp"
#include "ngraph/op/less.hpp"
...
...
src/ngraph/CMakeLists.txt
View file @
b50c17bf
...
...
@@ -463,6 +463,8 @@ set (SRC
runtime/backend.hpp
runtime/backend_manager.cpp
runtime/backend_manager.hpp
runtime/chrome_trace.cpp
runtime/chrome_trace.hpp
runtime/executable.cpp
runtime/executable.hpp
runtime/host_tensor.cpp
...
...
src/ngraph/autodiff/adjoints.cpp
View file @
b50c17bf
...
...
@@ -51,11 +51,6 @@ OutputVector make_zeros(std::shared_ptr<Node> x)
return
zeros
;
}
autodiff
::
Adjoints
::
Adjoints
(
const
NodeVector
&
ys
,
const
NodeVector
&
cs
)
:
Adjoints
(
OutputVector
(
ys
.
begin
(),
ys
.
end
()),
OutputVector
(
cs
.
begin
(),
cs
.
end
()))
{
}
autodiff
::
Adjoints
::
Adjoints
(
const
OutputVector
&
ys
,
const
OutputVector
&
cs
)
{
if
(
ys
.
size
()
!=
cs
.
size
())
...
...
src/ngraph/autodiff/adjoints.hpp
View file @
b50c17bf
...
...
@@ -46,8 +46,6 @@ namespace ngraph
/// \param c An expression for where to evaluate the derivatives
Adjoints
(
const
OutputVector
&
y
,
const
OutputVector
&
c
);
Adjoints
(
const
NodeVector
&
y
,
const
NodeVector
&
c
);
Adjoints
(
const
Adjoints
&
adjoints
)
=
default
;
Adjoints
&
operator
=
(
const
Adjoints
&
adjoints
)
=
default
;
Adjoints
()
=
default
;
...
...
src/ngraph/function.cpp
View file @
b50c17bf
...
...
@@ -41,6 +41,30 @@ Function::Function(const ResultVector& results,
init
();
}
Function
::
Function
(
const
OutputVector
&
results
,
const
ParameterVector
&
parameters
,
const
std
::
string
&
name
)
:
m_results
(
results
.
size
())
,
m_parameters
(
parameters
)
,
m_temporary_pool_size
(
0
)
,
m_instance_id
(
m_next_instance_id
.
fetch_add
(
1
))
,
m_name
(
name
)
,
m_unique_name
(
"Function_"
+
to_string
(
m_instance_id
))
{
if
(
std
::
any_of
(
results
.
cbegin
(),
results
.
cend
(),
[](
Output
<
Node
>
n
)
{
return
std
::
dynamic_pointer_cast
<
op
::
Result
>
(
n
.
get_node_shared_ptr
());
}))
{
throw
ngraph_error
(
" Results already contain op::Results. Use a c-tor that takes a ResultVector"
);
}
std
::
transform
(
results
.
begin
(),
results
.
end
(),
m_results
.
begin
(),
[](
Output
<
Node
>
n
)
{
return
std
::
make_shared
<
op
::
Result
>
(
n
);
});
init
();
}
Function
::
Function
(
const
NodeVector
&
results
,
const
ParameterVector
&
parameters
,
const
std
::
string
&
name
)
...
...
@@ -208,6 +232,11 @@ shared_ptr<Node> Function::get_output_op(size_t i) const
return
m_results
.
at
(
i
);
}
Output
<
Node
>
Function
::
output
(
size_t
i
)
const
{
return
m_results
.
at
(
i
);
}
shared_ptr
<
Node
>
Function
::
get_result
()
const
{
if
(
m_results
.
size
()
!=
1
)
...
...
src/ngraph/function.hpp
View file @
b50c17bf
...
...
@@ -37,6 +37,10 @@ namespace ngraph
const
ParameterVector
&
parameters
,
const
std
::
string
&
name
=
""
);
Function
(
const
OutputVector
&
results
,
const
ParameterVector
&
parameters
,
const
std
::
string
&
name
=
""
);
Function
(
const
std
::
shared_ptr
<
Node
>&
result
,
const
ParameterVector
&
parameters
,
const
std
::
string
&
name
=
""
);
...
...
@@ -55,6 +59,8 @@ namespace ngraph
/// Return the op that generates output i
std
::
shared_ptr
<
Node
>
get_output_op
(
size_t
i
)
const
;
Output
<
Node
>
output
(
size_t
i
)
const
;
/// Return the element type of output i
const
element
::
Type
&
get_output_element_type
(
size_t
i
)
const
;
...
...
src/ngraph/runtime/chrome_trace.cpp
0 → 100644
View file @
b50c17bf
//*****************************************************************************
// Copyright 2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <iostream>
#include <map>
#include <sstream>
#include <string>
#include "chrome_trace.hpp"
#include "ngraph/log.hpp"
using
namespace
std
;
using
namespace
ngraph
;
static
bool
read_tracing_env_var
()
{
static
const
bool
is_enabled
=
(
getenv
(
"NGRAPH_ENABLE_TRACING"
)
!=
nullptr
);
return
is_enabled
;
}
mutex
runtime
::
event
::
Manager
::
s_file_mutex
;
bool
runtime
::
event
::
Manager
::
s_tracing_enabled
=
read_tracing_env_var
();
runtime
::
event
::
Duration
::
Duration
(
const
string
&
name
,
const
string
&
category
,
const
string
&
args
)
{
if
(
Manager
::
is_tracing_enabled
())
{
m_start
=
Manager
::
get_current_microseconds
();
m_stop
=
0
;
m_name
=
name
;
m_category
=
category
;
m_args
=
args
;
}
}
void
runtime
::
event
::
Duration
::
stop
()
{
if
(
Manager
::
is_tracing_enabled
())
{
m_stop
=
Manager
::
get_current_microseconds
();
}
}
void
runtime
::
event
::
Duration
::
write
()
{
if
(
Manager
::
is_tracing_enabled
())
{
size_t
stop_time
=
(
m_stop
!=
0
?
m_stop
:
Manager
::
get_current_microseconds
());
lock_guard
<
mutex
>
lock
(
Manager
::
get_mutex
());
ofstream
&
out
=
runtime
::
event
::
Manager
::
get_output_stream
();
if
(
out
.
is_open
()
==
false
)
{
runtime
::
event
::
Manager
::
open
();
}
else
{
Manager
::
get_output_stream
()
<<
",
\n
"
;
}
Manager
::
get_output_stream
()
<<
R"({"name":")"
<<
m_name
<<
R"(","cat":")"
<<
m_category
<<
R"(","ph":"X","pid":)"
<<
Manager
::
get_process_id
()
<<
R"(,"tid":)"
<<
Manager
::
get_thread_id
()
<<
R"(,"ts":)"
<<
m_start
<<
R"(,"dur":)"
<<
(
stop_time
-
m_start
);
if
(
!
m_args
.
empty
())
{
out
<<
R"(,"args":)"
<<
m_args
;
}
out
<<
"}"
;
}
}
runtime
::
event
::
Object
::
Object
(
const
string
&
name
,
const
string
&
args
)
:
m_name
{
name
}
,
m_id
{
static_cast
<
size_t
>
(
chrono
::
high_resolution_clock
::
now
().
time_since_epoch
().
count
())}
{
if
(
Manager
::
is_tracing_enabled
())
{
lock_guard
<
mutex
>
lock
(
Manager
::
get_mutex
());
ofstream
&
out
=
runtime
::
event
::
Manager
::
get_output_stream
();
if
(
out
.
is_open
()
==
false
)
{
runtime
::
event
::
Manager
::
open
();
}
else
{
Manager
::
get_output_stream
()
<<
",
\n
"
;
}
out
<<
R"({"name":")"
<<
m_name
<<
R"(","ph":"N","id":")"
<<
m_id
<<
R"(","ts":)"
<<
Manager
::
get_current_microseconds
()
<<
R"(,"pid":)"
<<
Manager
::
get_process_id
()
<<
R"(,"tid":)"
<<
Manager
::
get_thread_id
();
if
(
!
args
.
empty
())
{
out
<<
R"(,"args":)"
<<
args
;
}
out
<<
"}"
;
write_snapshot
(
out
,
args
);
}
}
void
runtime
::
event
::
Object
::
snapshot
(
const
string
&
args
)
{
if
(
Manager
::
is_tracing_enabled
())
{
lock_guard
<
mutex
>
lock
(
Manager
::
get_mutex
());
ofstream
&
out
=
runtime
::
event
::
Manager
::
get_output_stream
();
if
(
out
.
is_open
()
==
false
)
{
runtime
::
event
::
Manager
::
open
();
}
else
{
Manager
::
get_output_stream
()
<<
",
\n
"
;
}
write_snapshot
(
out
,
args
);
}
}
void
runtime
::
event
::
Object
::
write_snapshot
(
ostream
&
out
,
const
string
&
args
)
{
out
<<
R"({"name":")"
<<
m_name
<<
R"(","ph":"O","id":")"
<<
m_id
<<
R"(","ts":)"
<<
Manager
::
get_current_microseconds
()
<<
R"(,"pid":)"
<<
Manager
::
get_process_id
()
<<
R"(,"tid":)"
<<
Manager
::
get_thread_id
();
if
(
!
args
.
empty
())
{
out
<<
R"(,"args":)"
<<
args
;
}
out
<<
"}"
;
}
void
runtime
::
event
::
Object
::
destroy
()
{
if
(
Manager
::
is_tracing_enabled
())
{
lock_guard
<
mutex
>
lock
(
Manager
::
get_mutex
());
ofstream
&
out
=
runtime
::
event
::
Manager
::
get_output_stream
();
if
(
out
.
is_open
()
==
false
)
{
runtime
::
event
::
Manager
::
open
();
}
else
{
Manager
::
get_output_stream
()
<<
",
\n
"
;
}
out
<<
R"({"name":")"
<<
m_name
<<
R"(","ph":"D","id":")"
<<
m_id
<<
R"(","ts":)"
<<
Manager
::
get_current_microseconds
()
<<
R"(,"pid":)"
<<
Manager
::
get_process_id
()
<<
R"(,"tid":)"
<<
Manager
::
get_thread_id
()
<<
"}"
;
}
}
void
runtime
::
event
::
Manager
::
open
(
const
string
&
path
)
{
ofstream
&
out
=
get_output_stream
();
if
(
out
.
is_open
()
==
false
)
{
out
.
open
(
path
,
ios_base
::
trunc
);
out
<<
"[
\n
"
;
}
}
void
runtime
::
event
::
Manager
::
close
()
{
ofstream
&
out
=
get_output_stream
();
if
(
out
.
is_open
())
{
out
<<
"
\n
]
\n
"
;
out
.
close
();
}
}
ofstream
&
runtime
::
event
::
Manager
::
get_output_stream
()
{
static
ofstream
s_event_log
;
return
s_event_log
;
}
const
string
&
runtime
::
event
::
Manager
::
get_process_id
()
{
static
const
string
s_pid
=
to_string
(
getpid
());
return
s_pid
;
}
void
runtime
::
event
::
Manager
::
enable_event_tracing
()
{
s_tracing_enabled
=
true
;
}
void
runtime
::
event
::
Manager
::
disable_event_tracing
()
{
s_tracing_enabled
=
false
;
}
bool
runtime
::
event
::
Manager
::
is_event_tracing_enabled
()
{
return
s_tracing_enabled
;
}
string
runtime
::
event
::
Manager
::
get_thread_id
()
{
thread
::
id
tid
=
this_thread
::
get_id
();
static
map
<
thread
::
id
,
string
>
tid_map
;
auto
it
=
tid_map
.
find
(
tid
);
string
rc
;
if
(
it
==
tid_map
.
end
())
{
stringstream
ss
;
ss
<<
"
\"
"
<<
tid
<<
"
\"
"
;
rc
=
ss
.
str
();
tid_map
.
insert
({
tid
,
rc
});
}
else
{
rc
=
it
->
second
;
}
return
rc
;
}
src/ngraph/runtime/chrome_trace.hpp
0 → 100644
View file @
b50c17bf
//*****************************************************************************
// Copyright 2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <chrono>
#include <fstream>
#include <iostream>
#include <mutex>
#include <string>
#include <thread>
#ifdef _WIN32
#include <windows.h>
// windows.h must be before processthreadsapi.h so we need this comment
#include <processthreadsapi.h>
#define getpid() GetCurrentProcessId()
#else
#include <unistd.h>
#endif
namespace
ngraph
{
namespace
runtime
{
namespace
event
{
class
Duration
;
class
Object
;
class
Manager
;
}
}
}
//
// This class records timestamps for a given user defined event and
// produces output in the chrome tracing format that can be used to view
// the events of a running program
//
// Following is the format of a trace event
//
// {
// "name": "myName",
// "cat": "category,list",
// "ph": "B",
// "ts": 12345,
// "pid": 123,
// "tid": 456,
// "args": {
// "someArg": 1,
// "anotherArg": {
// "value": "my value"
// }
// }
// }
//
// The trace file format is defined here:
// https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview
//
// The trace file can be viewed by Chrome browser using the
// URL: chrome://tracing/
//
// More information about this is at:
// http://dev.chromium.org/developers/how-tos/trace-event-profiling-tool
class
ngraph
::
runtime
::
event
::
Manager
{
friend
class
Duration
;
friend
class
Object
;
public
:
static
void
open
(
const
std
::
string
&
path
=
"runtime_event_trace.json"
);
static
void
close
();
static
bool
is_tracing_enabled
()
{
return
s_tracing_enabled
;
}
static
void
enable_event_tracing
();
static
void
disable_event_tracing
();
static
bool
is_event_tracing_enabled
();
private
:
static
std
::
ofstream
&
get_output_stream
();
static
const
std
::
string
&
get_process_id
();
static
size_t
get_current_microseconds
()
{
return
std
::
chrono
::
high_resolution_clock
::
now
().
time_since_epoch
().
count
()
/
1000
;
}
static
std
::
string
get_thread_id
();
static
std
::
mutex
&
get_mutex
()
{
return
s_file_mutex
;
}
static
std
::
ostream
s_ostream
;
static
std
::
mutex
s_file_mutex
;
static
bool
s_tracing_enabled
;
};
class
ngraph
::
runtime
::
event
::
Duration
{
public
:
explicit
Duration
(
const
std
::
string
&
name
,
const
std
::
string
&
category
,
const
std
::
string
&
args
=
""
);
~
Duration
()
{
write
();
}
/// \brief stop the timer without writing the data to the log file. To write the data
/// call the `write` method
/// Calls to stop() are optional
void
stop
();
/// \brief write the log data to the log file for this event
/// This funtion has an implicit stop() if stop() has not been previously called
void
write
();
Duration
(
const
Duration
&
)
=
delete
;
Duration
&
operator
=
(
Duration
const
&
)
=
delete
;
private
:
std
::
string
to_json
()
const
;
size_t
m_start
;
size_t
m_stop
;
std
::
string
m_name
;
std
::
string
m_category
;
std
::
string
m_args
;
};
class
ngraph
::
runtime
::
event
::
Object
{
public
:
Object
(
const
std
::
string
&
name
,
const
std
::
string
&
args
);
void
snapshot
(
const
std
::
string
&
args
);
void
destroy
();
private
:
void
write_snapshot
(
std
::
ostream
&
out
,
const
std
::
string
&
args
);
const
std
::
string
m_name
;
size_t
m_id
;
};
src/ngraph/runtime/host_tensor.cpp
View file @
b50c17bf
...
...
@@ -18,6 +18,7 @@
#include <memory>
#include "ngraph/descriptor/layout/dense_tensor_layout.hpp"
#include "ngraph/runtime/chrome_trace.hpp"
#include "ngraph/runtime/host_tensor.hpp"
#include "ngraph/util.hpp"
...
...
@@ -96,6 +97,8 @@ const char* runtime::HostTensor::get_data_ptr() const
void
runtime
::
HostTensor
::
write
(
const
void
*
source
,
size_t
n
)
{
runtime
::
event
::
Duration
d1
(
"write"
,
"HostTensor"
);
if
(
n
>
m_buffer_size
)
{
throw
out_of_range
(
"write access past end of tensor"
);
...
...
@@ -106,6 +109,7 @@ void runtime::HostTensor::write(const void* source, size_t n)
void
runtime
::
HostTensor
::
read
(
void
*
target
,
size_t
n
)
const
{
runtime
::
event
::
Duration
d1
(
"read"
,
"HostTensor"
);
if
(
n
>
m_buffer_size
)
{
throw
out_of_range
(
"read access past end of tensor"
);
...
...
src/ngraph/runtime/interpreter/int_executable.cpp
View file @
b50c17bf
...
...
@@ -30,6 +30,7 @@
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/memory_layout.hpp"
#include "ngraph/runtime/backend_manager.hpp"
#include "ngraph/runtime/chrome_trace.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
...
...
@@ -74,6 +75,8 @@ runtime::interpreter::INTExecutable::INTExecutable(const std::string& model_stri
bool
runtime
::
interpreter
::
INTExecutable
::
call
(
const
vector
<
shared_ptr
<
runtime
::
Tensor
>>&
outputs
,
const
vector
<
shared_ptr
<
runtime
::
Tensor
>>&
inputs
)
{
runtime
::
event
::
Duration
d1
(
"call"
,
"Interpreter"
);
// convert inputs to HostTensor
vector
<
shared_ptr
<
HostTensor
>>
func_inputs
;
for
(
auto
tensor
:
inputs
)
...
...
@@ -122,6 +125,7 @@ bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::
for
(
const
NodeWrapper
&
wrapped
:
m_wrapped_nodes
)
{
auto
op
=
wrapped
.
get_node
();
runtime
::
event
::
Duration
d2
(
op
->
description
(),
"Interpreter"
);
auto
type_id
=
wrapped
.
get_typeid
();
if
(
type_id
==
OP_TYPEID
::
Parameter
)
{
...
...
@@ -304,3 +308,74 @@ void runtime::interpreter::INTExecutable::save(ostream& out)
string
model
=
serialize
(
m_function
,
0
);
writer
.
write
(
"model"
,
model
.
data
(),
model
.
size
());
}
shared_ptr
<
ngraph
::
op
::
Parameter
>
runtime
::
interpreter
::
INTExecutable
::
get_parameter
(
size_t
index
)
const
{
const
ParameterVector
&
parameters
=
get_parameters
();
NGRAPH_CHECK
(
index
<
parameters
.
size
(),
"create_tensor for input out of bounds"
);
return
parameters
[
index
];
}
shared_ptr
<
ngraph
::
op
::
Result
>
runtime
::
interpreter
::
INTExecutable
::
get_result
(
size_t
index
)
const
{
const
ResultVector
&
results
=
get_results
();
NGRAPH_CHECK
(
index
<
results
.
size
(),
"create_tensor for input out of bounds"
);
return
results
[
index
];
}
shared_ptr
<
runtime
::
Tensor
>
runtime
::
interpreter
::
INTExecutable
::
create_input_tensor
(
size_t
input_index
)
{
shared_ptr
<
op
::
Parameter
>
parameter
=
get_parameter
(
input_index
);
return
make_shared
<
runtime
::
HostTensor
>
(
parameter
->
get_element_type
(),
parameter
->
get_shape
());
}
shared_ptr
<
runtime
::
Tensor
>
runtime
::
interpreter
::
INTExecutable
::
create_output_tensor
(
size_t
output_index
)
{
shared_ptr
<
op
::
Result
>
result
=
get_result
(
output_index
);
return
make_shared
<
runtime
::
HostTensor
>
(
result
->
get_element_type
(),
result
->
get_shape
());
}
vector
<
shared_ptr
<
runtime
::
Tensor
>>
runtime
::
interpreter
::
INTExecutable
::
create_input_tensor
(
size_t
input_index
,
size_t
pipeline_depth
)
{
vector
<
shared_ptr
<
runtime
::
HostTensor
>>
tensors
;
shared_ptr
<
op
::
Parameter
>
parameter
=
get_parameter
(
input_index
);
for
(
size_t
i
=
0
;
i
<
pipeline_depth
;
i
++
)
{
shared_ptr
<
runtime
::
HostTensor
>
tensor
;
auto
t
=
make_shared
<
runtime
::
HostTensor
>
(
parameter
->
get_element_type
(),
parameter
->
get_shape
());
tensor
=
static_pointer_cast
<
runtime
::
HostTensor
>
(
t
);
tensors
.
push_back
(
tensor
);
}
vector
<
shared_ptr
<
runtime
::
Tensor
>>
result_tensors
;
for
(
const
shared_ptr
<
runtime
::
HostTensor
>&
tensor
:
tensors
)
{
result_tensors
.
push_back
(
tensor
);
}
return
result_tensors
;
}
vector
<
shared_ptr
<
runtime
::
Tensor
>>
runtime
::
interpreter
::
INTExecutable
::
create_output_tensor
(
size_t
output_index
,
size_t
pipeline_depth
)
{
vector
<
shared_ptr
<
runtime
::
HostTensor
>>
tensors
;
shared_ptr
<
op
::
Result
>
result
=
get_result
(
output_index
);
for
(
size_t
i
=
0
;
i
<
pipeline_depth
;
i
++
)
{
shared_ptr
<
runtime
::
HostTensor
>
tensor
;
auto
t
=
make_shared
<
runtime
::
HostTensor
>
(
result
->
get_element_type
(),
result
->
get_shape
());
tensor
=
static_pointer_cast
<
runtime
::
HostTensor
>
(
t
);
tensors
.
push_back
(
tensor
);
}
vector
<
shared_ptr
<
runtime
::
Tensor
>>
result_tensors
;
for
(
const
shared_ptr
<
runtime
::
HostTensor
>&
tensor
:
tensors
)
{
result_tensors
.
push_back
(
tensor
);
}
return
result_tensors
;
}
src/ngraph/runtime/interpreter/int_executable.hpp
View file @
b50c17bf
...
...
@@ -186,9 +186,21 @@ public:
std
::
vector
<
PerformanceCounter
>
get_performance_data
()
const
override
;
std
::
shared_ptr
<
runtime
::
Tensor
>
create_input_tensor
(
size_t
input_index
)
override
;
std
::
shared_ptr
<
runtime
::
Tensor
>
create_output_tensor
(
size_t
output_index
)
override
;
std
::
vector
<
std
::
shared_ptr
<
runtime
::
Tensor
>>
create_input_tensor
(
size_t
input_index
,
size_t
pipeline_depth
)
override
;
std
::
vector
<
std
::
shared_ptr
<
runtime
::
Tensor
>>
create_output_tensor
(
size_t
output_index
,
size_t
pipeline_depth
)
override
;
private
:
INTExecutable
(
const
std
::
string
&
model_string
);
std
::
shared_ptr
<
ngraph
::
op
::
Parameter
>
get_parameter
(
size_t
index
)
const
;
std
::
shared_ptr
<
ngraph
::
op
::
Result
>
get_result
(
size_t
index
)
const
;
int
get_alignment
()
const
{
return
64
;
}
bool
m_is_compiled
=
false
;
bool
m_nan_check_enabled
=
false
;
...
...
src/tools/nbench/CMakeLists.txt
View file @
b50c17bf
...
...
@@ -17,6 +17,8 @@
set
(
SRC
nbench.cpp
benchmark.cpp
benchmark_pipelined.cpp
benchmark_utils.cpp
)
add_executable
(
nbench
${
SRC
}
)
...
...
src/tools/nbench/benchmark.cpp
View file @
b50c17bf
...
...
@@ -14,12 +14,8 @@
// limitations under the License.
//*****************************************************************************
#include <random>
#if defined(__x86_64__) || defined(__amd64__)
#include <xmmintrin.h>
#endif
#include "benchmark.hpp"
#include "benchmark_utils.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/host_tensor.hpp"
...
...
@@ -30,106 +26,6 @@
using
namespace
std
;
using
namespace
ngraph
;
static
default_random_engine
s_random_engine
;
void
set_denormals_flush_to_zero
()
{
#if defined(__x86_64__) || defined(__amd64__)
// Avoids perf impact from denormals while benchmarking with random data
_MM_SET_FLUSH_ZERO_MODE
(
_MM_FLUSH_ZERO_ON
);
_MM_SET_DENORMALS_ZERO_MODE
(
_MM_DENORMALS_ZERO_ON
);
#endif
}
template
<
typename
T
>
void
init_int_tv
(
shared_ptr
<
runtime
::
Tensor
>
tv
,
T
min
,
T
max
)
{
size_t
size
=
tv
->
get_element_count
();
uniform_int_distribution
<
T
>
dist
(
min
,
max
);
vector
<
T
>
vec
(
size
);
for
(
T
&
element
:
vec
)
{
element
=
dist
(
s_random_engine
);
}
tv
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
T
));
}
template
<>
void
init_int_tv
<
char
>
(
shared_ptr
<
runtime
::
Tensor
>
tv
,
char
min
,
char
max
)
{
size_t
size
=
tv
->
get_element_count
();
uniform_int_distribution
<
int16_t
>
dist
(
static_cast
<
short
>
(
min
),
static_cast
<
short
>
(
max
));
vector
<
char
>
vec
(
size
);
for
(
char
&
element
:
vec
)
{
element
=
static_cast
<
char
>
(
dist
(
s_random_engine
));
}
tv
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
char
));
}
template
<>
void
init_int_tv
<
int8_t
>
(
shared_ptr
<
runtime
::
Tensor
>
tv
,
int8_t
min
,
int8_t
max
)
{
size_t
size
=
tv
->
get_element_count
();
uniform_int_distribution
<
int16_t
>
dist
(
static_cast
<
short
>
(
min
),
static_cast
<
short
>
(
max
));
vector
<
int8_t
>
vec
(
size
);
for
(
int8_t
&
element
:
vec
)
{
element
=
static_cast
<
int8_t
>
(
dist
(
s_random_engine
));
}
tv
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
int8_t
));
}
template
<>
void
init_int_tv
<
uint8_t
>
(
shared_ptr
<
runtime
::
Tensor
>
tv
,
uint8_t
min
,
uint8_t
max
)
{
size_t
size
=
tv
->
get_element_count
();
uniform_int_distribution
<
int16_t
>
dist
(
static_cast
<
short
>
(
min
),
static_cast
<
short
>
(
max
));
vector
<
uint8_t
>
vec
(
size
);
for
(
uint8_t
&
element
:
vec
)
{
element
=
static_cast
<
uint8_t
>
(
dist
(
s_random_engine
));
}
tv
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
uint8_t
));
}
template
<
typename
T
>
void
init_real_tv
(
shared_ptr
<
runtime
::
Tensor
>
tv
,
T
min
,
T
max
)
{
size_t
size
=
tv
->
get_element_count
();
uniform_real_distribution
<
T
>
dist
(
min
,
max
);
vector
<
T
>
vec
(
size
);
for
(
T
&
element
:
vec
)
{
element
=
dist
(
s_random_engine
);
}
tv
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
T
));
}
static
void
random_init
(
shared_ptr
<
runtime
::
Tensor
>
tv
)
{
element
::
Type
et
=
tv
->
get_element_type
();
switch
(
et
.
get_type_enum
())
{
case
element
:
:
Type_t
::
boolean
:
init_int_tv
<
char
>
(
tv
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
f32
:
init_real_tv
<
float
>
(
tv
,
-
1
,
1
);
break
;
case
element
:
:
Type_t
::
f64
:
init_real_tv
<
double
>
(
tv
,
-
1
,
1
);
break
;
case
element
:
:
Type_t
::
i8
:
init_int_tv
<
int8_t
>
(
tv
,
-
1
,
1
);
break
;
case
element
:
:
Type_t
::
i16
:
init_int_tv
<
int16_t
>
(
tv
,
-
1
,
1
);
break
;
case
element
:
:
Type_t
::
i32
:
init_int_tv
<
int32_t
>
(
tv
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
i64
:
init_int_tv
<
int64_t
>
(
tv
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
u8
:
init_int_tv
<
uint8_t
>
(
tv
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
u16
:
init_int_tv
<
uint16_t
>
(
tv
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
u32
:
init_int_tv
<
uint32_t
>
(
tv
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
u64
:
init_int_tv
<
uint64_t
>
(
tv
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
undefined
:
case
element
:
:
Type_t
::
dynamic
:
case
element
:
:
Type_t
::
bf16
:
case
element
:
:
Type_t
::
f16
:
default
:
throw
runtime_error
(
"unsupported type"
);
}
}
vector
<
runtime
::
PerformanceCounter
>
run_benchmark
(
shared_ptr
<
Function
>
f
,
const
string
&
backend_name
,
size_t
iterations
,
...
...
@@ -140,7 +36,7 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
stopwatch
timer
;
timer
.
start
();
auto
backend
=
runtime
::
Backend
::
create
(
backend_name
);
auto
compiled_fun
c
=
backend
->
compile
(
f
,
timing_detail
);
auto
exe
c
=
backend
->
compile
(
f
,
timing_detail
);
timer
.
stop
();
cout
.
imbue
(
locale
(
""
));
cout
<<
"compile time: "
<<
timer
.
get_milliseconds
()
<<
"ms"
<<
endl
;
...
...
@@ -201,7 +97,7 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
}
}
}
compiled_fun
c
->
call
(
results
,
args
);
exe
c
->
call
(
results
,
args
);
if
(
copy_data
)
{
for
(
size_t
result_index
=
0
;
result_index
<
results
.
size
();
result_index
++
)
...
...
@@ -217,6 +113,6 @@ vector<runtime::PerformanceCounter> run_benchmark(shared_ptr<Function> f,
float
time
=
t1
.
get_milliseconds
();
cout
<<
time
/
iterations
<<
"ms per iteration"
<<
endl
;
vector
<
runtime
::
PerformanceCounter
>
perf_data
=
compiled_fun
c
->
get_performance_data
();
vector
<
runtime
::
PerformanceCounter
>
perf_data
=
exe
c
->
get_performance_data
();
return
perf_data
;
}
src/tools/nbench/benchmark.hpp
View file @
b50c17bf
...
...
@@ -24,10 +24,6 @@
#include "ngraph/function.hpp"
#include "ngraph/runtime/performance_counter.hpp"
/// performance test utilities
std
::
multimap
<
size_t
,
std
::
string
>
aggregate_timing
(
const
std
::
vector
<
ngraph
::
runtime
::
PerformanceCounter
>&
perf_data
);
std
::
vector
<
ngraph
::
runtime
::
PerformanceCounter
>
run_benchmark
(
std
::
shared_ptr
<
ngraph
::
Function
>
f
,
const
std
::
string
&
backend_name
,
size_t
iterations
,
...
...
src/tools/nbench/benchmark_pipelined.cpp
0 → 100644
View file @
b50c17bf
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <array>
#include <condition_variable>
#include <mutex>
#include <thread>
#include "benchmark.hpp"
#include "benchmark_utils.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/host_tensor.hpp"
#include "ngraph/runtime/tensor.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
using
namespace
std
;
using
namespace
ngraph
;
class
TensorCollection
{
public
:
vector
<
shared_ptr
<
runtime
::
HostTensor
>>
parameter_data
;
vector
<
shared_ptr
<
runtime
::
HostTensor
>>
result_data
;
vector
<
shared_ptr
<
runtime
::
Tensor
>>
input_tensors
;
vector
<
shared_ptr
<
runtime
::
Tensor
>>
output_tensors
;
private
:
};
static
mutex
s_mutex
;
static
condition_variable
s_condition
;
static
size_t
current_iteration
=
0
;
static
size_t
s_iterations
;
static
size_t
s_warmup_iterations
;
static
stopwatch
s_timer
;
static
void
thread_entry
(
runtime
::
Executable
*
exec
,
const
TensorCollection
&
tensors
,
size_t
pipeline_stage
)
{
bool
data_written
=
false
;
const
vector
<
shared_ptr
<
runtime
::
Tensor
>>&
args
=
tensors
.
input_tensors
;
const
vector
<
shared_ptr
<
runtime
::
Tensor
>>&
results
=
tensors
.
output_tensors
;
while
(
current_iteration
<
s_iterations
+
s_warmup_iterations
)
{
if
(
!
data_written
)
{
for
(
size_t
arg_index
=
0
;
arg_index
<
args
.
size
();
arg_index
++
)
{
const
shared_ptr
<
runtime
::
Tensor
>&
arg
=
args
[
arg_index
];
if
(
arg
->
get_stale
())
{
const
shared_ptr
<
runtime
::
HostTensor
>&
data
=
tensors
.
parameter_data
[
arg_index
];
arg
->
write
(
data
->
get_data_ptr
(),
data
->
get_element_count
()
*
data
->
get_element_type
().
size
());
}
}
data_written
=
true
;
}
unique_lock
<
mutex
>
lock
(
s_mutex
);
if
((
current_iteration
&
1
)
!=
pipeline_stage
)
{
s_condition
.
wait
(
lock
);
}
else
{
if
(
current_iteration
==
s_warmup_iterations
)
{
s_timer
.
start
();
}
// our turn to run
exec
->
call
(
results
,
args
);
current_iteration
++
;
data_written
=
false
;
s_condition
.
notify_all
();
lock
.
unlock
();
for
(
size_t
result_index
=
0
;
result_index
<
results
.
size
();
result_index
++
)
{
const
shared_ptr
<
runtime
::
HostTensor
>&
data
=
tensors
.
result_data
[
result_index
];
const
shared_ptr
<
runtime
::
Tensor
>&
result
=
results
[
result_index
];
result
->
read
(
data
->
get_data_ptr
(),
data
->
get_element_count
()
*
data
->
get_element_type
().
size
());
}
}
}
}
vector
<
runtime
::
PerformanceCounter
>
run_benchmark_pipelined
(
shared_ptr
<
Function
>
f
,
const
string
&
backend_name
,
size_t
iterations
,
bool
timing_detail
,
int
warmup_iterations
,
bool
copy_data
)
{
constexpr
size_t
pipeline_depth
=
2
;
s_iterations
=
iterations
;
s_warmup_iterations
=
warmup_iterations
;
array
<
TensorCollection
,
pipeline_depth
>
tensor_collections
;
stopwatch
timer
;
timer
.
start
();
auto
backend
=
runtime
::
Backend
::
create
(
backend_name
);
auto
exec
=
backend
->
compile
(
f
,
timing_detail
);
timer
.
stop
();
cout
.
imbue
(
locale
(
""
));
cout
<<
"compile time: "
<<
timer
.
get_milliseconds
()
<<
"ms"
<<
endl
;
set_denormals_flush_to_zero
();
// Create random input data for all input tensors
for
(
size_t
i
=
0
;
i
<
pipeline_depth
;
i
++
)
{
for
(
shared_ptr
<
op
::
Parameter
>
param
:
f
->
get_parameters
())
{
auto
tensor_data
=
make_shared
<
runtime
::
HostTensor
>
(
param
->
get_element_type
(),
param
->
get_shape
());
random_init
(
tensor_data
);
tensor_collections
[
i
].
parameter_data
.
push_back
(
tensor_data
);
}
}
// Create output tensors for all outputs
for
(
size_t
i
=
0
;
i
<
pipeline_depth
;
i
++
)
{
for
(
shared_ptr
<
Node
>
result
:
f
->
get_results
())
{
auto
tensor_data
=
make_shared
<
runtime
::
HostTensor
>
(
result
->
get_element_type
(),
result
->
get_shape
());
tensor_collections
[
i
].
result_data
.
push_back
(
tensor_data
);
}
}
// Create input tensors for all Parameters
array
<
vector
<
shared_ptr
<
runtime
::
Tensor
>>
,
pipeline_depth
>
input_tensors_array
;
size_t
input_index
=
0
;
for
(
shared_ptr
<
op
::
Parameter
>
param
:
f
->
get_parameters
())
{
auto
input_tensors
=
exec
->
create_input_tensor
(
input_index
++
,
pipeline_depth
);
for
(
size_t
i
=
0
;
i
<
pipeline_depth
;
i
++
)
{
tensor_collections
[
i
].
input_tensors
.
push_back
(
input_tensors
[
i
]);
}
}
// Create output tensors for all Results
array
<
vector
<
shared_ptr
<
runtime
::
Tensor
>>
,
pipeline_depth
>
output_tensors_array
;
size_t
output_index
=
0
;
for
(
shared_ptr
<
Node
>
result
:
f
->
get_results
())
{
auto
output_tensors
=
exec
->
create_output_tensor
(
output_index
++
,
pipeline_depth
);
for
(
size_t
i
=
0
;
i
<
pipeline_depth
;
i
++
)
{
tensor_collections
[
i
].
output_tensors
.
push_back
(
output_tensors
[
i
]);
}
}
thread
threads
[
pipeline_depth
];
for
(
size_t
i
=
0
;
i
<
pipeline_depth
;
i
++
)
{
threads
[
i
]
=
thread
(
thread_entry
,
exec
.
get
(),
tensor_collections
[
i
],
i
);
}
for
(
size_t
i
=
0
;
i
<
pipeline_depth
;
i
++
)
{
threads
[
i
].
join
();
}
s_timer
.
stop
();
float
time
=
s_timer
.
get_milliseconds
();
cout
<<
time
/
iterations
<<
"ms per iteration"
<<
endl
;
vector
<
runtime
::
PerformanceCounter
>
perf_data
=
exec
->
get_performance_data
();
return
perf_data
;
}
src/tools/nbench/benchmark_pipelined.hpp
0 → 100644
View file @
b50c17bf
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "ngraph/function.hpp"
#include "ngraph/runtime/performance_counter.hpp"
std
::
vector
<
ngraph
::
runtime
::
PerformanceCounter
>
run_benchmark_pipelined
(
std
::
shared_ptr
<
ngraph
::
Function
>
f
,
const
std
::
string
&
backend_name
,
size_t
iterations
,
bool
timing_detail
,
int
warmup_iterations
,
bool
copy_data
);
src/tools/nbench/benchmark_utils.cpp
0 → 100644
View file @
b50c17bf
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#if defined(__x86_64__) || defined(__amd64__)
#include <xmmintrin.h>
#endif
#include "benchmark_utils.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/host_tensor.hpp"
#include "ngraph/runtime/tensor.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
using
namespace
std
;
using
namespace
ngraph
;
template
<>
void
init_int_tensor
<
char
>
(
shared_ptr
<
runtime
::
Tensor
>
tensor
,
char
min
,
char
max
)
{
size_t
size
=
tensor
->
get_element_count
();
uniform_int_distribution
<
int16_t
>
dist
(
static_cast
<
short
>
(
min
),
static_cast
<
short
>
(
max
));
vector
<
char
>
vec
(
size
);
for
(
char
&
element
:
vec
)
{
element
=
static_cast
<
char
>
(
dist
(
get_random_engine
()));
}
tensor
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
char
));
}
template
<>
void
init_int_tensor
<
int8_t
>
(
shared_ptr
<
runtime
::
Tensor
>
tensor
,
int8_t
min
,
int8_t
max
)
{
size_t
size
=
tensor
->
get_element_count
();
uniform_int_distribution
<
int16_t
>
dist
(
static_cast
<
short
>
(
min
),
static_cast
<
short
>
(
max
));
vector
<
int8_t
>
vec
(
size
);
for
(
int8_t
&
element
:
vec
)
{
element
=
static_cast
<
int8_t
>
(
dist
(
get_random_engine
()));
}
tensor
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
int8_t
));
}
template
<>
void
init_int_tensor
<
uint8_t
>
(
shared_ptr
<
runtime
::
Tensor
>
tensor
,
uint8_t
min
,
uint8_t
max
)
{
size_t
size
=
tensor
->
get_element_count
();
uniform_int_distribution
<
int16_t
>
dist
(
static_cast
<
short
>
(
min
),
static_cast
<
short
>
(
max
));
vector
<
uint8_t
>
vec
(
size
);
for
(
uint8_t
&
element
:
vec
)
{
element
=
static_cast
<
uint8_t
>
(
dist
(
get_random_engine
()));
}
tensor
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
uint8_t
));
}
void
set_denormals_flush_to_zero
()
{
#if defined(__x86_64__) || defined(__amd64__)
// Avoids perf impact from denormals while benchmarking with random data
_MM_SET_FLUSH_ZERO_MODE
(
_MM_FLUSH_ZERO_ON
);
_MM_SET_DENORMALS_ZERO_MODE
(
_MM_DENORMALS_ZERO_ON
);
#endif
}
void
random_init
(
shared_ptr
<
runtime
::
Tensor
>
tensor
)
{
element
::
Type
et
=
tensor
->
get_element_type
();
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
switch
(
et
.
get_type_enum
())
{
case
element
:
:
Type_t
::
boolean
:
init_int_tensor
<
char
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
f32
:
init_real_tensor
<
float
>
(
tensor
,
-
1
,
1
);
break
;
case
element
:
:
Type_t
::
f64
:
init_real_tensor
<
double
>
(
tensor
,
-
1
,
1
);
break
;
case
element
:
:
Type_t
::
i8
:
init_int_tensor
<
int8_t
>
(
tensor
,
-
1
,
1
);
break
;
case
element
:
:
Type_t
::
i16
:
init_int_tensor
<
int16_t
>
(
tensor
,
-
1
,
1
);
break
;
case
element
:
:
Type_t
::
i32
:
init_int_tensor
<
int32_t
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
i64
:
init_int_tensor
<
int64_t
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
u8
:
init_int_tensor
<
uint8_t
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
u16
:
init_int_tensor
<
uint16_t
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
u32
:
init_int_tensor
<
uint32_t
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
u64
:
init_int_tensor
<
uint64_t
>
(
tensor
,
0
,
1
);
break
;
case
element
:
:
Type_t
::
undefined
:
case
element
:
:
Type_t
::
dynamic
:
case
element
:
:
Type_t
::
bf16
:
case
element
:
:
Type_t
::
f16
:
default
:
throw
runtime_error
(
"unsupported type"
);
}
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic pop
#endif
}
default_random_engine
&
get_random_engine
()
{
static
std
::
default_random_engine
s_random_engine
;
return
s_random_engine
;
}
src/tools/nbench/benchmark_utils.hpp
0 → 100644
View file @
b50c17bf
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <random>
#include "benchmark.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/host_tensor.hpp"
#include "ngraph/runtime/tensor.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
void
set_denormals_flush_to_zero
();
void
random_init
(
std
::
shared_ptr
<
ngraph
::
runtime
::
Tensor
>
tensor
);
std
::
default_random_engine
&
get_random_engine
();
template
<
typename
T
>
void
init_int_tensor
(
std
::
shared_ptr
<
ngraph
::
runtime
::
Tensor
>
tensor
,
T
min
,
T
max
)
{
size_t
size
=
tensor
->
get_element_count
();
std
::
uniform_int_distribution
<
T
>
dist
(
min
,
max
);
std
::
vector
<
T
>
vec
(
size
);
for
(
T
&
element
:
vec
)
{
element
=
dist
(
get_random_engine
());
}
tensor
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
T
));
}
template
<
typename
T
>
void
init_real_tensor
(
std
::
shared_ptr
<
ngraph
::
runtime
::
Tensor
>
tensor
,
T
min
,
T
max
)
{
size_t
size
=
tensor
->
get_element_count
();
std
::
uniform_real_distribution
<
T
>
dist
(
min
,
max
);
std
::
vector
<
T
>
vec
(
size
);
for
(
T
&
element
:
vec
)
{
element
=
dist
(
get_random_engine
());
}
tensor
->
write
(
vec
.
data
(),
vec
.
size
()
*
sizeof
(
T
));
}
src/tools/nbench/nbench.cpp
View file @
b50c17bf
...
...
@@ -24,6 +24,7 @@
#include <iomanip>
#include "benchmark.hpp"
#include "benchmark_pipelined.hpp"
#include "ngraph/distributed.hpp"
#include "ngraph/except.hpp"
#include "ngraph/file_util.hpp"
...
...
@@ -181,6 +182,7 @@ int main(int argc, char** argv)
int
warmup_iterations
=
1
;
bool
copy_data
=
true
;
bool
dot_file
=
false
;
bool
double_buffer
=
false
;
for
(
size_t
i
=
1
;
i
<
argc
;
i
++
)
{
...
...
@@ -229,6 +231,10 @@ int main(int argc, char** argv)
{
directory
=
argv
[
++
i
];
}
else
if
(
arg
==
"--double_buffer"
)
{
double_buffer
=
true
;
}
else
if
(
arg
==
"-w"
||
arg
==
"--warmup_iterations"
)
{
try
...
...
@@ -283,6 +289,7 @@ OPTIONS
-w|--warmup_iterations Number of warm-up iterations
--no_copy_data Disable copy of input/result data every iteration
--dot Generate Graphviz dot file
--double_buffer Double buffer inputs and outputs
)###"
;
return
1
;
}
...
...
@@ -420,8 +427,17 @@ OPTIONS
{
cout
<<
"
\n
---- Benchmark ----
\n
"
;
shared_ptr
<
Function
>
f
=
deserialize
(
model
);
auto
perf_data
=
run_benchmark
(
f
,
backend
,
iterations
,
timing_detail
,
warmup_iterations
,
copy_data
);
vector
<
runtime
::
PerformanceCounter
>
perf_data
;
if
(
double_buffer
)
{
perf_data
=
run_benchmark_pipelined
(
f
,
backend
,
iterations
,
timing_detail
,
warmup_iterations
,
copy_data
);
}
else
{
perf_data
=
run_benchmark
(
f
,
backend
,
iterations
,
timing_detail
,
warmup_iterations
,
copy_data
);
}
auto
perf_shape
=
to_perf_shape
(
f
,
perf_data
);
aggregate_perf_data
.
insert
(
aggregate_perf_data
.
end
(),
perf_shape
.
begin
(),
perf_shape
.
end
());
...
...
test/backend/batch_norm.in.cpp
View file @
b50c17bf
...
...
@@ -733,8 +733,8 @@ NGRAPH_TEST(${BACKEND_NAME}, batch_norm_bprop_n4c3h2w2)
auto
C
=
std
::
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_r
);
auto
zero
=
ngraph
::
make_zero
(
bn_dgamma
->
get_element_type
(),
bn_dgamma
->
get_shape
());
ngraph
::
autodiff
::
Adjoints
adjoints
(
Node
Vector
{
bn_dx
,
bn_dgamma
,
bn_dbeta
},
Node
Vector
{
C
,
zero
,
zero
});
ngraph
::
autodiff
::
Adjoints
adjoints
(
Output
Vector
{
bn_dx
,
bn_dgamma
,
bn_dbeta
},
Output
Vector
{
C
,
zero
,
zero
});
auto
dinput
=
adjoints
.
backprop_node
(
input
);
auto
dgamma
=
adjoints
.
backprop_node
(
gamma
);
...
...
test/backend/binary_elementwise.in.cpp
View file @
b50c17bf
...
...
@@ -257,10 +257,10 @@ NGRAPH_TEST(${BACKEND_NAME}, divide_adjoint_stability)
auto
B
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape
);
auto
f
=
make_shared
<
Function
>
(
make_shared
<
op
::
Divide
>
(
A
,
B
),
ParameterVector
{
A
,
B
});
auto
Y_out
=
f
->
get_output_op
(
0
);
auto
Y_out
=
f
->
output
(
0
);
auto
Xs
=
f
->
get_parameters
();
auto
C
=
std
::
make_shared
<
op
::
Parameter
>
(
Y_out
->
get_element_type
(),
Y_out
->
get_shape
());
ngraph
::
autodiff
::
Adjoints
adjoints
(
NodeVector
{
Y_out
},
Node
Vector
{
C
});
auto
C
=
std
::
make_shared
<
op
::
Parameter
>
(
Y_out
.
get_element_type
(),
Y_out
.
get_shape
());
ngraph
::
autodiff
::
Adjoints
adjoints
(
OutputVector
{
Y_out
},
Output
Vector
{
C
});
std
::
vector
<
std
::
shared_ptr
<
Node
>>
dYdXs
(
Xs
.
size
());
transform
(
Xs
.
begin
(),
Xs
.
end
(),
dYdXs
.
begin
(),
[
C
,
&
adjoints
](
const
std
::
shared_ptr
<
Node
>&
X
)
{
...
...
test/cpu_fusion.cpp
View file @
b50c17bf
...
...
@@ -507,7 +507,8 @@ TEST(cpu_fusion, conv_bias_bprop_n1c1h3w3)
auto
f
=
make_shared
<
Function
>
(
convolution_bias
,
ParameterVector
{
conv_test
.
data
,
conv_test
.
weights
,
conv_test
.
bias
});
ngraph
::
autodiff
::
Adjoints
adjoints
(
NodeVector
{
convolution_bias
},
NodeVector
{
conv_test
.
delta
});
ngraph
::
autodiff
::
Adjoints
adjoints
(
OutputVector
{
convolution_bias
},
OutputVector
{
conv_test
.
delta
});
auto
d_data
=
adjoints
.
backprop_node
(
conv_test
.
data
);
auto
d_weights
=
adjoints
.
backprop_node
(
conv_test
.
weights
);
...
...
@@ -546,7 +547,7 @@ TEST(cpu_fusion, conv_bias_bprop)
pass_manager
.
register_pass
<
pass
::
VisualizeTree
>
(
"conv_bias_bprop_fusion.png"
);
auto
f
=
make_shared
<
Function
>
(
conv_bias
,
ParameterVector
{
data_batch
,
filters
,
bias
});
ngraph
::
autodiff
::
Adjoints
adjoints
(
NodeVector
{
conv_bias
},
Node
Vector
{
delta
});
ngraph
::
autodiff
::
Adjoints
adjoints
(
OutputVector
{
conv_bias
},
Output
Vector
{
delta
});
auto
d_data
=
adjoints
.
backprop_node
(
data_batch
);
auto
d_weights
=
adjoints
.
backprop_node
(
filters
);
...
...
@@ -1452,7 +1453,7 @@ TEST(cpu_fusion, max_pool_with_indices)
auto
max_pool
=
std
::
make_shared
<
op
::
MaxPool
>
(
input
,
window_shape
);
auto
C
=
std
::
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
max_pool
->
get_shape
());
ngraph
::
autodiff
::
Adjoints
adjoints
(
NodeVector
{
max_pool
},
Node
Vector
{
C
});
ngraph
::
autodiff
::
Adjoints
adjoints
(
ngraph
::
OutputVector
{
max_pool
},
ngraph
::
Output
Vector
{
C
});
auto
dinput
=
adjoints
.
backprop_node
(
input
);
...
...
@@ -1789,14 +1790,14 @@ static std::shared_ptr<ngraph::Function> make_forward_function()
return
std
::
make_shared
<
Function
>
(
NodeVector
{
max_pool
,
neg
,
absn
},
ParameterVector
{
input
});
}
static
std
::
pair
<
std
::
shared_ptr
<
ngraph
::
Function
>
,
std
::
vector
<
std
::
shared_ptr
<
ngraph
::
Node
>>
>
static
std
::
pair
<
std
::
shared_ptr
<
ngraph
::
Function
>
,
OutputVector
>
make_backward_function
(
std
::
shared_ptr
<
ngraph
::
Function
>
f
)
{
// get parameters
std
::
vector
<
std
::
shared_ptr
<
ngraph
::
op
::
Parameter
>>
back_parameters
=
f
->
get_parameters
();
ngraph
::
Node
Vector
adjoints
;
ngraph
::
Node
Vector
outputs
;
ngraph
::
Output
Vector
adjoints
;
ngraph
::
Output
Vector
outputs
;
for
(
auto
Y
:
f
->
get_results
())
{
// Get the output
...
...
@@ -1809,7 +1810,7 @@ static std::pair<std::shared_ptr<ngraph::Function>, std::vector<std::shared_ptr<
ngraph
::
autodiff
::
Adjoints
adjoint
{
outputs
,
adjoints
};
// Perform autodiff
std
::
vector
<
std
::
shared_ptr
<
Node
>>
dYdXs
(
back_parameters
.
size
());
OutputVector
dYdXs
(
back_parameters
.
size
());
transform
(
back_parameters
.
begin
(),
back_parameters
.
end
(),
dYdXs
.
begin
(),
...
...
@@ -1818,7 +1819,8 @@ static std::pair<std::shared_ptr<ngraph::Function>, std::vector<std::shared_ptr<
// create the backward function
std
::
vector
<
std
::
shared_ptr
<
ngraph
::
op
::
Parameter
>>
param_adjoints
;
for
(
auto
n
:
adjoints
)
param_adjoints
.
push_back
(
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Parameter
>
(
n
));
param_adjoints
.
push_back
(
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Parameter
>
(
n
.
get_node_shared_ptr
()));
back_parameters
.
insert
(
back_parameters
.
begin
(),
param_adjoints
.
begin
(),
param_adjoints
.
end
());
return
{
std
::
make_shared
<
ngraph
::
Function
>
(
dYdXs
,
back_parameters
),
adjoints
};
...
...
@@ -2703,7 +2705,7 @@ void sigmoid_multiply_fusion_backward_compute(runtime::Backend* backend,
auto
sigmoid_mul
=
make_shared
<
op
::
SigmoidMultiply
>
(
input_0_alt
,
input_1_alt
,
input_0_type
,
input_1_type
);
ngraph
::
autodiff
::
Adjoints
adjoints
(
NodeVector
{
sigmoid_mul
},
Node
Vector
{
delta_param
});
ngraph
::
autodiff
::
Adjoints
adjoints
(
OutputVector
{
sigmoid_mul
},
Output
Vector
{
delta_param
});
auto
d_input_0
=
adjoints
.
backprop_node
(
input_0_adjoint
);
auto
d_input_1
=
adjoints
.
backprop_node
(
input_1_adjoint
);
auto
df
=
make_shared
<
Function
>
(
NodeVector
{
d_input_0
,
d_input_1
},
back_params
);
...
...
test/util/autodiff/backprop_derivative.hpp
View file @
b50c17bf
...
...
@@ -144,7 +144,7 @@ namespace ngraph
// df/dX*
std
::
vector
<
std
::
shared_ptr
<
Node
>>
df_output_params
;
Adjoints
adjoints
(
NodeVector
{
f
->
get_output_op
(
0
)},
Node
Vector
{
c_param
});
Adjoints
adjoints
(
OutputVector
{
f
->
output
(
0
)},
Output
Vector
{
c_param
});
// for each x "of interest"
for
(
auto
x
:
indep_params
)
...
...
test/util/autodiff/backprop_function.cpp
View file @
b50c17bf
...
...
@@ -32,10 +32,10 @@ using namespace ngraph;
std
::
shared_ptr
<
Function
>
autodiff
::
backprop_function
(
const
std
::
shared_ptr
<
Function
>&
f
)
{
auto
Y_out
=
f
->
get_output_op
(
0
);
auto
Y_out
=
f
->
output
(
0
);
auto
Xs
=
f
->
get_parameters
();
auto
C
=
std
::
make_shared
<
op
::
Parameter
>
(
Y_out
->
get_element_type
(),
Y_out
->
get_shape
());
Adjoints
adjoints
(
NodeVector
{
Y_out
},
Node
Vector
{
C
});
auto
C
=
std
::
make_shared
<
op
::
Parameter
>
(
Y_out
.
get_element_type
(),
Y_out
.
get_shape
());
Adjoints
adjoints
(
OutputVector
{
Y_out
},
Output
Vector
{
C
});
std
::
vector
<
std
::
shared_ptr
<
Node
>>
dYdXs
(
Xs
.
size
());
transform
(
Xs
.
begin
(),
Xs
.
end
(),
dYdXs
.
begin
(),
[
C
,
&
adjoints
](
const
std
::
shared_ptr
<
Node
>&
X
)
{
return
adjoints
.
backprop_node
(
X
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment