Commit ee463b66 authored by fenglei.tian's avatar fenglei.tian

Merge branch 'tfl/gpu_fix_constant_bug' of github.com:NervanaSystems/ngraph-cpp…

Merge branch 'tfl/gpu_fix_constant_bug' of github.com:NervanaSystems/ngraph-cpp into tfl/gpu_fix_constant_bug
parents 24b72581 20e2a098
# Environment to build and unit-test private-ngraph-cpp # Environment to build and unit-test ngraph-cpp
FROM ubuntu:16.04 FROM ubuntu:16.04
......
# Environment to build and unit-test private-ngraph-cpp on centos74 # Environment to build and unit-test ngraph-cpp on centos74
# with gcc 4.8.5 # with gcc 4.8.5
# with python 2.7 # with python 2.7
# with cmake3 # with cmake3
......
# Environment to build and unit-test private-ngraph-cpp # Environment to build and unit-test ngraph-cpp
FROM ubuntu:16.04 FROM ubuntu:16.04
......
# Environment to build and unit-test private-ngraph-cpp # Environment to build and unit-test ngraph-cpp
FROM ubuntu:16.04 FROM ubuntu:16.04
......
# Basic Makefile for contrib/docker. This can be expanded later as more targets # Basic Makefile for contrib/docker. This can be expanded later as more targets
# are added. # are added.
# Default is to build with -j for parallel cmake/make. Turn off with # Building LLVM from source has been observed to trigger the oom-killer
# make PARALLEL= # on systems with a large number of cores
PARALLEL=-j # running with make -j
#
# Default is to build with -j 22 for parallel cmake/make.
# Override with make PARALLEL="-j <num_parallel_processes>" where
# <num_parallel_processes> = the number of make processes to run in parallel
# Turn off with make PARALLEL=
PARALLEL=-j 22
# DIR is an internal variable that serves as an anchor to this cloned git # DIR is an internal variable that serves as an anchor to this cloned git
# repository. DIR is mounted into the docker container, so that builds # repository. DIR is mounted into the docker container, so that builds
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
# limitations under the License. # limitations under the License.
# ****************************************************************************** # ******************************************************************************
add_subdirectory(examples)
if ("${NGRAPH_BUILD_DOCS}" MATCHES "^ON$") if ("${NGRAPH_BUILD_DOCS}" MATCHES "^ON$")
add_custom_target( docs add_custom_target( docs
COMMENT "Build all of the documentation types selected during CMake configuration." COMMENT "Build all of the documentation types selected during CMake configuration."
......
<!-- XSLT script to combine the generated output into a single file.
If you have xsltproc you could use:
xsltproc combine.xslt index.xml >all.xml
-->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output method="xml" version="1.0" indent="no" standalone="yes" />
<xsl:template match="/">
<doxygen version="{doxygenindex/@version}">
<!-- Load all doxgen generated xml files -->
<xsl:for-each select="doxygenindex/compound">
<xsl:copy-of select="document( concat( @refid, '.xml' ) )/doxygen/*" />
</xsl:for-each>
</doxygen>
</xsl:template>
</xsl:stylesheet>
This diff is collapsed.
<?xml version='1.0' encoding='utf-8' ?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:element name="doxygenindex" type="DoxygenType"/>
<xsd:complexType name="DoxygenType">
<xsd:sequence>
<xsd:element name="compound" type="CompoundType" minOccurs="0" maxOccurs="unbounded"/>
</xsd:sequence>
<xsd:attribute name="version" type="xsd:string" use="required"/>
</xsd:complexType>
<xsd:complexType name="CompoundType">
<xsd:sequence>
<xsd:element name="name" type="xsd:string"/>
<xsd:element name="member" type="MemberType" minOccurs="0" maxOccurs="unbounded"/>
</xsd:sequence>
<xsd:attribute name="refid" type="xsd:string" use="required"/>
<xsd:attribute name="kind" type="CompoundKind" use="required"/>
</xsd:complexType>
<xsd:complexType name="MemberType">
<xsd:sequence>
<xsd:element name="name" type="xsd:string"/>
</xsd:sequence>
<xsd:attribute name="refid" type="xsd:string" use="required"/>
<xsd:attribute name="kind" type="MemberKind" use="required"/>
</xsd:complexType>
<xsd:simpleType name="CompoundKind">
<xsd:restriction base="xsd:string">
<xsd:enumeration value="class"/>
<xsd:enumeration value="struct"/>
<xsd:enumeration value="union"/>
<xsd:enumeration value="interface"/>
<xsd:enumeration value="protocol"/>
<xsd:enumeration value="category"/>
<xsd:enumeration value="exception"/>
<xsd:enumeration value="file"/>
<xsd:enumeration value="namespace"/>
<xsd:enumeration value="group"/>
<xsd:enumeration value="page"/>
<xsd:enumeration value="example"/>
<xsd:enumeration value="dir"/>
</xsd:restriction>
</xsd:simpleType>
<xsd:simpleType name="MemberKind">
<xsd:restriction base="xsd:string">
<xsd:enumeration value="define"/>
<xsd:enumeration value="property"/>
<xsd:enumeration value="event"/>
<xsd:enumeration value="variable"/>
<xsd:enumeration value="typedef"/>
<xsd:enumeration value="enum"/>
<xsd:enumeration value="enumvalue"/>
<xsd:enumeration value="function"/>
<xsd:enumeration value="signal"/>
<xsd:enumeration value="prototype"/>
<xsd:enumeration value="friend"/>
<xsd:enumeration value="dcop"/>
<xsd:enumeration value="slot"/>
</xsd:restriction>
</xsd:simpleType>
</xsd:schema>
# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
if(MKLDNN_INCLUDE_DIR)
link_directories(${MKLDNN_LIB_DIR})
endif()
if (NGRAPH_CPU_ENABLE)
set (SRC
abc.cpp
${PROJECT_SOURCE_DIR}/doc/examples/abc.cpp
)
add_executable(abc ${SRC})
add_dependencies(abc ngraph)
set(HEADER_SEARCH_DEFINES
"NGRAPH_HEADERS_PATH=\"${NGRAPH_INCLUDE_PATH}\""
)
target_link_libraries(abc ngraph)
include_directories(SYSTEM ${JSON_INCLUDE_DIR})
set_source_files_properties(abc.cpp PROPERTIES COMPILE_DEFINITIONS "${HEADER_SEARCH_DEFINES}")
endif()
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <iostream>
#include <ngraph/ngraph.hpp>
using namespace ngraph;
int main()
{
// Build the graph
Shape s{2, 3};
auto a = std::make_shared<op::Parameter>(element::f32, s);
auto b = std::make_shared<op::Parameter>(element::f32, s);
auto c = std::make_shared<op::Parameter>(element::f32, s);
auto t0 = std::make_shared<op::Add>(a, b);
auto t1 = std::make_shared<op::Multiply>(t0, c);
// Make the function
auto f = std::make_shared<Function>(NodeVector{t1}, op::ParameterVector{a, b, c});
// Get the backend
auto manager = runtime::Manager::get("CPU");
auto backend = manager->allocate_backend();
// Compile the function
auto external = manager->compile(f);
auto cf = backend->make_call_frame(external);
// Allocate tensors
auto t_a = backend->make_primary_tensor_view(element::f32, s);
auto t_b = backend->make_primary_tensor_view(element::f32, s);
auto t_c = backend->make_primary_tensor_view(element::f32, s);
auto t_result = backend->make_primary_tensor_view(element::f32, s);
// Initialize tensors
float v_a[2][3] = {{1, 2, 3}, {4, 5, 6}};
float v_b[2][3] = {{7, 8, 9}, {10, 11, 12}};
float v_c[2][3] = {{1, 0, -1}, {-1, 1, 2}};
t_a->write(&v_a, 0, sizeof(v_a));
t_b->write(&v_b, 0, sizeof(v_b));
t_c->write(&v_c, 0, sizeof(v_c));
// Invoke the function
cf->call({t_a, t_b, t_c}, {t_result});
// Get the result
float r[2][3];
t_result->read(&r, 0, sizeof(r));
std::cout << "[" << std::endl;
for (size_t i = 0; i < s[0]; ++i)
{
std::cout << " [";
for (size_t j = 0; j < s[1]; ++j)
{
std::cout << r[i][j] << ' ';
}
std::cout << ']' << std::endl;
}
std::cout << ']' << std::endl;
return 0;
}
...@@ -1837,18 +1837,19 @@ div[class^='highlight'] td.code { ...@@ -1837,18 +1837,19 @@ div[class^='highlight'] td.code {
} }
code, p.caption, caption-text { code, p.caption, caption-text {
font-family: RobotoSlab, sans, monospace; font-family: Inconsolata, sans, monospace;
color: #A79992; color: #A79992;
font-size: 0.95em; font-size: 0.99em;
line-height: 1.11em; line-height: 1.39em;
} }
.code-block-caption { .code-block-caption {
font-variant: small-caps; font-variant: small-caps;
font-size: 0.88em; font-size: 0.88em;
background-color: #c3d5d5; background-color: #d0dfdf;
padding-right: 0.43em; padding-right: 0.43em;
padding-top: 0.23em; padding-top: 0.23em;
padding-left: 0.11em;
padding-bottom: 0.23em; padding-bottom: 0.23em;
text-align: right; text-align: right;
} }
......
...@@ -18,6 +18,7 @@ Compile MXNet\* with ``libngraph`` ...@@ -18,6 +18,7 @@ Compile MXNet\* with ``libngraph``
If the |nGl| code has not yet been installed to your system, please go back If the |nGl| code has not yet been installed to your system, please go back
and return here to finish compiling MXNet with ``libngraph``. and return here to finish compiling MXNet with ``libngraph``.
#. Set the ``LD_LIBRARY_PATH`` path to the location where we built the nGraph #. Set the ``LD_LIBRARY_PATH`` path to the location where we built the nGraph
libraries: libraries:
......
...@@ -6,36 +6,94 @@ Glossary ...@@ -6,36 +6,94 @@ Glossary
======== ========
.. glossary:: .. glossary::
:sorted:
backend
A component that can execute computations.
bridge
A component of nGraph that acts as a backend for a framework,
allowing the framework to define and execute computations.
framework
A machine learning environment, such as TensorFlow, MXNet, or
neon.
function graph function graph
The Intel nGraph library uses a function graph to represent an ``op``'s
parameters and results. The Intel nGraph library uses a function graph to represent an
``op``'s parameters and results.
op op
An op represents an operation. Ops are stateless and have zero or more
inputs and zero or more outputs. Some ops have additional constant
attributes. Every output of an op corresponds to a tensor and has an
element type and a shape. The element types and shapes of the outputs of
an op are determined by the inputs and attributes of the op.
tensors An op represents an operation. Ops are stateless and have zero
Tensors are maps from *coordinates* to scalar values, all of the same type, or more inputs and zero or more outputs. Some ops have
called the *element type* of the tensor. additional constant attributes. Every output of an op
corresponds to a tensor and has an element type and a shape. The
element types and shapes of the outputs of an op are determined
by the inputs and attributes of the op.
parameter parameter
In the context of a function graph, a "parameter" refers to what "stands
in" for an argument in an ``op`` definition. In the context of a function graph, a "parameter" refers to what
"stands in" for an argument in an ``op`` definition.
result result
In the context of a function graph, the term "result" refers to what
stands in for the returned value. In the context of a function graph, the term "result" refers to
what stands in for the returned value.
shape shape
The shape of a tensor is a tuple of non-negative integers that represents an
exclusive upper bound for coordinate values. The shape of a tensor is a tuple of non-negative integers that
represents an exclusive upper bound for coordinate values.
shared pointer
The C++ standard template library has the template
``std::shared_ptr<X>``. A shared pointer is used like an ``X*``
pointer, but maintains a reference count to the underlying
object. Each new shared pointer to the object increases the
count. When a shared pointer goes out of scope, the reference
count is decremented, and, when the count reaches 0, the
underlying object is deleted. The function template
``std::make_shared<X>(...)`` can be used similarly to ``new
X(...)``, except it returns a ``std::shared_ptr<X>`` instead of
an ``X*``.
If there is a chain of shared pointers from an object back to
itself, every object in the chain is referenced, so the
reference counts will never reach 0 and the objects will never
be deleted.
If ``a`` referenced ``b`` and ``b`` wanted to track all
references to itself and shared pointers were used both
directions, there would be a chain of pointers form ``a`` to
itself. We avoid this by using shared pointers in only one
direction, and raw pointers for the inverse
direction. ``std::enabled_shared_from_this`` is a class template
that defines a method ``shared_from_this`` that provides a
shared pointer from a raw pointer.
nGraph makes use of shared pointers for objects whose lifetime
is hard to determine when they are allocated.
step step
An abstract "action" that produces zero or more tensor outputs from zero or more tensor
inputs. Steps correspond to *ops* that connect *nodes*.
An abstract "action" that produces zero or more tensor outputs
from zero or more tensor inputs. Steps correspond to *ops* that
connect *nodes*.
tensors
Tensors are maps from *coordinates* to scalar values, all of the
same type, called the *element type* of the tensor.
model description
A description of a program's fundamental operations that are
used by a framework to generate inputs for computation.
.. execute-cmp.rst
######################
Execute a Computation
######################
This section explains how to manually perform the steps that would normally be
performed by a framework :term:`bridge` to execute a computation. Intel® nGraph++
library is targeted toward automatic construction; it is far easier for a
processing unit (GPU, CPU, or an `Intel Nervana NNP`_) to run a computation than
it is for a user to map out how that computation happens. Unfortunately, things
that make by-hand graph construction simpler tend to make automatic construction
more difficult, and vice versa.
Here we will do all the bridge steps manually. The :term:`model description`
we're explaining is based on the :file:`abc.cpp` file in the ``/doc/examples/``
directory. We'll be deconstructing the steps that an entity (framework or
user) must be able to carry out in order to successfully execute a computation:
* :ref:`define_cmp`
* :ref:`specify_bkd`
* :ref:`compile_cmp`
* :ref:`allocate_bkd_storage`
* :ref:`initialize_inputs`
* :ref:`invoke_cmp`
* :ref:`access_outputs`
The final code is at the :ref:`end of this page <all_together>`.
.. _define_cmp:
Define the computation
======================
To a :term:`framework`, a computation is simply a transformation of inputs to
outputs. While a *framework bridge* can programmatically construct the graph
from a framework's representation of the computation, graph construction can be
somewhat more tedious for users. To a user, who is usually interested in
specific nodes (vertices) or edges of a computation that reveal "what is
happening where", it can be helpful to think of a computation as a zoomed-out
and *stateless* dataflow graph where all of the nodes are well-defined tensor
operations and all of the edges denote use of an output from one operation as
an input for another operation.
.. TODO
.. image for representing nodes and edges of (a+b)*c
Most of the public portion of the nGraph API is in the ``ngraph`` namespace, so
we will omit the namespace. Use of namespaces other than ``std`` will be
namespaces in ``ngraph``. For example, the ``op::Add`` is assumed to refer to
``ngraph::op::Add``.
A computation's graph is constructed from ops; each is a member of a subclass of
``op::Op``, which, in turn, is a subclass of ``Node``. Not all graphs are
computation, but all graphs are composed entirely of instances of ``Node``.
Computation graphs contain only ``op::Op`` nodes.
We mostly use :term:`shared pointers<shared pointer>` for nodes, i.e.
``std::shared_ptr<Node>`` so that they will be automatically
deallocated when they are no longer needed. A brief summary of shared
pointers is given in the glossary.
Every node has zero or more *inputs*, zero or more *outputs*, and zero or more
*attributes*. The specifics for each ``type`` permitted on a core ``Op``-specific
basis can be discovered in our :doc:`../ops/index` docs. For our
purpose to :ref:`define a computation <define_cmp>`, nodes should be thought of
as essentially immutable; that is, when constructing a node, we need to supply
all of its inputs. We get this process started with ops that have no inputs,
since any op with no inputs is going to first need some inputs.
``op::Parameter`` specifes the tensors that will be passed to the computation.
They receive their values from outside of the graph, so they have no inputs.
They have attributes for the element type and the shape of the tensor that will
be passed to them.
.. literalinclude:: ../../../examples/abc.cpp
:language: cpp
:lines: 26-29
Here we have made three parameter nodes, each a 32-bit float of shape ``(2, 3)``
using a row-major element layout.
We can create a graph for ``(a+b)*c`` by creating an ``op::Add`` node with inputs
from ``a`` and ``b``, and an ``op::Multiply`` node from the add node and ``c``:
.. literalinclude:: ../../../examples/abc.cpp
:language: cpp
:lines: 31-32
When the ``op::Add`` op is constructed, it will check that the element types and
shapes of its inputs match; to support multiple frameworks, ngraph does not do
automatic type conversion or broadcasting. In this case, they match, and the
shape of the unique output of ``t0`` will be a 32-bit float with shape ``(2, 3)``.
Similarly, ``op::Multiply`` checks that its inputs match and sets the element
type and shape of its unique output.
Once the graph is built, we need to package it in a ``Function``:
.. literalinclude:: ../../../examples/abc.cpp
:language: cpp
:lines: 35
The first argument to the constuctor specifies the nodes that the function will
return; in this case, the product. A ``NodeVector`` is a vector of shared
pointers of ``op::Node``. The second argument specifies the parameters of the
function, in the order they are to be passed to the compiled function. A
``ParameterVector`` is a vector of shared pointers to ``op::Parameter``.
.. important:: The parameter vector must include **every** parameter used in
the computation of the results.
.. _specify_bkd:
Specify the backend upon which to run the computation
=====================================================
For a framework bridge, a *backend* is the environment that can perform the
computations; it can be done with a CPU, GPU, or an Intel Nervana NNP. A
*transformer* can compile computations for a backend, allocate and deallocate
tensors, and invoke computations.
Factory-like managers for classes of backend managers can compile a ``Function``
and allocate backends. A backend is somewhat analogous to a multi-threaded
process.
There are two backends for the CPU: the optimized ``"CPU"`` backend, which uses
the `Intel MKL-DNN`_, and the ``"INTERPRETER"`` backend, which runs reference
versions of kernels that favor implementation clarity over speed. The
``"INTERPRETER"`` backend can be slow, and is primarily intended for testing.
To select the ``"CPU"`` backend,
.. literalinclude:: ../../../examples/abc.cpp
:language: cpp
:lines: 38-39
.. _compile_cmp:
Compile the computation
=======================
Compilation triggers something that can be used as a factory for producing a
``CallFrame`` which is a *function* and its associated *state* that can run
in a single thread at a time. A ``CallFrame`` may be reused, but any particular
``CallFrame`` must only be running in one thread at any time. If more than one
thread needs to execute the function at the same time, create multiple
``CallFrame`` objects from the ``ExternalFunction``.
.. literalinclude:: ../../../examples/abc.cpp
:language: cpp
:lines: 42-43
.. _allocate_bkd_storage:
Allocate backend storage for the inputs and outputs
===================================================
At the graph level, functions are stateless. They do have internal state related
to execution, but there is no user-visible state. Variables must be passed as
arguments. If the function updates variables, it must return the updated
variables.
To invoke a function, tensors must be provided for every input and every output.
At this time, a tensor used as an input cannot also be used as an output. If
variables are being updated, you should use a double-buffering approach where
you switch between odd/even generations of variables on each update.
Backends are responsible for managing storage. If the storage is off-CPU, caches
are used to minimize copying between device and CPU. We can allocate storage for
the three parameters and return value as follows:
.. literalinclude:: ../../../examples/abc.cpp
:language: cpp
:lines: 30-33
Each tensor is a shared pointer to a ``runtime::TensorView``, the interface
backends implement for tensor use. When there are no more references to the
tensor view, it will be freed when convenient for the backend.
.. _initialize_inputs:
Initialize the inputs
=====================
Next we need to copy some data into the tensors.
.. literalinclude:: ../../../examples/abc.cpp
:language: cpp
:lines: 45-58
The ``runtime::TensorView`` interface has ``write`` and ``read`` methods for
copying data to/from the tensor.
.. _invoke_cmp:
Invoke the computation
======================
To invoke the function, we simply pass argument and resultant tensors to the
call frame:
.. literalinclude:: ../../../examples/abc.cpp
:language: cpp
:lines: 61
.. _access_outputs:
Access the outputs
==================
We can use the ``read`` method to access the result:
.. literalinclude:: ../../../examples/abc.cpp
:language: cpp
:lines: 64-65
.. _all_together:
Put it all together
===================
.. literalinclude:: ../../../examples/abc.cpp
:language: cpp
:caption: "The (a + b) * c example for executing a computation on nGraph"
.. _Intel MKL-DNN: https://01.org/mkl-dnn
.. _Intel Nervana NNP: https://ai.intel.com/intel-nervana-neural-network-processors-nnp-redefine-ai-silicon/
\ No newline at end of file
.. howto/index:
How to
======
.. toctree::
:maxdepth: 1
:caption: How to
execute.rst
The "How to" articles in this section explain how to do specific tasks with the
Intel nGraph++ library. The recipes are all framework agnostic; in other words,
if an entity (framework or user) wishes to make use of target-based computational
resources, it can either:
* Do the tasks programatically through the framework, or
* Provide a clear model definition with documentation for the computational
resources needed.
.. note:: This section is aimed at intermediate-level developers working with
the nGraph++ library. It assumes a developer has understanding of the concepts
in the previous sections. It does not assume knowledge of any particular
frontend framework.
Since our primary audience is developers who are pushing the boundaries of deep
learning systems, we go beyond the use of deep learning primitives, and include
APIs and documentation for developers who want the ability to write programs
that use custom backends. For example, we know that GPU resources can be useful
backends for *some* kinds of algorithmic operations while they impose inherent
limitations and slow down others. We are barely scraping the surface of what is
possible for a hybridized approach to many kinds of training and inference-based
computational tasks.
One of our goals with the nGraph project is to enable developers with tools to
build programs that quickly access and process data with or from a breadth of
edge and network devices. Furthermore, we want them to be able to make use of
the best kind of computational resources for the kind of data they are processing,
after it has been gathered.
To get started, we've provided a basic example for how to execute a computation
that can run on an nGraph backend; this is analogous to a framework bridge.
This section is under development; it will eventually be populated with more
articles geared toward data scientists, algorithm designers, framework developers,
backend engineers, and others. We welcome contributions from the community and
invite you to experiment with the variety of hybridization and performance
extractions available through the nGraph library.
...@@ -13,26 +13,29 @@ ...@@ -13,26 +13,29 @@
.. limitations under the License. .. limitations under the License.
.. --------------------------------------------------------------------------- .. ---------------------------------------------------------------------------
############################# ########################
Intel nGraph library project Intel nGraph++ library
############################# ########################
Welcome to Intel nGraph, an open source C++ library for developers of Welcome to Intel® nGraph™, an open source C++ library for developers of
:abbr:`Deep Learning (DL)` (DL) systems. Here you will find a suite :abbr:`Deep Learning (DL)` (DL) systems. Here you will find a suite of
of components, APIs, and documentation that can be used to compile components, APIs, and documentation that can be used to compile and run
and run :abbr:`Deep Neural Network (DNN)` (DNN) models defined in a :abbr:`Deep Neural Network (DNN)` (DNN) models defined in a variety of
variety of frameworks. frameworks.
.. figure:: graphics/ngraph-hub.png .. figure:: graphics/ngraph-hub.png
For this early release, we provide :doc:`framework-integration-guides` to compile For this early release, we've provided :doc:`framework-integration-guides` to
and run MXNet and TensorFlow-based projects. compile and run MXNet\* and TensorFlow\*-based projects.
The nGraph library translates a framework’s representation of computations into .. note:: The library code is under active development as we're continually
an :abbr:`Intermediate Representation (IR)` designed to promote computational adding support for more ops, more frameworks, and more backends.
The nGraph++ library translates a framework’s representation of computations
into an :abbr:`Intermediate Representation (IR)` that promotes computational
efficiency on target hardware. Initially-supported backends include Intel efficiency on target hardware. Initially-supported backends include Intel
Architecture CPUs (CPU), the Intel® Nervana Neural Network Processor™ (NNP), Architecture CPUs (``CPU``), the Intel® Nervana Neural Network Processor™ (NNP),
and NVIDIA\* GPUs. Currently-supported compiler optimizations include efficient and NVIDIA\* GPUs. Currently-supported compiler optimizations include efficient
memory management and data layout abstraction. memory management and data layout abstraction.
...@@ -54,6 +57,7 @@ Sections ...@@ -54,6 +57,7 @@ Sections
testing-libngraph.rst testing-libngraph.rst
framework-integration-guides.rst framework-integration-guides.rst
graph-basics.rst graph-basics.rst
howto/index.rst
ops/index.rst ops/index.rst
project/index.rst project/index.rst
......
.. installation: .. installation:
################################### ########
Install the Intel® nGraph™ library Install
################################### ########
Build Environments Build Environments
================== ==================
The |release| version of |project| supports Linux\*-based systems which The |release| version of |project| supports Linux\*-based systems which have
have recent updates of the following packages and prerequisites: recent updates of the following packages and prerequisites:
.. csv-table:: .. csv-table::
:header: "Operating System", "Compiler", "Build System", "Status", "Additional Packages" :header: "Operating System", "Compiler", "Build System", "Status", "Additional Packages"
...@@ -25,15 +25,15 @@ Other configurations may work, but aren't tested; on Ubuntu 16.04 with ...@@ -25,15 +25,15 @@ Other configurations may work, but aren't tested; on Ubuntu 16.04 with
below. This gets a pre-built tarball of LLVM+Clang from `llvm.org`_, and will below. This gets a pre-built tarball of LLVM+Clang from `llvm.org`_, and will
substantially reduce build time. substantially reduce build time.
If using ``gcc-4.8``, it may be necessary to add symlinksfrom ``gcc`` to If using ``gcc-4.8``, it may be necessary to add symlinks from ``gcc`` to
``gcc-4.8``, and from ``g++`` to ``g++-4.8``, in your :envvar:`PATH`, even ``gcc-4.8``, and from ``g++`` to ``g++-4.8``, in your :envvar:`PATH`, even
if you explicitly specify the ``CMAKE_C_COMPILER`` and ``CMAKE_CXX_COMPILER`` if you explicitly specify the ``CMAKE_C_COMPILER`` and ``CMAKE_CXX_COMPILER``
flags when building. (You should NOT supply the `-DNGRAPH_USE_PREBUILT_LLVM` flags when building. (You **should NOT** supply the ``-DNGRAPH_USE_PREBUILT_LLVM``
flag in this case, because the prebuilt tarball supplied on llvm.org is not flag in this case, because the prebuilt tarball supplied on llvm.org is not
compatible with a gcc-4.8 based build.) compatible with a gcc-4.8 based build.)
Support for macOS is limited; see the `macOS development prerequisites`_ Support for macOS is limited; see the `macOS development`_ section at the end of
section at the end of this page for details. this page for details.
Installation Steps Installation Steps
...@@ -44,11 +44,10 @@ install ``ngraph_dist`` to the installing user's ``$HOME`` directory as ...@@ -44,11 +44,10 @@ install ``ngraph_dist`` to the installing user's ``$HOME`` directory as
the default location. See the :file:`CMakeLists.txt` file for more the default location. See the :file:`CMakeLists.txt` file for more
information about how to change or customize this location. information about how to change or customize this location.
#. (Optional) Since most of a developer's interaction with a frontend #. (Optional) Create something like ``/opt/local`` and (with sudo permissions),
framework will take place locally through Pythonic APIs to the C++ give ownership of that directory to your user. Under this directory, you can
library, you can set a reference placeholder for the documented source add a placeholder for ``libraries`` to have a placeholder for the documented
cloned from the repo. Create something like ``/opt/local`` and (with sudo source cloned from the repo:
permissions), give ownership of that directory to your user.
.. code-block:: console .. code-block:: console
...@@ -62,49 +61,56 @@ information about how to change or customize this location. ...@@ -62,49 +61,56 @@ information about how to change or customize this location.
.. code-block:: console .. code-block:: console
$ cd /opt/local/libraries $ cd /opt/local/libraries
$ git clone git@github.com:NervanaSystems/private-ngraph-cpp.git $ git clone git@github.com:NervanaSystems/ngraph-cpp.git
$ cd private-ngraph-cpp $ cd ngraph-cpp
#. Create a build directory outside of the ``private-ngraph-cpp/src`` directory #. Create a build directory outside of the ``ngraph-cpp/src`` directory
tree; somewhere like ``private-ngraph-cpp/build``, for example. tree; somewhere like ``ngraph-cpp/build``, for example.
.. code-block:: console .. code-block:: console
$ mkdir build $ mkdir build
#. ``$ cd`` to the build directory and generate the GNUMakefiles in the #. ``$ cd`` to the build directory and generate the GNUMakefiles in the
customary manner from within your ``build`` directory: customary manner from within your ``build`` directory (remember to append the
command with the prebuilt option, if needed):
.. code-block:: console .. code-block:: console
$ cd build && cmake ../ $ cd build && cmake ../ [-DNGRAPH_USE_PREBUILT_LLVM=TRUE]
#. Run ``$ make -j8`` and ``make install`` to install ``libngraph.so`` and the #. (Optional) Run ``$ make [-jN]`` where ``-jN`` specifies the number of
header files to the default location of ``$HOME/ngraph_dist``. cores. The example here uses a configuration of ``j8``, which is
good for a system install using an Intel® Xeon® (CPU processor). This step
is **not recommended** with Docker / VM installs.
.. code-block:: console .. code-block:: console
$ make -j8 && make install $ make -j8
#. Run ``make install`` to install ``libngraph.so`` and the header files to the
default location of ``$HOME/ngraph_dist``
#. (Optional, requires `Sphinx`_.) Run ``make html`` inside the .. code-block:: console
``doc/sphinx`` directory to build HTML docs for the nGraph library.
$ make install
#. (Optional, requires `doxygen`_.) Run ``$ make htmldocs`` inside #. (Optional, requires `doxygen`_, `Sphinx`_, and `breathe`_). Run ``make html``
the ``doc/sphinx`` directory to build HTML API docs inside the inside the ``doc/sphinx`` directory of the cloned source to build a copy of
``/docs/doxygen/`` directory. the `website docs`_ locally. The low-level API docs with inheritance diagrams
and collaboration diagrams can be found inside the ``/docs/doxygen/``
directory.
.. macos_development_prerequisites: .. macos_development:
macOS Development Prerequisites macOS development
------------------------------- -----------------
.. note:: If you are developing |nGl| projects on macOS*\, please be .. note:: The macOS*\ platform is officially unsupported.
aware that this platform is officially unsupported.
The repository includes two scripts (``maint/check-code-format.sh`` and The repository includes two scripts (``maint/check-code-format.sh`` and
``maint/apply-code-format.sh``) that are used respectively to check adherence ``maint/apply-code-format.sh``) that are used respectively to check adherence
to `libngraph` code formatting conventions, and to automatically reformat code to ``libngraph`` code formatting conventions, and to automatically reformat code
according to those conventions. These scripts require the command according to those conventions. These scripts require the command
``clang-format-3.9`` to be in your ``PATH``. Run the following commands ``clang-format-3.9`` to be in your ``PATH``. Run the following commands
(you will need to adjust them if you are not using bash): (you will need to adjust them if you are not using bash):
...@@ -118,6 +124,7 @@ according to those conventions. These scripts require the command ...@@ -118,6 +124,7 @@ according to those conventions. These scripts require the command
.. _doxygen: https://www.stack.nl/~dimitri/doxygen/ .. _doxygen: https://www.stack.nl/~dimitri/doxygen/
.. _Sphinx: http://www.sphinx-doc.org/en/stable/ .. _Sphinx: http://www.sphinx-doc.org/en/stable/
.. _NervanaSystems: https://github.com/NervanaSystems/private-ngraph-cpp/blob/master/README.md .. _breathe: https://breathe.readthedocs.io/en/latest/
.. _llvm.org: https://www.llvm.org .. _llvm.org: https://www.llvm.org
.. _NervanaSystems: https://github.com/NervanaSystems/ngraph-cpp/blob/master/README.md
.. _website docs: http://ngraph.nervanasys.com/index.html/index.html
.. allreduce.rst: .. allreduce.rst:
### ##########
AllReduce AllReduce
### ##########
.. code-block:: cpp .. code-block:: cpp
......
...@@ -82,3 +82,6 @@ Not currently a comprehensive list. ...@@ -82,3 +82,6 @@ Not currently a comprehensive list.
negative.rst negative.rst
not_equal.rst not_equal.rst
not.rst not.rst
softmax.rst
.. softmax.rst:
#######
Softmax
#######
.. code-block:: cpp
Softmax // Softmax operation
Description
===========
Produces a tensor of the same element type and shape as ``arg``,
where the value at each coordinate of ``output`` is the expine of the
value of the corresponding coordinate of ``arg`` divided by the sum
of the expine of all coordinates of ``arg`` in the specified ``axes``.
Inputs
------
+-----------------+-------------------------+--------------------------------+
| Name | Element Type | Shape |
+=================+=========================+================================+
| ``arg`` | Any | Any |
+-----------------+-------------------------+--------------------------------+
Parameters
----------
+-----------------+----------------------------------------------------------------+
| Name | Description |
+=================+================================================================+
| ``axes`` | The axis positions (0-based) on which to calculate the softmax |
+-----------------+----------------------------------------------------------------+
Outputs
-------
+-----------------+-------------------------+--------------------------------+
| Name | Element Type | Shape |
+=================+=========================+================================+
| ``output`` | Same as ``arg`` | Same as ``arg`` |
+-----------------+-------------------------+--------------------------------+
Mathematical Definition
=======================
.. math::
\texttt{output}_{i} = \frac{\exp(\texttt{arg}_{i})}{\sum_{j} \exp(\texttt{arg}_{j})}
C++ Interface
=============
.. doxygenclass:: ngraph::op::Softmax
:project: ngraph
:members: m_axes
\ No newline at end of file
...@@ -32,7 +32,7 @@ For this early |release| release, we're providing :doc:`framework-integration-gu ...@@ -32,7 +32,7 @@ For this early |release| release, we're providing :doc:`framework-integration-gu
for: for:
* :doc:`MXNet<framework-integration-guides>` framework, * :doc:`MXNet<framework-integration-guides>` framework,
* :doc:`Tensorflow<framework-integration-guides>` framework, and * :doc:`TensorFlow<framework-integration-guides>` framework, and
* neon™ `frontend framework`_. * neon™ `frontend framework`_.
Integration guides for other frameworks are tentatively forthcoming. Integration guides for other frameworks are tentatively forthcoming.
......
...@@ -72,6 +72,7 @@ set (SRC ...@@ -72,6 +72,7 @@ set (SRC
ops/sin.cpp ops/sin.cpp
ops/sinh.cpp ops/sinh.cpp
ops/slice.cpp ops/slice.cpp
ops/softmax.cpp
ops/sqrt.cpp ops/sqrt.cpp
ops/subtract.cpp ops/subtract.cpp
ops/sum.cpp ops/sum.cpp
...@@ -313,7 +314,7 @@ endif() ...@@ -313,7 +314,7 @@ endif()
# Nvidia # Nvidia
if(NGRAPH_GPU_ENABLE AND CUDA_LIBRARIES) if(NGRAPH_GPU_ENABLE AND CUDA_LIBRARIES)
find_library(CUDA_nvrtc_LIBRARY nvrtc /usr/local/cuda/lib64) find_library(CUDA_nvrtc_LIBRARY nvrtc /usr/local/cuda/lib64)
find_library(CUDA_cuda_LIBRARY cuda /usr/local/cuda/lib64) find_library(CUDA_cuda_LIBRARY cuda /usr/local/cuda/lib64/stubs)
target_link_libraries(ngraph PUBLIC ${CUDA_cuda_LIBRARY} ${CUDA_nvrtc_LIBRARY} ${CUDA_LIBRARIES} ${CUDA_CUBLAS_LIBRARIES} ${CUDNN_LIBRARIES}) target_link_libraries(ngraph PUBLIC ${CUDA_cuda_LIBRARY} ${CUDA_nvrtc_LIBRARY} ${CUDA_LIBRARIES} ${CUDA_CUBLAS_LIBRARIES} ${CUDNN_LIBRARIES})
endif() endif()
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "ngraph/builder/numpy_transpose.hpp" #include "ngraph/builder/numpy_transpose.hpp"
#include "ngraph/except.hpp" #include "ngraph/except.hpp"
#include "ngraph/ops/reshape.hpp" #include "ngraph/ops/reshape.hpp"
#include "ngraph/util.hpp"
namespace ngraph namespace ngraph
{ {
......
...@@ -17,12 +17,7 @@ ...@@ -17,12 +17,7 @@
#pragma once #pragma once
#include "ngraph/axis_vector.hpp" #include "ngraph/axis_vector.hpp"
#include "ngraph/function.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/ops/constant.hpp"
#include "ngraph/ops/parameter.hpp"
#include "ngraph/ops/reduce.hpp"
#include "ngraph/types/type.hpp"
namespace ngraph namespace ngraph
{ {
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include "ngraph/axis_set.hpp" #include "ngraph/axis_set.hpp"
#include "ngraph/builder/autobroadcast.hpp" #include "ngraph/builder/autobroadcast.hpp"
#include "ngraph/builder/reduce_ops.hpp" #include "ngraph/builder/reduce_ops.hpp"
#include "ngraph/ops/add.hpp" #include "ngraph/ops/constant.hpp"
#include "ngraph/ops/divide.hpp" #include "ngraph/ops/divide.hpp"
#include "ngraph/ops/multiply.hpp" #include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/power.hpp" #include "ngraph/ops/power.hpp"
......
...@@ -17,12 +17,7 @@ ...@@ -17,12 +17,7 @@
#pragma once #pragma once
#include "ngraph/axis_set.hpp" #include "ngraph/axis_set.hpp"
#include "ngraph/function.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/ops/constant.hpp"
#include "ngraph/ops/parameter.hpp"
#include "ngraph/ops/reduce.hpp"
#include "ngraph/types/type.hpp"
namespace ngraph namespace ngraph
{ {
......
...@@ -19,8 +19,6 @@ ...@@ -19,8 +19,6 @@
#include <sstream> #include <sstream>
#include <string> #include <string>
#include "ngraph/log.hpp"
namespace ngraph namespace ngraph
{ {
namespace codegen namespace codegen
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
#pragma once #pragma once
#include <functional>
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#pragma once #pragma once
#include <functional>
#include <memory> #include <memory>
#include "ngraph/codegen/compiler.hpp" #include "ngraph/codegen/compiler.hpp"
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
* limitations under the License. * limitations under the License.
*******************************************************************************/ *******************************************************************************/
#include <algorithm>
#include <cstdio> #include <cstdio>
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
...@@ -89,8 +88,10 @@ CoordinateTransform::CoordinateTransform(const Shape& source_shape, ...@@ -89,8 +88,10 @@ CoordinateTransform::CoordinateTransform(const Shape& source_shape,
} }
AxisVector all_axes(m_n_axes); AxisVector all_axes(m_n_axes);
size_t n = 0; for (size_t i = 0; i < all_axes.size(); i++)
std::generate(all_axes.begin(), all_axes.end(), [&n]() -> size_t { return n++; }); {
all_axes[i] = i;
}
if (!std::is_permutation(all_axes.begin(), all_axes.end(), source_axis_order.begin())) if (!std::is_permutation(all_axes.begin(), all_axes.end(), source_axis_order.begin()))
{ {
......
...@@ -16,11 +16,6 @@ ...@@ -16,11 +16,6 @@
#pragma once #pragma once
#include <cassert>
#include <cstdio>
#include <iostream>
#include <vector>
#include "ngraph/axis_vector.hpp" #include "ngraph/axis_vector.hpp"
#include "ngraph/coordinate.hpp" #include "ngraph/coordinate.hpp"
#include "ngraph/coordinate_diff.hpp" #include "ngraph/coordinate_diff.hpp"
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#pragma once #pragma once
#include <cstddef>
namespace ngraph namespace ngraph
{ {
namespace descriptor namespace descriptor
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#pragma once #pragma once
#include <cassert> #include <cassert>
#include <memory>
#include "ngraph/descriptor/buffer.hpp" #include "ngraph/descriptor/buffer.hpp"
......
...@@ -21,9 +21,6 @@ ...@@ -21,9 +21,6 @@
#include "ngraph/types/type.hpp" #include "ngraph/types/type.hpp"
using namespace ngraph; using namespace ngraph;
using ngraph::Shape;
using ngraph::descriptor::TensorView;
using ngraph::TensorViewType;
descriptor::layout::DenseTensorViewLayout::DenseTensorViewLayout(const TensorView& tensor_view) descriptor::layout::DenseTensorViewLayout::DenseTensorViewLayout(const TensorView& tensor_view)
: TensorViewLayout(tensor_view) : TensorViewLayout(tensor_view)
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#pragma once #pragma once
#include <memory> #include <memory>
#include <tuple>
#include <vector> #include <vector>
#include "ngraph/descriptor/buffer_pos.hpp" #include "ngraph/descriptor/buffer_pos.hpp"
......
...@@ -20,9 +20,6 @@ ...@@ -20,9 +20,6 @@
#include "ngraph/descriptor/tensor.hpp" #include "ngraph/descriptor/tensor.hpp"
#include "ngraph/descriptor/tensor_view.hpp" #include "ngraph/descriptor/tensor_view.hpp"
#include "ngraph/log.hpp"
#include "ngraph/shape.hpp"
#include "ngraph/types/type.hpp"
namespace ngraph namespace ngraph
{ {
...@@ -30,9 +27,6 @@ namespace ngraph ...@@ -30,9 +27,6 @@ namespace ngraph
namespace descriptor namespace descriptor
{ {
class Tensor;
class TensorViewLayout;
/// @brief A PrimaryTensorView owns the tensor. All other views are the result /// @brief A PrimaryTensorView owns the tensor. All other views are the result
/// of some index operation on the primary view. /// of some index operation on the primary view.
class PrimaryTensorView : public TensorView class PrimaryTensorView : public TensorView
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#pragma once #pragma once
#include <memory> #include <memory>
#include <string>
#include "ngraph/shape.hpp" #include "ngraph/shape.hpp"
......
...@@ -23,12 +23,7 @@ ...@@ -23,12 +23,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "ngraph/descriptor/output.hpp"
#include "ngraph/descriptor/tensor_view.hpp"
#include "ngraph/log.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/ops/op.hpp"
#include "ngraph/ops/parameter.hpp"
#include "ngraph/ops/parameter_vector.hpp" #include "ngraph/ops/parameter_vector.hpp"
#include "ngraph/types/type.hpp" #include "ngraph/types/type.hpp"
......
...@@ -14,13 +14,9 @@ ...@@ -14,13 +14,9 @@
* limitations under the License. * limitations under the License.
*******************************************************************************/ *******************************************************************************/
#include <algorithm>
#include <cassert> #include <cassert>
#include <deque> #include <deque>
#include <forward_list> #include <unordered_map>
#include <iomanip>
#include <iterator>
#include <map>
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
......
...@@ -16,27 +16,18 @@ ...@@ -16,27 +16,18 @@
#pragma once #pragma once
#include <algorithm>
#include <chrono>
#include <deque>
#include <functional>
#include <iostream>
#include <list> #include <list>
#include <map>
#include <memory> #include <memory>
#include <sstream>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <unordered_set>
#include <vector> #include <vector>
#include "ngraph/function.hpp"
#include "ngraph/node.hpp"
#include "ngraph/placement.hpp" #include "ngraph/placement.hpp"
namespace ngraph namespace ngraph
{ {
class Node;
class Function;
namespace descriptor namespace descriptor
{ {
class Input; class Input;
......
...@@ -118,6 +118,7 @@ ...@@ -118,6 +118,7 @@
#include "ngraph/ops/sin.hpp" #include "ngraph/ops/sin.hpp"
#include "ngraph/ops/sinh.hpp" #include "ngraph/ops/sinh.hpp"
#include "ngraph/ops/slice.hpp" #include "ngraph/ops/slice.hpp"
#include "ngraph/ops/softmax.hpp"
#include "ngraph/ops/sqrt.hpp" #include "ngraph/ops/sqrt.hpp"
#include "ngraph/ops/subtract.hpp" #include "ngraph/ops/subtract.hpp"
#include "ngraph/ops/sum.hpp" #include "ngraph/ops/sum.hpp"
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "ngraph/ops/batch_norm.hpp" #include "ngraph/ops/batch_norm.hpp"
#include "ngraph/ops/constant.hpp" #include "ngraph/ops/constant.hpp"
#include "ngraph/ops/get_output_element.hpp"
ngraph::op::BatchNorm::BatchNorm(double eps, ngraph::op::BatchNorm::BatchNorm(double eps,
std::shared_ptr<ngraph::Node> gamma, std::shared_ptr<ngraph::Node> gamma,
...@@ -94,3 +95,94 @@ std::shared_ptr<ngraph::Node> ...@@ -94,3 +95,94 @@ std::shared_ptr<ngraph::Node>
return std::make_shared<BatchNorm>( return std::make_shared<BatchNorm>(
m_epsilon, new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3), new_args.at(4)); m_epsilon, new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3), new_args.at(4));
} }
ngraph::op::BatchNormBackprop::BatchNormBackprop(double eps,
std::shared_ptr<ngraph::Node> gamma,
std::shared_ptr<ngraph::Node> beta,
std::shared_ptr<ngraph::Node> input,
std::shared_ptr<ngraph::Node> mean,
std::shared_ptr<ngraph::Node> variance,
std::shared_ptr<ngraph::Node> delta)
: RequiresTensorViewArgs("BatchNormBackprop", {gamma, beta, input, mean, variance, delta})
, epsilon(eps)
{
if (input->get_shape().size() != 4)
{
throw ngraph_error("Input expected to be a 4D tensor");
}
auto et = input->get_element_type();
const char* input_names[] = {"gamma", "beta", "input", "mean", "variance", "delta"};
for (size_t i = 0; i < get_input_size(); i++)
{
if (get_input_op(i)->get_element_type() != et)
{
auto err_msg = std::string("The element type of ") + input_names[i] +
" isn't equal to input data's type";
throw ngraph_error(err_msg.c_str());
}
}
Shape channel_shape{input->get_shape().at(1)};
for (size_t i = 0; i < get_input_size(); i++)
{
if (i == 2 || i == 5) //don't check input and delta
{
continue;
}
if (get_input_op(i)->get_shape() != channel_shape)
{
auto err_msg = std::string("The shape of ") + input_names[i] +
" isn't equal to input channel's shape";
throw ngraph_error(err_msg.c_str());
}
}
if (delta->get_shape() != input->get_shape())
{
throw ngraph_error("delta shape is expected to be equal to input shape");
}
add_output(input->get_element_type(), input->get_shape());
add_output(gamma->get_element_type(), gamma->get_shape());
add_output(beta->get_element_type(), beta->get_shape());
}
std::shared_ptr<ngraph::Node>
ngraph::op::BatchNormBackprop::copy_with_new_args(const NodeVector& new_args) const
{
if (new_args.size() != 6)
{
throw ngraph_error("Incorrect number of new arguments");
}
return std::make_shared<op::BatchNormBackprop>(epsilon,
new_args.at(0),
new_args.at(1),
new_args.at(2),
new_args.at(3),
new_args.at(4),
new_args.at(5));
}
void ngraph::op::BatchNorm::generate_adjoints(autodiff::Adjoints& adjoints,
const std::shared_ptr<Node>& delta)
{
auto gamma = get_input_op(0);
auto beta = get_input_op(1);
auto input = get_input_op(2);
auto mean = get_input_op(3);
auto variance = get_input_op(4);
auto bbn = std::make_shared<op::BatchNormBackprop>(
get_eps_value(), gamma, beta, input, mean, variance, delta);
auto dinput = std::make_shared<op::GetOutputElement>(bbn, 0);
auto dgamma = std::make_shared<op::GetOutputElement>(bbn, 1);
auto dbeta = std::make_shared<op::GetOutputElement>(bbn, 2);
adjoints.add_delta(input, dinput);
adjoints.add_delta(gamma, dgamma);
adjoints.add_delta(beta, dbeta);
}
...@@ -44,11 +44,34 @@ namespace ngraph ...@@ -44,11 +44,34 @@ namespace ngraph
virtual std::shared_ptr<Node> virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override; copy_with_new_args(const NodeVector& new_args) const override;
protected:
virtual void generate_adjoints(autodiff::Adjoints& adjoints,
const std::shared_ptr<Node>& delta) override;
private: private:
Shape m_bn_input_shape; Shape m_bn_input_shape;
Shape m_bn_variance_shape; Shape m_bn_variance_shape;
Shape m_bn_mean_shape; Shape m_bn_mean_shape;
double m_epsilon; double m_epsilon;
}; };
class BatchNormBackprop : public util::RequiresTensorViewArgs
{
public:
BatchNormBackprop(double eps,
std::shared_ptr<Node> gamma,
std::shared_ptr<Node> beta,
std::shared_ptr<Node> input,
std::shared_ptr<Node> mean,
std::shared_ptr<Node> variance,
std::shared_ptr<Node> delta);
double get_eps_value() const { return epsilon; }
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
private:
double epsilon;
};
} }
} }
...@@ -62,6 +62,11 @@ namespace ngraph ...@@ -62,6 +62,11 @@ namespace ngraph
/// \return The index of the tuple element to get. /// \return The index of the tuple element to get.
size_t get_n() const { return m_n; } size_t get_n() const { return m_n; }
virtual NodeVector get_input_ops() override
{
return NodeVector{get_inputs().at(0).get_output().get_node()};
}
protected: protected:
size_t m_n; size_t m_n;
}; };
......
...@@ -16,14 +16,15 @@ ...@@ -16,14 +16,15 @@
#pragma once #pragma once
#include <memory>
#include <vector> #include <vector>
#include "ngraph/ops/parameter.hpp"
namespace ngraph namespace ngraph
{ {
namespace op namespace op
{ {
class Parameter;
/// \brief Zero or more nodes. /// \brief Zero or more nodes.
class ParameterVector : public std::vector<std::shared_ptr<op::Parameter>> class ParameterVector : public std::vector<std::shared_ptr<op::Parameter>>
{ {
......
...@@ -14,10 +14,11 @@ ...@@ -14,10 +14,11 @@
* limitations under the License. * limitations under the License.
*******************************************************************************/ *******************************************************************************/
#include "ngraph/ops/reverse.hpp"
#include "ngraph/function.hpp"
#include <algorithm> #include <algorithm>
#include <sstream>
#include "ngraph/function.hpp"
#include "ngraph/ops/reverse.hpp"
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "ngraph/ops/select_and_scatter.hpp" #include "ngraph/ops/select_and_scatter.hpp"
#include "ngraph/function.hpp" #include "ngraph/function.hpp"
#include "ngraph/ops/parameter.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
using namespace std; using namespace std;
......
...@@ -14,44 +14,42 @@ ...@@ -14,44 +14,42 @@
* limitations under the License. * limitations under the License.
*******************************************************************************/ *******************************************************************************/
#include <memory> #include "ngraph/ops/softmax.hpp"
#include <sstream>
#include "ngraph/ops/xla_get_tuple_element.hpp" #include <algorithm>
#include "ngraph/ops/xla_tuple.hpp" #include <numeric>
using namespace std; #include "ngraph/builder/autobroadcast.hpp"
using namespace ngraph; #include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/subtract.hpp"
#include "ngraph/ops/sum.hpp"
op::XLAGetTupleElement::XLAGetTupleElement(const std::shared_ptr<Node>& arg, size_t n) void ngraph::op::Softmax::generate_adjoints(autodiff::Adjoints& adjoints,
: XLANode("XLAGetTupleElement", {arg}) const std::shared_ptr<Node>& delta)
, m_n{n}
{ {
m_arg = dynamic_pointer_cast<XLANode>(arg); auto z = delta * shared_from_this();
if (m_arg == nullptr || m_arg->get_tuple_value() == nullptr) auto zsum = std::make_shared<op::Sum>(z, m_axes);
Shape shape;
for (size_t i = 0; i < get_shape().size(); ++i)
{
if (m_axes.find(i) == m_axes.end())
{ {
throw ngraph_error("Argument must be a tuple view"); shape.push_back(get_shape()[i]);
} }
else
const Nodes& elements = m_arg->get_tuple_elements();
if (m_n >= elements.size())
{ {
throw ngraph_error("Indexing tuple beyond its size"); shape.push_back(1);
} }
} }
AxisVector order(zsum->get_shape().size());
Nodes op::XLAGetTupleElement::get_input_ops() //const std::iota(order.begin(), order.end(), 0);
{ auto zreshape = std::make_shared<op::Reshape>(zsum, order, shape);
return Nodes{m_arg};
}
shared_ptr<const op::XLATuple> op::XLAGetTupleElement::get_tuple_value() const auto adjoint =
{ z - builder::make_with_numpy_broadcast<op::Multiply>(shared_from_this(), zreshape);
return dynamic_pointer_cast<const op::XLATuple>(m_arg->get_tuple_elements().at(m_n));
}
const Nodes& op::XLAGetTupleElement::get_tuple_elements() const auto x = get_input_op(0);
{ adjoints.add_delta(x, adjoint);
return get_tuple_value()->get_tuple_elements();
} }
...@@ -16,61 +16,64 @@ ...@@ -16,61 +16,64 @@
#pragma once #pragma once
#include "ngraph/node.hpp" #include "ngraph/ops/util/unary_elementwise_arithmetic.hpp"
#include "ngraph/ops/xla_node.hpp"
namespace ngraph namespace ngraph
{ {
namespace op namespace op
{ {
/// \brief Operation to get an element from a tuple. /// \brief Softmax operation.
/// ///
/// ## Parameters class Softmax : public util::UnaryElementwiseArithmetic
/// {
/// | | Description | public:
/// | --- | ------------------------------------------------------------------ | /// \brief Constructs a softmax operation.
/// | `n` | The position of the element (0-based) to get from the input tuple. |
///
/// ## Inputs
/// ///
/// | | Type | Description | /// \param arg Node that produces the first input tensor.<br>
/// | ------ | ----------------------------------------------------------- | ------------------------------------------ | /// `[d0, ...]`
/// | `arg` | \f$(T_1,\dots,T_{n-1},T_n,T_{n+1},\dots,T_m)~(m \geq 1)\f$ | An input tuple with at least `n` elements. | /// \param axes The axis positions (0-based) on which to calculate the softmax.
/// ///
/// ## Output /// Output `[d0, ...]`
/// ///
/// | Type | Description | Softmax(const std::shared_ptr<Node>& arg, const AxisSet& axes)
/// | --------- | ------------------------------------- | : UnaryElementwiseArithmetic("Softmax", arg)
/// | \f$T_n\f$ | The `n`th element of the input tuple. | , m_axes(axes)
class XLAGetTupleElement : public XLANode
{ {
public: for (auto axis : m_axes)
/// \brief Constructs a get-tuple-element operation. {
/// if (axis >= get_shape().size())
/// \param arg The input tuple. {
/// \param n The index of the tuple element to get. throw ngraph_error("Axis for softmax reduction operator is out of bounds");
XLAGetTupleElement(const std::shared_ptr<Node>& arg, size_t n); }
}
virtual std::shared_ptr<Node> copy_with_new_args( // empty axes == all axes
const std::vector<std::shared_ptr<Node>>& new_args) const override if (m_axes.size() == 0)
{
for (size_t i = 0; i < get_shape().size(); ++i)
{
m_axes.insert(i);
}
}
}
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override
{ {
if (new_args.size() != 1) if (new_args.size() != 1)
{ {
throw ngraph_error("Incorrect number of new arguments"); throw ngraph_error("Incorrect number of new arguments");
} }
return std::make_shared<XLAGetTupleElement>(new_args.at(0), m_n); return std::make_shared<Softmax>(new_args.at(0), m_axes);
} }
virtual Nodes get_input_ops() override; //const; const AxisSet& get_axes() const { return m_axes; }
virtual std::shared_ptr<const XLATuple> get_tuple_value() const override;
virtual const Nodes& get_tuple_elements() const override;
/// \return The index of the tuple element to get.
size_t get_n() const { return m_n; }
protected: protected:
std::shared_ptr<XLANode> m_arg; virtual void generate_adjoints(autodiff::Adjoints& adjoints,
size_t m_n; const std::shared_ptr<Node>& delta) override;
private:
AxisSet m_axes;
}; };
} }
} }
...@@ -15,16 +15,13 @@ ...@@ -15,16 +15,13 @@
*******************************************************************************/ *******************************************************************************/
#include "ngraph/ops/tanh.hpp" #include "ngraph/ops/tanh.hpp"
#include "ngraph/ops/cosh.hpp"
#include "ngraph/ops/divide.hpp"
#include "ngraph/ops/multiply.hpp" #include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/subtract.hpp"
void ngraph::op::Tanh::generate_adjoints(autodiff::Adjoints& adjoints, void ngraph::op::Tanh::generate_adjoints(autodiff::Adjoints& adjoints,
const std::shared_ptr<Node>& delta) const std::shared_ptr<Node>& delta)
{ {
auto x = get_input_op(0); auto x = get_input_op(0);
auto c = std::make_shared<op::Cosh>(x); adjoints.add_delta(x, delta - (delta * (shared_from_this() * shared_from_this())));
adjoints.add_delta(x, delta / (c * c));
} }
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "inliner.hpp" #include "inliner.hpp"
#include "ngraph/graph_util.hpp" #include "ngraph/graph_util.hpp"
#include "ngraph/log.hpp"
#include "ngraph/ops/function_call.hpp" #include "ngraph/ops/function_call.hpp"
std::vector<std::shared_ptr<ngraph::op::FunctionCall>> std::vector<std::shared_ptr<ngraph::op::FunctionCall>>
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#pragma once #pragma once
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/pattern/matcher.hpp"
#include "ngraph/pattern/op/pattern.hpp" #include "ngraph/pattern/op/pattern.hpp"
namespace ngraph namespace ngraph
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#pragma once #pragma once
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/pattern/matcher.hpp"
#include "ngraph/pattern/op/pattern.hpp" #include "ngraph/pattern/op/pattern.hpp"
namespace ngraph namespace ngraph
......
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "ngraph/pass/graph_rewrite.hpp" #include "ngraph/pass/graph_rewrite.hpp"
#include "ngraph/pattern/matcher.hpp"
namespace ngraph namespace ngraph
{ {
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
* limitations under the License. * limitations under the License.
*******************************************************************************/ *******************************************************************************/
#include <memory>
#include "ngraph/runtime/aligned_buffer.hpp" #include "ngraph/runtime/aligned_buffer.hpp"
using namespace ngraph; using namespace ngraph;
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#pragma once #pragma once
#include <cstddef> #include <cstddef>
#include <memory>
namespace ngraph namespace ngraph
{ {
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
#include <memory> #include <memory>
#include "ngraph/log.hpp"
#include "ngraph/shape.hpp" #include "ngraph/shape.hpp"
#include "ngraph/types/element_type.hpp" #include "ngraph/types/element_type.hpp"
......
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "ngraph/function.hpp"
#include "ngraph/runtime/tensor_view.hpp" #include "ngraph/runtime/tensor_view.hpp"
namespace ngraph namespace ngraph
......
This diff is collapsed.
...@@ -89,6 +89,7 @@ ...@@ -89,6 +89,7 @@
#include "ngraph/ops/sin.hpp" #include "ngraph/ops/sin.hpp"
#include "ngraph/ops/sinh.hpp" #include "ngraph/ops/sinh.hpp"
#include "ngraph/ops/slice.hpp" #include "ngraph/ops/slice.hpp"
#include "ngraph/ops/softmax.hpp"
#include "ngraph/ops/sqrt.hpp" #include "ngraph/ops/sqrt.hpp"
#include "ngraph/ops/subtract.hpp" #include "ngraph/ops/subtract.hpp"
#include "ngraph/ops/sum.hpp" #include "ngraph/ops/sum.hpp"
...@@ -179,6 +180,7 @@ static const runtime::cpu::OpMap dispatcher{ ...@@ -179,6 +180,7 @@ static const runtime::cpu::OpMap dispatcher{
{TI(ngraph::op::Concat), &runtime::cpu::CPU_Emitter::emit<op::Concat>}, {TI(ngraph::op::Concat), &runtime::cpu::CPU_Emitter::emit<op::Concat>},
{TI(ngraph::op::Divide), &runtime::cpu::CPU_Emitter::emit<op::Divide>}, {TI(ngraph::op::Divide), &runtime::cpu::CPU_Emitter::emit<op::Divide>},
{TI(ngraph::op::Equal), &runtime::cpu::CPU_Emitter::emit<op::Equal>}, {TI(ngraph::op::Equal), &runtime::cpu::CPU_Emitter::emit<op::Equal>},
{TI(ngraph::op::GetOutputElement), &runtime::cpu::CPU_Emitter::emit<op::GetOutputElement>},
{TI(ngraph::op::Greater), &runtime::cpu::CPU_Emitter::emit<op::Greater>}, {TI(ngraph::op::Greater), &runtime::cpu::CPU_Emitter::emit<op::Greater>},
{TI(ngraph::op::GreaterEq), &runtime::cpu::CPU_Emitter::emit<op::GreaterEq>}, {TI(ngraph::op::GreaterEq), &runtime::cpu::CPU_Emitter::emit<op::GreaterEq>},
{TI(ngraph::op::Less), &runtime::cpu::CPU_Emitter::emit<op::Less>}, {TI(ngraph::op::Less), &runtime::cpu::CPU_Emitter::emit<op::Less>},
...@@ -231,12 +233,14 @@ static const runtime::cpu::OpMap dispatcher{ ...@@ -231,12 +233,14 @@ static const runtime::cpu::OpMap dispatcher{
{TI(ngraph::op::AvgPoolBackprop), &runtime::cpu::CPU_Emitter::emit<op::AvgPoolBackprop>}, {TI(ngraph::op::AvgPoolBackprop), &runtime::cpu::CPU_Emitter::emit<op::AvgPoolBackprop>},
{TI(ngraph::op::Pad), &runtime::cpu::CPU_Emitter::emit<op::Pad>}, {TI(ngraph::op::Pad), &runtime::cpu::CPU_Emitter::emit<op::Pad>},
{TI(ngraph::op::BatchNorm), &runtime::cpu::CPU_Emitter::emit<op::BatchNorm>}, {TI(ngraph::op::BatchNorm), &runtime::cpu::CPU_Emitter::emit<op::BatchNorm>},
{TI(ngraph::op::BatchNormBackprop), &runtime::cpu::CPU_Emitter::emit<op::BatchNormBackprop>},
{TI(ngraph::op::MaxPoolBackprop), &runtime::cpu::CPU_Emitter::emit<op::MaxPoolBackprop>}, {TI(ngraph::op::MaxPoolBackprop), &runtime::cpu::CPU_Emitter::emit<op::MaxPoolBackprop>},
{TI(ngraph::op::Product), &runtime::cpu::CPU_Emitter::emit<op::Product>}, {TI(ngraph::op::Product), &runtime::cpu::CPU_Emitter::emit<op::Product>},
{TI(ngraph::op::Max), &runtime::cpu::CPU_Emitter::emit<op::Max>}, {TI(ngraph::op::Max), &runtime::cpu::CPU_Emitter::emit<op::Max>},
{TI(ngraph::op::Min), &runtime::cpu::CPU_Emitter::emit<op::Min>}, {TI(ngraph::op::Min), &runtime::cpu::CPU_Emitter::emit<op::Min>},
{TI(ngraph::op::Relu), &runtime::cpu::CPU_Emitter::emit<op::Relu>}, {TI(ngraph::op::Relu), &runtime::cpu::CPU_Emitter::emit<op::Relu>},
{TI(ngraph::op::ReluBackprop), &runtime::cpu::CPU_Emitter::emit<op::ReluBackprop>}, {TI(ngraph::op::ReluBackprop), &runtime::cpu::CPU_Emitter::emit<op::ReluBackprop>},
{TI(ngraph::op::Softmax), &runtime::cpu::CPU_Emitter::emit<op::Softmax>},
}; };
runtime::cpu::CPU_ExternalFunction::CPU_ExternalFunction( runtime::cpu::CPU_ExternalFunction::CPU_ExternalFunction(
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#pragma once #pragma once
#include <cstdint> #include <cstdint>
#include <list>
#include <string> #include <string>
#include <vector> #include <vector>
......
...@@ -43,6 +43,7 @@ static const std::unordered_set<std::type_index> s_op_registry{ ...@@ -43,6 +43,7 @@ static const std::unordered_set<std::type_index> s_op_registry{
TI(ngraph::op::AvgPool), TI(ngraph::op::AvgPool),
TI(ngraph::op::AvgPoolBackprop), TI(ngraph::op::AvgPoolBackprop),
TI(ngraph::op::BatchNorm), TI(ngraph::op::BatchNorm),
TI(ngraph::op::BatchNormBackprop),
TI(ngraph::op::Convolution), TI(ngraph::op::Convolution),
TI(ngraph::op::ConvolutionBackpropData), TI(ngraph::op::ConvolutionBackpropData),
TI(ngraph::op::ConvolutionBackpropFilters), TI(ngraph::op::ConvolutionBackpropFilters),
......
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
*******************************************************************************/ *******************************************************************************/
#include "matmul_bias.hpp" #include "matmul_bias.hpp"
#include "ngraph/log.hpp"
#include "ngraph/util.hpp"
std::shared_ptr<ngraph::Node> std::shared_ptr<ngraph::Node>
ngraph::op::MatmulBias::copy_with_new_args(const NodeVector& new_args) const ngraph::op::MatmulBias::copy_with_new_args(const NodeVector& new_args) const
......
...@@ -16,11 +16,7 @@ ...@@ -16,11 +16,7 @@
#pragma once #pragma once
#include "ngraph/node.hpp"
#include "ngraph/ops/util/requires_tensor_view_args.hpp" #include "ngraph/ops/util/requires_tensor_view_args.hpp"
#include "ngraph/util.hpp"
#include <memory>
namespace ngraph namespace ngraph
{ {
......
...@@ -198,7 +198,8 @@ namespace ngraph ...@@ -198,7 +198,8 @@ namespace ngraph
auto arg0_rank = arg0_shape.size(); auto arg0_rank = arg0_shape.size();
auto result_shape = node->get_output_shape(0); auto result_shape = node->get_output_shape(0);
if (arg0_rank == 4 && node->get_input_element_type(0) == element::f32) if ((arg0_rank == 4 || arg0_rank == 2) &&
node->get_input_element_type(0) == element::f32)
{ {
auto op_annotations = auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>(); std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "cpu_layout.hpp" #include "cpu_layout.hpp"
#include "ngraph/descriptor/output.hpp" #include "ngraph/descriptor/output.hpp"
#include "ngraph/graph_util.hpp" #include "ngraph/graph_util.hpp"
#include "ngraph/log.hpp"
#include "ngraph/ops/add.hpp" #include "ngraph/ops/add.hpp"
#include "ngraph/ops/avg_pool.hpp" #include "ngraph/ops/avg_pool.hpp"
#include "ngraph/ops/convolution.hpp" #include "ngraph/ops/convolution.hpp"
...@@ -75,7 +76,7 @@ shared_ptr<Node> runtime::cpu::pass::CPULayout::insert_input_conversions( ...@@ -75,7 +76,7 @@ shared_ptr<Node> runtime::cpu::pass::CPULayout::insert_input_conversions(
} }
else else
{ {
new_args.push_back(node->get_input_op(index)); new_args.push_back(output.get_node());
} }
index++; index++;
} }
...@@ -163,7 +164,7 @@ void runtime::cpu::pass::CPULayout::set_default_layouts( ...@@ -163,7 +164,7 @@ void runtime::cpu::pass::CPULayout::set_default_layouts(
} }
else else
{ {
new_args.push_back(node->get_input_op(index)); new_args.push_back(output.get_node());
} }
index++; index++;
} }
......
...@@ -17,9 +17,6 @@ ...@@ -17,9 +17,6 @@
#pragma once #pragma once
#include <memory> #include <memory>
#include <typeindex>
#include <typeinfo>
#include <unordered_map>
#include "ngraph/function.hpp" #include "ngraph/function.hpp"
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#include "ngraph/ops/reverse.hpp" #include "ngraph/ops/reverse.hpp"
#include "ngraph/ops/select_and_scatter.hpp" #include "ngraph/ops/select_and_scatter.hpp"
#include "ngraph/ops/slice.hpp" #include "ngraph/ops/slice.hpp"
#include "ngraph/ops/softmax.hpp"
#include "ngraph/ops/sum.hpp" #include "ngraph/ops/sum.hpp"
#include "ngraph/runtime/call_frame.hpp" #include "ngraph/runtime/call_frame.hpp"
#include "ngraph/runtime/host_tensor_view.hpp" #include "ngraph/runtime/host_tensor_view.hpp"
...@@ -95,6 +96,7 @@ ...@@ -95,6 +96,7 @@
#include "ngraph/runtime/kernel/sin.hpp" #include "ngraph/runtime/kernel/sin.hpp"
#include "ngraph/runtime/kernel/sinh.hpp" #include "ngraph/runtime/kernel/sinh.hpp"
#include "ngraph/runtime/kernel/slice.hpp" #include "ngraph/runtime/kernel/slice.hpp"
#include "ngraph/runtime/kernel/softmax.hpp"
#include "ngraph/runtime/kernel/sqrt.hpp" #include "ngraph/runtime/kernel/sqrt.hpp"
#include "ngraph/runtime/kernel/subtract.hpp" #include "ngraph/runtime/kernel/subtract.hpp"
#include "ngraph/runtime/kernel/sum.hpp" #include "ngraph/runtime/kernel/sum.hpp"
...@@ -812,6 +814,14 @@ private: ...@@ -812,6 +814,14 @@ private:
slice->get_strides(), slice->get_strides(),
out[0]->get_shape()); out[0]->get_shape());
} }
else if (node_op == "Softmax")
{
const op::Softmax* softmax = static_cast<const op::Softmax*>(&node);
kernel::softmax<T>(reinterpret_cast<T*>(args[0]->get_data_ptr()),
reinterpret_cast<T*>(out[0]->get_data_ptr()),
out[0]->get_shape(),
softmax->get_axes());
}
else if (node_op == "Sqrt") else if (node_op == "Sqrt")
{ {
kernel::sqrt<T>(reinterpret_cast<T*>(args[0]->get_data_ptr()), kernel::sqrt<T>(reinterpret_cast<T*>(args[0]->get_data_ptr()),
......
...@@ -16,16 +16,11 @@ ...@@ -16,16 +16,11 @@
#pragma once #pragma once
#include <functional>
#include <memory> #include <memory>
#include <typeindex>
#include <typeinfo>
#include <unordered_map>
#include "ngraph/function.hpp" #include "ngraph/function.hpp"
#include "ngraph/runtime/call_frame.hpp"
#include "ngraph/runtime/external_function.hpp" #include "ngraph/runtime/external_function.hpp"
#include "ngraph/runtime/interpreter/int_backend.hpp"
#include "ngraph/runtime/interpreter/int_call_frame.hpp"
namespace ngraph namespace ngraph
{ {
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#pragma once #pragma once
#include <cmath> #include <cstddef>
namespace ngraph namespace ngraph
{ {
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#pragma once #pragma once
#include <cmath> #include <cmath>
#include <cstddef>
namespace ngraph namespace ngraph
{ {
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#pragma once #pragma once
#include <cstddef>
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#pragma once #pragma once
#include <cmath> #include <cmath>
#include <cstddef>
namespace ngraph namespace ngraph
{ {
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#pragma once #pragma once
#include <cmath> #include <cmath>
#include <cstddef>
namespace ngraph namespace ngraph
{ {
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
#pragma once #pragma once
#include <algorithm>
#include <cmath> #include <cmath>
#include <numeric> #include <numeric>
#include <vector> #include <vector>
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#pragma once #pragma once
#include <cmath> #include <cmath>
#include <cstddef>
namespace ngraph namespace ngraph
{ {
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#pragma once #pragma once
#include <cstddef>
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#pragma once #pragma once
#include <cstddef>
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
......
...@@ -119,10 +119,10 @@ namespace ngraph ...@@ -119,10 +119,10 @@ namespace ngraph
} }
AxisVector input_batch_transform_axis_order(2 + n_spatial_dimensions); AxisVector input_batch_transform_axis_order(2 + n_spatial_dimensions);
size_t n = 0; for (size_t i = 0; i < input_batch_transform_axis_order.size(); i++)
std::generate(input_batch_transform_axis_order.begin(), {
input_batch_transform_axis_order.end(), input_batch_transform_axis_order[i] = i;
[&n]() -> size_t { return n++; }); }
CoordinateTransform input_batch_transform( CoordinateTransform input_batch_transform(
arg0_shape, arg0_shape,
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#pragma once #pragma once
#include <cstddef>
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#pragma once #pragma once
#include <cmath> #include <cmath>
#include <cstddef>
namespace ngraph namespace ngraph
{ {
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#pragma once #pragma once
#include <cmath> #include <cmath>
#include <cstddef>
namespace ngraph namespace ngraph
{ {
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#pragma once #pragma once
#include <cstddef>
#include <stdexcept>
#include <type_traits> #include <type_traits>
namespace ngraph namespace ngraph
......
...@@ -19,6 +19,8 @@ ...@@ -19,6 +19,8 @@
#pragma clang diagnostic push #pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wfloat-equal" #pragma clang diagnostic ignored "-Wfloat-equal"
#include <cstddef>
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#pragma once #pragma once
#include <cmath> #include <cmath>
#include <cstddef>
namespace ngraph namespace ngraph
{ {
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#pragma once #pragma once
#include <cmath> #include <cmath>
#include <cstddef>
namespace ngraph namespace ngraph
{ {
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#pragma once #pragma once
#include <cstddef>
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#pragma once #pragma once
#include <cstddef>
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#pragma once #pragma once
#include <cstddef>
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#pragma once #pragma once
#include <cstddef>
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#pragma once #pragma once
#include <cmath> #include <cmath>
#include <cstddef>
namespace ngraph namespace ngraph
{ {
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
#pragma once #pragma once
#include <algorithm>
#include <cmath> #include <cmath>
#include <numeric> #include <numeric>
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#pragma once #pragma once
#include <cstddef>
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#pragma once #pragma once
#include <cstddef>
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#pragma once #pragma once
#include <cstddef>
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#pragma once #pragma once
#include <cstddef>
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#pragma once #pragma once
#include <cstddef>
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
......
...@@ -19,6 +19,8 @@ ...@@ -19,6 +19,8 @@
#pragma clang diagnostic push #pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wfloat-equal" #pragma clang diagnostic ignored "-Wfloat-equal"
#include <cstddef>
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
......
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
#include "ngraph/axis_vector.hpp" #include "ngraph/axis_vector.hpp"
#include "ngraph/coordinate_transform.hpp" #include "ngraph/coordinate_transform.hpp"
#include "ngraph/util.hpp"
namespace ngraph namespace ngraph
{ {
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment