Unverified Commit d991861d authored by L.S. Cook's avatar L.S. Cook Committed by GitHub

Merge pull request #556 from NervanaSystems/cyphers/dochow

Cyphers/dochow
parents 8e5c9404 174402a4
......@@ -13,7 +13,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
add_subdirectory(examples)
if ("${NGRAPH_BUILD_DOCS}" MATCHES "^ON$")
add_custom_target( docs
COMMENT "Build all of the documentation types selected during CMake configuration."
......
# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
if(MKLDNN_INCLUDE_DIR)
link_directories(${MKLDNN_LIB_DIR})
endif()
if (NGRAPH_CPU_ENABLE)
set (SRC
abc.cpp
${PROJECT_SOURCE_DIR}/doc/examples/abc.cpp
)
add_executable(abc ${SRC})
add_dependencies(abc ngraph)
set(HEADER_SEARCH_DEFINES
"NGRAPH_HEADERS_PATH=\"${NGRAPH_INCLUDE_PATH}\""
)
target_link_libraries(abc ngraph)
include_directories(SYSTEM ${JSON_INCLUDE_DIR})
set_source_files_properties(abc.cpp PROPERTIES COMPILE_DEFINITIONS "${HEADER_SEARCH_DEFINES}")
endif()
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <iostream>
#include <ngraph/ngraph.hpp>
using namespace ngraph;
int main()
{
// Build the graph
Shape s{2, 3};
auto a = std::make_shared<op::Parameter>(element::f32, s);
auto b = std::make_shared<op::Parameter>(element::f32, s);
auto c = std::make_shared<op::Parameter>(element::f32, s);
auto t0 = std::make_shared<op::Add>(a, b);
auto t1 = std::make_shared<op::Multiply>(t0, c);
// Make the function
auto f = std::make_shared<Function>(NodeVector{t1}, op::ParameterVector{a, b, c});
// Get the backend
auto manager = runtime::Manager::get("CPU");
auto backend = manager->allocate_backend();
// Compile the function
auto external = manager->compile(f);
auto cf = backend->make_call_frame(external);
// Allocate tensors
auto t_a = backend->make_primary_tensor_view(element::f32, s);
auto t_b = backend->make_primary_tensor_view(element::f32, s);
auto t_c = backend->make_primary_tensor_view(element::f32, s);
auto t_result = backend->make_primary_tensor_view(element::f32, s);
// Initialize tensors
float v_a[2][3] = {{1, 2, 3}, {4, 5, 6}};
float v_b[2][3] = {{7, 8, 9}, {10, 11, 12}};
float v_c[2][3] = {{1, 0, -1}, {-1, 1, 2}};
t_a->write(&v_a, 0, sizeof(v_a));
t_b->write(&v_b, 0, sizeof(v_b));
t_c->write(&v_c, 0, sizeof(v_c));
// Invoke the function
cf->call({t_a, t_b, t_c}, {t_result});
// Get the result
float r[2][3];
t_result->read(&r, 0, sizeof(r));
std::cout << "[" << std::endl;
for (size_t i = 0; i < s[0]; ++i)
{
std::cout << " [";
for (size_t j = 0; j < s[1]; ++j)
{
std::cout << r[i][j] << ' ';
}
std::cout << ']' << std::endl;
}
std::cout << ']' << std::endl;
return 0;
}
......@@ -1837,18 +1837,19 @@ div[class^='highlight'] td.code {
}
code, p.caption, caption-text {
font-family: RobotoSlab, sans, monospace;
font-family: Inconsolata, sans, monospace;
color: #A79992;
font-size: 0.95em;
line-height: 1.11em;
font-size: 0.99em;
line-height: 1.39em;
}
.code-block-caption {
font-variant: small-caps;
font-size: 0.88em;
background-color: #c3d5d5;
background-color: #d0dfdf;
padding-right: 0.43em;
padding-top: 0.23em;
padding-left: 0.11em;
padding-bottom: 0.23em;
text-align: right;
}
......
......@@ -18,6 +18,7 @@ Compile MXNet\* with ``libngraph``
If the |nGl| code has not yet been installed to your system, please go back
and return here to finish compiling MXNet with ``libngraph``.
#. Set the ``LD_LIBRARY_PATH`` path to the location where we built the nGraph
libraries:
......
......@@ -6,36 +6,89 @@ Glossary
========
.. glossary::
:sorted:
backend
A component that can execute computations.
bridge
A component of nGraph that acts as a backend for a framework,
allowing the framework to define and execute computations.
framework
A machine learning environment, such as TensorFlow, MXNet, or
neon.
function graph
The Intel nGraph library uses a function graph to represent an ``op``'s
parameters and results.
The Intel nGraph library uses a function graph to represent an
``op``'s parameters and results.
op
An op represents an operation. Ops are stateless and have zero or more
inputs and zero or more outputs. Some ops have additional constant
attributes. Every output of an op corresponds to a tensor and has an
element type and a shape. The element types and shapes of the outputs of
an op are determined by the inputs and attributes of the op.
tensors
Tensors are maps from *coordinates* to scalar values, all of the same type,
called the *element type* of the tensor.
An op represents an operation. Ops are stateless and have zero
or more inputs and zero or more outputs. Some ops have
additional constant attributes. Every output of an op
corresponds to a tensor and has an element type and a shape. The
element types and shapes of the outputs of an op are determined
by the inputs and attributes of the op.
parameter
In the context of a function graph, a "parameter" refers to what "stands
in" for an argument in an ``op`` definition.
In the context of a function graph, a "parameter" refers to what
"stands in" for an argument in an ``op`` definition.
result
In the context of a function graph, the term "result" refers to what
stands in for the returned value.
In the context of a function graph, the term "result" refers to
what stands in for the returned value.
shape
The shape of a tensor is a tuple of non-negative integers that represents an
exclusive upper bound for coordinate values.
The shape of a tensor is a tuple of non-negative integers that
represents an exclusive upper bound for coordinate values.
shared pointer
The C++ standard template library has the template
``std::shared_ptr<X>``. A shared pointer is used like an ``X*``
pointer, but maintains a reference count to the underlying
object. Each new shared pointer to the object increases the
count. When a shared pointer goes out of scope, the reference
count is decremented, and, when the count reaches 0, the
underlying object is deleted. The function template
``std::make_shared<X>(...)`` can be used similarly to ``new
X(...)``, except it returns a ``std::shared_ptr<X>`` instead of
an ``X*``.
If there is a chain of shared pointers from an object back to
itself, every object in the chain is referenced, so the
reference counts will never reach 0 and the objects will never
be deleted.
If ``a`` referenced ``b`` and ``b`` wanted to track all
references to itself and shared pointers were used both
directions, there would be a chain of pointers form ``a`` to
itself. We avoid this by using shared pointers in only one
direction, and raw pointers for the inverse
direction. ``std::enabled_shared_from_this`` is a class template
that defines a method ``shared_from_this`` that provides a
shared pointer from a raw pointer.
nGraph makes use of shared pointers for objects whose lifetime
is hard to determine when they are allocated.
step
An abstract "action" that produces zero or more tensor outputs from zero or more tensor
inputs. Steps correspond to *ops* that connect *nodes*.
An abstract "action" that produces zero or more tensor outputs
from zero or more tensor inputs. Steps correspond to *ops* that
connect *nodes*.
tensors
Tensors are maps from *coordinates* to scalar values, all of the
same type, called the *element type* of the tensor.
.. execute.rst
######################
Execute a Computation
######################
This section explains how to manually perform the steps that would normally be
performed by a framework :term:`bridge` to execute a computation. Intel® nGraph
library is targeted toward automatic construction; it is far easier for a
processing unit (GPU, CPU, or NNP) to run a computation than it is for a user
to map out how that computation happens. Unfortunately, things that make by-hand
graph construction simpler tend to make automatic construction more difficult,
and vice versa.
Here we will do all the bridge steps manually. The :term:`model description`
we're explaining is based on the :file:`abc.cpp` file in the ``/doc/examples/``
directory. We'll be deconstructing the steps that an entity (framework or
user) must be able to carry out in order to successfully execute a computation:
* :ref:`define_cmp`
* :ref:`specify_bkd`
* :ref:`compile_cmp`
* :ref:`allocate_bkd_storage`
* :ref:`initialize_inputs`
* :ref:`invoke_cmp`
* :ref:`access_outputs`
The final code is a the end of this page, on :ref:`all_together`.
.. _define_cmp:
Define the computation
======================
To a :term:`framework`, a computation is simply a transformation of inputs to
outputs. While a *framework bridge* can programmatically construct the graph
from a framework's representation of the computation, graph construction can be
somewhat more tedious for users. To a user, who is usually interested in
specific nodes (vertices) or edges of a computation that reveal "what is
happening where", it can be helpful to think of a computation as a zoomed-out
and *stateless* dataflow graph where all of the nodes are well-defined tensor
operations and all of the edges denote use of an output from one operation as
an input for another operation.
.. TODO
.. image for representing nodes and edges of (a+b)*c
Most of the public portion of the nGraph API is in the ``ngraph`` namespace, so
we will omit the namespace. Use of namespaces other than ``std`` will be
namespaces in ``ngraph``. For example, the ``op::Add`` is assumed to refer to
``ngraph::op::Add``.
A computation's graph is constructed from ops; each is a member of a subclass of
``op::Op``, which, in turn, is a subclass of ``Node``. Not all graphs are
computation, but all graphs are composed entirely of instances of ``Node``.
Computation graphs contain only ``op::Op`` nodes.
We mostly use :term:`shared pointers<shared pointer>` for nodes, i.e.
``std::shared_ptr<Node>`` so that they will be automatically
deallocated when they are no longer needed. A brief summary of shared
pointers is given in the glossary.
Every node has zero or more *inputs*, zero or more *outputs*, and zero or more
*attributes*. The specifics for each :cpp::type:: permitted on a core
``Op``-specific basis can be discovered in :doc:`ops` docs. For our
purpose to :ref:`define a computation <define_cmp>`, nodes should be thought of
as essentially immutable; that is, when constructing a node, we need to supply
all of its inputs. We get this process started with ops that have no inputs,
since any op with no inputs is going to first need some inputs.
``op::Parameter`` specifes the tensors that will be passed to the computation.
They receive their values from outside of the graph, so they have no inputs.
They have attributes for the element type and the shape of the tensor that will
be passed to them.
.. literalinclude:: ../../../examples/abc.cpp
:language: cpp
:lines: 26-29
Here we have made three parameter nodes, each a 32-bit float of shape ``(2, 3)``
using a row-major element layout.
We can create a graph for ``(a+b)*c`` by creating an ``op::Add`` node with inputs
from ``a`` and ``b``, and an ``op::Multiply`` node from the add node and ``c``:
.. literalinclude:: ../../../examples/abc.cpp
:language: cpp
:lines: 31-32
When the ``op::Add`` op is constructed, it will check that the element types and
shapes of its inputs match; to support multiple frameworks, ngraph does not do
automatic type conversion or broadcasting. In this case, they match, and the
shape of the unique output of ``t0`` will be a 32-bit float with shape ``(2, 3)``.
Similarly, ``op::Multiply`` checks that its inputs match and sets the element
type and shape of its unique output.
Once the graph is built, we need to package it in a ``Function``:
.. literalinclude:: ../../../examples/abc.cpp
:language: cpp
:lines: 35
The first argument to the constuctor specifies the nodes that the function will
return; in this case, the product. A ``NodeVector`` is a vector of shared
pointers of ``op::Node``. The second argument specifies the parameters of the
function, in the order they are to be passed to the compiled function. A
``ParameterVector`` is a vector of shared pointers to ``op::Parameter``.
.. important:: The parameter vector must include **every** parameter used in
the computation of the results.
.. _specify_bkd:
Specify the backend upon which to run the computation
=====================================================
For a framework bridge, a *backend* is the environment that can perform the
computations; it can be done with a CPU, GPU, or an Intel Nervana NNP. A
*transformer* can compile computations for a backend, allocate and deallocate
tensors, and invoke computations.
Factory-like managers for classes of backend managers can compile a ``Function``
and allocate backends. A backend is somewhat analogous to a multi-threaded
process.
There are two backends for the CPU: the optimized ``"CPU"`` backend, which uses
the `Intel MKL-DNN`_, and the ``"INTERPRETER"`` backend, which runs reference
versions of kernels that favor implementation clarity over speed. The
``"INTERPRETER"`` backend can be slow, and is primarily intended for testing.
To select the ``"CPU"`` backend,
.. literalinclude:: ../../../examples/abc.cpp
:language: cpp
:lines: 38-39
.. _compile_cmp:
Compile the computation
=======================
Compilation triggers something that can be used as a factory for producing a
``CallFrame`` which is a *function* and its associated *state* that can run
in a single thread at a time. A ``CallFrame`` may be reused, but any particular
``CallFrame`` must only be running in one thread at any time. If more than one
thread needs to execute the function at the same time, create multiple
``CallFrame`` objects from the ``ExternalFunction``.
.. literalinclude:: ../../../examples/abc.cpp
:language: cpp
:lines: 42-43
.. _allocate_bkd_storage:
Allocate backend storage for the inputs and outputs
===================================================
At the graph level, functions are stateless. They do have internal state related
to execution, but there is no user-visible state. Variables must be passed as
arguments. If the function updates variables, it must return the updated
variables.
To invoke a function, tensors must be provided for every input and every output.
At this time, a tensor used as an input cannot also be used as an output. If
variables are being updated, you should use a double-buffering approach where
you switch between odd/even generations of variables on each update.
Backends are responsible for managing storage. If the storage is off-CPU, caches
are used to minimize copying between device and CPU. We can allocate storage for
the three parameters and return value as follows:
.. literalinclude:: ../../../examples/abc.cpp
:language: cpp
:lines: 30-33
Each tensor is a shared pointer to a ``runtime::TensorView``, the interface
backends implement for tensor use. When there are no more references to the
tensor view, it will be freed when convenient for the backend.
.. _initialize_inputs:
Initialize the inputs
=====================
Next we need to copy some data into the tensors.
.. literalinclude:: ../../../examples/abc.cpp
:language: cpp
:lines: 45-58
The ``runtime::TensorView`` interface has ``write`` and ``read`` methods for
copying data to/from the tensor.
.. _invoke_cmp:
Invoke the computation
======================
To invoke the function, we simply pass argument and resultant tensors to the
call frame:
.. literalinclude:: ../../../examples/abc.cpp
:language: cpp
:lines: 61
.. _access_outputs:
Access the outputs
==================
We can use the ``read`` method to access the result:
.. literalinclude:: ../../../examples/abc.cpp
:language: cpp
:lines: 64-65
.. _all_together:
Put it all together
===================
.. literalinclude:: ../../../examples/abc.cpp
:language: cpp
:caption: "The (a + b) * c example for executing a computation on nGraph"
.. _Intel MKL-DNN: https://01.org/mkl-dnn
\ No newline at end of file
.. howto/index:
How to
======
.. note:: This section is aimed at intermediate users of Intel nGraph library.
It assumes a developer has understanding of the concepts in the previous
sections. It does not assume knowledge of any particular frontend framework.
The "How to" articles in this section explain how to do specific tasks with
Intel nGraph. The recipes are all framework agnostic; in other words, any
frontend framework that wishes to access the optimizations inherent in nGraph
will either need to do these things programatically through the framework, or to
provide documentation for the user. Our primary audience is users who have
already decided that they want the performance optimizations available through
the nGraph library's management of custom backends.
To get started, we've provided a basic example for how to execute a computation
that can run on an nGraph backend; this is analogous to a framework bridge.
This section is under development; it will eventually contain articles targeted
toward data scientists, algorithm designers, framework developers, and backend
engineers -- anyone who wants to pivot on our examples and experiment with the
variety of hybridization and performance extractions available through the
nGraph library.
.. toctree::
:maxdepth: 1
:caption: How-to
execute.rst
\ No newline at end of file
......@@ -54,6 +54,7 @@ Sections
testing-libngraph.rst
framework-integration-guides.rst
graph-basics.rst
howto/index.rst
ops/index.rst
project/index.rst
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment