More extensive examples of graph building. (#798)

* More extensive examples of graph building. * Review comments * Review comments * Review comments

More extensive examples of graph building. (#798)
* More extensive examples of graph building. * Review comments * Review comments * Review comments
d92a5103 · Scott Cyphers · GitHub · 8686eb7a · d92a5103 · d92a5103
Unverified Commit d92a5103 authored Apr 03, 2018 by Scott Cyphers Committed by GitHub Apr 03, 2018
17 changed files
--- a/doc/examples/.clang-format
+++ b/doc/examples/.clang-format
+BasedOnStyle: LLVM
+IndentWidth: 4
+UseTab: Never
+Language: Cpp
+Standard: Cpp11
+AccessModifierOffset: -4
+AlignConsecutiveDeclarations: false
+AlignConsecutiveAssignments: false
+AlignTrailingComments: true
+AllowShortBlocksOnASingleLine: true
+AllowShortCaseLabelsOnASingleLine: true
+AllowShortFunctionsOnASingleLine: Inline
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: false
+BinPackParameters: false
+BreakBeforeBraces: Allman
+BreakConstructorInitializersBeforeComma: true
+ColumnLimit: 75
+CommentPragmas: '.*'
+IndentCaseLabels: false
+IndentWrappedFunctionNames: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+NamespaceIndentation: All
+PointerAlignment: Left
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesInAngles: false
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+SortIncludes: false
+ReflowComments: true
+IncludeCategories:
+  - Regex:           '^".*'
+    Priority:        3
+  - Regex:           '^<.*'
+    Priority:        2
+SortIncludes: true
--- a/doc/examples/CMakeLists.txt
+++ b/doc/examples/CMakeLists.txt
@@ -19,21 +19,50 @@ if(MKLDNN_INCLUDE_DIR)
 endif()
 if (NGRAPH_CPU_ENABLE)
-    set (SRC
-        abc.cpp
+    set(HEADER_SEARCH_DEFINES
+        "NGRAPH_HEADERS_PATH=\"${NGRAPH_INCLUDE_PATH}\""
+    )
+    # abc
+    set (ABC_SRC
        ${PROJECT_SOURCE_DIR}/doc/examples/abc.cpp
    )
-    add_executable(abc ${SRC})
+    set_source_files_properties(${ABC_SRC} PROPERTIES COMPILE_DEFINITIONS "${HEADER_SEARCH_DEFINES}")
+    add_executable(abc ${ABC_SRC})
    add_dependencies(abc ngraph)
+    target_link_libraries(abc ngraph)
-    set(HEADER_SEARCH_DEFINES
+    # abc_op
-        "NGRAPH_HEADERS_PATH=\"${NGRAPH_INCLUDE_PATH}\""
+    set (ABC_OP_SRC
+        ${PROJECT_SOURCE_DIR}/doc/examples/abc_operator.cpp
    )
-    target_link_libraries(abc ngraph)
+    set_source_files_properties(${ABC_OP_SRC} PROPERTIES COMPILE_DEFINITIONS "${HEADER_SEARCH_DEFINES}")
-    include_directories(SYSTEM ${JSON_INCLUDE_DIR})
+    add_executable(abc_op ${ABC_OP_SRC})
+    add_dependencies(abc_op ngraph)
+    target_link_libraries(abc_op ngraph)
+    # update
+    set (UPDATE_SRC
+        ${PROJECT_SOURCE_DIR}/doc/examples/update.cpp
+    )
+    set_source_files_properties(${UPDATE_SRC} PROPERTIES COMPILE_DEFINITIONS "${HEADER_SEARCH_DEFINES}")
+    add_executable(update ${UPDATE_SRC})
+    add_dependencies(update ngraph)
+    target_link_libraries(update ngraph)
+    # mnist_mlp
+    set(MNIST_SRC
+        ${PROJECT_SOURCE_DIR}/doc/examples/mnist_loader.cpp 
+        ${PROJECT_SOURCE_DIR}/doc/examples/mnist_mlp.cpp
+    )
-    set_source_files_properties(abc.cpp PROPERTIES COMPILE_DEFINITIONS "${HEADER_SEARCH_DEFINES}")
+    set_source_files_properties(${MNIST_SRC} PROPERTIES COMPILE_DEFINITIONS "${HEADER_SEARCH_DEFINES}")
+    add_executable(mnist_mlp ${MNIST_SRC})
+    add_dependencies(mnist_mlp ngraph)
+    target_link_libraries(mnist_mlp ngraph)
 endif()
--- a/doc/examples/abc.cpp
+++ b/doc/examples/abc.cpp
@@ -32,7 +32,8 @@ int main()
    auto t1 = std::make_shared<op::Multiply>(t0, c);
    // Make the function
-    auto f = std::make_shared<Function>(NodeVector{t1}, op::ParameterVector{a, b, c});
+    auto f = std::make_shared<Function>(NodeVector{t1},
+                                        op::ParameterVector{a, b, c});
    // Get the backend
    auto manager = runtime::Manager::get("CPU");
@@ -42,10 +43,11 @@ int main()
    auto external = manager->compile(f);
    auto cf = backend->make_call_frame(external);
-    // Allocate tensors
+    // Allocate tensors for arguments a, b, c
    auto t_a = backend->make_primary_tensor_view(element::f32, s);
    auto t_b = backend->make_primary_tensor_view(element::f32, s);
    auto t_c = backend->make_primary_tensor_view(element::f32, s);
+    // Allocate tensor for the result
    auto t_result = backend->make_primary_tensor_view(element::f32, s);
    // Initialize tensors

--- a/doc/examples/abc_operator.cpp
+++ b/doc/examples/abc_operator.cpp
+/*******************************************************************************
+* Copyright 2017-2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+#include <iostream>
+#include <ngraph/ngraph.hpp>
+using namespace ngraph;
+int main()
+{
+    // Build the graph
+    Shape s{2, 3};
+    auto a = std::make_shared<op::Parameter>(element::f32, s);
+    auto b = std::make_shared<op::Parameter>(element::f32, s);
+    auto c = std::make_shared<op::Parameter>(element::f32, s);
+    auto t1 = (a + b) * c;
+    // Make the function
+    auto f = std::make_shared<Function>(NodeVector{t1},
+                                        op::ParameterVector{a, b, c});
+    // Get the backend
+    auto manager = runtime::Manager::get("CPU");
+    auto backend = manager->allocate_backend();
+    // Compile the function
+    auto external = manager->compile(f);
+    auto cf = backend->make_call_frame(external);
+    // Allocate tensors for arguments a, b, c
+    auto t_a = backend->make_primary_tensor_view(element::f32, s);
+    auto t_b = backend->make_primary_tensor_view(element::f32, s);
+    auto t_c = backend->make_primary_tensor_view(element::f32, s);
+    // Allocate tensor for the result
+    auto t_result = backend->make_primary_tensor_view(element::f32, s);
+    // Initialize tensors
+    float v_a[2][3] = {{1, 2, 3}, {4, 5, 6}};
+    float v_b[2][3] = {{7, 8, 9}, {10, 11, 12}};
+    float v_c[2][3] = {{1, 0, -1}, {-1, 1, 2}};
+    t_a->write(&v_a, 0, sizeof(v_a));
+    t_b->write(&v_b, 0, sizeof(v_b));
+    t_c->write(&v_c, 0, sizeof(v_c));
+    // Invoke the function
+    cf->call({t_result}, {t_a, t_b, t_c});
+    // Get the result
+    float r[2][3];
+    t_result->read(&r, 0, sizeof(r));
+    std::cout << "[" << std::endl;
+    for (size_t i = 0; i < s[0]; ++i)
+    {
+        std::cout << " [";
+        for (size_t j = 0; j < s[1]; ++j)
+        {
+            std::cout << r[i][j] << ' ';
+        }
+        std::cout << ']' << std::endl;
+    }
+    std::cout << ']' << std::endl;
+    return 0;
+}
--- a/doc/examples/mnist_loader.cpp
+++ b/doc/examples/mnist_loader.cpp
+/*******************************************************************************
+* Copyright 2017-2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+#include <algorithm>
+#include <iostream>
+#include <stdexcept>
+#include "mnist_loader.hpp"
+MNistLoader::MNistLoader(const std::string& filename, uint32_t magic)
+    : m_filename(filename)
+    , m_magic(magic)
+{
+}
+template <>
+void MNistLoader::read<std::uint32_t>(std::uint32_t* loc, size_t n)
+{
+    std::uint32_t result;
+    for (size_t i = 0; i < n; ++i)
+    {
+        int c = fgetc(m_file);
+        result = *reinterpret_cast<unsigned int*>(&c) << 24;
+        result |= fgetc(m_file) << 16;
+        result |= fgetc(m_file) << 8;
+        result |= fgetc(m_file);
+        *(loc + i) = result;
+    }
+}
+template <>
+void MNistLoader::read<float>(float* loc, size_t n)
+{
+    for (size_t i = 0; i < n; ++i)
+    {
+        loc[i] = static_cast<float>(fgetc(m_file));
+    }
+}
+namespace
+{
+    float inv_2_8 = 1.0f / 256.0f;
+}
+void MNistLoader::read_scaled(float* loc, size_t n)
+{
+    for (size_t i = 0; i < n; ++i)
+    {
+        loc[i] = static_cast<float>(fgetc(m_file)) * inv_2_8;
+    }
+}
+void MNistLoader::read_header()
+{
+    std::uint32_t m;
+    read<std::uint32_t>(&m);
+    if (m != m_magic)
+    {
+        throw std::invalid_argument("Incorrect magic");
+    }
+    read<std::uint32_t>(&m_items);
+}
+void MNistLoader::open()
+{
+    if (m_file != nullptr)
+    {
+        throw std::logic_error("Loader already open");
+    }
+    m_file = fopen(m_filename.c_str(), "rb");
+    read_header();
+    fgetpos(m_file, &m_data_pos);
+}
+void MNistLoader::reset()
+{
+    fsetpos(m_file, &m_data_pos);
+}
+void MNistLoader::close()
+{
+    if (nullptr != m_file)
+    {
+        fclose(m_file);
+        m_file = nullptr;
+    }
+}
+MNistLoader::~MNistLoader()
+{
+    close();
+}
+const char* const MNistImageLoader::TRAIN{"train-images-idx3-ubyte"};
+const char* const MNistImageLoader::TEST{"t10k-images-idx3-ubyte"};
+MNistImageLoader::MNistImageLoader(const std::string& file_name)
+    : MNistLoader(file_name, magic_value)
+{
+}
+void MNistImageLoader::read_header()
+{
+    MNistLoader::read_header();
+    read<uint32_t>(&m_rows);
+    read<uint32_t>(&m_columns);
+}
+const char* MNistLabelLoader::TEST{"t10k-labels-idx1-ubyte"};
+const char* MNistLabelLoader::TRAIN{"train-labels-idx1-ubyte"};
+MNistLabelLoader::MNistLabelLoader(const std::string& file_name)
+    : MNistLoader(file_name, magic_value)
+{
+}
+MNistDataLoader::MNistDataLoader(size_t batch_size,
+                                 const std::string& image,
+                                 const std::string& label)
+    : m_batch_size(batch_size)
+    , m_image_loader(image)
+    , m_label_loader(label)
+{
+}
+MNistDataLoader::~MNistDataLoader()
+{
+    close();
+}
+void MNistDataLoader::open()
+{
+    m_image_loader.open();
+    m_label_loader.open();
+    if (m_image_loader.get_items() != m_label_loader.get_items())
+    {
+        throw std::invalid_argument(
+            "Mismatch between image and label items");
+    }
+    m_items = m_image_loader.get_items();
+    m_image_sample_size = get_rows() * get_columns();
+    m_image_floats = std::unique_ptr<float[]>(
+        new float[m_batch_size * m_image_sample_size]);
+    m_label_floats = std::unique_ptr<float[]>(new float[m_batch_size]);
+    m_pos = 0;
+    m_epoch = 0;
+}
+void MNistDataLoader::close()
+{
+    m_image_loader.close();
+    m_label_loader.close();
+}
+void MNistDataLoader::rewind()
+{
+    m_image_loader.reset();
+    m_label_loader.reset();
+    m_pos = 0;
+    ++m_epoch;
+}
+void MNistDataLoader::reset()
+{
+    rewind();
+    m_epoch = 0;
+}
+void MNistDataLoader::load()
+{
+    size_t batch_remaining = m_batch_size;
+    float* image_pos = m_image_floats.get();
+    float* label_pos = m_label_floats.get();
+    while (batch_remaining > 0)
+    {
+        size_t epoch_remaining = get_items() - m_pos;
+        size_t whack = std::min(epoch_remaining, batch_remaining);
+        if (whack == 0)
+        {
+            rewind();
+            continue;
+        }
+        m_image_loader.read_scaled(image_pos, whack * m_image_sample_size);
+        m_label_loader.read<float>(label_pos, whack);
+        image_pos += whack * m_image_sample_size;
+        label_pos += whack;
+        m_pos += whack;
+        batch_remaining -= whack;
+    }
+#if 0
+    size_t pos = 0;
+    size_t lpos = 0;
+    for (size_t i = 0; i < m_batch_size; ++i)
+    {
+        for (size_t j = 0; j < get_rows(); ++j)
+        {
+            for (size_t k = 0; k < get_columns(); ++k){
+                std::cout << (m_image_floats.get()[pos++] > 10 ? '+' : ' ');
+            }
+            std::cout << std::endl;
+        }
+        std::cout << "---" << m_label_floats.get()[lpos++] << std::endl;
+    }
+    std::cout << "===" << std::endl;
+#endif
+}
--- a/doc/examples/mnist_loader.hpp
+++ b/doc/examples/mnist_loader.hpp
+/*******************************************************************************
+* Copyright 2017-2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+#include <cstdint>
+#include <cstdio>
+#include <memory>
+#include <string>
+class MNistLoader
+{
+protected:
+    MNistLoader(const std::string& filename, std::uint32_t magic);
+    virtual ~MNistLoader();
+    virtual void read_header();
+public:
+    void open();
+    void close();
+    void reset();
+    template <typename T>
+    void read(T* loc, size_t n = 1);
+    void read_scaled(float* loc, size_t n);
+    template <typename T>
+    size_t file_read(T* loc, size_t n)
+    {
+        return fread(loc, sizeof(T), n, m_file);
+    }
+    std::uint32_t get_items() { return m_items; }
+protected:
+    std::string m_filename;
+    FILE* m_file{nullptr};
+    std::uint32_t m_magic;
+    std::uint32_t m_items;
+    fpos_t m_data_pos;
+};
+class MNistImageLoader : public MNistLoader
+{
+    static const std::uint32_t magic_value = 0x00000803;
+    virtual void read_header() override;
+public:
+    MNistImageLoader(const std::string& file);
+    static const char* const TEST;
+    static const char* const TRAIN;
+    std::uint32_t get_rows() { return m_rows; }
+    std::uint32_t get_columns() { return m_columns; }
+protected:
+    std::uint32_t m_rows;
+    std::uint32_t m_columns;
+};
+class MNistLabelLoader : public MNistLoader
+{
+    static const std::uint32_t magic_value = 0x00000801;
+public:
+    MNistLabelLoader(const std::string& file);
+    static const char* TEST;
+    static const char* TRAIN;
+};
+class MNistDataLoader
+{
+public:
+    MNistDataLoader(size_t batch_size,
+                    const std::string& image,
+                    const std::string& label);
+    ~MNistDataLoader();
+    void open();
+    void close();
+    std::uint32_t get_rows() { return m_image_loader.get_rows(); }
+    std::uint32_t get_columns() { return m_image_loader.get_columns(); }
+    size_t get_batch_size() { return m_batch_size; }
+    size_t get_items() { return m_items; }
+    size_t get_epoch() { return m_epoch; }
+    size_t get_pos() { return m_pos; }
+    void load();
+    void rewind();
+    void reset();
+    const float* get_image_floats() const { return m_image_floats.get(); }
+    const float* get_label_floats() const { return m_label_floats.get(); }
+    size_t get_image_batch_size() const
+    {
+        return m_image_sample_size * m_batch_size;
+    }
+    size_t get_label_batch_size() const { return m_batch_size; }
+protected:
+    size_t m_batch_size;
+    MNistImageLoader m_image_loader;
+    MNistLabelLoader m_label_loader;
+    std::int32_t m_items;
+    size_t m_pos{0};
+    size_t m_epoch{0};
+    std::unique_ptr<float[]> m_image_floats;
+    std::unique_ptr<float[]> m_label_floats;
+    size_t m_image_sample_size;
+};
--- a/doc/examples/mnist_mlp.cpp
+++ b/doc/examples/mnist_mlp.cpp
--- a/doc/examples/tensor_utils.hpp
+++ b/doc/examples/tensor_utils.hpp
+/*******************************************************************************
+* Copyright 2017-2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+#pragma once
+#include <iostream>
+#include <stdexcept>
+#include <ngraph/ngraph.hpp>
+// Make a runtime tensor for a node output
+std::shared_ptr<ngraph::runtime::TensorView> make_output_tensor(
+    const std::shared_ptr<ngraph::runtime::Backend>& backend,
+    const std::shared_ptr<ngraph::Node>& node,
+    size_t output_pos)
+{
+    return backend->make_primary_tensor_view(
+        node->get_output_element_type(output_pos),
+        node->get_output_shape(output_pos));
+}
+// Initialize a tensor from a random generator
+template <typename T>
+void randomize(std::function<T()> rand,
+               const std::shared_ptr<ngraph::runtime::TensorView>& t)
+{
+    if (t->get_tensor().get_element_type().bitwidth() != 8 * sizeof(T))
+    {
+        throw std::invalid_argument(
+            "Randomize generator size is not the same as tensor "
+            "element size");
+    }
+    size_t element_count = t->get_element_count();
+    std::vector<T> temp;
+    for (size_t i = 0; i < element_count; ++i)
+    {
+        temp.push_back(rand());
+    }
+    t->write(&temp[0], 0, element_count * sizeof(T));
+}
+// Get a scalar value from a tensor, optionally at an element offset
+template <typename T>
+T get_scalar(const std::shared_ptr<ngraph::runtime::TensorView>& t,
+             size_t element_offset = 0)
+{
+    T result;
+    t->read(&result, element_offset * sizeof(T), sizeof(T));
+    return result;
+}
+// Set a scalar value in a tensor, optionally at an element offset
+template <typename T>
+void set_scalar(const std::shared_ptr<ngraph::runtime::TensorView>& t,
+                T value,
+                size_t element_offset = 0)
+{
+    t->write(&value, element_offset * sizeof(T), sizeof(T));
+}
+// Show a shape
+std::ostream& operator<<(std::ostream& s, const ngraph::Shape& shape)
+{
+    s << "Shape{";
+    for (size_t i = 0; i < shape.size(); ++i)
+    {
+        s << shape.at(i);
+        if (i + 1 < shape.size())
+        {
+            s << ", ";
+        }
+    }
+    s << "}";
+    return s;
+}
+// A debug class that supports various ways to dump information about a tensor.
+class TensorDumper
+{
+protected:
+    TensorDumper(
+        const std::string& name,
+        const std::shared_ptr<ngraph::runtime::TensorView>& tensor)
+        : m_name(name)
+        , m_tensor(tensor)
+    {
+    }
+public:
+    virtual ~TensorDumper() {}
+    const std::string& get_name() const { return m_name; }
+    std::shared_ptr<ngraph::runtime::TensorView> get_tensor() const
+    {
+        return m_tensor;
+    }
+    virtual std::ostream& dump(std::ostream& s) const = 0;
+protected:
+    std::string m_name;
+    std::shared_ptr<ngraph::runtime::TensorView> m_tensor;
+};
+std::ostream& operator<<(std::ostream& s, const TensorDumper& td)
+{
+    return td.dump(s);
+}
+// Show the min and max values of a tensor
+class MinMax : public TensorDumper
+{
+public:
+    MinMax(const std::string& name,
+           const std::shared_ptr<ngraph::runtime::TensorView>& tensor)
+        : TensorDumper(name, tensor)
+    {
+        size_t n = m_tensor->get_element_count();
+        for (size_t i = 0; i < n; ++i)
+        {
+            float s = get_scalar<float>(m_tensor, i);
+            m_max = std::max(m_max, s);
+            m_min = std::min(m_min, s);
+        }
+    }
+    float get_min() const { return m_min; }
+    float get_max() const { return m_max; }
+    std::ostream& dump(std::ostream& s) const override
+    {
+        return s << "MinMax[" << get_name() << ":" << get_min() << ", "
+                 << get_max() << "]";
+    }
+protected:
+    float m_min{std::numeric_limits<float>::max()};
+    float m_max{std::numeric_limits<float>::min()};
+};
+// Show the elements of a tensor
+class DumpTensor : public TensorDumper
+{
+public:
+    DumpTensor(const std::string& name,
+               const std::shared_ptr<ngraph::runtime::TensorView>& tensor)
+        : TensorDumper(name, tensor)
+    {
+    }
+    std::ostream& dump(std::ostream& s) const override
+    {
+        std::shared_ptr<ngraph::runtime::TensorView> t{get_tensor()};
+        const ngraph::Shape& shape = t->get_shape();
+        s << "Tensor<" << get_name() << ": ";
+        for (size_t i = 0; i < shape.size(); ++i)
+        {
+            s << shape.at(i);
+            if (i + 1 < shape.size())
+            {
+                s << ", ";
+            }
+        }
+        size_t pos = 0;
+        s << ">{";
+        size_t rank = shape.size();
+        if (rank == 0)
+        {
+            s << get_scalar<float>(t, pos++);
+        }
+        else if (rank <= 2)
+        {
+            s << "[";
+            for (size_t i = 0; i < shape.at(0); ++i)
+            {
+                if (rank == 1)
+                {
+                    s << get_scalar<float>(t, pos++);
+                }
+                else if (rank == 2)
+                {
+                    s << "[";
+                    for (size_t j = 0; j < shape.at(1); ++j)
+                    {
+                        s << get_scalar<float>(t, pos++);
+                        if (j + 1 < shape.at(1))
+                        {
+                            s << ", ";
+                        }
+                    }
+                    s << "]";
+                }
+                if (i + 1 < shape.at(0))
+                {
+                    s << ", ";
+                }
+            }
+            s << "]";
+        }
+        s << "}";
+        return s;
+    }
+};
--- a/doc/examples/update.cpp
+++ b/doc/examples/update.cpp
+/*******************************************************************************
+* Copyright 2017-2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+#include <iostream>
+#include <tuple>
+int count()
+{
+    static int counter = 0;
+    return counter++;
+}
+std::tuple<int, int> stateless_count(int counter)
+{
+    return std::tuple<int, int>(counter, counter + 1);
+}
+int main()
+{
+    std::cout << "State: " << count() << ", " << count() << std::endl;
+    std::cout << "Stateless: ";
+    int counter = 0;
+    {
+        auto r{stateless_count(counter)};
+        counter = std::get<1>(r);
+        std::cout << std::get<0>(r);
+    }
+    std::cout << ", ";
+    {
+        auto r{stateless_count(counter)};
+        counter = std::get<1>(r);
+        std::cout << std::get<0>(r);
+    }
+    std::cout << std::endl;
+}
--- a/doc/sphinx/source/howto/derive-for-training.rst
+++ b/doc/sphinx/source/howto/derive-for-training.rst
+.. derive-for-training.rst
+#########################
+Derive a trainable model 
+#########################
+Documentation in this section describes one of the ways to derive a trainable 
+model from an inference model.
+We can derive a trainable model from any graph that has been constructed with 
+weight-based updates. For this example named ``mnist_mlp.cpp``, we start with a hand-designed inference model and convert it to a model that can be trained 
+with nGraph. 
+Additionally, and to provide a more complete walk-through that *also* trains the 
+trainable model, our example includes the use of a simple data loader for the 
+MNIST data.
+* :ref:`model_overview`
+* :ref:`code_structure`
+  - :ref:`inference`
+  - :ref:`loss`
+  - :ref:`backprop`
+  - :ref:`update`
+.. _model_overview:
+Model overview
+==============
+The nGraph API was designed for automatic graph construction under direction of 
+a framework. Without a framework, the process is somewhat tedious, so the example 
+selected is a relatively simple model: a fully-connected topology with one hidden 
+layer followed by ``Softmax``.
+Since the graph is stateless there are parameters for both the inputs
+and the weights. We will construct the graph for inference and use
+nGraph to create a graph for training.  The training function will
+return tensors for the updated weights. Note that this is not the same
+as *constructing* the training model directly, which would be
+significantly more work.
+.. _code_structure:
+Code Structure
+==============
+.. _inference:
+Inference
+---------
+We begin by building the graph, starting with the input parameter 
+``X``. We define a fully-connected layer, including a parameter for
+weights and bias.
+.. literalinclude:: ../../../examples/mnist_mlp.cpp
+   :language: cpp
+   :lines: 124-136
+We repeat the process for the next layer, which we
+normalize with a ``softmax``.
+.. literalinclude:: ../../../examples/mnist_mlp.cpp
+   :language: cpp
+   :lines: 138-149
+.. _loss:
+Loss
+----
+We use cross-entropy to compute the loss. nGraph does not currenty
+have a cross-entropy op, so we implement it directly, adding clipping
+to prevent underflow.
+.. literalinclude:: ../../../examples/mnist_mlp.cpp
+   :language: cpp
+   :lines: 151-166
+.. _backprop:
+Backprop
+--------
+We want to reduce the loss by adjusting the weights. We compute the
+asjustments using the reverse mode autodiff algorithm, commonly
+referred to as "backprop" because of the way it is implemented in
+interpreted frameworks. In nGraph, we augment the loss computation
+with computations for the weight adjustments. This allows the
+calculations for the adjustments to be further optimized.
+.. literalinclude:: ../../../examples/mnist_mlp.cpp
+   :language: cpp
+   :lines: 172
+For any node ``N``, if the update for ``loss`` is ``delta``, the
+update computation for ``N`` will be given by the node
+.. code-block:: cpp
+   auto update = loss->backprop_node(N, delta);
+The different update nodes will share intermediate computations. So to
+get the updated value for the weights we just say:
+.. literalinclude:: ../../../examples/mnist_mlp.cpp
+   :language: cpp
+   :lines: 168-178
+.. _update:
+Update
+------
+Since nGraph is stateless, we train by making a function that has the
+original weights among its inputs and the updated weights among the
+results. For training, we'll also need the labeled training data as
+inputs, and we'll return the loss as an additional result.  We'll also
+want to track how well we are doing; this is a function that returns
+the loss and has the labeled testing data as input. Although we can
+use the same nodes in different functions, nGraph currently does not
+allow the same nodes to be compiled in different functions, so we
+compile clones of the nodes.
+.. literalinclude:: ../../../examples/mnist_mlp.cpp
+   :language: cpp
+   :lines: 248-260
--- a/doc/sphinx/source/howto/execute.rst
+++ b/doc/sphinx/source/howto/execute.rst
@@ -101,7 +101,7 @@ Once the graph is built, we need to package it in a ``Function``:
 .. literalinclude:: ../../../examples/abc.cpp
   :language: cpp
-   :lines: 35
+   :lines: 35-36
 The first argument to the constuctor specifies the nodes that the function will 
 return; in this case, the product. A ``NodeVector`` is a vector of shared 
@@ -136,7 +136,7 @@ To select the ``"CPU"`` backend,
 .. literalinclude:: ../../../examples/abc.cpp
   :language: cpp
-   :lines: 38-39
+   :lines: 39-40
 .. _compile_cmp:
@@ -153,7 +153,7 @@ thread needs to execute the function at the same time, create multiple
 .. literalinclude:: ../../../examples/abc.cpp
   :language: cpp
-   :lines: 42-43
+   :lines: 43-44
 .. _allocate_bkd_storage:
@@ -173,11 +173,11 @@ you switch between odd/even generations of variables on each update.
 Backends are responsible for managing storage. If the storage is off-CPU, caches 
 are used to minimize copying between device and CPU. We can allocate storage for 
-the three parameters and return value as follows:
+the three parameters and the return value as follows:
 .. literalinclude:: ../../../examples/abc.cpp
   :language: cpp
-   :lines: 30-33
+   :lines: 46-51
 Each tensor is a shared pointer to a ``runtime::TensorView``, the interface 
 backends implement for tensor use. When there are no more references to the 
@@ -192,7 +192,7 @@ Next we need to copy some data into the tensors.
 .. literalinclude:: ../../../examples/abc.cpp
   :language: cpp
-   :lines: 45-58
+   :lines: 53-60
 The ``runtime::TensorView`` interface has ``write`` and ``read`` methods for 
 copying data to/from the tensor.
@@ -207,7 +207,7 @@ call frame:
 .. literalinclude:: ../../../examples/abc.cpp
   :language: cpp
-   :lines: 61
+   :lines: 63
 .. _access_outputs:
@@ -219,7 +219,7 @@ We can use the ``read`` method to access the result:
 .. literalinclude:: ../../../examples/abc.cpp
   :language: cpp
-   :lines: 64-65
+   :lines: 65-67
 .. _all_together:
@@ -234,4 +234,4 @@ Put it all together
 .. _Intel MKL-DNN: https://01.org/mkl-dnn
 .. _Intel Nervana NNP: https://ai.intel.com/intel-nervana-neural-network-processors-nnp-redefine-ai-silicon/
\ No newline at end of file
--- a/doc/sphinx/source/howto/index.rst
+++ b/doc/sphinx/source/howto/index.rst
@@ -8,11 +8,13 @@ How to
   :caption: How to 
   execute.rst
-   import.rst
+   operator.rst
+   update.rst
+   derive-for-training.rst
+   import.rst    
-The "How to" articles in this section explain how to do specific tasks with the 
+The "How to" articles in this section explain how to do specific tasks with 
-Intel nGraph++ library. The recipes are all framework agnostic; in other words, 
+nGraph components. The recipes are all framework agnostic; in other words, 
 if an entity (framework or user) wishes to make use of target-based computational 
 resources, it can either:
@@ -38,8 +40,10 @@ devices, or it might mean programatically adjusting a model or the compute
 resources it requires, at an unknown or arbitray time after it has been deemed 
 to be trained well enough.
-To get started, we've provided a basic example for how to :doc:`execute` with 
+To get started, we've provided a basic example for how to execute a
-an nGraph backend; this is analogous to a framework bridge.
+computation that can run on an nGraph backend; this is analogous to a
+framework bridge.  We also provide a larger example for training and
+evaluating a simple MNIST MLP model.
 For data scientists or algorithm developers who are trying to extract specifics 
 about the state of a model at a certain node, or who want to optimize a model 

--- a/doc/sphinx/source/howto/operator.rst
+++ b/doc/sphinx/source/howto/operator.rst
+.. operator.rst
+############################
+Build a graph with operators
+############################
+This section illustrates the use of C++ operators to simplify the
+building of graphs.
+Several C++ operators are overloaded to simplify graph construction.
+For example, the following:
+.. literalinclude:: ../../../examples/abc.cpp
+   :language: cpp
+   :lines: 32-32
+can be simplified to:	   
+.. literalinclude:: ../../../examples/abc_operator.cpp
+   :language: cpp
+   :lines: 31
+The expression ``a + b`` is equivalent to
+``std::make_shared<op::Add>(a, b)`` and the ``*`` operator similarly
+returns ``std::make_shared<op::Multiply>`` to its arguments.
--- a/doc/sphinx/source/howto/update.rst
+++ b/doc/sphinx/source/howto/update.rst
+.. update.rst
+###########################
+Make a stateful computation
+###########################
+In this section, we show how to make a stateful computation from
+nGraphs stateless operations. The basic idea is that any computation
+with side-effects can be factored into a stateless function that
+transforms the old state into the new state.
+An Example from C++
+===================
+Let's start with a simple C++ example, a function ``count`` that
+returns how many times it has already been called:
+.. literalinclude:: ../../../examples/update.cpp
+   :language: cpp
+   :lines: 20-24
+The static variable ``counter`` provides state for this function. The
+state is initialized to 0. Every time ``count`` is called, the current
+value of ``counter`` is returned and ``counter`` is incremented. To
+convert this to use a stateless function, we make a function that
+takes the current value of ``counter`` as an argument and returns the
+updated value.
+.. literalinclude:: ../../../examples/update.cpp
+   :language: cpp
+   :lines: 26-29
+To use this version of counting,
+.. literalinclude:: ../../../examples/update.cpp
+   :language: cpp
+   :lines: 36-48
+Update in nGraph
+================
+We use the same approach with nGraph. During training, we include all
+the weights as arguments to the training function and return the
+updated weights along with any other results. If we are doing a more
+complex form of training, such as using momentum, we would add the
+momementum tensors are additional arguments and add their updated
+values as additional results. The simple case is illiustrated in the
+trainable model how to.
--- a/maint/apply-code-format.sh
+++ b/maint/apply-code-format.sh
@@ -47,7 +47,7 @@ declare NNP_TEST_DIR="build/third-party/nnp_transformer/src/ext_nnp_transformer/
 declare PYBIND_WRAPPER="python/pyngraph"
 declare ROOT_SUBDIR
-for ROOT_SUBDIR in src test ${NNP_SRC_DIR} ${NNP_TEST_DIR} ${PYBIND_WRAPPER}; do
+for ROOT_SUBDIR in src doc/examples test ${NNP_SRC_DIR} ${NNP_TEST_DIR} ${PYBIND_WRAPPER}; do
    if ! [[ -d "${ROOT_SUBDIR}" ]]; then
 	    bash_lib_status "In directory '$(pwd)', no subdirectory named '${ROOT_SUBDIR}' was found."
    else

--- a/maint/check-code-format.sh
+++ b/maint/check-code-format.sh
@@ -50,7 +50,7 @@ declare NNP_TEST_DIR="build/third-party/nnp_transformer/src/ext_nnp_transformer/
 declare PYBIND_WRAPPER="python/pyngraph"
 declare ROOT_SUBDIR
-for ROOT_SUBDIR in src test ${NNP_SRC_DIR} ${NNP_TEST_DIR} ${PYBIND_WRAPPER}; do
+for ROOT_SUBDIR in src doc/examples test ${NNP_SRC_DIR} ${NNP_TEST_DIR} ${PYBIND_WRAPPER}; do
    if ! [[ -d "${ROOT_SUBDIR}" ]]; then
        bash_lib_status "In directory '$(pwd)', no subdirectory named '${ROOT_SUBDIR}' was found."
    else

--- a/src/ngraph/graph_util.hpp
+++ b/src/ngraph/graph_util.hpp
@@ -72,6 +72,12 @@ namespace ngraph
        // throws ngrah_error if key does not exist
        std::shared_ptr<ngraph::Node> get(std::shared_ptr<ngraph::Node> orig) const;
+        template <typename T>
+        T dynamic_get(const T& orig)
+        {
+            return std::dynamic_pointer_cast<typename T::element_type>(get(orig));
+        }
        // returns true if original node is already mapped
        bool exists(std::shared_ptr<ngraph::Node> orig) const
        {