Add PlaidML backend (#1888)

* Add PlaidML backend * CR comments Used m_ prefix for members; removed trailing underscores Updated license headers Moved associated header inclusions to project blocks Wrapped comments to 100 chars Added missing newlines between functions Removed nested namespaces in operation implementations * Add earhart to CODEOWNERS * Rebase updates * style

Add PlaidML backend (#1888)
* Add PlaidML backend * CR comments Used m_ prefix for members; removed trailing underscores Updated license headers Moved associated header inclusions to project blocks Wrapped comments to 100 chars Added missing newlines between functions Removed nested namespaces in operation implementations * Add earhart to CODEOWNERS * Rebase updates * style
f0acb7da · Rob Earhart · Robert Kimball · d901446d · f0acb7da · f0acb7da
Commit f0acb7da authored Oct 29, 2018 by Rob Earhart Committed by Robert Kimball Oct 29, 2018
57 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -104,6 +104,8 @@ if (NGRAPH_ONNX_IMPORT_ENABLE)
    option(NGRAPH_ONNXIFI_ENABLE "Enable ONNX Interface for Framework Integration" TRUE)
 endif()

+option(NGRAPH_PLAIDML_ENABLE "Enable the PlaidML backend" FALSE)
+
 #-----------------------------------------------------------------------------------------------
 # Installation logic...
 #-----------------------------------------------------------------------------------------------
@@ -206,6 +208,10 @@ if (NGRAPH_CPU_ENABLE)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNGRAPH_CPU_ENABLE")	
 endif()

+if (NGRAPH_PLAIDML_ENABLE)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNGRAPH_PlaidML_ENABLE")
+endif()
+
 if (NOT DEFINED NGRAPH_TBB_ENABLE)
    set(NGRAPH_TBB_ENABLE ${NGRAPH_CPU_ENABLE})
 endif()

--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -43,6 +43,7 @@
 /src/ngraph/runtime/hybrid/         @sasadep
 /src/ngraph/runtime/intelgpu/       @shssf
 /src/ngraph/runtime/interpreter/    @rkimballn1
+/src/ngraph/runtime/plaidml/        @earhart
 /src/ngraph/runtime/reference/      @aprocter
 /src/ngraph/type/                   @diyessi
 /src/ngraph/serializer.*pp          @rkimballn1

--- a/python/test/conftest.py
+++ b/python/test/conftest.py
@@ -18,7 +18,7 @@ import pytest

 def pytest_addoption(parser):
    parser.addoption('--backend', default='INTERPRETER',
-                     choices=['INTERPRETER', 'CPU', 'GPU', 'NNP'],
+                     choices=['INTERPRETER', 'CPU', 'GPU', 'NNP', 'PlaidML'],
                     help='Select from available backends')


@@ -31,3 +31,4 @@ def pytest_configure(config):
    config.cpu_skip = pytest.mark.skipif(config.getvalue('backend') == 'CPU')
    config.nnp_skip = pytest.mark.skipif(config.getvalue('backend') == 'NNP')
    config.interpreter_skip = pytest.mark.skipif(config.getvalue('backend') == 'INTERPRETER')
+    config.plaidml_skip = pytest.mark.skipif(config.getvalue('backend') == 'PlaidML')
--- a/src/ngraph/placement.cpp
+++ b/src/ngraph/placement.cpp
@@ -35,6 +35,7 @@ std::string ngraph::placement_to_string(Placement placement)
    case Placement::CPU: return "CPU";
    case Placement::GPU: return "GPU";
    case Placement::NNP: return "NNP";
+    case Placement::PLAIDML: return "PlaidML";
    }
    throw runtime_error("unhandled placement type");
 }

--- a/src/ngraph/placement.hpp
+++ b/src/ngraph/placement.hpp
@@ -42,6 +42,7 @@ namespace ngraph
        CPU,
        GPU,
        NNP,
+        PLAIDML,
    };

    std::string placement_to_string(Placement placement);

--- a/src/ngraph/runtime/CMakeLists.txt
+++ b/src/ngraph/runtime/CMakeLists.txt
@@ -32,3 +32,5 @@ endif()
 if (NGRAPH_GPU_ENABLE)
    add_subdirectory(gpu)
 endif()
+
+add_subdirectory(plaidml)
--- a/src/ngraph/runtime/plaidml/CMakeLists.txt
+++ b/src/ngraph/runtime/plaidml/CMakeLists.txt
+# ******************************************************************************
+# Copyright 2018 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ******************************************************************************
+
+set(SRC
+    plaidml_backend.cpp
+    plaidml_builder.cpp
+    plaidml_compilation_cache.cpp
+    plaidml_compiled_function.cpp
+    plaidml_compiler.cpp
+    plaidml_config.cpp
+    plaidml_convpool_formatter.cpp
+    plaidml_impl.cpp
+    plaidml_logger.cpp
+    plaidml_ops_arithmetic.cpp
+    plaidml_ops_batch_norm.cpp
+    plaidml_ops_comparison.cpp
+    plaidml_ops_concat.cpp
+    plaidml_ops_convert.cpp
+    plaidml_ops_convolution.cpp
+    plaidml_ops_dot.cpp
+    plaidml_ops_function.cpp
+    plaidml_ops_general.cpp
+    plaidml_ops_index_reduction.cpp
+    plaidml_ops_io.cpp
+    plaidml_ops_local_response_norm.cpp
+    plaidml_ops_logical.cpp
+    plaidml_ops_one_hot.cpp
+    plaidml_ops_pool.cpp
+    plaidml_ops_reduce.cpp
+    plaidml_ops_replace_slice.cpp
+    plaidml_ops_reverse.cpp
+    plaidml_ops_slice.cpp
+    plaidml_ops_softmax.cpp
+    plaidml_ops_transcendental.cpp
+    plaidml_tensor.cpp
+    plaidml_translate.cpp
+)
+
+if (NGRAPH_PLAIDML_ENABLE)
+    find_package(PlaidML CONFIG REQUIRED)
+    message(STATUS "PlaidML enabled")
+
+    add_library(libplaidml INTERFACE)
+    target_link_libraries(libplaidml INTERFACE ${PLAIDML_LIBRARIES})
+    install(FILES ${PLAIDML_LIBRARIES} DESTINATION ${NGRAPH_INSTALL_LIB})
+
+    add_library(plaidml_backend SHARED ${SRC})
+    set_target_properties(plaidml_backend PROPERTIES VERSION ${NGRAPH_VERSION} SOVERSION ${NGRAPH_API_VERSION})
+    target_include_directories(plaidml_backend SYSTEM PUBLIC ${PLAIDML_INCLUDE_DIRS})
+    target_link_libraries(plaidml_backend PUBLIC ngraph libplaidml)
+    set_target_properties(plaidml_backend PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${NGRAPH_BUILD_DIR}")
+    install(TARGETS plaidml_backend LIBRARY DESTINATION ${NGRAPH_INSTALL_LIB})
+else()
+    message(STATUS "PlaidML not enabled")
+endif()
+  
--- a/src/ngraph/runtime/plaidml/plaidml_backend.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_backend.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/runtime/plaidml/plaidml_backend.hpp"
+#include "ngraph/node.hpp"
+#include "ngraph/runtime/plaidml/plaidml_compiled_function.hpp"
+#include "ngraph/runtime/plaidml/plaidml_tensor.hpp"
+#include "ngraph/util.hpp"
+
+namespace vp = vertexai::plaidml;
+
+ngraph::runtime::plaidml::PlaidML_Backend::PlaidML_Backend(const char* configuration_string)
+    : m_config{parse_config_string(configuration_string)}
+    , m_compiler{&m_config}
+{
+}
+
+std::shared_ptr<ngraph::runtime::Tensor> ngraph::runtime::plaidml::PlaidML_Backend::create_tensor(
+    const ngraph::element::Type& element_type, const ngraph::Shape& shape)
+{
+    return std::make_shared<PlaidML_Tensor>(&m_config, element_type, shape, "direct_data", nullptr);
+}
+
+std::shared_ptr<ngraph::runtime::Tensor> ngraph::runtime::plaidml::PlaidML_Backend::create_tensor(
+    const ngraph::element::Type& element_type, const Shape& shape, void* memory_pointer)
+{
+    return std::make_shared<PlaidML_Tensor>(
+        &m_config, element_type, shape, "direct_data", memory_pointer);
+}
+
+bool ngraph::runtime::plaidml::PlaidML_Backend::compile(std::shared_ptr<Function> func)
+{
+    m_cache.compile(func, &m_compiler);
+    return true;
+}
+
+bool ngraph::runtime::plaidml::PlaidML_Backend::call(
+    std::shared_ptr<Function> func,
+    const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
+    const std::vector<std::shared_ptr<runtime::Tensor>>& inputs)
+{
+    auto cfunc = m_cache.try_lookup(func);
+    if (!cfunc)
+    {
+        cfunc = m_compiler.compile(func);
+    }
+    cfunc->schedule_invocation(inputs, outputs);
+    return true;
+}
+
+void ngraph::runtime::plaidml::PlaidML_Backend::remove_compiled_function(
+    std::shared_ptr<Function> func)
+{
+    m_cache.forget(func);
+}
+
+void ngraph::runtime::plaidml::PlaidML_Backend::save(std::shared_ptr<Function> func,
+                                                     const std::string& filename,
+                                                     plaidml_file_format format)
+{
+    auto cfunc = m_cache.try_lookup(func);
+    if (!cfunc)
+    {
+        cfunc = m_compiler.compile(func);
+    }
+    cfunc->save(filename, format);
+}
+
+extern "C" const char* get_ngraph_version_string()
+{
+    return NGRAPH_VERSION;
+}
+
+extern "C" ngraph::runtime::Backend* new_backend(const char* configuration_string)
+{
+    return new ngraph::runtime::plaidml::PlaidML_Backend{configuration_string};
+}
+
+extern "C" void delete_backend(ngraph::runtime::Backend* backend)
+{
+    delete backend;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_backend.hpp
+++ b/src/ngraph/runtime/plaidml/plaidml_backend.hpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <plaidml/plaidml++.h>
+
+#include "ngraph/runtime/backend.hpp"
+#include "ngraph/runtime/plaidml/plaidml_compilation_cache.hpp"
+#include "ngraph/runtime/plaidml/plaidml_compiler.hpp"
+#include "ngraph/runtime/plaidml/plaidml_config.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            class PlaidML_Backend;
+        }
+    }
+}
+
+// Implements the runtime::Backend interface for the PlaidML nGraph backend.
+class ngraph::runtime::plaidml::PlaidML_Backend final : public runtime::Backend
+{
+public:
+    PlaidML_Backend(const char* configuration_string);
+    ~PlaidML_Backend() final {}
+    std::shared_ptr<ngraph::runtime::Tensor>
+        create_tensor(const ngraph::element::Type& element_type, const Shape& shape) final;
+
+    std::shared_ptr<ngraph::runtime::Tensor> create_tensor(
+        const ngraph::element::Type& element_type, const Shape& shape, void* memory_pointer) final;
+
+    bool compile(std::shared_ptr<Function> func) final;
+
+    bool call(std::shared_ptr<Function> func,
+              const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
+              const std::vector<std::shared_ptr<runtime::Tensor>>& inputs) final;
+
+    void remove_compiled_function(std::shared_ptr<Function> func) final;
+
+    void save(std::shared_ptr<Function> func,
+              const std::string& filename,
+              plaidml_file_format format);
+
+private:
+    Config m_config;
+    Compiler m_compiler;
+    CompilationCache m_cache;
+};
--- a/src/ngraph/runtime/plaidml/plaidml_build.hpp
+++ b/src/ngraph/runtime/plaidml/plaidml_build.hpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <plaidml/plaidml++.h>
+
+#include <string>
+#include <unordered_map>
+
+#include "ngraph/function.hpp"
+#include "ngraph/runtime/plaidml/plaidml_config.hpp"
+#include "ngraph/runtime/tensor.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            struct Build;
+            class Compiler;
+            struct TensorInfo;
+
+            enum class TensorContents
+            {
+                DATA = 0,
+                LOGICAL = 1
+            };
+        }
+    }
+}
+
+// Holds information about a particular tensor.
+struct ngraph::runtime::plaidml::TensorInfo final
+{
+    TensorInfo(vertexai::plaidml::variable _var, TensorContents _contents)
+        : var{std::move(_var)}
+        , contents{_contents}
+    {
+    }
+
+    vertexai::plaidml::variable var;
+    TensorContents contents;
+};
+
+// Holds the intermediate state of a function compilation.
+struct ngraph::runtime::plaidml::Build final
+{
+    Config* config = nullptr;
+    Compiler* compiler = nullptr;
+    std::shared_ptr<Function> func;
+    std::unordered_map<descriptor::Tensor*, std::string> input_names;
+    std::unordered_map<descriptor::Tensor*, std::string> output_names;
+    vertexai::plaidml::compose composer;
+    std::unordered_map<descriptor::Tensor*, TensorInfo> bindings;
+    bool io_dim_override = false;
+    std::size_t io_dim_override_count = 0;
+};
--- a/src/ngraph/runtime/plaidml/plaidml_builder.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_builder.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <sstream>
+#include <stdexcept>
+#include <utility>
+
+#include "ngraph/runtime/plaidml/plaidml_builder.hpp"
+#include "ngraph/runtime/plaidml/plaidml_logger.hpp"
+
+namespace vp = vertexai::plaidml;
+
+ngraph::runtime::plaidml::builder::Function::Function(const std::string& name, bool debug)
+    : m_name{name}
+    , m_debug{debug}
+{
+}
+
+std::string ngraph::runtime::plaidml::builder::Function::to_string() const
+{
+    std::ostringstream s;
+    s << "function (";
+    bool first = true;
+    for (const auto& input : m_inputs)
+    {
+        if (!first)
+        {
+            s << ", ";
+        }
+        first = false;
+        s << input.m_name;
+        if (input.m_dims.size())
+        {
+            s << "[";
+            bool first_dim = true;
+            for (const auto& dim : input.m_dims)
+            {
+                if (!first_dim)
+                {
+                    s << ", ";
+                }
+                first_dim = false;
+                s << dim;
+            }
+            s << "]";
+        }
+    }
+    s << ") -> (";
+    first = true;
+    for (const auto& output : m_outputs)
+    {
+        if (!first)
+        {
+            s << ", ";
+        }
+        first = false;
+        s << output.m_name;
+    }
+    s << ") {\n";
+    std::string name_annotation;
+    if (m_name.size())
+    {
+        name_annotation = "[[name(op" + m_name + ")]]\n  ";
+    }
+    for (const std::unique_ptr<Statement>& stmt : m_stmts)
+    {
+        s << "  " << name_annotation;
+        {
+            const TernaryContraction* tc = dynamic_cast<const TernaryContraction*>(stmt.get());
+            if (tc)
+            {
+                if (!tc->m_output || !tc->m_first || !tc->m_second || !tc->m_third)
+                {
+                    throw std::logic_error{"Incomplete contraction"};
+                }
+                if (tc->m_output->m_indices.size() != tc->m_output->m_dims.size())
+                {
+                    throw std::logic_error{"Contraction index count != dimension count"};
+                }
+                s << tc->m_output->m_name << "[";
+                first = true;
+                for (const auto& idx : tc->m_output->m_indices)
+                {
+                    if (!first)
+                    {
+                        s << ", ";
+                    }
+                    first = false;
+                    s << idx;
+                }
+                if (tc->m_output->m_indices.size())
+                {
+                    s << " : ";
+                }
+                first = true;
+                for (const auto& dim : tc->m_output->m_dims)
+                {
+                    if (!first)
+                    {
+                        s << ", ";
+                    }
+                    first = false;
+                    s << dim;
+                }
+                s << "] = " << tc->m_agg_op << "(" << tc->m_first->m_name << "[";
+                first = true;
+                for (const auto& idx : tc->m_first->m_indices)
+                {
+                    if (!first)
+                    {
+                        s << ", ";
+                    }
+                    first = false;
+                    s << idx;
+                }
+                s << "] == " << tc->m_second->m_name << "[";
+                first = true;
+                for (const auto& idx : tc->m_second->m_indices)
+                {
+                    if (!first)
+                    {
+                        s << ", ";
+                    }
+                    first = false;
+                    s << idx;
+                }
+                s << "] " << tc->m_comb_op << " " << tc->m_third->m_name << "[";
+                first = true;
+                for (const auto& idx : tc->m_third->m_indices)
+                {
+                    if (!first)
+                    {
+                        s << ", ";
+                    }
+                    first = false;
+                    s << idx;
+                }
+                s << "])";
+                for (const auto& constraint : tc->m_constraints)
+                {
+                    s << ", " << constraint;
+                }
+                s << ";\n";
+                continue;
+            }
+            const BinaryContraction* bc = dynamic_cast<const BinaryContraction*>(stmt.get());
+            if (bc)
+            {
+                if (!bc->m_output || !bc->m_lhs || !bc->m_rhs)
+                {
+                    throw std::logic_error{"Incomplete contraction"};
+                }
+                if (bc->m_output->m_indices.size() != bc->m_output->m_dims.size())
+                {
+                    throw std::logic_error{"Contraction index count != dimension count"};
+                }
+                s << bc->m_output->m_name << "[";
+                first = true;
+                for (const auto& idx : bc->m_output->m_indices)
+                {
+                    if (!first)
+                    {
+                        s << ", ";
+                    }
+                    first = false;
+                    s << idx;
+                }
+                if (bc->m_output->m_indices.size())
+                {
+                    s << " : ";
+                }
+                first = true;
+                for (const auto& dim : bc->m_output->m_dims)
+                {
+                    if (!first)
+                    {
+                        s << ", ";
+                    }
+                    first = false;
+                    s << dim;
+                }
+                s << "] = " << bc->m_agg_op << "(" << bc->m_lhs->m_name << "[";
+                first = true;
+                for (const auto& idx : bc->m_lhs->m_indices)
+                {
+                    if (!first)
+                    {
+                        s << ", ";
+                    }
+                    first = false;
+                    s << idx;
+                }
+                s << "] " << bc->m_comb_op << " " << bc->m_rhs->m_name << "[";
+                first = true;
+                for (const auto& idx : bc->m_rhs->m_indices)
+                {
+                    if (!first)
+                    {
+                        s << ", ";
+                    }
+                    first = false;
+                    s << idx;
+                }
+                s << "])";
+                for (const auto& constraint : bc->m_constraints)
+                {
+                    s << ", " << constraint;
+                }
+                s << ";\n";
+                if (bc->m_default.length())
+                {
+                    s << " default " << bc->m_default;
+                }
+                continue;
+            }
+        }
+        {
+            const UnaryContraction* uc = dynamic_cast<const UnaryContraction*>(stmt.get());
+            if (uc)
+            {
+                if (!uc->m_output || !uc->m_input)
+                {
+                    throw std::logic_error{"Incomplete contraction"};
+                }
+                if (uc->m_output->m_indices.size() != uc->m_output->m_dims.size())
+                {
+                    throw std::logic_error{"Contraction index count != dimension count"};
+                }
+                s << uc->m_output->m_name << "[";
+                first = true;
+                for (const auto& idx : uc->m_output->m_indices)
+                {
+                    if (!first)
+                    {
+                        s << ", ";
+                    }
+                    first = false;
+                    s << idx;
+                }
+                if (uc->m_output->m_indices.size())
+                {
+                    s << " : ";
+                }
+                first = true;
+                for (const auto& dim : uc->m_output->m_dims)
+                {
+                    if (!first)
+                    {
+                        s << ", ";
+                    }
+                    first = false;
+                    s << dim;
+                }
+                s << "] = " << uc->m_agg_op << "(" << uc->m_input->m_name << "[";
+                first = true;
+                for (const auto& idx : uc->m_input->m_indices)
+                {
+                    if (!first)
+                    {
+                        s << ", ";
+                    }
+                    first = false;
+                    s << idx;
+                }
+                s << "])";
+                for (const auto& constraint : uc->m_constraints)
+                {
+                    s << ", " << constraint;
+                }
+                if (uc->m_default.length())
+                {
+                    s << " default " << uc->m_default;
+                }
+                s << ";\n";
+                continue;
+            }
+        }
+        {
+            const Elementwise* e = dynamic_cast<const Elementwise*>(stmt.get());
+            if (e)
+            {
+                s << e->m_lhs << " = " << e->m_rhs << ";\n";
+                continue;
+            }
+        }
+        throw std::logic_error{"Failed to determine dynamic operation class"};
+    }
+    s << "}";
+    return s.str();
+}
+
+vp::application ngraph::runtime::plaidml::builder::Function::finalize() const
+{
+    std::vector<vp::variable> params;
+    for (auto& input : m_inputs)
+    {
+        params.emplace_back(input.m_var);
+    }
+    auto str = to_string();
+    if (m_debug)
+    {
+        PLAIDML_DEBUG << "Built Tile code:\n" << str;
+    }
+    return vp::function{str}.apply(params);
+}
+
+ngraph::runtime::plaidml::builder::Function&
+    ngraph::runtime::plaidml::builder::Function::add(Input input) &
+{
+    m_inputs.emplace_back(std::move(input));
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::Function&&
+    ngraph::runtime::plaidml::builder::Function::add(Input input) &&
+{
+    m_inputs.emplace_back(std::move(input));
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::Function&
+    ngraph::runtime::plaidml::builder::Function::add(Output output) &
+{
+    m_outputs.emplace_back(std::move(output));
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::Function&&
+    ngraph::runtime::plaidml::builder::Function::add(Output output) &&
+{
+    m_outputs.emplace_back(std::move(output));
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::Function&
+    ngraph::runtime::plaidml::builder::Function::add(UnaryContraction contraction) &
+{
+    m_stmts.emplace_back(
+        std::unique_ptr<UnaryContraction>{new UnaryContraction(std::move(contraction))});
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::Function&&
+    ngraph::runtime::plaidml::builder::Function::add(UnaryContraction contraction) &&
+{
+    m_stmts.emplace_back(
+        std::unique_ptr<UnaryContraction>{new UnaryContraction(std::move(contraction))});
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::Function&
+    ngraph::runtime::plaidml::builder::Function::add(BinaryContraction contraction) &
+{
+    m_stmts.emplace_back(
+        std::unique_ptr<BinaryContraction>{new BinaryContraction(std::move(contraction))});
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::Function&&
+    ngraph::runtime::plaidml::builder::Function::add(BinaryContraction contraction) &&
+{
+    m_stmts.emplace_back(
+        std::unique_ptr<BinaryContraction>{new BinaryContraction(std::move(contraction))});
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::Function&
+    ngraph::runtime::plaidml::builder::Function::add(TernaryContraction contraction) &
+{
+    m_stmts.emplace_back(
+        std::unique_ptr<TernaryContraction>{new TernaryContraction(std::move(contraction))});
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::Function&&
+    ngraph::runtime::plaidml::builder::Function::add(TernaryContraction contraction) &&
+{
+    m_stmts.emplace_back(
+        std::unique_ptr<TernaryContraction>{new TernaryContraction(std::move(contraction))});
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::Function&
+    ngraph::runtime::plaidml::builder::Function::add(Elementwise elementwise) &
+{
+    m_stmts.emplace_back(std::unique_ptr<Elementwise>{new Elementwise(std::move(elementwise))});
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::Function&&
+    ngraph::runtime::plaidml::builder::Function::add(Elementwise elementwise) &&
+{
+    m_stmts.emplace_back(std::unique_ptr<Elementwise>{new Elementwise(std::move(elementwise))});
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::Input::Input(vp::variable var, std::string name)
+    : m_var{std::move(var)}
+    , m_name{std::move(name)}
+{
+}
+
+ngraph::runtime::plaidml::builder::Input& ngraph::runtime::plaidml::builder::Input::add_dims(
+    std::string prefix, std::size_t first, std::size_t limit) &
+{
+    for (std::size_t idx = first; idx < limit; ++idx)
+    {
+        m_dims.emplace_back(prefix + std::to_string(idx));
+    }
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::Input&& ngraph::runtime::plaidml::builder::Input::add_dims(
+    std::string prefix, std::size_t first, std::size_t limit) &&
+{
+    for (std::size_t idx = first; idx < limit; ++idx)
+    {
+        m_dims.emplace_back(prefix + std::to_string(idx));
+    }
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::Input& ngraph::runtime::plaidml::builder::Input::add_rdims(
+    std::string prefix, std::size_t limit, std::size_t first) &
+{
+    for (std::size_t idx = limit; first < idx;)
+    {
+        m_dims.emplace_back(prefix + std::to_string(--idx));
+    }
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::Input&& ngraph::runtime::plaidml::builder::Input::add_rdims(
+    std::string prefix, std::size_t limit, std::size_t first) &&
+{
+    for (std::size_t idx = limit; first < idx;)
+    {
+        m_dims.emplace_back(prefix + std::to_string(--idx));
+    }
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::Input&
+    ngraph::runtime::plaidml::builder::Input::add_dims(std::initializer_list<std::string> s) &
+{
+    m_dims.insert(m_dims.end(), s.begin(), s.end());
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::Input&&
+    ngraph::runtime::plaidml::builder::Input::add_dims(std::initializer_list<std::string> s) &&
+{
+    m_dims.insert(m_dims.end(), s.begin(), s.end());
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::Output::Output(std::string name)
+    : m_name{std::move(name)}
+{
+}
+
+ngraph::runtime::plaidml::builder::Elementwise::Elementwise(std::string lhs, std::string rhs)
+    : m_lhs{std::move(lhs)}
+    , m_rhs{std::move(rhs)}
+{
+}
+
+ngraph::runtime::plaidml::builder::ContractionOutput::ContractionOutput(std::string name)
+    : m_name{std::move(name)}
+{
+}
+
+ngraph::runtime::plaidml::builder::ContractionOutput&
+    ngraph::runtime::plaidml::builder::ContractionOutput::add_indices(std::string prefix,
+                                                                      std::size_t first,
+                                                                      std::size_t limit) &
+{
+    for (std::size_t idx = first; idx < limit; ++idx)
+    {
+        m_indices.emplace_back(prefix + std::to_string(idx));
+    }
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::ContractionOutput&&
+    ngraph::runtime::plaidml::builder::ContractionOutput::add_indices(std::string prefix,
+                                                                      std::size_t first,
+                                                                      std::size_t limit) &&
+{
+    for (std::size_t idx = first; idx < limit; ++idx)
+    {
+        m_indices.emplace_back(prefix + std::to_string(idx));
+    }
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::ContractionOutput&
+    ngraph::runtime::plaidml::builder::ContractionOutput::add_rindices(std::string prefix,
+                                                                       std::size_t limit,
+                                                                       std::size_t first) &
+{
+    for (std::size_t idx = limit; first < idx;)
+    {
+        m_indices.emplace_back(prefix + std::to_string(--idx));
+    }
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::ContractionOutput&&
+    ngraph::runtime::plaidml::builder::ContractionOutput::add_rindices(std::string prefix,
+                                                                       std::size_t limit,
+                                                                       std::size_t first) &&
+{
+    for (std::size_t idx = limit; first < idx;)
+    {
+        m_indices.emplace_back(prefix + std::to_string(--idx));
+    }
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::ContractionOutput&
+    ngraph::runtime::plaidml::builder::ContractionOutput::add_indices(
+        std::initializer_list<std::string> s) &
+{
+    m_indices.insert(m_indices.end(), s.begin(), s.end());
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::ContractionOutput&&
+    ngraph::runtime::plaidml::builder::ContractionOutput::add_indices(
+        std::initializer_list<std::string> s) &&
+{
+    m_indices.insert(m_indices.end(), s.begin(), s.end());
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::ContractionOutput&
+    ngraph::runtime::plaidml::builder::ContractionOutput::add_dims(std::string prefix,
+                                                                   std::size_t first,
+                                                                   std::size_t limit) &
+{
+    for (std::size_t idx = first; idx < limit; ++idx)
+    {
+        m_dims.emplace_back(prefix + std::to_string(idx));
+    }
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::ContractionOutput&&
+    ngraph::runtime::plaidml::builder::ContractionOutput::add_dims(std::string prefix,
+                                                                   std::size_t first,
+                                                                   std::size_t limit) &&
+{
+    for (std::size_t idx = first; idx < limit; ++idx)
+    {
+        m_dims.emplace_back(prefix + std::to_string(idx));
+    }
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::ContractionOutput&
+    ngraph::runtime::plaidml::builder::ContractionOutput::add_rdims(std::string prefix,
+                                                                    std::size_t limit,
+                                                                    std::size_t first) &
+{
+    for (std::size_t idx = limit; first < idx;)
+    {
+        m_dims.emplace_back(prefix + std::to_string(--idx));
+    }
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::ContractionOutput&&
+    ngraph::runtime::plaidml::builder::ContractionOutput::add_rdims(std::string prefix,
+                                                                    std::size_t limit,
+                                                                    std::size_t first) &&
+{
+    for (std::size_t idx = limit; first < idx;)
+    {
+        m_dims.emplace_back(prefix + std::to_string(--idx));
+    }
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::ContractionOutput&
+    ngraph::runtime::plaidml::builder::ContractionOutput::add_dims(
+        std::initializer_list<std::string> s) &
+{
+    m_dims.insert(m_dims.end(), s.begin(), s.end());
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::ContractionOutput&&
+    ngraph::runtime::plaidml::builder::ContractionOutput::add_dims(
+        std::initializer_list<std::string> s) &&
+{
+    m_dims.insert(m_dims.end(), s.begin(), s.end());
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::ContractionInput&
+    ngraph::runtime::plaidml::builder::ContractionInput::add_indices(std::string prefix,
+                                                                     std::size_t first,
+                                                                     std::size_t limit) &
+{
+    for (std::size_t idx = first; idx < limit; ++idx)
+    {
+        m_indices.emplace_back(prefix + std::to_string(idx));
+    }
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::ContractionInput&&
+    ngraph::runtime::plaidml::builder::ContractionInput::add_indices(std::string prefix,
+                                                                     std::size_t first,
+                                                                     std::size_t limit) &&
+{
+    for (std::size_t idx = first; idx < limit; ++idx)
+    {
+        m_indices.emplace_back(prefix + std::to_string(idx));
+    }
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::ContractionInput&
+    ngraph::runtime::plaidml::builder::ContractionInput::add_rindices(std::string prefix,
+                                                                      std::size_t limit,
+                                                                      std::size_t first) &
+{
+    for (std::size_t idx = limit; first < idx;)
+    {
+        m_indices.emplace_back(prefix + std::to_string(--idx));
+    }
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::ContractionInput&&
+    ngraph::runtime::plaidml::builder::ContractionInput::add_rindices(std::string prefix,
+                                                                      std::size_t limit,
+                                                                      std::size_t first) &&
+{
+    for (std::size_t idx = limit; first < idx;)
+    {
+        m_indices.emplace_back(prefix + std::to_string(--idx));
+    }
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::ContractionInput&
+    ngraph::runtime::plaidml::builder::ContractionInput::add_indices(
+        std::initializer_list<std::string> s) &
+{
+    m_indices.insert(m_indices.end(), s.begin(), s.end());
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::ContractionInput&&
+    ngraph::runtime::plaidml::builder::ContractionInput::add_indices(
+        std::initializer_list<std::string> s) &&
+{
+    m_indices.insert(m_indices.end(), s.begin(), s.end());
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::UnaryContraction::UnaryContraction(std::string agg_op)
+    : m_agg_op{std::move(agg_op)}
+{
+}
+
+ngraph::runtime::plaidml::builder::UnaryContraction&
+    ngraph::runtime::plaidml::builder::UnaryContraction::set(ContractionInput input) &
+{
+    m_input = std::unique_ptr<ContractionInput>{new ContractionInput(std::move(input))};
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::UnaryContraction&&
+    ngraph::runtime::plaidml::builder::UnaryContraction::set(ContractionInput input) &&
+{
+    m_input = std::unique_ptr<ContractionInput>{new ContractionInput(std::move(input))};
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::UnaryContraction&
+    ngraph::runtime::plaidml::builder::UnaryContraction::set(ContractionOutput output) &
+{
+    m_output = std::unique_ptr<ContractionOutput>{new ContractionOutput(std::move(output))};
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::UnaryContraction&&
+    ngraph::runtime::plaidml::builder::UnaryContraction::set(ContractionOutput output) &&
+{
+    m_output = std::unique_ptr<ContractionOutput>{new ContractionOutput(std::move(output))};
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::UnaryContraction&
+    ngraph::runtime::plaidml::builder::UnaryContraction::set_default(std::string tensor) &
+{
+    m_default = std::move(tensor);
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::UnaryContraction&&
+    ngraph::runtime::plaidml::builder::UnaryContraction::set_default(std::string tensor) &&
+{
+    m_default = std::move(tensor);
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::BinaryContraction::BinaryContraction(std::string agg_op,
+                                                                        std::string comb_op)
+    : m_agg_op{std::move(agg_op)}
+    , m_comb_op{std::move(comb_op)}
+{
+}
+
+ngraph::runtime::plaidml::builder::BinaryContraction&
+    ngraph::runtime::plaidml::builder::BinaryContraction::set_lhs(ContractionInput input) &
+{
+    m_lhs = std::unique_ptr<ContractionInput>{new ContractionInput(std::move(input))};
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::BinaryContraction&&
+    ngraph::runtime::plaidml::builder::BinaryContraction::set_lhs(ContractionInput input) &&
+{
+    m_lhs = std::unique_ptr<ContractionInput>{new ContractionInput(std::move(input))};
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::BinaryContraction&
+    ngraph::runtime::plaidml::builder::BinaryContraction::set_rhs(ContractionInput input) &
+{
+    m_rhs = std::unique_ptr<ContractionInput>{new ContractionInput(std::move(input))};
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::BinaryContraction&&
+    ngraph::runtime::plaidml::builder::BinaryContraction::set_rhs(ContractionInput input) &&
+{
+    m_rhs = std::unique_ptr<ContractionInput>{new ContractionInput(std::move(input))};
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::BinaryContraction&
+    ngraph::runtime::plaidml::builder::BinaryContraction::set(ContractionOutput output) &
+{
+    m_output = std::unique_ptr<ContractionOutput>{new ContractionOutput(std::move(output))};
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::BinaryContraction&&
+    ngraph::runtime::plaidml::builder::BinaryContraction::set(ContractionOutput output) &&
+{
+    m_output = std::unique_ptr<ContractionOutput>{new ContractionOutput(std::move(output))};
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::BinaryContraction&
+    ngraph::runtime::plaidml::builder::BinaryContraction::set_default(std::string tensor) &
+{
+    m_default = std::move(tensor);
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::BinaryContraction&&
+    ngraph::runtime::plaidml::builder::BinaryContraction::set_default(std::string tensor) &&
+{
+    m_default = std::move(tensor);
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::TernaryContraction::TernaryContraction(std::string agg_op,
+                                                                          std::string comb_op)
+    : m_agg_op{std::move(agg_op)}
+    , m_comb_op{std::move(comb_op)}
+{
+}
+
+ngraph::runtime::plaidml::builder::TernaryContraction&
+    ngraph::runtime::plaidml::builder::TernaryContraction::set_first(ContractionInput input) &
+{
+    m_first = std::unique_ptr<ContractionInput>{new ContractionInput(std::move(input))};
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::TernaryContraction&&
+    ngraph::runtime::plaidml::builder::TernaryContraction::set_first(ContractionInput input) &&
+{
+    m_first = std::unique_ptr<ContractionInput>{new ContractionInput(std::move(input))};
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::TernaryContraction&
+    ngraph::runtime::plaidml::builder::TernaryContraction::set_second(ContractionInput input) &
+{
+    m_second = std::unique_ptr<ContractionInput>{new ContractionInput(std::move(input))};
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::TernaryContraction&&
+    ngraph::runtime::plaidml::builder::TernaryContraction::set_second(ContractionInput input) &&
+{
+    m_second = std::unique_ptr<ContractionInput>{new ContractionInput(std::move(input))};
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::TernaryContraction&
+    ngraph::runtime::plaidml::builder::TernaryContraction::set_third(ContractionInput input) &
+{
+    m_third = std::unique_ptr<ContractionInput>{new ContractionInput(std::move(input))};
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::TernaryContraction&&
+    ngraph::runtime::plaidml::builder::TernaryContraction::set_third(ContractionInput input) &&
+{
+    m_third = std::unique_ptr<ContractionInput>{new ContractionInput(std::move(input))};
+    return std::move(*this);
+}
+
+ngraph::runtime::plaidml::builder::TernaryContraction&
+    ngraph::runtime::plaidml::builder::TernaryContraction::set(ContractionOutput output) &
+{
+    m_output = std::unique_ptr<ContractionOutput>{new ContractionOutput(std::move(output))};
+    return *this;
+}
+
+ngraph::runtime::plaidml::builder::TernaryContraction&&
+    ngraph::runtime::plaidml::builder::TernaryContraction::set(ContractionOutput output) &&
+{
+    m_output = std::unique_ptr<ContractionOutput>{new ContractionOutput(std::move(output))};
+    return std::move(*this);
+}
--- a/src/ngraph/runtime/plaidml/plaidml_builder.hpp
+++ b/src/ngraph/runtime/plaidml/plaidml_builder.hpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <plaidml/plaidml++.h>
+
+#include <list>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <utility>
+
+#include "ngraph/runtime/plaidml/plaidml_config.hpp"
+
+// Utilities for constructing PlaidML functions.
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            namespace builder
+            {
+                class BinaryContraction;
+                class ContractionInput;
+                class ContractionOutput;
+                class Elementwise;
+                class Function;
+                class Input;
+                class Output;
+                class Statement;
+                class TernaryContraction;
+                class UnaryContraction;
+            }
+        }
+    }
+}
+
+// Function provides a fluent interface for the construction of the text form of PlaidML functions.
+// It's intended to be simpler to use and produce simpler code than using direct string
+// construction.
+//
+// N.B. It's possible to capture the intermediate pieces as they're being added to a function
+// (e.g. in order to directly code loops which call methods on them), but it's important to remember
+// that what's returned are references, not objects; the caller is responsible for instantiating a
+// Function instance and keeping it alive as long as there are any outstanding references to its
+// constituent pieces.
+class ngraph::runtime::plaidml::builder::Function final
+{
+public:
+    Function(const std::string& name, bool debug);
+    Function(const Function&) = delete;
+    Function& operator=(const Function&) = delete;
+    Function(Function&&) = default;
+    Function& operator=(Function&&) = default;
+
+    // Builds the final string form of the function.
+    std::string to_string() const;
+
+    // Finalizes a function, transforming it into a PlaidML function application object.
+    vertexai::plaidml::application finalize() const;
+
+    // Adds an input to the function.
+    Function& add(Input input) &;
+    Function&& add(Input input) &&;
+
+    // Adds an output to the function.
+    Function& add(Output output) &;
+    Function&& add(Output output) &&;
+
+    // Adds a contraction to the function.
+    Function& add(TernaryContraction contraction) &;
+    Function&& add(TernaryContraction contraction) &&;
+    Function& add(BinaryContraction contraction) &;
+    Function&& add(BinaryContraction contraction) &&;
+    Function& add(UnaryContraction contraction) &;
+    Function&& add(UnaryContraction contraction) &&;
+
+    // Adds an elementwise mapping to the function.
+    Function& add(Elementwise elementwise) &;
+    Function&& add(Elementwise elementwise) &&;
+
+private:
+    std::string m_name;
+    bool m_debug;
+    std::list<Input> m_inputs;
+    std::list<Output> m_outputs;
+    std::list<std::unique_ptr<Statement>> m_stmts;
+};
+
+// Input represents an input being added to a function.
+class ngraph::runtime::plaidml::builder::Input final
+{
+public:
+    Input(vertexai::plaidml::variable var, std::string name);
+
+    // Adds a list of dimensions to the input, [first..limit).
+    Input& add_dims(std::string prefix, std::size_t first, std::size_t limit) &;
+    Input&& add_dims(std::string prefix, std::size_t first, std::size_t limit) &&;
+
+    // Adds a list of dimensions to the input, [first..limit), in reverse order.
+    Input& add_rdims(std::string prefix, std::size_t limit, std::size_t first) &;
+    Input&& add_rdims(std::string prefix, std::size_t limit, std::size_t first) &&;
+
+    // Adds a fixed list of dimensions to the input.
+    Input& add_dims(std::initializer_list<std::string> s) &;
+    Input&& add_dims(std::initializer_list<std::string> s) &&;
+
+    // Adds dimensions by passing an insert iterator to a lambda.
+    template <typename L>
+    Input& add_dims(L lambda) &
+    {
+        lambda(std::back_inserter(m_dims));
+        return *this;
+    }
+
+    template <typename L>
+    Input&& add_dims(L lambda) &&
+    {
+        lambda(std::back_inserter(m_dims));
+        return std::move(*this);
+    }
+
+private:
+    friend class Function;
+
+    vertexai::plaidml::variable m_var;
+    std::string m_name;
+    std::list<std::string> m_dims;
+};
+
+// Output represents an output being added to a function.
+class ngraph::runtime::plaidml::builder::Output final
+{
+public:
+    Output(std::string name);
+
+private:
+    friend class Function;
+
+    std::string m_name;
+};
+
+// Statement is the abstract base class for UnaryContraction, BinaryContraction,
+// TernaryContraction, and Elementwise objects.
+class ngraph::runtime::plaidml::builder::Statement
+{
+public:
+    virtual ~Statement() = default;
+
+protected:
+    Statement() = default;
+    Statement(const Statement&) = default;
+    Statement(Statement&&) = default;
+};
+
+// Elementwise represents an elementwise mapping being added to a function.
+class ngraph::runtime::plaidml::builder::Elementwise final : public Statement
+{
+public:
+    Elementwise(std::string lhs, std::string rhs);
+
+private:
+    friend class Function;
+
+    std::string m_lhs;
+    std::string m_rhs;
+};
+
+// The output of a contraction
+class ngraph::runtime::plaidml::builder::ContractionOutput final
+{
+public:
+    explicit ContractionOutput(std::string name);
+
+    ContractionOutput& add_indices(std::string prefix, std::size_t first, std::size_t limit) &;
+    ContractionOutput&& add_indices(std::string prefix, std::size_t first, std::size_t limit) &&;
+    ContractionOutput& add_rindices(std::string prefix, std::size_t limit, std::size_t first) &;
+    ContractionOutput&& add_rindices(std::string prefix, std::size_t limit, std::size_t first) &&;
+    ContractionOutput& add_indices(std::initializer_list<std::string> s) &;
+    ContractionOutput&& add_indices(std::initializer_list<std::string> s) &&;
+
+    template <typename L>
+    ContractionOutput& add_indices(L lambda) &
+    {
+        lambda(std::back_inserter(m_indices));
+        return *this;
+    }
+
+    template <typename L>
+    ContractionOutput&& add_indices(L lambda) &&
+    {
+        lambda(std::back_inserter(m_indices));
+        return std::move(*this);
+    }
+
+    ContractionOutput& add_dims(std::string prefix, std::size_t first, std::size_t limit) &;
+    ContractionOutput&& add_dims(std::string prefix, std::size_t first, std::size_t limit) &&;
+    ContractionOutput& add_rdims(std::string prefix, std::size_t limit, std::size_t first) &;
+    ContractionOutput&& add_rdims(std::string prefix, std::size_t limit, std::size_t first) &&;
+    ContractionOutput& add_dims(std::initializer_list<std::string> s) &;
+    ContractionOutput&& add_dims(std::initializer_list<std::string> s) &&;
+
+    template <typename L>
+    ContractionOutput& add_dims(L lambda) &
+    {
+        lambda(std::back_inserter(m_dims));
+        return *this;
+    }
+
+    template <typename L>
+    ContractionOutput&& add_dims(L lambda) &&
+    {
+        lambda(std::back_inserter(m_dims));
+        return std::move(*this);
+    }
+
+private:
+    friend class Function;
+
+    std::string m_name;
+    std::list<std::string> m_indices;
+    std::list<std::string> m_dims;
+};
+
+// An input to a contraction
+class ngraph::runtime::plaidml::builder::ContractionInput final
+{
+public:
+    explicit ContractionInput(std::string name)
+        : m_name{std::move(name)}
+    {
+    }
+
+    ContractionInput& add_indices(std::string prefix, std::size_t first, std::size_t limit) &;
+    ContractionInput&& add_indices(std::string prefix, std::size_t first, std::size_t limit) &&;
+    ContractionInput& add_rindices(std::string prefix, std::size_t limit, std::size_t first) &;
+    ContractionInput&& add_rindices(std::string prefix, std::size_t limit, std::size_t first) &&;
+    ContractionInput& add_indices(std::initializer_list<std::string> s) &;
+    ContractionInput&& add_indices(std::initializer_list<std::string> s) &&;
+
+    template <typename L>
+    ContractionInput& add_indices(L lambda) &
+    {
+        lambda(std::back_inserter(m_indices));
+        return *this;
+    }
+
+    template <typename L>
+    ContractionInput&& add_indices(L lambda) &&
+    {
+        lambda(std::back_inserter(m_indices));
+        return std::move(*this);
+    }
+
+private:
+    friend class Function;
+
+    std::string m_name;
+    std::list<std::string> m_indices;
+};
+
+// UnaryContraction represents a unary contraction being added to a function.
+class ngraph::runtime::plaidml::builder::UnaryContraction final : public Statement
+{
+public:
+    explicit UnaryContraction(std::string agg_op);
+
+    UnaryContraction& set(ContractionInput input) &;
+    UnaryContraction&& set(ContractionInput input) &&;
+    UnaryContraction& set(ContractionOutput output) &;
+    UnaryContraction&& set(ContractionOutput output) &&;
+    UnaryContraction& set_default(std::string tensor) &;
+    UnaryContraction&& set_default(std::string tensor) &&;
+
+    template <typename L>
+    UnaryContraction& add_constraints(L lambda) &
+    {
+        lambda(std::back_inserter(m_constraints));
+        return *this;
+    }
+    template <typename L>
+    UnaryContraction&& add_constraints(L lambda) &&
+    {
+        lambda(std::back_inserter(m_constraints));
+        return std::move(*this);
+    }
+
+private:
+    friend class Function;
+
+    std::string m_agg_op;
+    std::list<std::string> m_constraints;
+    std::unique_ptr<ContractionOutput> m_output;
+    std::unique_ptr<ContractionInput> m_input;
+    std::string m_default;
+};
+
+// BinaryContraction represents a binary contraction being added to a function.
+class ngraph::runtime::plaidml::builder::BinaryContraction final : public Statement
+{
+public:
+    BinaryContraction(std::string agg_op, std::string comb_op);
+
+    BinaryContraction& set_lhs(ContractionInput input) &;
+    BinaryContraction&& set_lhs(ContractionInput input) &&;
+    BinaryContraction& set_rhs(ContractionInput input) &;
+    BinaryContraction&& set_rhs(ContractionInput input) &&;
+    BinaryContraction& set(ContractionOutput output) &;
+    BinaryContraction&& set(ContractionOutput output) &&;
+    BinaryContraction& set_default(std::string tensor) &;
+    BinaryContraction&& set_default(std::string tensor) &&;
+
+    template <typename L>
+    BinaryContraction& add_constraints(L lambda) &
+    {
+        lambda(std::back_inserter(m_constraints));
+        return *this;
+    }
+    template <typename L>
+    BinaryContraction&& add_constraints(L lambda) &&
+    {
+        lambda(std::back_inserter(m_constraints));
+        return std::move(*this);
+    }
+
+private:
+    friend class Function;
+
+    std::string m_agg_op;
+    std::string m_comb_op;
+    std::list<std::string> m_constraints;
+    std::unique_ptr<ContractionOutput> m_output;
+    std::unique_ptr<ContractionInput> m_lhs;
+    std::unique_ptr<ContractionInput> m_rhs;
+    std::string m_default;
+};
+
+// TernaryContraction represents a ternary contraction being added to a function
+class ngraph::runtime::plaidml::builder::TernaryContraction final : public Statement
+{
+public:
+    TernaryContraction(std::string agg_op, std::string comb_op);
+
+    TernaryContraction& set_first(ContractionInput input) &;
+    TernaryContraction&& set_first(ContractionInput input) &&;
+    TernaryContraction& set_second(ContractionInput input) &;
+    TernaryContraction&& set_second(ContractionInput input) &&;
+    TernaryContraction& set_third(ContractionInput input) &;
+    TernaryContraction&& set_third(ContractionInput input) &&;
+    TernaryContraction& set(ContractionOutput output) &;
+    TernaryContraction&& set(ContractionOutput output) &&;
+
+    template <typename L>
+    TernaryContraction& add_constraints(L lambda) &
+    {
+        lambda(std::back_inserter(m_constraints));
+        return *this;
+    }
+    template <typename L>
+    TernaryContraction&& add_constraints(L lambda) &&
+    {
+        lambda(std::back_inserter(m_constraints));
+        return std::move(*this);
+    }
+
+private:
+    friend class Function;
+
+    std::string m_agg_op;
+    std::string m_comb_op;
+    std::list<std::string> m_constraints;
+    std::unique_ptr<ContractionOutput> m_output;
+    std::unique_ptr<ContractionInput> m_first;
+    std::unique_ptr<ContractionInput> m_second;
+    std::unique_ptr<ContractionInput> m_third;
+};
--- a/src/ngraph/runtime/plaidml/plaidml_compilation_cache.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_compilation_cache.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/runtime/plaidml/plaidml_compilation_cache.hpp"
+
+std::shared_ptr<ngraph::runtime::plaidml::CompiledFunction>
+    ngraph::runtime::plaidml::CompilationCache::try_lookup(std::shared_ptr<Function> func)
+{
+    std::lock_guard<std::mutex> lock{m_mu};
+    auto it = m_cache.find(func);
+    if (it != m_cache.end())
+    {
+        return it->second;
+    }
+    return std::shared_ptr<CompiledFunction>{};
+}
+
+std::shared_ptr<ngraph::runtime::plaidml::CompiledFunction>
+    ngraph::runtime::plaidml::CompilationCache::compile(std::shared_ptr<Function> func,
+                                                        Compiler* compiler)
+{
+    std::lock_guard<std::mutex> lock{m_mu};
+    auto it_inserted = m_cache.insert(std::make_pair(func, std::shared_ptr<CompiledFunction>{}));
+    if (it_inserted.second)
+    {
+        try
+        {
+            it_inserted.first->second = compiler->compile(func);
+        }
+        catch (...)
+        {
+            m_cache.erase(it_inserted.first);
+            throw;
+        }
+    }
+    return it_inserted.first->second;
+}
+
+void ngraph::runtime::plaidml::CompilationCache::forget(std::shared_ptr<Function> func)
+{
+    std::lock_guard<std::mutex> lock{m_mu};
+    m_cache.erase(func);
+}
--- a/src/ngraph/runtime/plaidml/plaidml_compilation_cache.hpp
+++ b/src/ngraph/runtime/plaidml/plaidml_compilation_cache.hpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+
+#include "ngraph/function.hpp"
+#include "ngraph/runtime/plaidml/plaidml_compiled_function.hpp"
+#include "ngraph/runtime/plaidml/plaidml_compiler.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            class CompilationCache;
+        }
+    }
+}
+
+// A compilation cacher.
+class ngraph::runtime::plaidml::CompilationCache final
+{
+public:
+    // Looks up the supplied function in the compilation cache.  If the function is not in the
+    // cache, returns an empty pointer.
+    std::shared_ptr<CompiledFunction> try_lookup(std::shared_ptr<Function> func);
+
+    // Looks up the supplied function in the compilation cache.  If the function is not in the
+    // cache, compiles it using the specified compiler (which must not be nullptr), adds the
+    // compiled function to the cache, and returns the compiled function.
+    std::shared_ptr<CompiledFunction> compile(std::shared_ptr<Function> func, Compiler* compiler);
+
+    // Drops the supplied function's compiled function from the compilation cache.
+    void forget(std::shared_ptr<Function> func);
+
+private:
+    std::mutex m_mu;
+    std::unordered_map<std::shared_ptr<Function>, std::shared_ptr<CompiledFunction>> m_cache;
+};
--- a/src/ngraph/runtime/plaidml/plaidml_compiled_function.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_compiled_function.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <utility>
+
+#include "ngraph/log.hpp"
+#include "ngraph/runtime/plaidml/plaidml_build.hpp"
+#include "ngraph/runtime/plaidml/plaidml_compiled_function.hpp"
+#include "ngraph/runtime/plaidml/plaidml_tensor.hpp"
+#include "ngraph/runtime/plaidml/plaidml_translate.hpp"
+
+namespace vp = vertexai::plaidml;
+
+ngraph::runtime::plaidml::CompiledFunction::CompiledFunction(Build build)
+    : m_config{build.config}
+    , m_func{std::move(build.func)}
+    , m_input_names{std::move(build.input_names)}
+    , m_output_names{std::move(build.output_names)}
+    , m_invoker{build.config->ctx, std::move(build.composer)}
+{
+    NGRAPH_DEBUG << "Compiled PlaidML function " << this;
+}
+
+bool ngraph::runtime::plaidml::CompiledFunction::schedule_invocation(
+    const std::vector<std::shared_ptr<runtime::Tensor>>& inputs,
+    const std::vector<std::shared_ptr<runtime::Tensor>>& outputs) const
+{
+    std::lock_guard<std::mutex> lock{m_mu};
+
+    NGRAPH_DEBUG << "Binding PlaidML function " << this;
+
+    std::size_t input_count = 0;
+    for (const auto& param : m_func->get_parameters())
+    {
+        for (std::size_t idx = 0; idx < param->get_output_size(); ++idx)
+        {
+            descriptor::Tensor* tv = param->get_output_tensor_ptr(idx).get();
+            auto rtv = dynamic_cast<PlaidML_Tensor*>(inputs[input_count++].get());
+            if (!rtv)
+            {
+                throw std::runtime_error{
+                    "The PlaidML backend only operations on PlaidML tensor views"};
+            }
+            rtv->sync_input();
+            NGRAPH_DEBUG << "Binding input " << m_input_names.at(tv) << " to tensor " << rtv;
+            m_invoker.set_input(m_input_names.at(tv), rtv->tensor());
+        }
+    }
+
+    std::size_t output_count = 0;
+    for (const auto& result : m_func->get_results())
+    {
+        for (std::size_t idx = 0; idx < result->get_output_size(); ++idx)
+        {
+            descriptor::Tensor* tv = result->get_output_tensor_ptr(idx).get();
+            auto rtv = dynamic_cast<PlaidML_Tensor*>(outputs[output_count++].get());
+            if (!rtv)
+            {
+                throw std::runtime_error{
+                    "The PlaidML backend only operations on PlaidML tensor views"};
+            }
+            NGRAPH_DEBUG << "Binding output " << m_output_names.at(tv) << " to tensor " << rtv;
+            m_invoker.set_output(m_output_names.at(tv), rtv->tensor());
+        }
+    }
+
+    NGRAPH_DEBUG << "Invoking PlaidML function " << this;
+
+    m_invoker.invoke();
+    m_bound = true;
+
+    output_count = 0;
+    for (const auto& result : m_func->get_results())
+    {
+        for (std::size_t idx = 0; idx < result->get_output_size(); ++idx)
+        {
+            auto rtv = dynamic_cast<PlaidML_Tensor*>(outputs[output_count++].get());
+            if (!rtv)
+            {
+                throw std::runtime_error{
+                    "The PlaidML backend only operations on PlaidML tensor views"};
+            }
+            rtv->sync_output();
+        }
+    }
+    return true;
+}
+
+void ngraph::runtime::plaidml::CompiledFunction::save(const std::string& filename,
+                                                      plaidml_file_format format) const
+{
+    std::lock_guard<std::mutex> lock{m_mu};
+
+    if (!m_bound)
+    {
+        for (const auto& param : m_func->get_parameters())
+        {
+            for (std::size_t idx = 0; idx < param->get_output_size(); ++idx)
+            {
+                descriptor::Tensor* tv = param->get_output_tensor_ptr(idx).get();
+                auto tensor = m_config->dev->allocate(
+                    to_plaidml(m_config->ctx, tv->get_element_type(), tv->get_shape()));
+                m_invoker.set_input(m_input_names.at(tv), tensor);
+            }
+        }
+        for (const auto& result : m_func->get_results())
+        {
+            for (std::size_t idx = 0; idx < result->get_output_size(); ++idx)
+            {
+                descriptor::Tensor* tv = result->get_output_tensor_ptr(idx).get();
+                auto tensor = m_config->dev->allocate(
+                    to_plaidml(m_config->ctx, tv->get_element_type(), tv->get_shape()));
+                m_invoker.set_output(m_output_names.at(tv), tensor);
+            }
+        }
+        m_bound = true;
+    }
+
+    m_invoker.save(filename, format);
+}
--- a/src/ngraph/runtime/plaidml/plaidml_compiled_function.hpp
+++ b/src/ngraph/runtime/plaidml/plaidml_compiled_function.hpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <memory>
+#include <mutex>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include <plaidml/plaidml++.h>
+
+#include "ngraph/function.hpp"
+#include "ngraph/runtime/plaidml/plaidml_config.hpp"
+#include "ngraph/runtime/tensor.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            struct Build;
+            class CompiledFunction;
+        }
+    }
+}
+
+// A PlaidML compiled function object produced by compiling an nGraph function.
+class ngraph::runtime::plaidml::CompiledFunction final
+{
+public:
+    CompiledFunction(Build build);
+
+    bool schedule_invocation(const std::vector<std::shared_ptr<runtime::Tensor>>& inputs,
+                             const std::vector<std::shared_ptr<runtime::Tensor>>& outputs) const;
+
+    void save(const std::string& filename, plaidml_file_format format) const;
+
+private:
+    mutable std::mutex m_mu; // Locks the invoker while scheduling invocations.
+    mutable bool m_bound = false;
+    Config* m_config;
+    std::shared_ptr<Function> m_func;
+    std::unordered_map<descriptor::Tensor*, std::string> m_input_names;
+    std::unordered_map<descriptor::Tensor*, std::string> m_output_names;
+    mutable vertexai::plaidml::invoker m_invoker;
+};
--- a/src/ngraph/runtime/plaidml/plaidml_compiler.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_compiler.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/runtime/plaidml/plaidml_compiler.hpp"
+#include "ngraph/log.hpp"
+#include "ngraph/pass/algebraic_simplification.hpp"
+#include "ngraph/pass/core_fusion.hpp"
+#include "ngraph/pass/cse.hpp"
+#include "ngraph/pass/get_output_element_elimination.hpp"
+#include "ngraph/pass/like_replacement.hpp"
+#include "ngraph/pass/liveness.hpp"
+#include "ngraph/pass/nop_elimination.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+#include "ngraph/runtime/plaidml/plaidml_logger.hpp"
+
+namespace
+{
+    void write_debug(const ngraph::Node& op)
+    {
+        PLAIDML_DEBUG << "Node: name=\"" << op.get_name() << "\" desc=\"" << op.description()
+                      << "\"";
+        for (const auto& op_input : op.get_inputs())
+        {
+            ngraph::descriptor::Tensor* tensor = op_input.get_output().get_tensor_ptr().get();
+            PLAIDML_DEBUG << "Input: descriptor::Tensor " << tensor << " "
+                          << op.get_input_shape(op_input.get_index());
+        }
+        for (std::size_t out_idx = 0; out_idx < op.get_output_size(); ++out_idx)
+        {
+            ngraph::descriptor::Tensor* tensor = op.get_output_tensor_ptr(out_idx).get();
+            PLAIDML_DEBUG << "Output: descriptor::Tensor " << tensor << " "
+                          << op.get_output_shape(out_idx);
+        }
+        for (auto* t : op.liveness_new_list)
+        {
+            PLAIDML_DEBUG << "New tensor: " << t;
+        }
+        for (auto* t : op.liveness_free_list)
+        {
+            PLAIDML_DEBUG << "Retire tensor: " << t;
+        }
+    }
+}
+
+ngraph::runtime::plaidml::Compiler::Compiler(Config* config)
+    : m_config{config}
+{
+    // We apply the same general-purposes passes as the CPU backend.
+    m_pass_manager.register_pass<ngraph::pass::LikeReplacement>();
+    m_pass_manager.register_pass<ngraph::pass::NopElimination>();
+    m_pass_manager.register_pass<ngraph::pass::AlgebraicSimplification>();
+    m_pass_manager.register_pass<ngraph::pass::CommonSubexpressionElimination>();
+    m_pass_manager.register_pass<ngraph::pass::CoreFusion>();
+    // N.B. We'd like to register ngraph::pass::GetOutputElementElimination, but it breaks BatchNorm
+    // backprop
+    m_pass_manager.register_pass<ngraph::pass::Liveness>();
+}
+
+std::shared_ptr<ngraph::runtime::plaidml::CompiledFunction>
+    ngraph::runtime::plaidml::Compiler::compile(std::shared_ptr<Function> func)
+{
+    m_pass_manager.run_passes(func);
+
+    Build b;
+    build(std::move(func), &b);
+    return std::make_shared<CompiledFunction>(std::move(b));
+}
+
+void ngraph::runtime::plaidml::Compiler::build(std::shared_ptr<Function> func, Build* b)
+{
+    b->compiler = this;
+    b->config = m_config;
+    b->func = func;
+
+    const auto* op_map = OpImplMap();
+
+    for (const auto& op_ptr : func->get_ordered_ops())
+    {
+        const ngraph::Node* op = op_ptr.get();
+        if (m_config->debug)
+        {
+            write_debug(*op);
+        }
+        auto it = op_map->find(std::type_index(typeid(*op)));
+        if (it == op_map->end())
+        {
+            throw unsupported_op{
+                std::string{"The PlaidML backend doesn't currently implement the '"} +
+                op->description() + "' operation"};
+        }
+        it->second(b, *op);
+    }
+}
--- a/src/ngraph/runtime/plaidml/plaidml_compiler.hpp
+++ b/src/ngraph/runtime/plaidml/plaidml_compiler.hpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <memory>
+
+#include <plaidml/plaidml++.h>
+
+#include "ngraph/function.hpp"
+#include "ngraph/pass/manager.hpp"
+#include "ngraph/runtime/plaidml/plaidml_compiled_function.hpp"
+#include "ngraph/runtime/plaidml/plaidml_config.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            struct Build;
+            class Compiler;
+        }
+    }
+}
+
+// Compiles nGraph operation graphs (functions).
+class ngraph::runtime::plaidml::Compiler final
+{
+public:
+    Compiler(Config* config);
+
+    std::shared_ptr<CompiledFunction> compile(std::shared_ptr<Function> func);
+
+    void build(std::shared_ptr<Function> func, Build* build);
+
+private:
+    Config* m_config;
+    ngraph::pass::Manager m_pass_manager;
+};
--- a/src/ngraph/runtime/plaidml/plaidml_config.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_config.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <cstring>
+#include <sstream>
+
+#include "ngraph/log.hpp"
+#include "ngraph/runtime/plaidml/plaidml_config.hpp"
+#include "ngraph/runtime/plaidml/plaidml_logger.hpp"
+
+namespace v = vertexai;
+namespace vp = vertexai::plaidml;
+
+extern "C" void vai_internal_set_vlog(std::size_t num);
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            namespace
+            {
+                vp::device get_device(const std::shared_ptr<vertexai::ctx>& ctx,
+                                      std::size_t device_idx)
+                {
+                    auto dev_configs = vp::enumerate_devices(ctx);
+                    if (!dev_configs.size())
+                    {
+                        throw std::runtime_error{"Unable to find any PlaidML devices"};
+                    }
+                    if (dev_configs.size() <= device_idx)
+                    {
+                        throw std::runtime_error{"Device index out of range"};
+                    }
+                    return dev_configs[device_idx].open();
+                }
+
+                void list_devices(const std::shared_ptr<vertexai::ctx>& ctx)
+                {
+                    auto dev_configs = vp::enumerate_devices(ctx);
+                    if (!dev_configs.size())
+                    {
+                        NGRAPH_WARN << "No PlaidML devices found";
+                        return;
+                    }
+                    NGRAPH_INFO << "PlaidML Devices:";
+                    for (std::size_t idx = 0; idx < dev_configs.size(); ++idx)
+                    {
+                        const auto& config = dev_configs[idx];
+                        NGRAPH_INFO << "\t" << idx << ": " << config.id() << ": "
+                                    << config.description();
+                    }
+                }
+            }
+        }
+    }
+}
+
+ngraph::runtime::plaidml::Config
+    ngraph::runtime::plaidml::parse_config_string(const char* configuration_string)
+{
+    bool err = false;
+    bool help = false;
+    bool list = false;
+    bool debug = false;
+    std::size_t device_idx = 0;
+    std::string eventlog_config;
+
+#ifdef NGRAPH_DEBUG_ENABLE
+    debug = true;
+#endif
+
+    // To visualize what's going on here, here's a configuration string fragment:
+    //
+    //     ,option_name=option_value,
+    //      ^          ^^           ^
+    // oname_begin     ||           |
+    //         oname_end|           |
+    //                  oval_begin  |
+    //                          oval_end
+    //
+    // When there is no option value, here's where the pointers go:
+    //
+    //     ,option_name,
+    //      ^          ^
+    // oname_begin     |
+    //         oname_end
+    //        oval_begin
+    //          oval_end
+
+    const char* c = configuration_string;
+    while (*c && *c != ':')
+    {
+        ++c;
+    }
+
+    // Before the options, we have an optional device index.
+    if (*c)
+    {
+        char* dev_end;
+        std::size_t explicit_idx = std::strtoul(c + 1, &dev_end, 10);
+        if (dev_end != c + 1)
+        {
+            device_idx = explicit_idx;
+            c = dev_end;
+        }
+    }
+
+    while (*c)
+    {
+        // Invariant: c points to the character introducing the current option.
+
+        const char* oname_begin = c + 1;
+        // Invariant: oname_begin points to the first character of the option name.
+
+        const char* oname_end = oname_begin;
+        while (*oname_end && *oname_end != '=' && *oname_end != ',')
+        {
+            ++oname_end;
+        }
+        // Invariant: [oname_begin, oname_end) is the option name.
+
+        const char* oval_begin = oname_end;
+        if (*oval_begin == '=')
+        {
+            ++oval_begin;
+        }
+        const char* oval_end = oval_begin;
+        while (*oval_end && *oval_end != ',')
+        {
+            ++oval_end;
+        }
+        // Invariant: [oval_begin, oval_end) is the option value.
+
+        // Re-establish initial invariant, allowing "continue" to resume the loop.
+        c = oval_end;
+
+        // Readability definitions
+        auto is_opt = [=](const char* opt) {
+            auto len = strlen(opt);
+            return (oname_end - oname_begin == len) && !strncmp(oname_begin, opt, len);
+        };
+
+        auto oval_len = oval_end - oval_begin;
+        bool has_oval = oval_begin != oname_end;
+
+        // N.B. oval_len != 0 => has_oval, but there's no other relationship.
+        // So to verify that there is a non-zero-length option value, test oval_len
+        // To verify that there is no option value, test has_oval
+
+        // Check for verbosity
+        if (is_opt("v"))
+        {
+            if (!oval_len)
+            {
+                throw std::invalid_argument{"PlaidML verbosity level requires a value"};
+            }
+            char* val_end;
+            std::size_t vlog = std::strtoul(oval_begin, &val_end, 10);
+            if (oval_end != val_end)
+            {
+                throw std::invalid_argument{"Invalid PlaidML verbosity level"};
+            }
+            debug = true;
+            vai_internal_set_vlog(vlog);
+            continue;
+        }
+
+        // Check for help
+        if (is_opt("help"))
+        {
+            help = true;
+            continue;
+        }
+
+        // Check for PlaidML debugging
+        if (is_opt("debug"))
+        {
+            debug = true;
+            continue;
+        }
+
+        // Check for list_devices
+        if (is_opt("list_devices"))
+        {
+            if (has_oval)
+            {
+                throw std::invalid_argument{"PlaidML list_devices does not take a value"};
+            }
+            list = true;
+            continue;
+        }
+
+        // Check for eventlog
+        if (is_opt("eventlog"))
+        {
+            if (!oval_len)
+            {
+                throw std::invalid_argument{"PlaidML eventlog requires a value"};
+            }
+            std::ostringstream e;
+            e << "{\"@type\": "
+                 "\"type.vertex.ai/vertexai.eventing.file.proto.EventLog\", "
+                 "\"filename\": \"";
+            for (const char* oc = oval_begin; oc < oval_end; ++oc)
+            {
+                if (!isalnum(*oc))
+                {
+                    e << '\\';
+                }
+                e << *oc;
+            }
+            e << "\"}";
+            eventlog_config = e.str();
+            continue;
+        }
+
+        // Reject unknown options
+        err = true;
+    }
+
+    constexpr char help_text[] =
+        "PlaidML Backend Specification: \""
+        "PlaidML[:[device_index][,debug][,help][,list_devices][,"
+        "eventlog=<filename>]]\".  For example: \"PlaidML\", \""
+        "PlaidML:0,list_devices\"";
+    if (err)
+    {
+        NGRAPH_ERR << help_text;
+        throw std::invalid_argument{"Invalid parameter supplied to PlaidML backend"};
+    }
+
+    if (help)
+    {
+        NGRAPH_INFO << help_text;
+    }
+
+    // Ensure process-level logging callbacks are in place.
+    configure_plaidml_logger(debug);
+
+    // Build the PlaidML configuration.
+    Config result;
+
+    result.ctx = std::make_shared<vertexai::ctx>();
+    if (eventlog_config.length())
+    {
+        v::vai_exception::check_and_throw(
+            vai_set_eventlog(result.ctx->get_ctx(), eventlog_config.c_str()));
+    }
+    if (list)
+    {
+        list_devices(result.ctx);
+    }
+    result.dev = std::make_shared<vertexai::plaidml::device>(get_device(result.ctx, device_idx));
+
+    result.debug = debug;
+
+    return result;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_config.hpp
+++ b/src/ngraph/runtime/plaidml/plaidml_config.hpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <plaidml/plaidml++.h>
+
+#include <memory>
+#include <string>
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            struct Config;
+
+            Config parse_config_string(const char* configuration_string);
+        }
+    }
+}
+
+struct ngraph::runtime::plaidml::Config
+{
+    std::shared_ptr<vertexai::ctx> ctx;
+    std::shared_ptr<vertexai::plaidml::device> dev;
+    bool debug;
+};
--- a/src/ngraph/runtime/plaidml/plaidml_convpool_formatter.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_convpool_formatter.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/runtime/plaidml/plaidml_convpool_formatter.hpp"
+
+ngraph::runtime::plaidml::ConvPoolFormatter::ConvPoolFormatter(
+    std::size_t rank,
+    const ngraph::CoordinateDiff& pad_below,
+    const ngraph::CoordinateDiff& pad_above,
+    const ngraph::Strides& strides,
+    const ngraph::Strides& filter_dilation,
+    const ngraph::Strides& data_dilation,
+    ConvPoolFormatter::OpType op,
+    ConvPoolFormatter::DerivType deriv,
+    const ngraph::Shape& deriv_output_shape)
+    : m_rank{rank}
+    , m_pad_below{pad_below}
+    , m_pad_above{pad_above}
+    , m_strides{strides}
+    , m_filter_dilation{filter_dilation}
+    , m_data_dilation{data_dilation}
+    , m_op{op}
+    , m_deriv{deriv}
+{
+    m_window_shape = Shape(rank, 0); // Not used for convolutions
+    if (m_op != OpType::Conv)
+    {
+        throw std::runtime_error{"Using conv-style ctor for pool"};
+    }
+    if (m_pad_below.size() != rank)
+    {
+        std::ostringstream msg;
+        msg << "Rank mismatch in pad_below ";
+        msg << "(expected length " << rank << " to match rank " << rank;
+        msg << " but received length " << m_pad_below.size() << ")";
+        throw std::runtime_error{msg.str()};
+    }
+    if (m_pad_above.size() != rank)
+    {
+        std::ostringstream msg;
+        msg << "Rank mismatch in pad_above ";
+        msg << "(expected length " << rank << " to match rank " << rank;
+        msg << " but received length " << m_pad_above.size() << ")";
+        throw std::runtime_error{msg.str()};
+    }
+    if (m_strides.size() != rank)
+    {
+        std::ostringstream msg;
+        msg << "Rank mismatch in strides ";
+        msg << "(expected length " << rank << " to match rank " << rank;
+        msg << " but received length " << m_strides.size() << ")";
+        throw std::runtime_error{msg.str()};
+    }
+    if (m_filter_dilation.size() != rank)
+    {
+        std::ostringstream msg;
+        msg << "Rank mismatch in filter dilation ";
+        msg << "(expected length " << rank << " to match rank " << rank;
+        msg << " but received length " << m_filter_dilation.size() << ")";
+        throw std::runtime_error{msg.str()};
+    }
+    if (m_data_dilation.size() != rank)
+    {
+        std::ostringstream msg;
+        msg << "Rank mismatch in data dilation ";
+        msg << "(expected length " << rank << " to match rank " << rank;
+        msg << " but received length " << m_data_dilation.size() << ")";
+        throw std::runtime_error{msg.str()};
+    }
+    if (m_deriv == DerivType::None && !deriv_output_shape.empty())
+    {
+        throw std::runtime_error{"Forward pass given derivative shape"};
+    }
+    if (m_deriv == DerivType::Filter)
+    {
+        m_filters_shape = deriv_output_shape;
+        if (m_filters_shape.size() != rank + 2)
+        {
+            std::ostringstream msg;
+            msg << "Rank mismatch in filter shape ";
+            msg << "(expected length " << rank + 2 << " to match rank " << rank;
+            msg << " but received length " << m_filters_shape.size() << ")";
+            throw std::runtime_error{msg.str()};
+        }
+    }
+    if (m_deriv == DerivType::Data)
+    {
+        m_data_batch_shape = deriv_output_shape;
+        if (m_data_batch_shape.size() != rank + 2)
+        {
+            std::ostringstream msg;
+            msg << "Rank mismatch in data batch shape ";
+            msg << "(expected length " << rank + 2 << " to match rank " << rank;
+            msg << " but received length " << m_data_batch_shape.size() << ")";
+            throw std::runtime_error{msg.str()};
+        }
+    }
+}
+
+ngraph::runtime::plaidml::ConvPoolFormatter::ConvPoolFormatter(
+    std::size_t rank,
+    const ngraph::CoordinateDiff& pad_below,
+    const ngraph::CoordinateDiff& pad_above,
+    const ngraph::Strides& strides,
+    const ngraph::Shape& window_shape,
+    ConvPoolFormatter::OpType op,
+    ConvPoolFormatter::DerivType deriv)
+    : m_rank{rank}
+    , m_pad_below{pad_below}
+    , m_pad_above{pad_above}
+    , m_strides{strides}
+    , m_window_shape{window_shape}
+    , m_op{op}
+    , m_deriv{deriv}
+{
+    m_filter_dilation = ngraph::Strides(rank, 1); // Not used for pools
+    m_data_dilation = ngraph::Strides(rank, 1);   // Nos used for pools
+    if (m_op == OpType::Conv)
+    {
+        throw std::runtime_error{"Using pool-style ctor for conv"};
+    }
+    if (m_deriv == DerivType::Filter)
+    {
+        throw std::runtime_error{"Asking for filter deriv for pool"};
+    }
+    if (m_pad_below.size() != rank)
+    {
+        std::ostringstream msg;
+        msg << "Rank mismatch in pad_below ";
+        msg << "(expected length " << rank << " to match rank " << rank;
+        msg << " but received length " << m_pad_below.size() << ")";
+        throw std::runtime_error{msg.str()};
+    }
+    if (m_pad_above.size() != rank)
+    {
+        std::ostringstream msg;
+        msg << "Rank mismatch in pad_above ";
+        msg << "(expected length " << rank << " to match rank " << rank;
+        msg << " but received length " << m_pad_above.size() << ")";
+        throw std::runtime_error{msg.str()};
+    }
+    if (m_strides.size() != rank)
+    {
+        std::ostringstream msg;
+        msg << "Rank mismatch in strides ";
+        msg << "(expected length " << rank << " to match rank " << rank;
+        msg << " but received length " << m_strides.size() << ")";
+        throw std::runtime_error{msg.str()};
+    }
+    if (m_window_shape.size() != rank)
+    {
+        std::ostringstream msg;
+        msg << "Rank mismatch in window shape ";
+        msg << "(expected length " << rank << " to match rank " << rank;
+        msg << " but received length " << m_filter_dilation.size() << ")";
+        throw std::runtime_error{msg.str()};
+    }
+}
+
+ngraph::runtime::plaidml::builder::Input
+    ngraph::runtime::plaidml::ConvPoolFormatter::F_in_header(vertexai::plaidml::variable var)
+{
+    if (m_op != OpType::Conv)
+    {
+        throw std::runtime_error{"Asked to construct filter F for pooling operation"};
+    }
+    if (m_deriv == DerivType::Filter)
+    {
+        throw std::runtime_error{"Asked to construct F as input when computing its gradient"};
+    }
+    builder::Input ret{var, F()};
+    ret.add_dims({CO(), CI()});
+    for (const auto& XFi : XFs())
+    {
+        ret.add_dims({XFi});
+    }
+    return ret;
+}
+
+ngraph::runtime::plaidml::builder::Input
+    ngraph::runtime::plaidml::ConvPoolFormatter::I_in_header(vertexai::plaidml::variable var)
+{
+    if (m_deriv == DerivType::Data && m_op == OpType::Conv)
+    {
+        throw std::runtime_error{
+            "Asked to construct I as input to convolution when computing its gradient"};
+    }
+    builder::Input ret{var, "I"};
+    ret.add_dims({N()});
+    if (m_op == OpType::Conv)
+    {
+        ret.add_dims({CI()});
+    }
+    else
+    {
+        ret.add_dims({C()});
+    }
+    for (const auto& XIi : XIs())
+    {
+        ret.add_dims({XIi});
+    }
+    return ret;
+}
+
+ngraph::runtime::plaidml::builder::Input
+    ngraph::runtime::plaidml::ConvPoolFormatter::O_in_header(vertexai::plaidml::variable var)
+{
+    if (m_deriv == DerivType::None)
+    {
+        throw std::runtime_error{"Asked to construct O as input in forward pass"};
+    }
+    builder::Input ret{var, O()};
+    ret.add_dims({N()});
+    if (m_op == OpType::Conv)
+    {
+        ret.add_dims({CO()});
+    }
+    else
+    {
+        ret.add_dims({C()});
+    }
+    for (const auto& XOi : XOs())
+    {
+        ret.add_dims({XOi});
+    }
+    return ret;
+}
+
+ngraph::runtime::plaidml::builder::Output
+    ngraph::runtime::plaidml::ConvPoolFormatter::F_out_header()
+{
+    if (m_op != OpType::Conv)
+    {
+        throw std::runtime_error{"Asked to construct filter F for pooling operation"};
+    }
+    if (m_deriv != DerivType::Filter)
+    {
+        throw std::runtime_error{"Asked for output F when not finding gradient w.r.t. F"};
+    }
+    return builder::Output{F()};
+}
+
+ngraph::runtime::plaidml::builder::Output
+    ngraph::runtime::plaidml::ConvPoolFormatter::I_out_header()
+{
+    if (m_deriv != DerivType::Data)
+    {
+        throw std::runtime_error{"Asked to construct I as output in forward pass"};
+    }
+    if (m_op == OpType::Conv)
+    {
+        return builder::Output{"DI"};
+    }
+    else
+    {
+        // TODO: Confirm correct for AvgPool as well
+        return builder::Output{"I"};
+    }
+}
+
+ngraph::runtime::plaidml::builder::Output
+    ngraph::runtime::plaidml::ConvPoolFormatter::O_out_header()
+{
+    if (m_deriv != DerivType::None)
+    {
+        throw std::runtime_error{"Asked to construct O as output in gradient pass"};
+    }
+    return builder::Output{O()};
+}
+
+ngraph::runtime::plaidml::builder::ContractionOutput
+    ngraph::runtime::plaidml::ConvPoolFormatter::F_out_body()
+{
+    if (m_op != OpType::Conv)
+    {
+        throw std::runtime_error{"Asked to construct filter F for pooling operation"};
+    }
+    if (m_deriv != DerivType::Filter)
+    {
+        throw std::runtime_error{"Asked for output F when not finding gradient w.r.t. F"};
+    }
+    builder::ContractionOutput ret{F()};
+    ret.add_indices({co(), ci()});
+    for (const auto& xfi : xfs())
+    {
+        ret.add_indices({xfi});
+    }
+    ret.add_dims({CO(), CI()});
+    for (const auto& XFi : XFs())
+    {
+        ret.add_dims({XFi});
+    }
+    return ret;
+}
+
+ngraph::runtime::plaidml::builder::ContractionOutput
+    ngraph::runtime::plaidml::ConvPoolFormatter::I_out_body()
+{
+    if (m_deriv != DerivType::Data)
+    {
+        throw std::runtime_error{"Asked to construct I as output in forward pass"};
+    }
+    std::string result_name;
+    if (m_op == OpType::AvgPool)
+    {
+        result_name = "DI";
+    }
+    else
+    {
+        result_name = I();
+    }
+    builder::ContractionOutput ret{result_name};
+    ret.add_indices({n()});
+    if (m_op == OpType::Conv)
+    {
+        ret.add_indices({ci()});
+    }
+    else
+    {
+        ret.add_indices({c()});
+    }
+    for (const auto& xii : xis())
+    {
+        ret.add_indices({xii});
+    }
+    ret.add_dims({N()});
+    if (m_op == OpType::Conv)
+    {
+        ret.add_dims({CI()});
+    }
+    else
+    {
+        ret.add_dims({C()});
+    }
+    for (const auto& XIi : XIs())
+    {
+        ret.add_dims({XIi});
+    }
+    return ret;
+}
+
+ngraph::runtime::plaidml::builder::ContractionOutput
+    ngraph::runtime::plaidml::ConvPoolFormatter::O_out_body()
+{
+    if (m_deriv != DerivType::None && m_op == OpType::Conv)
+    {
+        throw std::runtime_error{"Asked to construct O as output in gradient pass"};
+    }
+    std::string name;
+    if (m_op == OpType::AvgPool)
+    {
+        // Special name to allow final division for AvgPool
+        name = "S";
+    }
+    else if (m_op == OpType::MaxPool && m_deriv == DerivType::Data)
+    {
+        // Special name since forward output is intermediate
+        name = "Y";
+    }
+    else
+    {
+        name = O();
+    }
+    builder::ContractionOutput ret{name};
+    ret.add_indices({n()});
+    if (m_op == OpType::Conv)
+    {
+        ret.add_indices({co()});
+    }
+    else
+    {
+        ret.add_indices({c()});
+    }
+    for (const auto& xoi : xos())
+    {
+        ret.add_indices({xoi});
+    }
+    ret.add_dims({N()});
+    if (m_op == OpType::Conv)
+    {
+        ret.add_dims({CO()});
+    }
+    else
+    {
+        ret.add_dims({C()});
+    }
+    for (const auto& XOi : XOs())
+    {
+        ret.add_dims({XOi});
+    }
+    return ret;
+}
+
+ngraph::runtime::plaidml::builder::ContractionInput
+    ngraph::runtime::plaidml::ConvPoolFormatter::F_in_body()
+{
+    if (m_op != OpType::Conv)
+    {
+        throw std::runtime_error{"Asked to construct filter F for pooling operation"};
+    }
+    if (m_deriv == DerivType::Filter)
+    {
+        throw std::runtime_error{"Asked to construct F as input when computing its gradient"};
+    }
+    builder::ContractionInput ret{F()};
+    ret.add_indices({co(), ci()});
+    for (const auto& xfi : xfs())
+    {
+        ret.add_indices({xfi});
+    }
+    return ret;
+}
+
+ngraph::runtime::plaidml::builder::ContractionInput
+    ngraph::runtime::plaidml::ConvPoolFormatter::I_in_body()
+{
+    if (m_deriv == DerivType::Data && m_op == OpType::Conv)
+    {
+        throw std::runtime_error{"Asked to construct I as input when computing its gradient"};
+    }
+    builder::ContractionInput ret{"I"};
+    ret.add_indices({n()});
+    if (m_op == OpType::Conv)
+    {
+        ret.add_indices({ci()});
+    }
+    else
+    {
+        ret.add_indices({c()});
+    }
+    for (const auto& xii : xis())
+    {
+        ret.add_indices({xii});
+    }
+    return ret;
+}
+
+ngraph::runtime::plaidml::builder::ContractionInput
+    ngraph::runtime::plaidml::ConvPoolFormatter::O_in_body()
+{
+    if (m_deriv == DerivType::None)
+    {
+        throw std::runtime_error{"Asked to construct O as input in forward pass"};
+    }
+    std::string result_name;
+    if (m_op == OpType::AvgPool)
+    {
+        result_name = "S";
+    }
+    else
+    {
+        result_name = O();
+    }
+    builder::ContractionInput ret{result_name};
+    ret.add_indices({n()});
+    if (m_op == OpType::Conv)
+    {
+        ret.add_indices({co()});
+    }
+    else
+    {
+        ret.add_indices({c()});
+    }
+    for (const auto& xoi : xos())
+    {
+        ret.add_indices({xoi});
+    }
+    return ret;
+}
+
+ngraph::runtime::plaidml::builder::UnaryContraction
+    ngraph::runtime::plaidml::ConvPoolFormatter::Broadcast_Ones()
+{
+    if (m_op != OpType::AvgPool)
+    {
+        throw std::runtime_error{"Broadcast_Ones should only be used for AvgPool"};
+    }
+    builder::UnaryContraction ret{"="};
+    builder::ContractionOutput ones{"Ones"};
+    ones.add_indices("o", 0, m_rank);
+    for (const auto& XIi : XIs())
+    {
+        ones.add_dims({XIi});
+    }
+    ret.set(ones);
+    ret.set(builder::ContractionInput{"One"});
+    return ret;
+}
+
+ngraph::runtime::plaidml::builder::UnaryContraction
+    ngraph::runtime::plaidml::ConvPoolFormatter::Count()
+{
+    if (m_op != OpType::AvgPool)
+    {
+        throw std::runtime_error{"Count should only be used for AvgPool"};
+    }
+    builder::UnaryContraction ret{"+"};
+    builder::ContractionOutput count{"Count"};
+    for (const auto& xoi : xos())
+    {
+        count.add_indices({xoi});
+    }
+    for (const auto& XOi : XOs())
+    {
+        count.add_dims({XOi});
+    }
+    builder::ContractionInput ones{"Ones"};
+    for (const auto& xii : xis())
+    {
+        ones.add_indices({xii});
+    }
+    ret.set(count).set(ones).add_constraints(
+        [&](std::back_insert_iterator<std::list<std::string>> out) {
+            for (std::size_t idx = 0; idx < m_rank; ++idx)
+            {
+                std::ostringstream s;
+                s << "xf" << idx << " < " << m_window_shape[idx];
+                out = s.str();
+            }
+        });
+    return ret;
+}
+
+ngraph::runtime::plaidml::builder::UnaryContraction
+    ngraph::runtime::plaidml::ConvPoolFormatter::PoolContraction()
+{
+    std::string agg_op;
+    switch (m_op)
+    {
+    case OpType::AvgPool: agg_op = "+"; break;
+    case OpType::MaxPool: agg_op = ">"; break;
+    default: throw std::runtime_error("Asked for pool contraction for non-pool op");
+    }
+    return builder::UnaryContraction{agg_op}
+        .set((m_op == OpType::AvgPool && m_deriv == DerivType::Data) ? I_out_body() : O_out_body())
+        .set((m_op == OpType::AvgPool && m_deriv == DerivType::Data) ? O_in_body() : I_in_body())
+        .add_constraints([&](std::back_insert_iterator<std::list<std::string>> out) {
+            for (std::size_t idx = 0; idx < m_rank; ++idx)
+            {
+                std::ostringstream s;
+                s << "xf" << idx << " < " << m_window_shape[idx];
+                out = s.str();
+            }
+        });
+}
+
+ngraph::runtime::plaidml::builder::TernaryContraction
+    ngraph::runtime::plaidml::ConvPoolFormatter::PoolDerivContraction()
+{
+    builder::ContractionOutput output{"DI"};
+    output.add_indices({n(), c()}).add_dims({N(), C()});
+    for (const auto& xii : xis())
+    {
+        output.add_indices({xii});
+    }
+    for (const auto& XIi : XIs())
+    {
+        output.add_dims({XIi});
+    }
+    builder::ContractionInput input{"I"};
+    input.add_indices({n(), c()});
+    for (const auto& xii : xis())
+    {
+        input.add_indices({xii});
+    }
+    builder::ContractionInput forward_output{"Y"};
+    forward_output.add_indices({n(), c()});
+    for (const auto& xoi : xos())
+    {
+        forward_output.add_indices({xoi});
+    }
+    builder::ContractionInput incoming_deriv{"DO"};
+    incoming_deriv.add_indices({n(), c()});
+    for (const auto& xoi : xos())
+    {
+        incoming_deriv.add_indices({xoi});
+    }
+    return builder::TernaryContraction{"+", "?"}
+        .set(output)
+        .set_first(input)
+        .set_second(forward_output)
+        .set_third(incoming_deriv);
+}
+
+std::string ngraph::runtime::plaidml::ConvPoolFormatter::c()
+{
+    return "c";
+}
+
+std::string ngraph::runtime::plaidml::ConvPoolFormatter::ci()
+{
+    return "ci";
+}
+
+std::string ngraph::runtime::plaidml::ConvPoolFormatter::co()
+{
+    return "co";
+}
+
+std::string ngraph::runtime::plaidml::ConvPoolFormatter::n()
+{
+    return "n";
+}
+
+std::vector<std::string> ngraph::runtime::plaidml::ConvPoolFormatter::xfs()
+{
+    if (m_xfs.empty())
+    {
+        for (int i = 0; i < m_rank; ++i)
+        {
+            std::ostringstream s;
+            s << "xf" << i;
+            m_xfs.push_back(s.str());
+        }
+    }
+    return m_xfs;
+}
+
+std::vector<std::string> ngraph::runtime::plaidml::ConvPoolFormatter::xis()
+{
+    if (m_xis.empty())
+    {
+        for (int i = 0; i < m_rank; ++i)
+        {
+            std::ostringstream s;
+            s << "(";
+            s << m_strides[i] << "*xo" << i;
+            s << " + ";
+            s << m_filter_dilation[i] << "*xf" << i;
+            s << " - " << m_pad_below[i];
+            s << ")";
+            if (m_data_dilation[i] != 1)
+            {
+                s << " / " << m_data_dilation[i];
+            }
+            m_xis.push_back(s.str());
+        }
+    }
+    return m_xis;
+}
+
+std::vector<std::string> ngraph::runtime::plaidml::ConvPoolFormatter::xos()
+{
+    if (m_xos.empty())
+    {
+        for (int i = 0; i < m_rank; ++i)
+        {
+            std::ostringstream s;
+            s << "xo" << i;
+            m_xos.push_back(s.str());
+        }
+    }
+    return m_xos;
+}
+
+std::string ngraph::runtime::plaidml::ConvPoolFormatter::C()
+{
+    return "C";
+}
+
+std::string ngraph::runtime::plaidml::ConvPoolFormatter::CI()
+{
+    return "CI";
+}
+
+std::string ngraph::runtime::plaidml::ConvPoolFormatter::CO()
+{
+    return "CO";
+}
+
+std::string ngraph::runtime::plaidml::ConvPoolFormatter::N()
+{
+    return "N";
+}
+
+std::vector<std::string> ngraph::runtime::plaidml::ConvPoolFormatter::XFs()
+{
+    if (m_XFs.empty())
+    {
+        for (int i = 0; i < m_rank; ++i)
+        {
+            std::ostringstream s;
+            if (m_deriv == DerivType::Filter)
+            {
+                s << m_filters_shape[i + 2];
+            }
+            else
+            {
+                s << "XF" << i;
+            }
+            m_XFs.push_back(s.str());
+        }
+    }
+    return m_XFs;
+}
+
+std::vector<std::string> ngraph::runtime::plaidml::ConvPoolFormatter::XIs()
+{
+    if (m_XIs.empty())
+    {
+        for (int i = 0; i < m_rank; ++i)
+        {
+            std::ostringstream s;
+            if (m_deriv == DerivType::Data && m_op == OpType::Conv)
+            {
+                s << m_data_batch_shape[i + 2];
+            }
+            else
+            {
+                s << "XI" << i;
+            }
+            m_XIs.push_back(s.str());
+        }
+    }
+    return m_XIs;
+}
+
+std::vector<std::string> ngraph::runtime::plaidml::ConvPoolFormatter::XOs()
+{
+    if (m_XOs.empty())
+    {
+        // TODO: Assumes explicit padding...
+        for (int i = 0; i < m_rank; ++i)
+        {
+            std::ostringstream s;
+            if (m_deriv == DerivType::None)
+            {
+                s << "(";
+                s << m_data_dilation[i] << " * ";
+                s << "(XI" << i << " - 1) + 1 + ";
+                s << m_pad_below[i] + m_pad_above[i];
+                if (m_window_shape[i] != 0)
+                {
+                    s << " - " << m_window_shape[i];
+                }
+                if (m_op == OpType::Conv)
+                {
+                    s << " - ";
+                    s << "(" << m_filter_dilation[i];
+                    s << " * (XF" << i << " - 1) + 1)";
+                }
+                s << " + " << m_strides[i] << ")";
+                s << " / " << m_strides[i];
+            }
+            else
+            {
+                s << "XO" << i;
+            }
+            m_XOs.push_back(s.str());
+        }
+    }
+    return m_XOs;
+}
+
+std::string ngraph::runtime::plaidml::ConvPoolFormatter::F()
+{
+    if (m_deriv == DerivType::Filter)
+    {
+        return "DF";
+    }
+    return "F";
+}
+
+std::string ngraph::runtime::plaidml::ConvPoolFormatter::I()
+{
+    if (m_deriv == DerivType::Data && m_op == OpType::Conv)
+    {
+        return "DI";
+    }
+    return "I";
+}
+
+std::string ngraph::runtime::plaidml::ConvPoolFormatter::O()
+{
+    if (m_deriv != DerivType::None)
+    {
+        return "DO";
+    }
+    return "O";
+}
--- a/src/ngraph/runtime/plaidml/plaidml_convpool_formatter.hpp
+++ b/src/ngraph/runtime/plaidml/plaidml_convpool_formatter.hpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "ngraph/coordinate_diff.hpp"
+#include "ngraph/runtime/plaidml/plaidml_builder.hpp"
+#include "ngraph/shape.hpp"
+#include "ngraph/strides.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            class ConvPoolFormatter;
+        }
+    }
+}
+
+class ngraph::runtime::plaidml::ConvPoolFormatter
+{
+public:
+    enum class OpType
+    {
+        Conv,
+        MaxPool,
+        AvgPool
+    };
+    enum class DerivType
+    {
+        None,
+        Data,
+        Filter
+    };
+
+    // TODO: Data dilation?
+    // TODO: Types for the dimensional data?
+
+    // Convolution-style constructor
+    ConvPoolFormatter(std::size_t rank,
+                      const ngraph::CoordinateDiff& pad_below,
+                      const ngraph::CoordinateDiff& pad_above,
+                      const ngraph::Strides& strides,
+                      const ngraph::Strides& filter_dilation,
+                      const ngraph::Strides& data_dilation,
+                      ConvPoolFormatter::OpType op,
+                      ConvPoolFormatter::DerivType deriv,
+                      const ngraph::Shape& deriv_output_shape = Shape());
+
+    // Pool-style constructor
+    ConvPoolFormatter(std::size_t rank,
+                      const ngraph::CoordinateDiff& pad_below,
+                      const ngraph::CoordinateDiff& pad_above,
+                      const ngraph::Strides& strides,
+                      const ngraph::Shape& window_shape,
+                      ConvPoolFormatter::OpType op,
+                      ConvPoolFormatter::DerivType deriv);
+
+    // Formatted tensors
+    builder::Input F_in_header(vertexai::plaidml::variable var);
+    builder::Input I_in_header(vertexai::plaidml::variable var);
+    builder::Input O_in_header(vertexai::plaidml::variable var);
+    builder::Output F_out_header();
+    builder::Output I_out_header();
+    builder::Output O_out_header();
+    builder::ContractionOutput F_out_body();
+    builder::ContractionOutput I_out_body();
+    builder::ContractionOutput O_out_body();
+    builder::ContractionInput F_in_body();
+    builder::ContractionInput I_in_body();
+    builder::ContractionInput O_in_body();
+
+    // Special Operations
+    builder::UnaryContraction Broadcast_Ones();
+    builder::UnaryContraction Count();
+    builder::UnaryContraction PoolContraction();
+    builder::TernaryContraction PoolDerivContraction();
+
+    // Index names / formulas
+    std::string c();
+    std::string ci();
+    std::string co();
+    std::string n();
+    std::vector<std::string> xfs();
+    std::vector<std::string> xis();
+    std::vector<std::string> xos();
+
+    // Dimension names / formulas
+    std::string C();
+    std::string CI();
+    std::string CO();
+    std::string N();
+    std::vector<std::string> XFs();
+    std::vector<std::string> XIs();
+    std::vector<std::string> XOs();
+
+    // Tensor names
+    std::string F();
+    std::string I();
+    std::string O();
+
+private:
+    std::size_t m_rank;
+    ngraph::CoordinateDiff m_pad_below;
+    ngraph::CoordinateDiff m_pad_above;
+    ngraph::Strides m_strides;
+    ngraph::Strides m_filter_dilation;
+    ngraph::Strides m_data_dilation;
+    ngraph::Shape m_window_shape;
+    OpType m_op = OpType::Conv;
+    DerivType m_deriv = DerivType::None;
+    ngraph::Shape m_filters_shape;
+    ngraph::Shape m_data_batch_shape;
+    std::vector<std::string> m_xfs;
+    std::vector<std::string> m_xis;
+    std::vector<std::string> m_xos;
+    std::vector<std::string> m_XFs;
+    std::vector<std::string> m_XIs;
+    std::vector<std::string> m_XOs;
+};
--- a/src/ngraph/runtime/plaidml/plaidml_impl.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_impl.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            std::unordered_map<std::type_index, std::function<void(Build*, const ngraph::Node&)>>*
+                OpImplMap()
+            {
+                static std::unordered_map<std::type_index,
+                                          std::function<void(Build*, const ngraph::Node&)>>
+                    op_impl_map;
+                return &op_impl_map;
+            }
+        }
+    }
+}
--- a/src/ngraph/runtime/plaidml/plaidml_impl.hpp
+++ b/src/ngraph/runtime/plaidml/plaidml_impl.hpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <sstream>
+#include <typeindex>
+#include <typeinfo>
+#include <unordered_map>
+#include <utility>
+
+#include "ngraph/runtime/plaidml/plaidml_build.hpp"
+#include "ngraph/runtime/plaidml/plaidml_builder.hpp"
+#include "ngraph/runtime/plaidml/plaidml_translate.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            // PlaidML Operation implementation support.
+            //
+            // To add a new operation:
+            //  1) Include the operation header
+            //  2) Write the per-operation implementation definition
+            //  3) Register the operation type, by instantiating Impl<op::OpClass>::Registration at
+            //     global scope.
+            //
+            // Operation implementation definitions have access to all methods and member variables
+            // of the general Impl template.
+
+            // The accessor for the global operation handler map.
+            std::unordered_map<std::type_index, std::function<void(Build*, const ngraph::Node&)>>*
+                OpImplMap();
+
+            // BaseImpl provides a context for operation interpretation, and provides a few useful
+            // utility methods.
+            template <typename O>
+            class BaseImpl
+            {
+            public:
+                BaseImpl(Build* build, const O& op)
+                    : m_build{build}
+                    , m_op{op}
+                {
+                }
+
+            protected:
+                Build* build() { return m_build; }
+                const O& op() { return m_op; }
+                // Returns the indicated operation input as a PlaidML variable.
+                vertexai::plaidml::variable
+                    op_input(std::size_t idx,
+                             TensorContents as_contents = TensorContents::DATA) const
+                {
+                    const auto& ti = m_build->bindings.at(
+                        m_op.get_inputs()[idx].get_output().get_tensor_ptr().get());
+                    if (as_contents == TensorContents::DATA &&
+                        ti.contents == TensorContents::LOGICAL)
+                    {
+                        return plaidml_logical_to_data(ti.var, m_build->config->debug);
+                    }
+                    return ti.var;
+                }
+
+                // Returns the 0th operation input as a PlaidML variable.
+                vertexai::plaidml::variable op_input() const
+                {
+                    return op_input(0, TensorContents::DATA);
+                }
+                // Validates that the number of operation inputs matches the expected operation
+                // input count.
+                void check_inputs(std::size_t expected_input_count) const
+                {
+                    if (m_op.get_input_size() != expected_input_count)
+                    {
+                        std::ostringstream os;
+                        os << "The PlaidML nGraph backend only supports " << m_op.description()
+                           << " operations with an input count == " << expected_input_count
+                           << " (got " << m_op.get_input_size() << " inputs)";
+                        throw std::runtime_error{os.str()};
+                    }
+                }
+
+                // Validates that the number of operation inputs is greater than or equal to the
+                // expected operation input count.
+                void check_inputs_ge(std::size_t minimum_input_count) const
+                {
+                    if (m_op.get_input_size() < minimum_input_count)
+                    {
+                        std::ostringstream os;
+                        os << "The PlaidML nGraph backend only supports " << m_op.description()
+                           << " operations with an input count >= " << minimum_input_count
+                           << " (got " << m_op.get_input_size() << " inputs)";
+                        throw std::runtime_error{os.str()};
+                    }
+                }
+
+                // Validates that the number of operation outputs matches the expected operation
+                // output count.
+                void check_outputs(std::size_t expected_output_count) const
+                {
+                    if (m_op.get_output_size() != expected_output_count)
+                    {
+                        std::ostringstream os;
+                        os << "The PlaidML nGraph backend only supports " << m_op.description()
+                           << " operations with an output count == " << expected_output_count
+                           << " (got " << m_op.get_output_size() << " outputs)";
+                        throw std::runtime_error{os.str()};
+                    }
+                }
+
+                // Binds the indicated operation output to the supplied PlaidML variable.
+                void set_output(std::size_t idx,
+                                vertexai::plaidml::variable var,
+                                TensorContents contents = TensorContents::DATA)
+                {
+                    m_build->bindings.emplace(m_op.get_output_tensor_ptr(idx).get(),
+                                              TensorInfo{std::move(var), contents});
+                }
+
+                // Binds the 0th operation output to the supplied PlaidML variable.
+                void set_output(vertexai::plaidml::variable var,
+                                TensorContents contents = TensorContents::DATA)
+                {
+                    m_build->bindings.emplace(m_op.get_output_tensor_ptr().get(),
+                                              TensorInfo{std::move(var), contents});
+                }
+
+                // Gets a useful name for the current op.
+                std::string get_op_name() const { return this->m_op.description(); }
+                // Starts a Tile function builder.
+                builder::Function start_tile_function() const
+                {
+                    return builder::Function{get_op_name(), m_build->config->debug};
+                }
+
+            private:
+                Build* m_build;
+                const O& m_op;
+            };
+
+            // ParentImpl sets the base implementation class for a particular operation class; the
+            // Impl template uses this to figure out which class to derive from when implementing a
+            // particular operation.  This is meant to be specialized as needed.
+            template <typename O>
+            struct ParentImpl
+            {
+                using Type = BaseImpl<O>;
+            };
+
+            // Impl is the common operation implementation class.  It declares an operator(), to be
+            // subsequently defined with the implementation for the particular operation.
+            //
+            // Operations that do require extensions may derive their common class from BaseImpl,
+            // and pass it to the Impl template.  Alternatively, they may specialize the Impl
+            // template, replacing it with their own implementation.
+            template <typename O>
+            class Impl : public ParentImpl<O>::Type
+            {
+            public:
+                Impl(Build* build, const O& op)
+                    : ParentImpl<O>::Type{build, op}
+                {
+                }
+                void operator()();
+
+                static void handler(Build* build, const ngraph::Node& node)
+                {
+                    Impl<O>(build, dynamic_cast<const O&>(node))();
+                }
+
+                // Registration handles the registration of a particular operation implementation.
+                // To use it, instantiate a variable of type Impl<op::OpClass>::Registration at
+                // global scope.
+                class Registration
+                {
+                public:
+                    Registration()
+                    {
+                        OpImplMap()->emplace(std::type_index{typeid(O)}, &Impl<O>::handler);
+                    }
+                };
+            };
+        }
+    }
+}
--- a/src/ngraph/runtime/plaidml/plaidml_logger.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_logger.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <plaidml/plaidml++.h>
+
+#include "ngraph/log.hpp"
+#include "ngraph/runtime/plaidml/plaidml_logger.hpp"
+
+namespace
+{
+    void logger(void* debug, vai_log_severity severity, const char* message)
+    {
+        switch (severity)
+        {
+        case VAI_LOG_SEVERITY_VERBOSE:
+        case VAI_LOG_SEVERITY_TRACE:
+        case VAI_LOG_SEVERITY_DEBUG:
+            if (debug)
+            {
+                PLAIDML_DEBUG << message;
+            }
+            return;
+        case VAI_LOG_SEVERITY_INFO:
+            // We treat PlaidML info-level logs as nGraph debug-level logs, since we expect that
+            // most nGraph users think of PlaidML details as debugging information.
+            if (debug)
+            {
+                PLAIDML_DEBUG << message;
+            }
+            return;
+        case VAI_LOG_SEVERITY_WARNING: NGRAPH_WARN << message; return;
+        case VAI_LOG_SEVERITY_ERROR:
+        default: NGRAPH_ERR << message; return;
+        }
+    }
+}
+
+void ngraph::runtime::plaidml::configure_plaidml_logger(bool debug)
+{
+    vai_set_logger(&logger, reinterpret_cast<void*>(debug ? 1 : 0));
+}
--- a/src/ngraph/runtime/plaidml/plaidml_logger.hpp
+++ b/src/ngraph/runtime/plaidml/plaidml_logger.hpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "ngraph/log.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            void configure_plaidml_logger(bool debug);
+
+// N.B. This is an unconditional write to the debug log, used when PlaidML debugging is enabled.
+#define PLAIDML_DEBUG                                                                              \
+    ngraph::LogHelper(ngraph::LOG_TYPE::_LOG_TYPE_DEBUG,                                           \
+                      ngraph::get_file_name(__FILE__),                                             \
+                      __LINE__,                                                                    \
+                      ngraph::default_logger_handler_func)                                         \
+        .stream()
+        }
+    }
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_arithmetic.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_arithmetic.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/op/abs.hpp"
+#include "ngraph/op/add.hpp"
+#include "ngraph/op/ceiling.hpp"
+#include "ngraph/op/divide.hpp"
+#include "ngraph/op/floor.hpp"
+#include "ngraph/op/multiply.hpp"
+#include "ngraph/op/negative.hpp"
+#include "ngraph/op/relu.hpp"
+#include "ngraph/op/sigmoid.hpp"
+#include "ngraph/op/sign.hpp"
+#include "ngraph/op/subtract.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+#include "ngraph/runtime/plaidml/plaidml_translate.hpp"
+
+// Abs performs a simple elementwise absolute value.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Abs>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "abs(I)"})
+                   .finalize());
+}
+
+// Add performs a simple elementwise addition.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Add>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "A"})
+                   .add(builder::Input{op_input(1), "B"})
+                   .add(builder::Output{"C"})
+                   .add(builder::Elementwise{"C", "A + B"})
+                   .finalize());
+}
+
+// Ceiling performs a simple elementwise ceiling.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Ceiling>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "ceil(I)"})
+                   .finalize());
+}
+
+// Divide performs a simple elementwise division.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Divide>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "A"})
+                   .add(builder::Input{op_input(1), "B"})
+                   .add(builder::Output{"C"})
+                   .add(builder::Elementwise{"C", "A / B"})
+                   .finalize());
+}
+
+// Floor performs a simple elementwise floor.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Floor>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "floor(I)"})
+                   .finalize());
+}
+
+// Multiply performs a simple elementwise multiplication.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Multiply>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "A"})
+                   .add(builder::Input{op_input(1), "B"})
+                   .add(builder::Output{"C"})
+                   .add(builder::Elementwise{"C", "A * B"})
+                   .finalize());
+}
+
+// Negative performs a simple elementwise negation.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Negative>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "-I"})
+                   .finalize());
+}
+
+// Relu implements a simple elementwise rectified linear unit.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Relu>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "relu(I)"})
+                   .finalize());
+}
+
+// ReluBackprop computes the derivative of Relu.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::ReluBackprop>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "I"})
+                   .add(builder::Input{op_input(1), "DO"})
+                   .add(builder::Output{"DI"})
+                   .add(builder::Elementwise{"DI", "I > 0 ? DO : 0"})
+                   .finalize());
+}
+
+// Sigmoid computes a standard ML sigmoid: 1/(1+exp(-X))
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Sigmoid>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "1/(1+exp(-I))"})
+                   .finalize());
+}
+
+// SigmoidBackprop computes the derivative of a standard ML
+// sigmoid: dOutput * sigmoid(X) * (1-sigmoid(X))
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::SigmoidBackprop>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "I"})
+                   .add(builder::Input{op_input(1), "DO"})
+                   .add(builder::Output{"DI"})
+                   .add(builder::Elementwise{"O", "1/(1+exp(-I))"})
+                   .add(builder::Elementwise{"DI", "DO * O * (1-O)"})
+                   .finalize());
+}
+
+// Sign returns the sign of an element.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Sign>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"S", "(I < 0) ? -1 : ((I > 0) ? 1 : 0)"})
+                   .add(builder::Elementwise{"O", tile_converter("S", op().get_element_type())})
+                   .finalize());
+}
+
+// Subtract performs a simple elementwise subtraction.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Subtract>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "A"})
+                   .add(builder::Input{op_input(1), "B"})
+                   .add(builder::Output{"C"})
+                   .add(builder::Elementwise{"C", "A - B"})
+                   .finalize());
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::Abs>::Registration register_abs;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Add>::Registration register_add;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Ceiling>::Registration register_ceiling;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Divide>::Registration register_divide;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Floor>::Registration register_floor;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Multiply>::Registration register_multiply;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Negative>::Registration register_negative;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Relu>::Registration register_relu;
+    ngraph::runtime::plaidml::Impl<ngraph::op::ReluBackprop>::Registration register_relu_backprop;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Sigmoid>::Registration register_sigmoid;
+    ngraph::runtime::plaidml::Impl<ngraph::op::SigmoidBackprop>::Registration
+        register_sigmoid_backprop;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Sign>::Registration register_sign;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Subtract>::Registration register_subtract;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_batch_norm.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_batch_norm.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/log.hpp"
+#include "ngraph/op/batch_norm.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+
+// BatchNormInference implements batch normalization for inference, in
+// which the mean and variance to use are supplied.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::BatchNormInference>::operator()()
+{
+    auto& input_shape = op().get_input_shape(2);
+    check_inputs(5);
+    check_outputs(1);
+
+    auto f = start_tile_function();
+    f.add(builder::Input{op_input(0), "Gamma"}.add_dims({"C"}))
+        .add(builder::Input{op_input(1), "Beta"}.add_dims({"C"}))
+        .add(builder::Input{op_input(2), "Input"}
+                 .add_dims({"B", "C"})
+                 .add_dims("DI", 3, input_shape.size() + 1))
+        .add(builder::Output{"Normalized"})
+        .add(builder::Input{op_input(3), "Mean"}.add_dims({"C"}))
+        .add(builder::Input{op_input(4), "Variance"}.add_dims({"C"}));
+
+    std::string ones;
+    for (auto idx = 2; idx < input_shape.size(); ++idx)
+    {
+        ones += ", 1";
+    }
+
+    if (input_shape.size() <= 2)
+    {
+        f.add(builder::Elementwise{"GammaP", "Gamma"}).add(builder::Elementwise{"BetaP", "Beta"});
+    }
+    else
+    {
+        f.add(builder::Elementwise{"GammaP", std::string{"reshape(Gamma, C"} + ones + ")"})
+            .add(builder::Elementwise{"BetaP", std::string{"reshape(Beta, C"} + ones + ")"});
+    }
+
+    if (input_shape.size() <= 2)
+    {
+        f.add(builder::Elementwise{"MeanP", "Mean"});
+    }
+    else
+    {
+        f.add(builder::Elementwise{"MeanP", std::string{"reshape(Mean, C"} + ones + ")"});
+    }
+
+    if (input_shape.size() <= 2)
+    {
+        f.add(builder::Elementwise{"VarianceP", "Variance"});
+    }
+    else
+    {
+        f.add(builder::Elementwise{"VarianceP", std::string{"reshape(Variance, C"} + ones + ")"});
+    }
+
+    f.add(builder::Elementwise{"Normalized",
+                               "(((Input-MeanP) / sqrt(VarianceP + " +
+                                   std::to_string(op().get_eps_value()) + ")) * GammaP) + BetaP"});
+
+    auto app = f.finalize();
+
+    set_output(app);
+}
+
+// BatchNormTraining implements batch normalization for training, in
+// which the mean and variance are to be computed from the supplied
+// input.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::BatchNormTraining>::operator()()
+{
+    auto& input_shape = op().get_input_shape(2);
+    check_inputs(3);
+    check_outputs(3);
+
+    auto f = start_tile_function();
+    f.add(builder::Input{op_input(0), "Gamma"}.add_dims({"C"}))
+        .add(builder::Input{op_input(1), "Beta"}.add_dims({"C"}))
+        .add(builder::Input{op_input(2), "Input"}
+                 .add_dims({"B", "C"})
+                 .add_dims("DI", 3, input_shape.size() + 1))
+        .add(builder::Output{"Normalized"})
+        .add(builder::Output{"Mean"})
+        .add(builder::Output{"Variance"});
+
+    std::string ones;
+    for (auto idx = 2; idx < input_shape.size(); ++idx)
+    {
+        ones += ", 1";
+    }
+
+    if (input_shape.size() <= 2)
+    {
+        f.add(builder::Elementwise{"GammaP", "Gamma"}).add(builder::Elementwise{"BetaP", "Beta"});
+    }
+    else
+    {
+        f.add(builder::Elementwise{"GammaP", std::string{"reshape(Gamma, C"} + ones + ")"})
+            .add(builder::Elementwise{"BetaP", std::string{"reshape(Beta, C"} + ones + ")"});
+    }
+
+    if (input_shape.size() <= 2)
+    {
+        f.add(builder::Elementwise{"EltCount", "B"});
+    }
+    else
+    {
+        std::string elts{"B"};
+        for (auto idx = 2; idx < input_shape.size(); ++idx)
+        {
+            elts += " * DI" + std::to_string(idx + 1);
+        }
+        f.add(builder::Elementwise{"EltCount", std::move(elts)});
+    }
+
+    f.add(builder::UnaryContraction{"+"}
+              .set(builder::ContractionOutput{"SumInput"}.add_indices({"c"}).add_dims({"C"}))
+              .set(builder::ContractionInput{"Input"}
+                       .add_indices({"b", "c"})
+                       .add_indices("di", 3, input_shape.size() + 1)));
+    f.add(builder::Elementwise{"Mean", "SumInput / EltCount"});
+
+    if (input_shape.size() <= 2)
+    {
+        f.add(builder::Elementwise{"MeanP", "Mean"});
+    }
+    else
+    {
+        f.add(builder::Elementwise{"MeanP", std::string{"reshape(Mean, C"} + ones + ")"});
+    }
+
+    f.add(builder::Elementwise{"DiffV", "(Input - MeanP)"})
+        .add(builder::Elementwise{"SqDiffV", "DiffV*DiffV"})
+        .add(builder::UnaryContraction{"+"}
+                 .set(builder::ContractionOutput{"SumSqDiffV"}.add_indices({"c"}).add_dims({"C"}))
+                 .set(builder::ContractionInput{"SqDiffV"}
+                          .add_indices({"b", "c"})
+                          .add_indices("di", 3, input_shape.size() + 1)))
+        .add(builder::Elementwise{"Variance", "SumSqDiffV / EltCount"});
+
+    if (input_shape.size() <= 2)
+    {
+        f.add(builder::Elementwise{"VarianceP", "Variance"});
+    }
+    else
+    {
+        f.add(builder::Elementwise{"VarianceP", std::string{"reshape(Variance, C"} + ones + ")"});
+    }
+
+    f.add(builder::Elementwise{"Normalized",
+                               "(((Input-MeanP) / sqrt(VarianceP + " +
+                                   std::to_string(op().get_eps_value()) + ")) * GammaP) + BetaP"});
+
+    auto app = f.finalize();
+
+    set_output(0, app.get_output(0));
+    set_output(1, app.get_output(1));
+    set_output(2, app.get_output(2));
+}
+
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::BatchNormTrainingBackprop>::operator()()
+{
+    // WARNING: I'm unconvinced that we have sufficient test converage for BatchNorm
+    // backprop and in particular I'm concerned that Gamma/Beta and Mean/Var could be
+    // swapped without the tests catching it.
+    check_inputs(6);
+    check_outputs(3);
+    auto& input_shape = op().get_input_shape(2);
+    std::string epsilon = std::to_string(op().get_eps_value());
+
+    auto f = start_tile_function();
+    // Header
+    f.add(builder::Input{op_input(0), "Gamma"}.add_dims({"C"}))
+        .add(builder::Input{op_input(1), "Beta"}.add_dims({"C"}))
+        .add(builder::Input{op_input(2), "Input"}
+                 .add_dims({"N", "C"})
+                 .add_dims("X", 3, input_shape.size() + 1))
+        .add(builder::Input{op_input(3), "Mean"}.add_dims({"C"}))
+        .add(builder::Input{op_input(4), "Var"}.add_dims({"C"}))
+        .add(builder::Input{op_input(5), "DOutput"}
+                 .add_dims({"N", "C"})
+                 .add_dims("X", 3, input_shape.size() + 1));
+    f.add(builder::Output{"DInput"});
+    f.add(builder::Output{"DGamma"});
+    f.add(builder::Output{"DBeta"});
+
+    // Prep for body
+    builder::ContractionOutput broadcast_gamma{"BroadcastGamma"};
+    builder::ContractionOutput broadcast_dgamma{"BroadcastDGamma"};
+    builder::ContractionOutput broadcast_dbeta{"BroadcastDBeta"};
+    broadcast_gamma.add_indices({"0", "c"}).add_dims({"1", "C"});
+    broadcast_dgamma.add_indices({"0", "c"}).add_dims({"1", "C"});
+    broadcast_dbeta.add_indices({"0", "c"}).add_dims({"1", "C"});
+    for (std::size_t i = 0; i < input_shape.size() - 2; ++i)
+    {
+        broadcast_gamma.add_indices({"0"}).add_dims({"1"});
+        broadcast_dgamma.add_indices({"0"}).add_dims({"1"});
+        broadcast_dbeta.add_indices({"0"}).add_dims({"1"});
+    }
+    std::ostringstream reduction_dims;
+    reduction_dims << "("
+                   << "N";
+    for (std::size_t i = 3; i < input_shape.size() + 1; ++i)
+    {
+        reduction_dims << " * X" << i;
+    }
+    reduction_dims << ")";
+
+    // Body
+    f.add(builder::UnaryContraction{"+"}
+              .set(builder::ContractionOutput{"BatchMeanNumerator"}
+                       .add_indices({"0", "c", "0", "0"})
+                       .add_dims({"1", "C", "1", "1"}))
+              .set(builder::ContractionInput{"Input"}
+                       .add_indices({"n", "c"})
+                       .add_indices("x", 3, input_shape.size() + 1)));
+    f.add(builder::Elementwise{"BatchMean", "BatchMeanNumerator / " + reduction_dims.str()});
+    f.add(builder::Elementwise{"NegBatchMean", "-BatchMean"});
+    f.add(
+        builder::BinaryContraction{"=", "+"}
+            .set(builder::ContractionOutput{"Deviation"}
+                     .add_indices({"n", "c"})
+                     .add_indices("x", 3, input_shape.size() + 1)
+                     .add_dims({"N", "C"})
+                     .add_dims("X", 3, input_shape.size() + 1))
+            .set_lhs(builder::ContractionInput{"Input"}
+                         .add_indices({"n", "c"})
+                         .add_indices("x", 3, input_shape.size() + 1))
+            .set_rhs(builder::ContractionInput{"NegBatchMean"}.add_indices({"0", "c", "0", "0"})));
+    f.add(builder::BinaryContraction{"+", "*"}
+              .set(builder::ContractionOutput{"BatchVarNumerator"}
+                       .add_indices({"0", "c", "0", "0"})
+                       .add_dims({"1", "C", "1", "1"}))
+              .set_lhs(builder::ContractionInput{"Deviation"}
+                           .add_indices({"n", "c"})
+                           .add_indices("x", 3, input_shape.size() + 1))
+              .set_rhs(builder::ContractionInput{"Deviation"}
+                           .add_indices({"n", "c"})
+                           .add_indices("x", 3, input_shape.size() + 1)));
+    f.add(builder::Elementwise{"BatchVar", "BatchVarNumerator / " + reduction_dims.str()});
+    f.add(builder::Elementwise{"BatchStdDev", "sqrt(BatchVar + " + epsilon + ")"});
+    f.add(builder::Elementwise{"NormedInput", "(Input - BatchMean) / BatchStdDev"});
+
+    f.add(builder::Elementwise{"ZeroedInput", "Input - BatchMean"});
+    f.add(builder::UnaryContraction{"="}
+              .set(broadcast_gamma)
+              .set(builder::ContractionInput{"Gamma"}.add_indices({"c"})));
+    f.add(builder::Elementwise{"DNormedInput", "DOutput * BroadcastGamma"});
+
+    f.add(builder::UnaryContraction{"+"}
+              .set(builder::ContractionOutput{"SumDOutput"}.add_indices({"c"}).add_dims({"C"}))
+              .set(builder::ContractionInput{"DOutput"}
+                       .add_indices({"n", "c"})
+                       .add_indices("x", 3, input_shape.size() + 1)));
+    f.add(builder::BinaryContraction{"+", "*"}
+              .set(builder::ContractionOutput{"DGamma"}.add_indices({"c"}).add_dims({"C"}))
+              .set_lhs(builder::ContractionInput{"DOutput"}
+                           .add_indices({"n", "c"})
+                           .add_indices("x", 3, input_shape.size() + 1))
+              .set_rhs(builder::ContractionInput{"NormedInput"}
+                           .add_indices({"n", "c"})
+                           .add_indices("x", 3, input_shape.size() + 1)));
+    f.add(builder::Elementwise{"DBeta", "SumDOutput"});
+    f.add(builder::UnaryContraction{"="}
+              .set(broadcast_dgamma)
+              .set(builder::ContractionInput{"DGamma"}.add_indices({"c"})));
+    f.add(builder::UnaryContraction{"="}
+              .set(broadcast_dbeta)
+              .set(builder::ContractionInput{"DBeta"}.add_indices({"c"})));
+    f.add(builder::Elementwise{"DInput",
+                               "(BroadcastGamma / BatchStdDev) * (DOutput - "
+                               "(NormedInput * BroadcastDGamma + BroadcastDBeta) / (" +
+                                   reduction_dims.str() + "))"});
+
+    // Return results
+    auto app = f.finalize();
+    set_output(0, app.get_output(0));
+    set_output(1, app.get_output(1));
+    set_output(2, app.get_output(2));
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::BatchNormInference>::Registration
+        register_batch_norm_inference;
+    ngraph::runtime::plaidml::Impl<ngraph::op::BatchNormTraining>::Registration
+        register_batch_norm_training;
+    ngraph::runtime::plaidml::Impl<ngraph::op::BatchNormTrainingBackprop>::Registration
+        register_batch_norm_training_backprop;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_comparison.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_comparison.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/op/equal.hpp"
+#include "ngraph/op/greater.hpp"
+#include "ngraph/op/greater_eq.hpp"
+#include "ngraph/op/less.hpp"
+#include "ngraph/op/less_eq.hpp"
+#include "ngraph/op/maximum.hpp"
+#include "ngraph/op/minimum.hpp"
+#include "ngraph/op/not_equal.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+
+// Equal performs a simple elementwise equality.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Equal>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0, TensorContents::LOGICAL), "A"})
+                   .add(builder::Input{op_input(1, TensorContents::LOGICAL), "B"})
+                   .add(builder::Output{"C"})
+                   .add(builder::Elementwise{"C", "A == B"})
+                   .finalize(),
+               TensorContents::LOGICAL);
+}
+
+// Greater performs a simple elementwise greater-than comparison.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Greater>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "A"})
+                   .add(builder::Input{op_input(1), "B"})
+                   .add(builder::Output{"C"})
+                   .add(builder::Elementwise{"C", "A > B"})
+                   .finalize(),
+               TensorContents::LOGICAL);
+}
+
+// GreaterEq performs a simple elementwise greater-than-or-equal-to comparison.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::GreaterEq>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "A"})
+                   .add(builder::Input{op_input(1), "B"})
+                   .add(builder::Output{"C"})
+                   .add(builder::Elementwise{"C", "A >= B"})
+                   .finalize(),
+               TensorContents::LOGICAL);
+}
+
+// Less performs a simple elementwise less-than comparison.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Less>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "A"})
+                   .add(builder::Input{op_input(1), "B"})
+                   .add(builder::Output{"C"})
+                   .add(builder::Elementwise{"C", "A < B"})
+                   .finalize(),
+               TensorContents::LOGICAL);
+}
+
+// LessEq performs a simple elementwise less-than-or-equal-to comparison.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::LessEq>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "A"})
+                   .add(builder::Input{op_input(1), "B"})
+                   .add(builder::Output{"C"})
+                   .add(builder::Elementwise{"C", "A <= B"})
+                   .finalize(),
+               TensorContents::LOGICAL);
+}
+
+// Maximum performs a simple elementwise maximum.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Maximum>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "A"})
+                   .add(builder::Input{op_input(1), "B"})
+                   .add(builder::Output{"C"})
+                   .add(builder::Elementwise{"C", "max(A, B)"})
+                   .finalize());
+}
+
+// Minimum performs a simple elementwise minimum.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Minimum>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "A"})
+                   .add(builder::Input{op_input(1), "B"})
+                   .add(builder::Output{"C"})
+                   .add(builder::Elementwise{"C", "min(A, B)"})
+                   .finalize());
+}
+
+// NotEqual performs a simple elementwise not-equality.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::NotEqual>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0, TensorContents::LOGICAL), "A"})
+                   .add(builder::Input{op_input(1, TensorContents::LOGICAL), "B"})
+                   .add(builder::Output{"C"})
+                   .add(builder::Elementwise{"C", "A != B"})
+                   .finalize(),
+               TensorContents::LOGICAL);
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::Equal>::Registration register_equal;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Greater>::Registration register_greater;
+    ngraph::runtime::plaidml::Impl<ngraph::op::GreaterEq>::Registration register_greater_eq;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Less>::Registration register_less;
+    ngraph::runtime::plaidml::Impl<ngraph::op::LessEq>::Registration register_less_eq;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Maximum>::Registration register_maximum;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Minimum>::Registration register_minimum;
+    ngraph::runtime::plaidml::Impl<ngraph::op::NotEqual>::Registration register_not_equal;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_concat.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_concat.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/op/concat.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+
+// Concat views a tensor as a new type.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Concat>::operator()()
+{
+    check_outputs(1);
+
+    auto f = start_tile_function();
+    f.add(builder::Output{"O"});
+    std::size_t dim_count = op().get_shape().size();
+    std::ostringstream offset;
+    std::ostringstream oexpr;
+    std::ostringstream concat_dsize;
+    bool saw_non_zero_tensor = false;
+    for (std::size_t iidx = 0; iidx < op().get_inputs().size(); ++iidx)
+    {
+        if (!shape_size(op().get_input_shape(iidx)))
+        {
+            continue;
+        }
+        if (saw_non_zero_tensor)
+        {
+            concat_dsize << "+";
+        }
+        saw_non_zero_tensor = true;
+        concat_dsize << "I" << iidx << "_D" << op().get_concatenation_axis();
+    }
+
+    saw_non_zero_tensor = false;
+    for (std::size_t iidx = 0; iidx < op().get_inputs().size(); ++iidx)
+    {
+        if (!shape_size(op().get_input_shape(iidx)))
+        {
+            continue;
+        }
+        std::string sidx{std::to_string(iidx)};
+        f.add(builder::Input{op_input(iidx), "I" + sidx}.add_dims("I" + sidx + "_D", 0, dim_count));
+        f.add(builder::UnaryContraction{"="}
+                  .set(builder::ContractionOutput{"E" + sidx}
+                           .add_dims([&](std::back_insert_iterator<std::list<std::string>> out) {
+                               for (std::size_t idx = 0; idx < dim_count; ++idx)
+                               {
+                                   std::ostringstream s;
+                                   if (idx == op().get_concatenation_axis())
+                                   {
+                                       out = concat_dsize.str();
+                                   }
+                                   else
+                                   {
+                                       s << "I" << iidx << "_D" << idx;
+                                       out = s.str();
+                                   }
+                               }
+                           })
+                           .add_indices([&](std::back_insert_iterator<std::list<std::string>> out) {
+                               for (std::size_t idx = 0; idx < dim_count; ++idx)
+                               {
+                                   std::ostringstream s;
+                                   s << "d" << idx;
+                                   if (saw_non_zero_tensor && idx == op().get_concatenation_axis())
+                                   {
+                                       s << " + " << offset.str();
+                                   }
+                                   out = s.str();
+                               }
+                           }))
+                  .set(builder::ContractionInput{"I" + sidx}.add_indices("d", 0, dim_count)));
+        if (saw_non_zero_tensor)
+        {
+            oexpr << " + ";
+            offset << " + ";
+        }
+        oexpr << "E" << sidx;
+        offset << "I" << iidx << "_D" << op().get_concatenation_axis();
+        saw_non_zero_tensor = true;
+    }
+    f.add(builder::Elementwise{"O", oexpr.str()});
+
+    set_output(f.finalize());
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::Concat>::Registration register_concat;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_convert.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_convert.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/op/convert.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+#include "ngraph/runtime/plaidml/plaidml_translate.hpp"
+
+// Convert views a tensor as a new type.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Convert>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{
+                       "O", tile_converter("I", to_plaidml(op().get_convert_element_type()))})
+                   .finalize());
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::Convert>::Registration register_convert;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_convolution.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_convolution.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/log.hpp"
+#include "ngraph/op/convolution.hpp"
+#include "ngraph/runtime/plaidml/plaidml_convpool_formatter.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            template <typename O>
+            class ConvolutionImpl : public BaseImpl<O>
+            {
+            public:
+                ConvolutionImpl(Build* build, const O& op)
+                    : BaseImpl<O>{build, op}
+                {
+                }
+
+                void LogConvolution(vertexai::plaidml::variable image,
+                                    vertexai::plaidml::variable filter,
+                                    std::size_t image_dims,
+                                    const Strides& window_movement_strides,
+                                    const Strides& window_dilation_strides,
+                                    const CoordinateDiff& padding_below,
+                                    const CoordinateDiff& padding_above,
+                                    const Strides& data_dilation_strides,
+                                    std::size_t batch_axis_data,
+                                    std::size_t input_channel_axis_data,
+                                    std::size_t input_channel_axis_filters,
+                                    std::size_t output_channel_axis_filters,
+                                    std::size_t batch_axis_result,
+                                    std::size_t output_channel_axis_result,
+                                    bool rotate_filter);
+            };
+        }
+    }
+}
+
+template <>
+struct ngraph::runtime::plaidml::ParentImpl<ngraph::op::Convolution>
+{
+    using Type = ngraph::runtime::plaidml::ConvolutionImpl<ngraph::op::Convolution>;
+};
+
+template <>
+struct ngraph::runtime::plaidml::ParentImpl<ngraph::op::ConvolutionBackpropFilters>
+{
+    using Type = ngraph::runtime::plaidml::ConvolutionImpl<ngraph::op::ConvolutionBackpropFilters>;
+};
+
+template <>
+struct ngraph::runtime::plaidml::ParentImpl<ngraph::op::ConvolutionBackpropData>
+{
+    using Type = ngraph::runtime::plaidml::ConvolutionImpl<ngraph::op::ConvolutionBackpropData>;
+};
+
+// Convolution implements a standard ML convolultion, with optional striding, padding, and dilation.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Convolution>::operator()()
+{
+    this->check_inputs(2);
+    this->check_outputs(1);
+
+    LogConvolution(op_input(0),
+                   op_input(1),
+                   op().get_inputs()[0].get_shape().size() - 2,
+                   op().get_window_movement_strides(),
+                   op().get_window_dilation_strides(),
+                   op().get_padding_below(),
+                   op().get_padding_above(),
+                   op().get_data_dilation_strides(),
+                   0,
+                   1,
+                   1,
+                   0,
+                   0,
+                   1,
+                   false);
+
+    const auto& image = op_input(0);
+    const auto& filter = op_input(1);
+    auto image_dims = op().get_inputs()[0].get_shape().size() - 2;
+    const auto& padding_above = op().get_padding_above();
+    const auto& padding_below = op().get_padding_below();
+    const auto& strides = op().get_window_movement_strides();
+    const auto& filter_dilation = op().get_window_dilation_strides();
+    const auto& data_dilation = op().get_data_dilation_strides();
+
+    ConvPoolFormatter cpf(image_dims,
+                          padding_below,
+                          padding_above,
+                          strides,
+                          filter_dilation,
+                          data_dilation,
+                          ConvPoolFormatter::OpType::Conv,
+                          ConvPoolFormatter::DerivType::None);
+
+    this->set_output(start_tile_function()
+                         .add(cpf.I_in_header(image))
+                         .add(cpf.F_in_header(filter))
+                         .add(cpf.O_out_header())
+                         .add(builder::BinaryContraction{"+", "*"}
+                                  .set(cpf.O_out_body())
+                                  .set_lhs(cpf.I_in_body())
+                                  .set_rhs(cpf.F_in_body()))
+                         .finalize());
+}
+
+// ConvolutionBackpropFilters implements the derivative of a convolution with respect to its filter
+// input.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::ConvolutionBackpropFilters>::operator()()
+{
+    this->check_inputs(2);
+    this->check_outputs(1);
+
+    LogConvolution(op_input(0),
+                   op_input(1),
+                   op().get_inputs()[0].get_shape().size() - 2,
+                   op().get_window_movement_strides_backward(),
+                   op().get_window_dilation_strides_backward(),
+                   op().get_padding_below_backward(),
+                   op().get_padding_above_backward(),
+                   op().get_data_dilation_strides_backward(),
+                   1,
+                   0,
+                   0,
+                   1,
+                   1,
+                   0,
+                   false);
+
+    const auto& image = op_input(0);
+    const auto& output = op_input(1);
+    auto image_dims = op().get_inputs()[0].get_shape().size() - 2;
+    const auto& padding_above = op().get_padding_above_forward();
+    const auto& padding_below = op().get_padding_below_forward();
+    const auto& strides = op().get_window_movement_strides_forward();
+    const auto& filter_dilation = op().get_window_dilation_strides_forward();
+    const auto& data_dilation = op().get_data_dilation_strides_forward();
+    const auto& filters_shape = op().get_filters_shape();
+
+    ConvPoolFormatter cpf(image_dims,
+                          padding_below,
+                          padding_above,
+                          strides,
+                          filter_dilation,
+                          data_dilation,
+                          ConvPoolFormatter::OpType::Conv,
+                          ConvPoolFormatter::DerivType::Filter,
+                          filters_shape);
+
+    this->set_output(start_tile_function()
+                         .add(cpf.I_in_header(image))
+                         .add(cpf.O_in_header(output))
+                         .add(cpf.F_out_header())
+                         .add(builder::BinaryContraction{"+", "*"}
+                                  .set(cpf.F_out_body())
+                                  .set_lhs(cpf.O_in_body())
+                                  .set_rhs(cpf.I_in_body()))
+                         .finalize());
+}
+
+// ConvolutionBackpropData implements the derivative of a convolution with respect to its data
+// input.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::ConvolutionBackpropData>::operator()()
+{
+    this->check_inputs(2);
+    this->check_outputs(1);
+
+    LogConvolution(op_input(0),
+                   op_input(1),
+                   op().get_inputs()[1].get_shape().size() - 2,
+                   op().get_window_movement_strides_backward(),
+                   op().get_window_dilation_strides_backward(),
+                   op().get_padding_below_backward(),
+                   op().get_padding_above_backward(),
+                   op().get_data_dilation_strides_backward(),
+                   0,
+                   1,
+                   0,
+                   1,
+                   0,
+                   1,
+                   true);
+
+    auto image_dims = op().get_inputs()[0].get_shape().size() - 2;
+    const auto& filter = op_input(0);
+    const auto& output = op_input(1);
+    const auto& padding_above = op().get_padding_above_forward();
+    const auto& padding_below = op().get_padding_below_forward();
+    const auto& strides = op().get_window_movement_strides_forward();
+    const auto& filter_dilation = op().get_window_dilation_strides_forward();
+    const auto& data_dilation = op().get_data_dilation_strides_forward();
+    const auto& data_batch_shape = op().get_data_batch_shape();
+
+    ConvPoolFormatter cpf(image_dims,
+                          padding_below,
+                          padding_above,
+                          strides,
+                          filter_dilation,
+                          data_dilation,
+                          ConvPoolFormatter::OpType::Conv,
+                          ConvPoolFormatter::DerivType::Data,
+                          data_batch_shape);
+
+    this->set_output(start_tile_function()
+                         .add(cpf.F_in_header(filter))
+                         .add(cpf.O_in_header(output))
+                         .add(cpf.I_out_header())
+                         .add(builder::BinaryContraction{"+", "*"}
+                                  .set(cpf.I_out_body())
+                                  .set_lhs(cpf.O_in_body())
+                                  .set_rhs(cpf.F_in_body()))
+                         .finalize());
+}
+
+template <typename O>
+inline void ngraph::runtime::plaidml::ConvolutionImpl<O>::LogConvolution(
+    vertexai::plaidml::variable image,
+    vertexai::plaidml::variable filter,
+    std::size_t image_dims,
+    const Strides& window_movement_strides,
+    const Strides& window_dilation_strides,
+    const CoordinateDiff& padding_below,
+    const CoordinateDiff& padding_above,
+    const Strides& data_dilation_strides,
+    std::size_t batch_axis_data,
+    std::size_t input_channel_axis_data,
+    std::size_t input_channel_axis_filters,
+    std::size_t output_channel_axis_filters,
+    std::size_t batch_axis_result,
+    std::size_t output_channel_axis_result,
+    bool rotate_filter)
+{
+    this->check_inputs(2);
+    this->check_outputs(1);
+
+    NGRAPH_DEBUG << "image_dims: " << image_dims;
+    NGRAPH_DEBUG << "first_dims: " << this->op().get_inputs()[0].get_shape();
+    NGRAPH_DEBUG << "second_dims: " << this->op().get_inputs()[1].get_shape();
+    NGRAPH_DEBUG << "output_dims: " << this->op().get_outputs()[0].get_shape();
+    NGRAPH_DEBUG << "padding_below: " << padding_below;
+    NGRAPH_DEBUG << "padding_above: " << padding_above;
+    NGRAPH_DEBUG << "window_movement_strides: " << window_movement_strides;
+    NGRAPH_DEBUG << "window_dilation_strides: " << window_dilation_strides;
+    NGRAPH_DEBUG << "data_dilation_strides:" << data_dilation_strides;
+    NGRAPH_DEBUG << "batch_axis_data: " << batch_axis_data;
+    NGRAPH_DEBUG << "input_channel_axis_data: " << input_channel_axis_data;
+    NGRAPH_DEBUG << "input_channel_axis_filters: " << input_channel_axis_filters;
+    NGRAPH_DEBUG << "output_channel_axis_filters: " << output_channel_axis_filters;
+    NGRAPH_DEBUG << "batch_axis_result: " << batch_axis_result;
+    NGRAPH_DEBUG << "output_channel_axis_result: " << output_channel_axis_result;
+    NGRAPH_DEBUG << "rotate_filter: " << rotate_filter;
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::Convolution>::Registration register_convolution;
+    ngraph::runtime::plaidml::Impl<ngraph::op::ConvolutionBackpropFilters>::Registration
+        register_convolution_backprop_filters;
+    ngraph::runtime::plaidml::Impl<ngraph::op::ConvolutionBackpropData>::Registration
+        register_convolution_backprop_data;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_dot.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_dot.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <sstream>
+
+#include "ngraph/log.hpp"
+#include "ngraph/op/dot.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+
+// Dot is a generalized dot product operation -- scalar-tensor,
+// matrix-vector, and matrix multiplication.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Dot>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+
+    auto l_dim_limit = op().get_inputs()[0].get_shape().size();
+    auto r_dim_limit = op().get_inputs()[1].get_shape().size();
+    auto reduce_limit = op().get_reduction_axes_count();
+    auto l_dim_mac = l_dim_limit - reduce_limit;
+    auto r_dim_mic = reduce_limit;
+
+    NGRAPH_DEBUG << "l_dim_limit=" << l_dim_limit;
+    NGRAPH_DEBUG << "r_dim_limit=" << r_dim_limit;
+    NGRAPH_DEBUG << "reduce_limit=" << reduce_limit;
+    NGRAPH_DEBUG << "l_dim_mac=" << l_dim_mac;
+    NGRAPH_DEBUG << "r_dim_mic=" << r_dim_mic;
+
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "L"}
+                            .add_dims("DL", 1, l_dim_mac + 1)
+                            .add_dims("DC", 1, reduce_limit + 1))
+                   .add(builder::Input{op_input(1), "R"}
+                            .add_dims("DC", 1, reduce_limit + 1)
+                            .add_dims("DR", r_dim_mic + 1, r_dim_limit + 1))
+                   .add(builder::Output{"O"})
+                   .add(builder::BinaryContraction{"+", "*"}
+                            .set(builder::ContractionOutput{"O"}
+                                     .add_indices("dl", 1, l_dim_mac + 1)
+                                     .add_indices("dr", r_dim_mic + 1, r_dim_limit + 1)
+                                     .add_dims("DL", 1, l_dim_mac + 1)
+                                     .add_dims("DR", r_dim_mic + 1, r_dim_limit + 1))
+                            .set_lhs(builder::ContractionInput{"L"}
+                                         .add_indices("dl", 1, l_dim_mac + 1)
+                                         .add_indices("dc", 1, reduce_limit + 1))
+                            .set_rhs(builder::ContractionInput{"R"}
+                                         .add_indices("dc", 1, reduce_limit + 1)
+                                         .add_indices("dr", r_dim_mic + 1, r_dim_limit + 1)))
+                   .finalize());
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::Dot>::Registration register_dot;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_function.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_function.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/op/function_call.hpp"
+#include "ngraph/runtime/plaidml/plaidml_build.hpp"
+#include "ngraph/runtime/plaidml/plaidml_compiler.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+
+// FunctionCall invokes a sub-function.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::FunctionCall>::operator()()
+{
+    Build b;
+    build()->compiler->build(op().get_functions()[0], &b);
+    vertexai::plaidml::function f{b.composer};
+    vertexai::plaidml::function::parameters_t inputs;
+    for (std::size_t idx = 0; idx < op().get_input_size(); ++idx)
+    {
+        auto* oitv = op().get_inputs()[idx].get_output().get_tensor_ptr().get();
+        auto* iitv = b.func->get_parameters()[idx]->get_outputs()[0].get_tensor_ptr().get();
+        inputs.emplace_back(b.input_names.at(iitv), build()->bindings.at(oitv).var);
+    }
+    vertexai::plaidml::application app{f.apply(inputs)};
+    for (std::size_t idx = 0; idx < op().get_output_size(); ++idx)
+    {
+        auto* iotv = b.func->get_results()[idx]->get_output_tensor_ptr().get();
+        set_output(idx, app.get_output(b.output_names[iotv]));
+    }
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::FunctionCall>::Registration register_function_call;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_general.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_general.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/log.hpp"
+#include "ngraph/op/broadcast.hpp"
+#include "ngraph/op/constant.hpp"
+#include "ngraph/op/get_output_element.hpp"
+#include "ngraph/op/pad.hpp"
+#include "ngraph/op/reshape.hpp"
+#include "ngraph/op/result.hpp"
+#include "ngraph/op/select.hpp"
+#include "ngraph/op/stop_gradient.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+#include "ngraph/runtime/plaidml/plaidml_translate.hpp"
+
+namespace vp = vertexai::plaidml;
+
+// Broadcast broadcasts a tensor to a wider shape.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Broadcast>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+
+    auto in_dim_limit = op().get_inputs()[0].get_shape().size();
+    auto out_dim_limit = op().get_broadcast_shape().size();
+
+    NGRAPH_DEBUG << "Broadcast in_dim_limit: " << in_dim_limit
+                 << " out_dim_limit:" << out_dim_limit;
+    NGRAPH_DEBUG << "Broadcast axes: " << op().get_broadcast_axes();
+    NGRAPH_DEBUG << "Broadcast input shape: " << op().get_input_shape(0);
+    NGRAPH_DEBUG << "Broadcast output shape: " << op().get_broadcast_shape();
+
+    auto input_didx = in_dim_limit;
+    std::vector<std::size_t> out_didxs;
+    for (std::size_t idx = 0; idx < out_dim_limit; ++idx)
+    {
+        if (!op().get_broadcast_axes().count(idx))
+        {
+            out_didxs.push_back(out_dim_limit - idx - 1);
+        }
+    }
+    set_output(
+        start_tile_function()
+            .add(builder::Input{op_input(0), "I"}.add_rdims("D", in_dim_limit, 0))
+            .add(builder::Output{"O"})
+            .add(builder::UnaryContraction{"="}
+                     .set(builder::ContractionOutput{"O"}
+                              .add_rindices("o", out_dim_limit, 0)
+                              .add_dims([&](std::back_insert_iterator<std::list<std::string>> out) {
+                                  for (std::size_t idx = 0; idx < out_dim_limit; ++idx)
+                                  {
+                                      if (op().get_broadcast_axes().count(idx))
+                                      {
+                                          out = std::to_string(op().get_broadcast_shape()[idx]);
+                                      }
+                                      else
+                                      {
+                                          out = "D" + std::to_string(--input_didx);
+                                      }
+                                  }
+                              }))
+                     .set(builder::ContractionInput{"I"}.add_indices(
+                         [&](std::back_insert_iterator<std::list<std::string>> out) {
+                             for (std::size_t idx = 0; idx < in_dim_limit; ++idx)
+                             {
+                                 out = "o" + std::to_string(out_didxs[idx]);
+                             }
+                         })))
+            .finalize());
+}
+
+// Constant fills in a tensor constant.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Constant>::operator()()
+{
+    check_inputs(0);
+    check_outputs(1);
+
+    bool output_to_result = false;
+    for (const std::shared_ptr<Node>& node : op().get_users())
+    {
+        if (dynamic_cast<op::Result*>(node.get()))
+        {
+            output_to_result = true;
+            break;
+        }
+    }
+
+    if (!op().get_shape().size() && !output_to_result)
+    {
+        switch (to_plaidml(op().get_element_type()))
+        {
+        case PLAIDML_DATA_BOOLEAN:
+            set_output(static_cast<std::int64_t>(*static_cast<const char*>(op().get_data_ptr())));
+            return;
+        case PLAIDML_DATA_INT8:
+            set_output(
+                static_cast<std::int64_t>(*static_cast<const std::int8_t*>(op().get_data_ptr())));
+            return;
+        case PLAIDML_DATA_INT16:
+            set_output(
+                static_cast<std::int64_t>(*static_cast<const std::int16_t*>(op().get_data_ptr())));
+            return;
+        case PLAIDML_DATA_INT32:
+            set_output(
+                static_cast<std::int64_t>(*static_cast<const std::int32_t*>(op().get_data_ptr())));
+            return;
+        case PLAIDML_DATA_INT64:
+            set_output(*static_cast<const std::int64_t*>(op().get_data_ptr()));
+            return;
+        case PLAIDML_DATA_UINT8:
+            set_output(
+                static_cast<std::int64_t>(*static_cast<const std::uint8_t*>(op().get_data_ptr())));
+            return;
+        case PLAIDML_DATA_UINT16:
+            set_output(
+                static_cast<std::int64_t>(*static_cast<const std::uint16_t*>(op().get_data_ptr())));
+            return;
+        case PLAIDML_DATA_UINT32:
+            set_output(
+                static_cast<std::int64_t>(*static_cast<const std::uint32_t*>(op().get_data_ptr())));
+            return;
+        case PLAIDML_DATA_UINT64:
+            set_output(
+                static_cast<std::int64_t>(*static_cast<const std::uint64_t*>(op().get_data_ptr())));
+            return;
+        case PLAIDML_DATA_FLOAT16:
+            set_output(static_cast<double>(
+                static_cast<float>(*static_cast<const half*>(op().get_data_ptr()))));
+            return;
+        case PLAIDML_DATA_FLOAT32:
+            set_output(static_cast<double>(*static_cast<const float*>(op().get_data_ptr())));
+            return;
+        case PLAIDML_DATA_FLOAT64:
+            set_output(static_cast<double>(*static_cast<const double*>(op().get_data_ptr())));
+            return;
+        default: break;
+        }
+    }
+
+    auto tensor = build()->config->dev->allocate(
+        to_plaidml(build()->config->ctx, op().get_element_type(), op().get_shape()));
+
+    {
+        vp::mapping<char> mp = tensor.map(vp::map_for_write);
+        const char* src = static_cast<const char*>(op().get_data_ptr());
+        char* dest = mp.raw();
+        std::copy(src, src + tensor.get_shape().buffer_size(), dest);
+    }
+
+    set_output(tensor);
+}
+
+// GetOutputElement pipes one of its N inputs to its output.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::GetOutputElement>::operator()()
+{
+    check_inputs_ge(op().get_n() + 1);
+    check_outputs(1);
+
+    set_output(op_input(op().get_n()));
+}
+
+// Pad adds interior and exterior padding to a tensor.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Pad>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+
+    auto tensor = op_input(0);
+    auto value = op_input(1);
+
+    // For padding, we construct two intermediate tensors: the first is the input tensor expanded by
+    // the requisite padding (with zeros in all padded locations), and the second is a boolean
+    // tensor expanded the same way, but with true at the source locations and false at the padded
+    // locations.  We then combine these elementwise using a trinary condition, with the pad value
+    // being used everywhere the boolean intermediate is false.
+
+    // It's a little wasteful, but it expresses the logic correctly, and doesn't take long to run;
+    // the runtime is also free to optimize it through combining the intermediate contractions.
+
+    NGRAPH_DEBUG << "Pad below: " << op().get_padding_below();
+    NGRAPH_DEBUG << "Pad above: " << op().get_padding_above();
+    NGRAPH_DEBUG << "Pad interior: " << op().get_padding_interior();
+    NGRAPH_DEBUG << "Pad input dims: " << op().get_input_shape(0);
+    NGRAPH_DEBUG << "Pad output dims: " << op().get_shape();
+
+    auto dim_limit = op().get_shape().size();
+
+    bool any_zero_dims = false;
+    for (auto sz : op().get_input_shape(0))
+    {
+        if (!sz)
+        {
+            any_zero_dims = true;
+            break;
+        }
+    }
+
+    auto out_dsize = [&](std::size_t idx) {
+        std::ostringstream s;
+        std::size_t total_pad = op().get_padding_below().at(idx) + op().get_padding_above().at(idx);
+        std::size_t in_dsize = op().get_input_shape(0).at(idx);
+        if (in_dsize)
+        {
+            total_pad += op().get_padding_interior().at(idx) * (in_dsize - 1);
+        }
+        if (!any_zero_dims)
+        {
+            s << "DI" << idx + 1;
+            if (total_pad)
+            {
+                s << " + " << total_pad;
+            }
+        }
+        else
+        {
+            s << total_pad + in_dsize;
+        }
+        return s.str();
+    };
+
+    auto out_didx = [&](std::size_t idx) {
+        std::ostringstream s;
+        auto below = op().get_padding_below().at(idx);
+        if (below)
+        {
+            s << below << " + ";
+        }
+        auto interior = op().get_padding_interior().at(idx) + 1;
+        if (interior != 1)
+        {
+            s << "(d" << idx + 1 << " * " << interior << ")";
+        }
+        else
+        {
+            s << "d" << idx + 1;
+        }
+        return s.str();
+    };
+
+    auto flag_constraints = [&](std::size_t idx) {
+        std::ostringstream s;
+        s << "d" << idx + 1 << " < DI" << idx + 1;
+        return s.str();
+    };
+
+    auto f = start_tile_function();
+
+    f.add(builder::Input{op_input(1), "V"}).add(builder::Output{"O"});
+
+    if (!any_zero_dims)
+    {
+        f.add(builder::Input{op_input(0), "I"}.add_dims("DI", 1, dim_limit + 1))
+            .add(builder::UnaryContraction{"="}
+                     .set(builder::ContractionOutput{"P"}
+                              .add_indices(
+                                  [&](std::back_insert_iterator<std::list<std::string>> out) {
+                                      for (std::size_t idx = 0; idx < dim_limit; ++idx)
+                                      {
+                                          out = out_didx(idx);
+                                      }
+                                  })
+                              .add_dims([&](std::back_insert_iterator<std::list<std::string>> out) {
+                                  for (std::size_t idx = 0; idx < dim_limit; ++idx)
+                                  {
+                                      out = out_dsize(idx);
+                                  }
+                              }))
+                     .set(builder::ContractionInput{"I"}.add_indices("d", 1, dim_limit + 1)))
+            .add(builder::Elementwise{"T", "1"})
+            .add(builder::UnaryContraction{"="}
+                     .set(builder::ContractionOutput{"F"}
+                              .add_indices(
+                                  [&](std::back_insert_iterator<std::list<std::string>> out) {
+                                      for (std::size_t idx = 0; idx < dim_limit; ++idx)
+                                      {
+                                          out = out_didx(idx);
+                                      }
+                                  })
+                              .add_dims([&](std::back_insert_iterator<std::list<std::string>> out) {
+                                  for (std::size_t idx = 0; idx < dim_limit; ++idx)
+                                  {
+                                      out = out_dsize(idx);
+                                  }
+                              }))
+                     .set(builder::ContractionInput{"T"})
+                     .add_constraints([&](std::back_insert_iterator<std::list<std::string>> out) {
+                         for (std::size_t idx = 0; idx < dim_limit; ++idx)
+                         {
+                             out = flag_constraints(idx);
+                         }
+                     }))
+            .add(builder::Elementwise{"O", "F ? P : V"});
+    }
+    else
+    {
+        f.add(builder::UnaryContraction{"="}
+                  .set(builder::ContractionOutput{"O"}
+                           .add_indices("d", 0, dim_limit)
+                           .add_dims([&](std::back_insert_iterator<std::list<std::string>> out) {
+                               for (std::size_t idx = 0; idx < dim_limit; ++idx)
+                               {
+                                   out = out_dsize(idx);
+                               }
+                           }))
+                  .set(builder::ContractionInput{"V"}));
+    }
+
+    set_output(f.finalize());
+}
+
+// Reshape reshapes an input tensor.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Reshape>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+
+    // The reshape operation doesn't just describe a new way of looking at an input tensor; it can
+    // optionally rearrange the elements of the input tensor.
+
+    auto src = op_input(0);
+    auto dim_limit = op().get_inputs()[0].get_shape().size();
+
+    if (!dim_limit)
+    {
+        // This reshape is being used to create a tensor from a scalar.  PlaidML's reshape()
+        // operation requires a tensor input (as of this writing), so instead of a reshape(), we'll
+        // just use a contraction to build the tensor.
+        auto& out_shape = op().get_shape();
+        set_output(
+            start_tile_function()
+                .add(builder::Input{src, "I"})
+                .add(builder::Output{"O"})
+                .add(builder::UnaryContraction{"="}
+                         .set(builder::ContractionOutput{"O"}
+                                  .add_indices("d", 0, out_shape.size())
+                                  .add_dims(
+                                      [&](std::back_insert_iterator<std::list<std::string>> out) {
+                                          std::transform(
+                                              out_shape.begin(),
+                                              out_shape.end(),
+                                              out,
+                                              [](std::size_t sz) { return std::to_string(sz); });
+                                      }))
+                         .set(builder::ContractionInput{"I"}))
+                .finalize());
+        return;
+    }
+
+    std::size_t dim_idx = 0;
+    auto input_order = op().get_input_order();
+    for (std::size_t src_idx : op().get_input_order())
+    {
+        if (src_idx != dim_idx++)
+        {
+            // This reshape operation doesn't just describe a new way of looking at an input tensor;
+            // it's also rearranging the elements of the input tensor.  This is pretty easy to
+            // handle with a contraction.
+
+            src =
+                start_tile_function()
+                    .add(builder::Input{src, "I"}.add_dims("D", 1, dim_limit + 1))
+                    .add(builder::Output{"O"})
+                    .add(
+                        builder::UnaryContraction{"="}
+                            .set(builder::ContractionOutput{"O"}
+                                     .add_indices([&](
+                                         std::back_insert_iterator<std::list<std::string>> out) {
+                                         for (std::size_t idx = 0; idx < dim_limit; ++idx)
+                                         {
+                                             out = "d" + std::to_string(input_order[idx] + 1);
+                                         }
+                                     })
+                                     .add_dims([&](
+                                         std::back_insert_iterator<std::list<std::string>> out) {
+                                         for (std::size_t idx = 0; idx < dim_limit; ++idx)
+                                         {
+                                             out = "D" + std::to_string(input_order[idx] + 1);
+                                         }
+                                     }))
+                            .set(builder::ContractionInput{"I"}.add_indices("d", 1, dim_limit + 1)))
+                    .finalize();
+            break;
+        }
+    }
+
+    std::ostringstream reshape_expr;
+    reshape_expr << "reshape(I";
+    for (std::size_t dsize : op().get_output_shape())
+    {
+        reshape_expr << ", " << dsize;
+    }
+    reshape_expr << ")";
+
+    set_output(start_tile_function()
+                   .add(builder::Input{src, "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise("O", reshape_expr.str()))
+                   .finalize());
+}
+
+// Select conditionally selects elements from input tensors.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Select>::operator()()
+{
+    check_inputs(3);
+    check_outputs(1);
+
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "C"})
+                   .add(builder::Input{op_input(1), "T"})
+                   .add(builder::Input{op_input(2), "F"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "C ? T : F"})
+                   .finalize());
+}
+
+// Used by nGraph for bprop graph generation, no-op as a kernel
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::StopGradient>::operator()()
+{
+    set_output(start_tile_function()
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "0"})
+                   .finalize());
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::Broadcast>::Registration register_broadcast;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Constant>::Registration register_constant;
+    ngraph::runtime::plaidml::Impl<ngraph::op::GetOutputElement>::Registration
+        register_get_output_element;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Pad>::Registration register_pad;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Reshape>::Registration register_reshape;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Select>::Registration register_select;
+    ngraph::runtime::plaidml::Impl<ngraph::op::StopGradient>::Registration register_stop_gradient;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_index_reduction.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_index_reduction.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/op/argmax.hpp"
+#include "ngraph/op/argmin.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+#include "ngraph/runtime/plaidml/plaidml_translate.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            template <typename O>
+            class IndexReductionImpl : public BaseImpl<O>
+            {
+            public:
+                IndexReductionImpl(Build* build, const O& op)
+                    : BaseImpl<O>{build, op}
+                {
+                }
+
+                void build_index_reduction(const char* agg_op);
+            };
+        }
+    }
+}
+
+template <typename O>
+void ngraph::runtime::plaidml::IndexReductionImpl<O>::build_index_reduction(const char* agg_op)
+{
+    this->check_inputs(1);
+    this->check_outputs(1);
+
+    auto dim_limit = this->op().get_inputs()[0].get_shape().size();
+
+    auto reduction_axis_str = std::to_string(this->op().get_reduction_axis());
+
+    this->set_output(
+        this->start_tile_function()
+            .add(builder::Input{this->op_input(), "I"}.add_dims("D", 0, dim_limit))
+            .add(builder::Output{"O"})
+            .add( // Compute the maxes along the specified axis in the input
+                builder::UnaryContraction{agg_op}
+                    .set(builder::ContractionOutput{"SelVal"}
+                             .add_indices([&](
+                                 std::back_insert_iterator<std::list<std::string>> out) {
+                                 for (auto idx = 0; idx < dim_limit; ++idx)
+                                 {
+                                     out = (idx == this->op().get_reduction_axis() ? "rd" : "d") +
+                                           std::to_string(idx);
+                                 }
+                             })
+                             .add_dims([&](std::back_insert_iterator<std::list<std::string>> out) {
+                                 for (auto idx = 0; idx < dim_limit; ++idx)
+                                 {
+                                     if (idx == this->op().get_reduction_axis())
+                                     {
+                                         out = "1";
+                                     }
+                                     else
+                                     {
+                                         out = "D" + std::to_string(idx);
+                                     }
+                                 }
+                             }))
+                    .set(builder::ContractionInput{"I"}.add_indices("d", 0, dim_limit)))
+            .add( // Compare the input against the (broadcasted) max values, and select the indices
+                  // where the max val occurs
+                builder::Elementwise{"SelValIdxs",
+                                     "I == SelVal ? index(I, " + reduction_axis_str + ") : D" +
+                                         reduction_axis_str})
+            .add( // Select the maximum index
+                builder::UnaryContraction{"<"}
+                    .set(builder::ContractionOutput{"SelIdx"}
+                             .add_indices(
+                                 [&](std::back_insert_iterator<std::list<std::string>> out) {
+                                     for (auto idx = 0; idx < dim_limit; ++idx)
+                                     {
+                                         if (idx != this->op().get_reduction_axis())
+                                         {
+                                             out = "d" + std::to_string(idx);
+                                         }
+                                     }
+                                 })
+                             .add_dims([&](std::back_insert_iterator<std::list<std::string>> out) {
+                                 for (auto idx = 0; idx < dim_limit; ++idx)
+                                 {
+                                     if (idx != this->op().get_reduction_axis())
+                                     {
+                                         out = "D" + std::to_string(idx);
+                                     }
+                                 }
+                             }))
+                    .set(builder::ContractionInput{"SelValIdxs"}.add_indices("d", 0, dim_limit)))
+            .add( // Convert to the requested output element type (if any)
+                builder::Elementwise{"O",
+                                     tile_converter("SelIdx", this->op().get_index_element_type())})
+            .finalize());
+}
+
+template <>
+struct ngraph::runtime::plaidml::ParentImpl<ngraph::op::ArgMax>
+{
+    using Type = IndexReductionImpl<ngraph::op::ArgMax>;
+};
+
+template <>
+struct ngraph::runtime::plaidml::ParentImpl<ngraph::op::ArgMin>
+{
+    using Type = IndexReductionImpl<ngraph::op::ArgMin>;
+};
+
+// ArgMax computes the maximum index along a tensor axis.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::ArgMax>::operator()()
+{
+    build_index_reduction(">");
+}
+
+// ArgMin computes the minimum index along a tensor axis.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::ArgMin>::operator()()
+{
+    build_index_reduction("<");
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::ArgMax>::Registration register_argmax;
+    ngraph::runtime::plaidml::Impl<ngraph::op::ArgMin>::Registration register_argmin;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_io.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_io.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/op/parameter.hpp"
+#include "ngraph/op/result.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+
+namespace vp = vertexai::plaidml;
+
+// Parameter binds a descriptor::Tensor to a PlaidML Placeholder.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Parameter>::operator()()
+{
+    check_inputs(0);
+    check_outputs(1);
+    vp::placeholder ph{build()->io_dim_override ? build()->io_dim_override_count
+                                                : op().get_output_shape(0).size()};
+    std::string name = std::string{"I"} + std::to_string(build()->input_names.size());
+    descriptor::Tensor* tv = op().get_output_tensor_ptr().get();
+    build()->bindings.emplace(tv, TensorInfo{ph, TensorContents::DATA});
+    build()->composer.input(name, ph);
+    build()->input_names.emplace(tv, std::move(name));
+}
+
+// Result binds a PlaidML variable to a composed function output.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Result>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    std::string name = std::string{"O"} + std::to_string(build()->output_names.size());
+    descriptor::Tensor* tv = op().get_output_tensor_ptr().get();
+    build()->composer.output(name, op_input());
+    build()->output_names.emplace(tv, std::move(name));
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::Parameter>::Registration register_parameter;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Result>::Registration register_result;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_local_response_norm.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_local_response_norm.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/op/lrn.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+
+// LRN implements Local Response Normalization
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::LRN>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    auto dim_limit = op().get_inputs()[0].get_shape().size();
+    auto rank = dim_limit - 2;
+    auto distance = op().get_nsize() / 2;
+    std::ostringstream div_expr;
+    div_expr << "I / pow(" << op().get_bias() << ".0 + ((" << op().get_alpha() << ".0 / "
+             << op().get_nsize() << ".0) * S), " << op().get_beta() << ".0)";
+    set_output(
+        start_tile_function()
+            .add(builder::Input{op_input(), "I"}.add_dims({"N", "C"}).add_dims("D", 0, rank))
+            .add(builder::Output{"O"})
+            .add(builder::Elementwise{"ISQ", "I * I"})
+            .add(builder::UnaryContraction{"+"}
+                     .set(builder::ContractionOutput{"S"}
+                              .add_indices({"n", "c"})
+                              .add_indices("d", 0, rank)
+                              .add_dims({"N", "C"})
+                              .add_dims("D", 0, rank))
+                     .set(builder::ContractionInput{"ISQ"}
+                              .add_indices({"n", "c + z - " + std::to_string(distance)})
+                              .add_indices("d", 0, rank))
+                     .add_constraints([&](std::back_insert_iterator<std::list<std::string>> out) {
+                         out = "z < " + std::to_string(op().get_nsize());
+                     }))
+            .add(builder::Elementwise{"O", div_expr.str()})
+            .finalize());
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::LRN>::Registration register_local_response_norm;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_logical.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_logical.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/op/and.hpp"
+#include "ngraph/op/not.hpp"
+#include "ngraph/op/or.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+
+// And performs a simple elementwise logical and.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::And>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0, TensorContents::LOGICAL), "A"})
+                   .add(builder::Input{op_input(1, TensorContents::LOGICAL), "B"})
+                   .add(builder::Output{"C"})
+                   .add(builder::Elementwise{"C", "A ? B : A"})
+                   .finalize(),
+               TensorContents::LOGICAL);
+}
+
+// Not performs a simple elementwise logical not.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Not>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0, TensorContents::LOGICAL), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "cmp_eq(I, 0)"})
+                   .finalize(),
+               TensorContents::LOGICAL);
+}
+
+// Or performs a simple elementwise logical or.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Or>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0, TensorContents::LOGICAL), "A"})
+                   .add(builder::Input{op_input(1, TensorContents::LOGICAL), "B"})
+                   .add(builder::Output{"C"})
+                   .add(builder::Elementwise{"C", "A ? A : B"})
+                   .finalize(),
+               TensorContents::LOGICAL);
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::And>::Registration register_and;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Not>::Registration register_not;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Or>::Registration register_or;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_one_hot.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_one_hot.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <sstream>
+
+#include "ngraph/op/one_hot.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+#include "ngraph/runtime/plaidml/plaidml_translate.hpp"
+
+// OneHot performs one-hot encoding along the requested axis.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::OneHot>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+
+    // Here's what's going on to implement OneHot:
+    //
+    // * We reshape the input tensor to add a size=1 dimension where we want the one-hot axis to be,
+    //
+    // * We create an index tensor that's size=1 on every dimension except the one-hot dimension,
+    //
+    // * We perform an elementwise conditional across them to assign the one-hot values.
+    //
+    // The broadcast rules will expand the index tensor on all non-one-hot dimensions to match the
+    // input, and will expand the input tensor on the one-hot dimension to match the index.
+    //
+    // In theory, it'd be pretty easy to implement all this with purely elementwise operations.  The
+    // current definition of index() requires an input tensor of the index() output shape, and it's
+    // a little tricky to fix that, so we generate a zero tensor of the correct shape using a
+    // contraction.  TODO: Optimize out the zero tensor contraction.
+
+    const auto& in_shape = op().get_inputs()[0].get_shape();
+    const auto& out_shape = op().get_shape();
+
+    std::ostringstream in_reshape;
+    for (std::size_t idx = 0; idx < out_shape.size(); ++idx)
+    {
+        if (idx)
+        {
+            in_reshape << ", ";
+        }
+        if (idx == op().get_one_hot_axis())
+        {
+            in_reshape << 1;
+        }
+        else
+        {
+            in_reshape << out_shape[idx];
+        }
+    }
+
+    set_output(
+        start_tile_function()
+            .add(builder::Input{op_input(), "I"}.add_dims("D", 0, in_shape.size()))
+            .add(builder::Input{static_cast<std::int64_t>(0), "Zero"})
+            .add(builder::Output{"O"})
+            .add(builder::UnaryContraction{"="}
+                     .set(builder::ContractionOutput{"ZS"}
+                              .add_dims([&](std::back_insert_iterator<std::list<std::string>> out) {
+                                  for (std::size_t idx = 0; idx < out_shape.size(); ++idx)
+                                  {
+                                      if (idx == op().get_one_hot_axis())
+                                      {
+                                          out = std::to_string(out_shape[idx]);
+                                      }
+                                      else
+                                      {
+                                          out = "1";
+                                      }
+                                  }
+                              })
+                              .add_indices("d", 0, out_shape.size()))
+                     .set(builder::ContractionInput{"Zero"}))
+            .add(builder::Elementwise{"Idx",
+                                      "index(ZS, " + std::to_string(op().get_one_hot_axis()) + ")"})
+            .add(builder::Elementwise{"IS", "reshape(I, " + in_reshape.str() + ")"})
+            .add(builder::Elementwise{"OV", "IS == Idx ? 1 : 0"})
+            .add(builder::Elementwise{"O", tile_converter("OV", op().get_element_type())})
+            .finalize());
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::OneHot>::Registration register_one_hot;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_pool.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_pool.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/log.hpp"
+#include "ngraph/op/avg_pool.hpp"
+#include "ngraph/op/max_pool.hpp"
+#include "ngraph/runtime/plaidml/plaidml_convpool_formatter.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+
+// AvgPool implements a batch average pooling operation.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::AvgPool>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+
+    auto src_dims = op().get_inputs()[0].get_shape().size() - 2;
+    const auto& padding_above = op().get_padding_above();
+    const auto& padding_below = op().get_padding_below();
+    const auto& window_shape = op().get_window_shape();
+    const auto& strides = op().get_window_movement_strides();
+    const auto& include_padding = op().get_include_padding_in_avg_computation();
+
+    ngraph::CoordinateDiff pad_above;
+    ngraph::CoordinateDiff pad_below;
+    for (const auto& pad : padding_above)
+    {
+        pad_above.push_back(pad);
+    }
+    for (const auto& pad : padding_below)
+    {
+        pad_below.push_back(pad);
+    }
+
+    // Overpadding occurs iff any padding value is >= its corresponding window shape.  If this
+    // happens, we need to conditionally set the padded values to the operation default.
+
+    bool overpad = false;
+    for (std::size_t idx = 0; idx < src_dims; ++idx)
+    {
+        auto shape = window_shape[idx];
+        if (shape <= padding_below[idx] || shape <= padding_above[idx])
+        {
+            overpad = true;
+            break;
+        }
+    }
+
+    if (overpad)
+    {
+        throw std::runtime_error{
+            "The PlaidML nGraph backend does not support over-padded AvgPool "
+            "operations"};
+    }
+
+    ConvPoolFormatter cpf(src_dims,
+                          pad_below,
+                          pad_above,
+                          strides,
+                          window_shape,
+                          ConvPoolFormatter::OpType::AvgPool,
+                          ConvPoolFormatter::DerivType::None);
+
+    vertexai::plaidml::variable one{static_cast<std::int64_t>(1)};
+
+    auto f = start_tile_function();
+    f.add(cpf.I_in_header(op_input()))
+        .add(builder::Input{one, "One"})
+        .add(cpf.O_out_header())
+        .add(cpf.Broadcast_Ones());
+    if (include_padding)
+    {
+        f.add(builder::Elementwise{"Count", std::to_string(shape_size(window_shape))});
+    }
+    else
+    {
+        f.add(cpf.Count());
+    }
+    f.add(cpf.PoolContraction()).add(builder::Elementwise{"O", "S / Count"});
+
+    set_output(f.finalize());
+}
+
+// MaxPool implements a batch max pooling operation.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::MaxPool>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+
+    auto src_dims = op().get_inputs()[0].get_shape().size() - 2;
+    const auto& padding_above = op().get_padding_above();
+    const auto& padding_below = op().get_padding_below();
+    const auto& window_shape = op().get_window_shape();
+    const auto& strides = op().get_window_movement_strides();
+    ngraph::CoordinateDiff pad_above;
+    ngraph::CoordinateDiff pad_below;
+    for (const auto& pad : padding_above)
+    {
+        pad_above.push_back(pad);
+    }
+    for (const auto& pad : padding_below)
+    {
+        pad_below.push_back(pad);
+    }
+
+    NGRAPH_DEBUG << "MaxPool padding_below: " << padding_below;
+    NGRAPH_DEBUG << "MaxPool padding_above: " << padding_above;
+    NGRAPH_DEBUG << "MaxPool window_shape: " << window_shape;
+    NGRAPH_DEBUG << "MaxPool window_movement_strides: " << strides;
+
+    // Overpadding occurs iff any padding value is >= its corresponding window shape.  If this
+    // happens, we need to conditionally set the padded values to the operation default.
+
+    bool overpad = false;
+    for (std::size_t idx = 0; idx < src_dims; ++idx)
+    {
+        auto shape = window_shape[idx];
+        if (shape <= padding_below[idx] || shape <= padding_above[idx])
+        {
+            overpad = true;
+            break;
+        }
+    }
+
+    if (overpad)
+    {
+        throw std::runtime_error{
+            "The PlaidML nGraph backend does not support over-padded MaxPool "
+            "operations"};
+    }
+
+    ConvPoolFormatter cpf(src_dims,
+                          pad_below,
+                          pad_above,
+                          strides,
+                          window_shape,
+                          ConvPoolFormatter::OpType::MaxPool,
+                          ConvPoolFormatter::DerivType::None);
+
+    set_output(start_tile_function()
+                   .add(cpf.I_in_header(op_input()))
+                   .add(cpf.O_out_header())
+                   .add(cpf.PoolContraction())
+                   .finalize());
+}
+
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::AvgPoolBackprop>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+
+    auto src_dims = op().get_inputs()[0].get_shape().size() - 2;
+    const auto& forward_arg_shape = op().get_forward_arg_shape();
+    const auto& padding_above = op().get_padding_above();
+    const auto& padding_below = op().get_padding_below();
+    const auto& window_shape = op().get_window_shape();
+    const auto& strides = op().get_window_movement_strides();
+    const auto& include_padding = op().get_include_padding_in_avg_computation();
+
+    if (include_padding)
+    {
+        throw std::runtime_error("Include padding in average not yet implemented in PlaidML");
+    }
+
+    ngraph::CoordinateDiff pad_above;
+    ngraph::CoordinateDiff pad_below;
+    for (const auto& pad : padding_above)
+    {
+        pad_above.push_back(pad);
+    }
+    for (const auto& pad : padding_below)
+    {
+        pad_below.push_back(pad);
+    }
+
+    // Overpadding occurs iff any padding value is >= its corresponding window shape.  If this
+    // happens, we need to conditionally set the padded values to the operation default.
+
+    bool overpad = false;
+    for (std::size_t idx = 0; idx < src_dims; ++idx)
+    {
+        auto shape = window_shape[idx];
+        if (shape <= padding_below[idx] || shape <= padding_above[idx])
+        {
+            overpad = true;
+            break;
+        }
+    }
+
+    if (overpad)
+    {
+        throw std::runtime_error{
+            "The PlaidML nGraph backend does not support over-padded AvgPool "
+            "operations"};
+    }
+
+    ConvPoolFormatter cpf(src_dims,
+                          pad_below,
+                          pad_above,
+                          strides,
+                          window_shape,
+                          ConvPoolFormatter::OpType::AvgPool,
+                          ConvPoolFormatter::DerivType::Data);
+
+    const auto& incoming_deriv = op_input();
+
+    vertexai::plaidml::variable one{static_cast<std::int64_t>(1)};
+
+    auto ret = start_tile_function();
+    ret.add(cpf.O_in_header(incoming_deriv))
+        .add(builder::Input{one, "One"})
+        .add(builder::Output{"DI"});
+    for (int i = 2; i < forward_arg_shape.size(); ++i)
+    {
+        std::ostringstream s;
+        s << "XI" << i - 2;
+        ret.add(builder::Input{static_cast<std::int64_t>(forward_arg_shape[i]), s.str()});
+    }
+    set_output(ret.add(cpf.Broadcast_Ones())
+                   .add(cpf.Count())
+                   .add(builder::Elementwise{"S", "DO / Count"})
+                   .add(cpf.PoolContraction())
+                   .finalize());
+}
+
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::MaxPoolBackprop>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+
+    auto src_dims = op().get_inputs()[0].get_shape().size() - 2;
+    const auto& padding_above = op().get_padding_above();
+    const auto& padding_below = op().get_padding_below();
+    const auto& window_shape = op().get_window_shape();
+    const auto& strides = op().get_window_movement_strides();
+    ngraph::CoordinateDiff pad_above;
+    ngraph::CoordinateDiff pad_below;
+    for (const auto& pad : padding_above)
+    {
+        pad_above.push_back(pad);
+    }
+    for (const auto& pad : padding_below)
+    {
+        pad_below.push_back(pad);
+    }
+
+    // Overpadding occurs iff any padding value is >= its corresponding window shape.  If this
+    // happens, we need to conditionally set the padded values to the operation default.
+
+    bool overpad = false;
+    for (std::size_t idx = 0; idx < src_dims; ++idx)
+    {
+        auto shape = window_shape[idx];
+        if (shape <= padding_below[idx] || shape <= padding_above[idx])
+        {
+            overpad = true;
+            break;
+        }
+    }
+
+    if (overpad)
+    {
+        throw std::runtime_error{
+            "The PlaidML nGraph backend does not support over-padded MaxPool "
+            "operations"};
+    }
+
+    ConvPoolFormatter cpf(src_dims,
+                          pad_below,
+                          pad_above,
+                          strides,
+                          window_shape,
+                          ConvPoolFormatter::OpType::MaxPool,
+                          ConvPoolFormatter::DerivType::Data);
+
+    const auto& input = op_input(0);
+    const auto& incoming_deriv = op_input(1);
+
+    set_output(start_tile_function()
+                   .add(cpf.I_in_header(input))
+                   .add(cpf.O_in_header(incoming_deriv))
+                   .add(builder::Output{"DI"})
+                   .add(cpf.PoolContraction())
+                   .add(cpf.PoolDerivContraction())
+                   .finalize());
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::AvgPool>::Registration register_avg_pool;
+    ngraph::runtime::plaidml::Impl<ngraph::op::MaxPool>::Registration register_max_pool;
+    ngraph::runtime::plaidml::Impl<ngraph::op::AvgPoolBackprop>::Registration
+        register_avg_pool_backprop;
+    ngraph::runtime::plaidml::Impl<ngraph::op::MaxPoolBackprop>::Registration
+        register_max_pool_backprop;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_reduce.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_reduce.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/coordinate_transform.hpp"
+#include "ngraph/op/max.hpp"
+#include "ngraph/op/min.hpp"
+#include "ngraph/op/product.hpp"
+#include "ngraph/op/reduce.hpp"
+#include "ngraph/op/sum.hpp"
+#include "ngraph/runtime/plaidml/plaidml_compiler.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+
+namespace vp = vertexai::plaidml;
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            template <typename O>
+            class ReductionImpl : public BaseImpl<O>
+            {
+            public:
+                ReductionImpl(Build* build, const O& op)
+                    : BaseImpl<O>{build, op}
+                {
+                }
+
+                void build_reduction(const char* agg_op);
+            };
+        }
+    }
+}
+
+template <typename O>
+void ngraph::runtime::plaidml::ReductionImpl<O>::build_reduction(const char* agg_op)
+{
+    this->check_inputs(1);
+    this->check_outputs(1);
+
+    auto in_shape = this->op().get_input_shape(0);
+    auto in_dim_limit = in_shape.size();
+
+    std::vector<std::size_t> out_idxs;
+    for (std::size_t in_idx = 0; in_idx < in_dim_limit; ++in_idx)
+    {
+        if (!this->op().get_reduction_axes().count(in_idx))
+        {
+            out_idxs.push_back(in_idx);
+        }
+    }
+
+    this->set_output(
+        this->start_tile_function()
+            .add(builder::Output{"O"})
+
+            .add(builder::Input{this->op_input(0), "I"}.add_dims("D", 1, in_dim_limit + 1))
+            .add(builder::UnaryContraction{agg_op}
+                     .set(builder::ContractionOutput{"O"}
+                              .add_indices(
+                                  [&](std::back_insert_iterator<std::list<std::string>> out) {
+                                      for (std::size_t idx = 0; idx < out_idxs.size(); ++idx)
+                                      {
+                                          out = "d" + std::to_string(out_idxs[idx] + 1);
+                                      }
+                                  })
+                              .add_dims([&](std::back_insert_iterator<std::list<std::string>> out) {
+                                  for (std::size_t idx = 0; idx < out_idxs.size(); ++idx)
+                                  {
+                                      out = "D" + std::to_string(out_idxs[idx] + 1);
+                                  }
+                              }))
+                     .set(builder::ContractionInput{"I"}.add_indices("d", 1, in_dim_limit + 1)))
+            .finalize());
+}
+
+template <>
+struct ngraph::runtime::plaidml::ParentImpl<ngraph::op::Max>
+{
+    using Type = ngraph::runtime::plaidml::ReductionImpl<ngraph::op::Max>;
+};
+
+template <>
+struct ngraph::runtime::plaidml::ParentImpl<ngraph::op::Min>
+{
+    using Type = ngraph::runtime::plaidml::ReductionImpl<ngraph::op::Min>;
+};
+
+template <>
+struct ngraph::runtime::plaidml::ParentImpl<ngraph::op::Product>
+{
+    using Type = ngraph::runtime::plaidml::ReductionImpl<ngraph::op::Product>;
+};
+
+template <>
+struct ngraph::runtime::plaidml::ParentImpl<ngraph::op::Reduce>
+{
+    using Type = ngraph::runtime::plaidml::ReductionImpl<ngraph::op::Reduce>;
+};
+
+template <>
+struct ngraph::runtime::plaidml::ParentImpl<ngraph::op::Sum>
+{
+    using Type = ngraph::runtime::plaidml::ReductionImpl<ngraph::op::Sum>;
+};
+
+// Max reduces a tensor, taking the maximum along the specified axes.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Max>::operator()()
+{
+    build_reduction(">");
+}
+
+// Min reduces a tensor, taking the minimum along the specified axes.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Min>::operator()()
+{
+    build_reduction("<");
+}
+
+// Min reduces a tensor, taking the product along the specified axes.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Product>::operator()()
+{
+    build_reduction("*");
+}
+
+// Reduce reduces a tensor with an arbitrary user-supplied reduction operation.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Reduce>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+
+    // TODO: Special case known-easy reductions.
+
+    // To support arbitrary reduction operations, we take advantage of the fact that in nGraph, we
+    // have concrete dimension sizes.  We start with the initial tensor (argument 1), construct N
+    // slices of tensor 0 (where N == the product of the sizes of the axes to reduce), and
+    // repeatedly apply the supplied aggregation function to them.
+    //
+    // This is somewhat inefficient, but works.
+    const Shape& input_shape = op().get_input_shape(0);
+    auto dim_limit = input_shape.size();
+    Shape reduction_shape;
+    for (std::size_t axis_idx = 0; axis_idx < input_shape.size(); ++axis_idx)
+    {
+        if (op().get_reduction_axes().count(axis_idx))
+        {
+            reduction_shape.emplace_back(input_shape[axis_idx]);
+        }
+    }
+    std::size_t agg_dim_limit = dim_limit - reduction_shape.size();
+
+    vp::function agg_fn;
+    {
+        Build b;
+        b.io_dim_override = true;
+        b.io_dim_override_count = agg_dim_limit;
+        build()->compiler->build(op().get_functions()[0], &b);
+        agg_fn = b.composer;
+    }
+
+    vp::variable input = op_input(0);
+
+    // Note that we need to explicitly broadcast the 0-dimensional base result to match the
+    // aggregation dimension count.
+    vp::variable result =
+        start_tile_function()
+            .add(builder::Input{op_input(1), "I"})
+            .add(builder::Output{"O"})
+            .add(builder::UnaryContraction{"="}
+                     .set(builder::ContractionOutput{"O"}
+                              .add_indices("d", 0, agg_dim_limit)
+                              .add_dims([&](std::back_insert_iterator<std::list<std::string>> out) {
+                                  for (auto idx = 0; idx < agg_dim_limit; ++idx)
+                                  {
+                                      out = "1";
+                                  }
+                              }))
+                     .set(builder::ContractionInput{"I"}))
+            .finalize();
+
+    CoordinateTransform reduction_coords{reduction_shape};
+    for (const Coordinate& coordinate : reduction_coords)
+    {
+        result = agg_fn(
+            result,
+            start_tile_function()
+                .add(builder::Input{input, "I"}.add_dims("D", 0, dim_limit))
+                .add(builder::Output{"O"})
+                .add(builder::UnaryContraction{"="}
+                         .set(builder::ContractionOutput{"O"}
+                                  .add_indices(
+                                      [&](std::back_insert_iterator<std::list<std::string>> out) {
+                                          for (std::size_t idx = 0; idx < input_shape.size(); ++idx)
+                                          {
+                                              if (!op().get_reduction_axes().count(idx))
+                                              {
+                                                  out = "d" + std::to_string(idx);
+                                              }
+                                          }
+                                      })
+                                  .add_dims(
+                                      [&](std::back_insert_iterator<std::list<std::string>> out) {
+                                          for (std::size_t idx = 0; idx < input_shape.size(); ++idx)
+                                          {
+                                              if (!op().get_reduction_axes().count(idx))
+                                              {
+                                                  out = "D" + std::to_string(idx);
+                                              }
+                                          }
+                                      }))
+                         .set(builder::ContractionInput{"I"}.add_indices(
+                             [&](std::back_insert_iterator<std::list<std::string>> out) {
+                                 for (std::size_t idx = 0; idx < input_shape.size(); ++idx)
+                                 {
+                                     std::size_t cidx = 0;
+                                     if (!op().get_reduction_axes().count(idx))
+                                     {
+                                         out = "d" + std::to_string(idx);
+                                     }
+                                     else
+                                     {
+                                         out = std::to_string(coordinate[cidx++]);
+                                     }
+                                 }
+                             })))
+                .finalize());
+    }
+
+    set_output(result);
+}
+
+// Sum reduces a tensor, summing the specified axes.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Sum>::operator()()
+{
+    build_reduction("+");
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::Max>::Registration register_max;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Min>::Registration register_min;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Product>::Registration register_product;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Reduce>::Registration register_reduce;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Sum>::Registration register_sum;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_replace_slice.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_replace_slice.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <sstream>
+
+#include "ngraph/op/replace_slice.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+
+// ReplaceSlice replaces part of a tensor with another tensor.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::ReplaceSlice>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+
+    // For ReplaceSlice:
+    //
+    // * Pad the second tensor to match the first (same-size dimensions and offset according to the
+    // * lower bounds of the replacement, with the desired stridings)
+    //
+    // * Generate a boolean tensor of the same shape as the first, where true == "Do the
+    // * replacement".
+    //
+    // * Use a trinary to do the replacement.
+
+    const auto& shape = op().get_shape();
+
+    set_output(
+        start_tile_function()
+            .add(builder::Input{op_input(0), "L"}.add_dims("D", 0, shape.size()))
+            .add(builder::Input{op_input(1), "S"}.add_dims("SD", 0, shape.size()))
+            .add(builder::Output{"O"})
+            .add(builder::UnaryContraction{"="}
+                     .set(builder::ContractionOutput{"O"}
+                              .add_dims("D", 0, shape.size())
+                              .add_indices(
+                                  [&](std::back_insert_iterator<std::list<std::string>> out) {
+                                      for (std::size_t idx = 0; idx < shape.size(); ++idx)
+                                      {
+                                          auto stride = op().get_strides()[idx];
+                                          auto lower_bound = op().get_lower_bounds()[idx];
+                                          std::ostringstream didx;
+                                          if ((stride != 1) && lower_bound)
+                                          {
+                                              didx << "(";
+                                          }
+                                          didx << "d" << idx;
+                                          if (stride != 1)
+                                          {
+                                              didx << "*" << stride;
+                                          }
+                                          if ((stride != 1) && lower_bound)
+                                          {
+                                              didx << ")";
+                                          }
+                                          if (lower_bound)
+                                          {
+                                              didx << "+" << lower_bound;
+                                          }
+                                          out = didx.str();
+                                      }
+                                  }))
+                     .set(builder::ContractionInput{"S"}.add_indices("d", 0, shape.size()))
+                     .add_constraints([&](std::back_insert_iterator<std::list<std::string>> out) {
+                         for (std::size_t idx = 0; idx < shape.size(); ++idx)
+                         {
+                             out = "d" + std::to_string(idx) + " < " +
+                                   std::to_string(op().get_upper_bounds()[idx] -
+                                                  op().get_lower_bounds()[idx]);
+                         }
+                     })
+                     .set_default("L"))
+            .finalize());
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::ReplaceSlice>::Registration register_replace_slice;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_reverse.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_reverse.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <sstream>
+
+#include "ngraph/op/reverse.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+
+// Reverse reverses the selected axes within a tensor.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Reverse>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+
+    const auto& shape = op().get_shape();
+
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(), "I"}.add_dims("D", 0, shape.size()))
+                   .add(builder::Output{"O"})
+                   .add(builder::UnaryContraction{"="}
+                            .set(builder::ContractionOutput{"O"}
+                                     .add_indices("d", 0, shape.size())
+                                     .add_dims("D", 0, shape.size()))
+                            .set(builder::ContractionInput{"I"}.add_indices(
+                                [&](std::back_insert_iterator<std::list<std::string>> out) {
+                                    for (std::size_t idx = 0; idx < shape.size(); ++idx)
+                                    {
+                                        auto sidx = std::to_string(idx);
+                                        if (op().get_reversed_axes().count(idx))
+                                        {
+                                            out = "D" + sidx + "-d" + sidx + "-1";
+                                        }
+                                        else
+                                        {
+                                            out = "d" + sidx;
+                                        }
+                                    }
+                                })))
+                   .finalize());
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::Reverse>::Registration register_reverse;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_slice.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_slice.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/log.hpp"
+#include "ngraph/op/slice.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+
+// Slice takes a sub-slice of a tensor.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Slice>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    NGRAPH_DEBUG << "Slice: low: " << op().get_lower_bounds();
+    NGRAPH_DEBUG << "Slice high: " << op().get_upper_bounds();
+    NGRAPH_DEBUG << "Slice stride: " << op().get_strides();
+    const auto& shape = op().get_inputs()[0].get_shape();
+    auto dim_limit = shape.size();
+    set_output(
+        start_tile_function()
+            .add(builder::Input{op_input(), "I"}.add_dims("ID", 0, dim_limit))
+            .add(builder::Output{"O"})
+            .add(builder::UnaryContraction{"="}
+                     .set(builder::ContractionOutput{"O"}
+                              .add_indices("od", 0, dim_limit)
+                              .add_dims([&](std::back_insert_iterator<std::list<std::string>> out) {
+                                  for (std::size_t idx = 0; idx < dim_limit; ++idx)
+                                  {
+                                      std::ostringstream s;
+                                      std::size_t stride = op().get_strides()[idx];
+                                      std::ptrdiff_t trim_count =
+                                          op().get_lower_bounds()[idx] +
+                                          (shape[idx] - op().get_upper_bounds()[idx]) + 1 - stride;
+                                      if ((stride != 1) && trim_count)
+                                      {
+                                          s << "(";
+                                      }
+                                      s << "ID" << idx;
+                                      if (0 < trim_count)
+                                      {
+                                          s << " - " << trim_count;
+                                      }
+                                      if (trim_count < 0)
+                                      {
+                                          s << " + " << -trim_count;
+                                      }
+                                      if ((stride != 1) && trim_count)
+                                      {
+                                          s << ")";
+                                      }
+                                      if (stride != 1)
+                                      {
+                                          s << " / " << stride;
+                                      }
+                                      out = s.str();
+                                  }
+                              }))
+                     .set(builder::ContractionInput{"I"}.add_indices(
+                         [&](std::back_insert_iterator<std::list<std::string>> out) {
+                             for (std::size_t idx = 0; idx < dim_limit; ++idx)
+                             {
+                                 std::ostringstream s;
+                                 std::size_t stride = op().get_strides()[idx];
+                                 std::size_t offset = op().get_lower_bounds()[idx];
+                                 if ((stride != 1) && offset)
+                                 {
+                                     s << "(";
+                                 }
+                                 s << "od" << idx;
+                                 if (stride != 1)
+                                 {
+                                     s << " * " << stride;
+                                 }
+                                 if ((stride != 1) && offset)
+                                 {
+                                     s << ")";
+                                 }
+                                 if (offset)
+                                 {
+                                     s << " + " << offset;
+                                 }
+                                 out = s.str();
+                             }
+                         })))
+            .finalize());
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::Slice>::Registration register_slice;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_softmax.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_softmax.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <sstream>
+
+#include "ngraph/op/softmax.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+
+// Softmax implements a standard ML softmax operation.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Softmax>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+
+    const auto& shape = op().get_inputs()[0].get_shape();
+    auto dim_limit = shape.size();
+
+    auto f = start_tile_function();
+    f.add(builder::Input{op_input(0), "I"}.add_dims("D", 0, dim_limit)).add(builder::Output{"O"});
+
+    bool reorder_needed = false;
+    bool saw_element = false;
+    auto groups = 1;
+    auto elements = 1;
+    std::vector<std::size_t> group_idxs;
+    std::vector<std::size_t> element_idxs;
+
+    for (auto didx = 0; didx < shape.size(); ++didx)
+    {
+        if (op().get_axes().count(didx))
+        {
+            elements *= shape[didx];
+            element_idxs.push_back(didx);
+            saw_element = true;
+        }
+        else
+        {
+            groups *= shape[didx];
+            group_idxs.push_back(didx);
+            if (saw_element)
+            {
+                reorder_needed = true;
+            }
+        }
+    }
+
+    const char* input = "I";
+    const char* output = "O";
+    const char* reshape_output = output;
+    bool reshape_needed = dim_limit != 2;
+
+    if (!reorder_needed)
+    {
+        reshape_needed |= shape[0] != groups;
+    }
+    else
+    {
+        f.add(builder::UnaryContraction{"="}
+                  .set(builder::ContractionOutput{"RI"}
+                           .add_dims([&](std::back_insert_iterator<std::list<std::string>> out) {
+                               for (auto idx : group_idxs)
+                               {
+                                   out = "D" + std::to_string(idx);
+                               }
+                               for (auto idx : element_idxs)
+                               {
+                                   out = "D" + std::to_string(idx);
+                               }
+                           })
+                           .add_indices([&](std::back_insert_iterator<std::list<std::string>> out) {
+                               for (auto idx : group_idxs)
+                               {
+                                   out = "d" + std::to_string(idx);
+                               }
+                               for (auto idx : element_idxs)
+                               {
+                                   out = "d" + std::to_string(idx);
+                               }
+                           }))
+                  .set(builder::ContractionInput{"I"}.add_indices("d", 0, dim_limit)));
+        input = "RI";
+        output = "RO";
+        if (group_idxs.size())
+        {
+            reshape_needed |= shape[group_idxs[0]] != groups;
+        }
+        else
+        {
+            reshape_needed |= shape[element_idxs[0]] != groups;
+        }
+    }
+
+    if (reshape_needed)
+    {
+        std::ostringstream reshape;
+        reshape << "reshape(" << input << ", " << groups << ", " << elements << ")";
+        f.add(builder::Elementwise{"GI", reshape.str()});
+        input = "GI";
+        reshape_output = output;
+        output = "GO";
+    }
+
+    {
+        // Take the softmax.
+        std::ostringstream softmax;
+        softmax << "builtin_softmax(" << input << ", " << groups << ", " << elements << ")";
+        f.add(builder::Elementwise{output, softmax.str()});
+    }
+
+    if (reshape_needed)
+    {
+        // Unbundle the axes.
+        std::ostringstream reshape;
+        reshape << "reshape(GO";
+        for (auto didx : group_idxs)
+        {
+            reshape << ", " << shape[didx];
+        }
+        for (auto didx : element_idxs)
+        {
+            reshape << ", " << shape[didx];
+        }
+        reshape << ")";
+        f.add(builder::Elementwise{reshape_output, reshape.str()});
+        output = reshape_output;
+    }
+
+    if (reorder_needed)
+    {
+        f.add(builder::UnaryContraction{"="}
+                  .set(builder::ContractionOutput{"O"}
+                           .add_dims("D", 0, dim_limit)
+                           .add_indices("d", 0, dim_limit))
+                  .set(builder::ContractionInput{output}.add_indices(
+                      [&](std::back_insert_iterator<std::list<std::string>> out) {
+                          for (auto idx : group_idxs)
+                          {
+                              out = "d" + std::to_string(idx);
+                          }
+                          for (auto idx : element_idxs)
+                          {
+                              out = "d" + std::to_string(idx);
+                          }
+                      })));
+    }
+
+    set_output(f.finalize());
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::Softmax>::Registration register_softmax;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_ops_transcendental.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_transcendental.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/op/acos.hpp"
+#include "ngraph/op/asin.hpp"
+#include "ngraph/op/atan.hpp"
+#include "ngraph/op/cos.hpp"
+#include "ngraph/op/cosh.hpp"
+#include "ngraph/op/exp.hpp"
+#include "ngraph/op/log.hpp"
+#include "ngraph/op/power.hpp"
+#include "ngraph/op/sin.hpp"
+#include "ngraph/op/sinh.hpp"
+#include "ngraph/op/sqrt.hpp"
+#include "ngraph/op/tan.hpp"
+#include "ngraph/op/tanh.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+
+// acos performs a simple elementwise arccos function.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Acos>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "acos(I)"})
+                   .finalize());
+}
+
+// asin performs a simple elementwise arcsin function.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Asin>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "asin(I)"})
+                   .finalize());
+}
+
+// atan performs a simple elementwise arctan function.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Atan>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "atan(I)"})
+                   .finalize());
+}
+
+// cos performs a simple elementwise cos function.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Cos>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "cos(I)"})
+                   .finalize());
+}
+
+// cosh performs a simple elementwise hyperbolic cos function.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Cosh>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "cosh(I)"})
+                   .finalize());
+}
+
+// exp performs a simple elementwise natural exponential function.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Exp>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "exp(I)"})
+                   .finalize());
+}
+
+// log performs a simple elementwise natural logarithm function.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Log>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "log(I)"})
+                   .finalize());
+}
+
+// power performs a simple elementwise power function.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Power>::operator()()
+{
+    check_inputs(2);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "I"})
+                   .add(builder::Input{op_input(1), "E"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "pow(I, E)"})
+                   .finalize());
+}
+
+// sin performs a simple elementwise sin function.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Sin>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "sin(I)"})
+                   .finalize());
+}
+
+// sinh performs a simple elementwise hyperbolic sin function.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Sinh>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "sinh(I)"})
+                   .finalize());
+}
+
+// sqrt performs a simple elementwise square root function.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Sqrt>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "sqrt(I)"})
+                   .finalize());
+}
+
+// tan performs a simple elementwise tangent function.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Tan>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "tan(I)"})
+                   .finalize());
+}
+
+// tanh performs a simple elementwise hyperbolic tangent function.
+template <>
+void ngraph::runtime::plaidml::Impl<ngraph::op::Tanh>::operator()()
+{
+    check_inputs(1);
+    check_outputs(1);
+    set_output(start_tile_function()
+                   .add(builder::Input{op_input(0), "I"})
+                   .add(builder::Output{"O"})
+                   .add(builder::Elementwise{"O", "tanh(I)"})
+                   .finalize());
+}
+
+namespace
+{
+    ngraph::runtime::plaidml::Impl<ngraph::op::Acos>::Registration register_acos;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Asin>::Registration register_asin;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Atan>::Registration register_atan;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Cos>::Registration register_cos;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Cosh>::Registration register_cosh;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Exp>::Registration register_exp;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Log>::Registration register_log;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Power>::Registration register_power;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Sin>::Registration register_sin;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Sinh>::Registration register_sinh;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Sqrt>::Registration register_sqrt;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Tan>::Registration register_tan;
+    ngraph::runtime::plaidml::Impl<ngraph::op::Tanh>::Registration register_tanh;
+}
--- a/src/ngraph/runtime/plaidml/plaidml_tensor.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_tensor.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/runtime/plaidml/plaidml_tensor.hpp"
+#include "ngraph/descriptor/layout/dense_tensor_layout.hpp"
+#include "ngraph/log.hpp"
+#include "ngraph/runtime/plaidml/plaidml_translate.hpp"
+#include "ngraph/runtime/tensor.hpp"
+
+namespace vp = vertexai::plaidml;
+
+ngraph::runtime::plaidml::PlaidML_Tensor::PlaidML_Tensor(Config* config,
+                                                         const ngraph::element::Type& element_type,
+                                                         const ngraph::Shape& shape,
+                                                         const std::string& name,
+                                                         void* memory)
+    : Tensor{std::make_shared<ngraph::descriptor::Tensor>(element_type, shape, name)}
+    , m_tensor{config->dev->allocate(
+          to_plaidml(config->ctx, element_type, shape, ConversionUse::FOR_IO))}
+    , m_memory{memory}
+    , m_memory_size{memory ? m_tensor.get_shape().buffer_size() : 0}
+    , m_is_logically_zero{memory ? false : true}
+{
+    m_descriptor->set_tensor_layout(
+        std::make_shared<ngraph::descriptor::layout::DenseTensorLayout>(*m_descriptor));
+    NGRAPH_DEBUG << "Built PlaidML_Tensor " << this << " memory=" << m_memory
+                 << " type=" << element_type << " shape=" << shape;
+}
+
+void ngraph::runtime::plaidml::PlaidML_Tensor::write(const void* p, size_t tensor_offset, size_t n)
+{
+    NGRAPH_DEBUG << "Write " << this << " offset=" << tensor_offset << " n=" << n
+                 << " is_logically_zero=" << m_is_logically_zero;
+
+    // As a special case: if we get a zero-sized write to offset zero, fill the tensor with zero.
+    if (n == 0 && tensor_offset == 0)
+    {
+        NGRAPH_DEBUG << "Logically zeroing tensor " << this;
+        m_is_logically_zero = true;
+        return;
+    }
+
+    bool is_full_write = (tensor_offset == 0 && n == m_tensor.get_shape().buffer_size());
+
+    vp::mapping<char> mp;
+    if (m_is_logically_zero || is_full_write)
+    {
+        // In either of these cases, we're completely replacing the existing data.
+        mp = m_tensor.map(vp::map_for_write);
+    }
+    else
+    {
+        // There may be existing non-zero data, and this is a partial buffer write; we need to read
+        // the existing data.
+        mp = m_tensor.map(vp::map_for_update);
+    }
+
+    if (m_is_logically_zero && !is_full_write)
+    {
+        // It's a partial write of a logically-zero buffer, so first, fill the buffer with physical
+        // zeros.
+        std::fill_n(mp.raw(), m_tensor.get_shape().buffer_size(), 0);
+    }
+    m_is_logically_zero = false;
+
+    const char* src = static_cast<const char*>(p);
+    char* dest = mp.raw() + tensor_offset;
+    std::copy(src, src + n, dest);
+}
+
+void ngraph::runtime::plaidml::PlaidML_Tensor::read(void* p, size_t tensor_offset, size_t n) const
+{
+    NGRAPH_DEBUG << "Read " << this << " offset=" << tensor_offset << " n=" << n
+                 << " is_logically_zero=" << m_is_logically_zero;
+
+    char* dest = static_cast<char*>(p);
+
+    if (m_is_logically_zero)
+    {
+        std::fill_n(dest, n, 0);
+        return;
+    }
+
+    vp::mapping<char> mp = m_tensor.map(vp::map_for_read);
+    const char* src = mp.raw() + tensor_offset;
+    std::copy(src, src + n, dest);
+}
+
+void ngraph::runtime::plaidml::PlaidML_Tensor::sync_input()
+{
+    if (!m_memory)
+    {
+        if (m_is_logically_zero)
+        {
+            NGRAPH_DEBUG << "Flushing logically zero " << this << " to physical memory";
+            // The tensor's about to be used for an input, and it's logically zero; we need to write
+            // physical zeros to its buffer.
+            auto mp = m_tensor.map(vp::map_for_write);
+            std::fill_n(mp.raw(), m_tensor.get_shape().buffer_size(), 0);
+        }
+        m_is_logically_zero = false;
+        return;
+    }
+    NGRAPH_DEBUG << "Syncing input for tensor " << this;
+    write(m_memory, 0, m_memory_size);
+}
+
+void ngraph::runtime::plaidml::PlaidML_Tensor::sync_output()
+{
+    // The tensor's been used for an output, so it's no longer logically zero.
+    m_is_logically_zero = false;
+
+    if (!m_memory)
+    {
+        return;
+    }
+    NGRAPH_DEBUG << "Syncing output for tensor " << this;
+    read(m_memory, 0, m_memory_size);
+}
--- a/src/ngraph/runtime/plaidml/plaidml_tensor.hpp
+++ b/src/ngraph/runtime/plaidml/plaidml_tensor.hpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <plaidml/plaidml++.h>
+
+#include "ngraph/runtime/plaidml/plaidml_config.hpp"
+#include "ngraph/runtime/tensor.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            class PlaidML_Tensor;
+        }
+    }
+}
+
+class ngraph::runtime::plaidml::PlaidML_Tensor final : public ngraph::runtime::Tensor
+{
+public:
+    PlaidML_Tensor(Config* config,
+                   const ngraph::element::Type& element_type,
+                   const ngraph::Shape& shape,
+                   const std::string& name,
+                   void* memory);
+    ~PlaidML_Tensor() final {}
+    const vertexai::plaidml::tensor<char>& tensor() const { return m_tensor; }
+    void write(const void* p, size_t tensor_offset, size_t n) final;
+    void read(void* p, size_t tensor_offset, size_t n) const final;
+
+    // Copy the backing memory to the tensor, if needed.
+    void sync_input();
+
+    // Copy the tensor to the backing memory, if needed.
+    void sync_output();
+
+private:
+    vertexai::plaidml::tensor<char> m_tensor;
+    void* m_memory;
+    size_t m_memory_size;
+    bool m_is_logically_zero;
+};
--- a/src/ngraph/runtime/plaidml/plaidml_translate.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_translate.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/runtime/plaidml/plaidml_translate.hpp"
+#include "ngraph/runtime/plaidml/plaidml_builder.hpp"
+
+namespace vp = vertexai::plaidml;
+
+vp::datatype ngraph::runtime::plaidml::to_plaidml(const ngraph::element::Type& element_type,
+                                                  ConversionUse use)
+{
+    switch (element_type.bitwidth())
+    {
+    case 8:
+        if (element_type.is_signed())
+        {
+            // TODO: Extend nGraph's element::Type to distinguish between boolean and i8.
+            if (element_type.c_type_string() == "char" && use == ConversionUse::FOR_DATA)
+            {
+                return PLAIDML_DATA_BOOLEAN;
+            }
+            return PLAIDML_DATA_INT8;
+        }
+        return PLAIDML_DATA_UINT8;
+
+    case 16:
+        if (element_type.is_real())
+        {
+            return PLAIDML_DATA_FLOAT16;
+        }
+        if (element_type.is_signed())
+        {
+            return PLAIDML_DATA_INT16;
+        }
+        return PLAIDML_DATA_UINT16;
+
+    case 32:
+        if (element_type.is_real())
+        {
+            return PLAIDML_DATA_FLOAT32;
+        }
+        if (element_type.is_signed())
+        {
+            return PLAIDML_DATA_INT32;
+        }
+        return PLAIDML_DATA_UINT32;
+
+    case 64:
+        if (element_type.is_real())
+        {
+            return PLAIDML_DATA_FLOAT64;
+        }
+        if (element_type.is_signed())
+        {
+            return PLAIDML_DATA_INT64;
+        }
+        return PLAIDML_DATA_UINT64;
+
+    default: break;
+    }
+
+    throw ngraph::ngraph_error{
+        std::string{"The nGraph PlaidML backend doesn't support the requested element type ("} +
+        element_type.c_type_string() + ")"};
+}
+
+vp::shape<char> ngraph::runtime::plaidml::to_plaidml(std::shared_ptr<vertexai::ctx>& ctx,
+                                                     const ngraph::element::Type& element_type,
+                                                     const ngraph::Shape& shape,
+                                                     ConversionUse use)
+{
+    vp::shape<char> ps{ctx, to_plaidml(element_type, use)};
+    std::ptrdiff_t stride = 1;
+    for (auto dit = shape.begin(); dit != shape.end(); ++dit)
+    {
+        stride *= *dit;
+    }
+    for (auto dit = shape.begin(); dit != shape.end(); ++dit)
+    {
+        if (*dit)
+        {
+            stride /= *dit;
+        }
+        ps.add_dimension(*dit, stride);
+    }
+    return ps;
+}
+
+std::string ngraph::runtime::plaidml::tile_converter(const std::string& tensor_name,
+                                                     vp::datatype dt)
+{
+    switch (dt)
+    {
+    case PLAIDML_DATA_BOOLEAN:
+        return "as_uint(" + tensor_name + ", 8)"; // N.B. nGraph boolean semantics
+    case PLAIDML_DATA_INT8: return "as_int(" + tensor_name + ", 8)";
+    case PLAIDML_DATA_INT16: return "as_int(" + tensor_name + ", 16)";
+    case PLAIDML_DATA_INT32: return "as_int(" + tensor_name + ", 32)";
+    case PLAIDML_DATA_INT64: return "as_int(" + tensor_name + ", 64)";
+    case PLAIDML_DATA_UINT8: return "as_uint(" + tensor_name + ", 8)";
+    case PLAIDML_DATA_UINT16: return "as_uint(" + tensor_name + ", 16)";
+    case PLAIDML_DATA_UINT32: return "as_uint(" + tensor_name + ", 32)";
+    case PLAIDML_DATA_UINT64: return "as_uint(" + tensor_name + ", 64)";
+    case PLAIDML_DATA_FLOAT16: return "as_float(" + tensor_name + ", 16)";
+    case PLAIDML_DATA_FLOAT32: return "as_float(" + tensor_name + ", 32)";
+    case PLAIDML_DATA_FLOAT64: return "as_float(" + tensor_name + ", 64)";
+    default: throw std::runtime_error{"Unsupported type conversion"};
+    }
+}
+
+std::string ngraph::runtime::plaidml::tile_converter(const std::string& tensor_name,
+                                                     const ngraph::element::Type& element_type)
+{
+    if (!element_type.bitwidth())
+    {
+        return tensor_name;
+    }
+    return tile_converter(tensor_name, to_plaidml(element_type));
+}
+
+vp::variable ngraph::runtime::plaidml::plaidml_logical_to_data(vp::variable var, bool debug)
+{
+    return builder::Function{"logicalToData", debug}
+        .add(builder::Input{var, "I"})
+        .add(builder::Output{"O"})
+        .add(builder::Elementwise{"O", "as_int(I ? 1 : 0, 8)"})
+        .finalize();
+}
--- a/src/ngraph/runtime/plaidml/plaidml_translate.hpp
+++ b/src/ngraph/runtime/plaidml/plaidml_translate.hpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <plaidml/plaidml++.h>
+
+#include "ngraph/shape.hpp"
+#include "ngraph/type/element_type.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            enum class ConversionUse
+            {
+                FOR_DATA = 0,
+                FOR_IO = 1,
+            };
+
+            vertexai::plaidml::datatype to_plaidml(const ngraph::element::Type& element_type,
+                                                   ConversionUse use = ConversionUse::FOR_DATA);
+
+            vertexai::plaidml::shape<char> to_plaidml(std::shared_ptr<vertexai::ctx>& ctx,
+                                                      const ngraph::element::Type& element_type,
+                                                      const ngraph::Shape& shape,
+                                                      ConversionUse use = ConversionUse::FOR_DATA);
+
+            std::string tile_converter(const std::string& tensor_name,
+                                       vertexai::plaidml::datatype dt);
+
+            std::string tile_converter(const std::string& tensor_name,
+                                       const ngraph::element::Type& element_type);
+
+            vertexai::plaidml::variable plaidml_logical_to_data(vertexai::plaidml::variable var,
+                                                                bool debug);
+        }
+    }
+}
--- a/src/ngraph/runtime/plaidml/unit_test.manifest
+++ b/src/ngraph/runtime/plaidml/unit_test.manifest
+# Tests not supported by the PlaidML backend
+backwards_reverse_sequence_n3_c2_h3     # No plans to implement ReverseSequence
+backwards_reverse_sequence_n4d2c3h2w2   # No plans to implement ReverseSequence
+divide_by_zero_int32                    # PlaidML does not die.
+product_matrix_rows_zero                # Out-of-range for PlaidML
+product_matrix_cols_zero                # Out-of-range for PlaidML
+product_vector_zero                     # Out-of-range for PlaidML
+product_matrix_to_scalar_zero_by_zero   # Out-of-range for PlaidML
+product_3d_eliminate_zero_dim           # Out-of-range for PlaidML
+max_matrix_rows_zero                    # Out-of-range for PlaidML
+max_matrix_cols_zero                    # Out-of-range for PlaidML
+max_vector_zero                         # Out-of-range for PlaidML
+max_matrix_to_scalar_zero_by_zero       # Out-of-range for PlaidML
+max_3d_eliminate_zero_dim               # Out-of-range for PlaidML
+min_matrix_rows_zero                    # Out-of-range for PlaidML
+min_matrix_cols_zero                    # Out-of-range for PlaidML
+min_vector_zero                         # Out-of-range for PlaidML
+min_matrix_to_scalar_zero_by_zero       # Out-of-range for PlaidML
+min_3d_eliminate_zero_dim               # Out-of-range for PlaidML
+reverse_sequence_n2c3h4w2               # No plans to implement ReverseSequence
+reverse_sequence_n4c3h2w2               # No plans to implement ReverseSequence
+reverse_sequence_n4d2c3h2w2             # No plans to implement ReverseSequence
+topk_1d_max_all                         # No plans to implement TopK
+topk_1d_max_partial                     # No plans to implement TopK
+topk_1d_max_one                         # No plans to implement TopK
+topk_1d_min_all                         # No plans to implement TopK
+topk_1d_min_partial                     # No plans to implement TopK
+topk_1d_min_one                         # No plans to implement TopK
+topk_3d_max_all                         # No plans to implement TopK
+topk_3d_max_partial                     # No plans to implement TopK
+topk_3d_max_one                         # No plans to implement TopK
+topk_3d_min_all                         # No plans to implement TopK
+topk_3d_min_partial                     # No plans to implement TopK
+topk_3d_min_one                         # No plans to implement TopK
+topk_2d_max_all                         # No plans to implement TopK
+topk_2d_max_partial                     # No plans to implement TopK
+topk_2d_max_one                         # No plans to implement TopK
+topk_2d_min_all                         # No plans to implement TopK
+topk_2d_min_partial                     # No plans to implement TopK
+topk_2d_min_one                         # No plans to implement TopK
+
+# Tests that PlaidML might be able to run at some point.
+backwards_maxpool_n2_c1_hw5_3x3_str2_max_pad1x2_2x3
+backwards_slice
+batchnorm_fprop_bprop  # To debug
+batchnorm_fprop_bprop_2step  # To debug
+reduce_matrix_rows_zero  # To debug: possible broadcasting error?
+reduce_matrix_cols_zero  # To debug: possible broadcasting error?
+reduce_3d_to_vector  # To debug: possible broadcasting error?
+replace_slice_matrix_inplace
+max_pool_2d_1channel_1image_overpadded
+max_pool_3d
+reduce_window_emulating_max_pool_1d_1channel_1image
+reduce_window_emulating_max_pool_1d_1channel_2image
+reduce_window_emulating_max_pool_1d_2channel_2image
+reduce_window_emulating_max_pool_2d_2channel_2image
+reduce_window_emulating_max_pool_2d_1channel_1image_strided
+select_and_scatter_with_overlap
+select_and_scatter_without_overlap
+select_and_scatter_3d_without_overlap
+avg_pool_3d
+avg_pool_3d_uneven_strided_padded_include_in_computation
+dequantize_zero_offset              # Quantization/Dequantization is unimplemented
+quantize_ROUND_NEAREST_TOWARD_ZERO  # Quantization/Dequantization is unimplemented
+quantize_ROUND_NEAREST_UPWARD       # Quantization/Dequantization is unimplemented
+quantize_ROUND_NEAREST_DOWNWARD     # Quantization/Dequantization is unimplemented
+quantize_ROUND_NEAREST_TOWARD_EVEN  # Quantization/Dequantization is unimplemented
+quantize_ROUND_TOWARD_INFINITY      # Quantization/Dequantization is unimplemented
+quantize_ROUND_TOWARD_ZERO          # Quantization/Dequantization is unimplemented
+quantize_ROUND_UP                   # Quantization/Dequantization is unimplemented
+quantize_ROUND_DOWN                 # Quantization/Dequantization is unimplemented
+quantize                            # Quantization/Dequantization is unimplemented
+quantize_axes                       # Quantization/Dequantization is unimplemented
+quantize_int8                       # Quantization/Dequantization is unimplemented
+quantize_clamp                      # Quantization/Dequantization is unimplemented
+dequantize                          # Quantization/Dequantization is unimplemented
+dequantize_axes                     # Quantization/Dequantization is unimplemented
+dequantize_int8                     # Quantization/Dequantization is unimplemented
+sum_matrix_rows_zero                # Empty dims apparently should produce shaped 0s
+sum_matrix_cols_zero                # Empty dims apparently should produce shaped 0s
+sum_vector_zero                     # Empty dims apparently should produce shaped 0s
+sum_matrix_to_scalar_zero_by_zero   # Empty dims apparently should produce shaped 0s
+sum_3d_eliminate_zero_dim           # Empty dims apparently should produce shaped 0s
+dot_0_0                             # Empty dims apparently should produce shaped 0s
+dot_matrix_2x0_0x2                  # Empty dims apparently should produce shaped 0s
+dot_2x0_0                           # Empty dims apparently should produce shaped 0s
--- a/src/tools/CMakeLists.txt
+++ b/src/tools/CMakeLists.txt
@@ -15,6 +15,7 @@
 # ******************************************************************************

 add_subdirectory(nbench)
+add_subdirectory(ngraph-to-plaidml)
 add_subdirectory(reserialize)
 if (NGRAPH_ONNX_IMPORT_ENABLE)
    add_subdirectory(serialize_onnx)

--- a/src/tools/nbench/CMakeLists.txt
+++ b/src/tools/nbench/CMakeLists.txt
@@ -34,5 +34,8 @@ endif()
 if (NGRAPH_INTERPRETER_ENABLE)
    target_link_libraries(nbench interpreter_backend)
 endif()
+if (NGRAPH_PLAIDML_ENABLE)
+    target_link_libraries(nbench plaidml_backend)
+endif()

 install(TARGETS nbench RUNTIME DESTINATION ${NGRAPH_INSTALL_BIN})
--- a/src/tools/ngraph-to-plaidml/CMakeLists.txt
+++ b/src/tools/ngraph-to-plaidml/CMakeLists.txt
+# ******************************************************************************
+# Copyright 2018 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ******************************************************************************
+
+set (SRC
+    ngraph-to-plaidml.cpp
+)
+
+if (NGRAPH_PLAIDML_ENABLE)
+    add_executable(ngraph-to-plaidml ${SRC})
+    target_link_libraries(ngraph-to-plaidml ngraph plaidml_backend)
+    install(TARGETS ngraph-to-plaidml RUNTIME DESTINATION ${NGRAPH_INSTALL_BIN})
+else()
+    message(STATUS "PlaidML not enabled; not compiling ngraph-to-plaidml")
+endif()
--- a/src/tools/ngraph-to-plaidml/ngraph-to-plaidml.cpp
+++ b/src/tools/ngraph-to-plaidml/ngraph-to-plaidml.cpp
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <getopt.h>
+
+#include <iostream>
+#include <memory>
+
+#include "ngraph/file_util.hpp"
+#include "ngraph/runtime/plaidml/plaidml_backend.hpp"
+#include "ngraph/serializer.hpp"
+
+static const struct option opts[] = {{"backend", required_argument, nullptr, 'b'},
+                                     {"format", required_argument, nullptr, 'f'},
+                                     {"help", no_argument, nullptr, 'h'},
+                                     {nullptr, 0, nullptr, '\0'}};
+
+int main(int argc, char** argv)
+{
+    int opt;
+    bool err = false;
+    bool usage = false;
+    std::string model;
+    std::string output;
+    std::string backend_name = "PlaidML";
+    plaidml_file_format format = PLAIDML_FILE_FORMAT_TILE;
+
+    while ((opt = getopt_long(argc, argv, "f:b:h", opts, nullptr)) != -1)
+    {
+        switch (opt)
+        {
+        case 'b': backend_name = optarg; break;
+        case 'h': usage = true; break;
+        case 'f':
+            if (!strcmp(optarg, "tile"))
+            {
+                format = PLAIDML_FILE_FORMAT_TILE;
+            }
+            else if (!strcmp(optarg, "human"))
+            {
+                format = PLAIDML_FILE_FORMAT_STRIPE_HUMAN;
+            }
+            else if (!strcmp(optarg, "prototxt"))
+            {
+                format = PLAIDML_FILE_FORMAT_STRIPE_PROTOTXT;
+            }
+            else if (!strcmp(optarg, "binary"))
+            {
+                format = PLAIDML_FILE_FORMAT_STRIPE_BINARY;
+            }
+            else
+            {
+                err = true;
+            }
+            break;
+        case '?':
+        default: err = true; break;
+        }
+    }
+
+    if (optind + 2 != argc)
+    {
+        err = true;
+    }
+    else
+    {
+        model = argv[optind];
+        output = argv[optind + 1];
+
+        if (model.empty())
+        {
+            err = true;
+        }
+        else if (!ngraph::file_util::exists(model))
+        {
+            std::cerr << "File " << model << " not found\n";
+            err = true;
+        }
+
+        if (output.empty())
+        {
+            err = true;
+        }
+        else if (ngraph::file_util::exists(output))
+        {
+            std::cerr << "File " << output << " already exists; not overwriting\n";
+            err = true;
+        }
+    }
+
+    if (backend_name.substr(0, backend_name.find(':')) != "PlaidML")
+    {
+        std::cerr << "Unsupported backend: " << backend_name << "\n";
+        err = true;
+    }
+
+    if (err || usage)
+    {
+        std::cerr << R"###(
+DESCRIPTION
+       Convert an ngraph JSON model to one of PlaidML's file formats.
+
+SYNOPSIS
+       ngraph-to-plaidml [--backend|-b <backend>] MODEL OUTPUT
+
+OPTIONS
+        -b|--backend      Backend to use (default: PlaidML)
+        -f|--format       Format to use (tile, human, prototxt, binary, or json; default: tile)
+)###";
+    }
+    if (err)
+    {
+        return EXIT_FAILURE;
+    }
+    if (usage)
+    {
+        return EXIT_SUCCESS;
+    }
+
+    std::cerr << "Reading nGraph model from " << model << "\n";
+    std::shared_ptr<ngraph::Function> f = ngraph::deserialize(model);
+    std::shared_ptr<ngraph::runtime::Backend> base_backend =
+        ngraph::runtime::Backend::create(backend_name);
+    std::shared_ptr<ngraph::runtime::plaidml::PlaidML_Backend> backend =
+        std::dynamic_pointer_cast<ngraph::runtime::plaidml::PlaidML_Backend>(base_backend);
+    if (!backend)
+    {
+        std::cerr << "Failed to load PlaidML backend\n";
+        return EXIT_FAILURE;
+    }
+
+    backend->save(f, output, format);
+    std::cerr << "Wrote output to " << output << "\n";
+    return EXIT_SUCCESS;
+}
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -91,6 +91,10 @@ if (NGRAPH_HYBRID_ENABLE)
    set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} HYBRID)
 endif()

+if (NGRAPH_PLAIDML_ENABLE)
+    set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} PlaidML)
+endif()
+
 add_subdirectory(models)
 add_subdirectory(files)
 add_subdirectory(util)
@@ -180,6 +184,10 @@ if (NGRAPH_CPU_ENABLE)
    target_link_libraries(unit-test PRIVATE libmkldnn)
 endif()

+if (NGRAPH_PLAIDML_ENABLE)
+    target_link_libraries(unit-test PRIVATE plaidml_backend)
+endif()
+
 if (NGRAPH_TBB_ENABLE)
    target_compile_definitions(unit-test PRIVATE NGRAPH_TBB_ENABLE)
 endif()