Convolution forward prop (#294)

* Test GitHub-JIRA integration, nothing useful in this commit NGTF-388 #comment Testing JIRA integration * WIP on convolution * Type checking for convolution * Docstrings for convolution * Add convolution reference kernel; it works on some unit tests copied and pasted from my old branch. * Bugfix for dilated conv, and improvement to conv test generation * Remove get_arguments calls from convolution stuff * Add convolution to CPU; also a few fixes to the test generation stuff * Add copyright header to convolution ref script * Move copyright header to the correct place * A few more tests * Remove fallback behavior of blanking out the convolution ref file, since we're not generating it from the build system anymore * Delete stale comment * Merge stuff for the convolution ref script * Clean up rebase mess * Review comments * Review comment (n_foo -> foo_count)

Convolution forward prop (#294)
* Test GitHub-JIRA integration, nothing useful in this commit NGTF-388 #comment Testing JIRA integration * WIP on convolution * Type checking for convolution * Docstrings for convolution * Add convolution reference kernel; it works on some unit tests copied and pasted from my old branch. * Bugfix for dilated conv, and improvement to conv test generation * Remove get_arguments calls from convolution stuff * Add convolution to CPU; also a few fixes to the test generation stuff * Add copyright header to convolution ref script * Move copyright header to the correct place * A few more tests * Remove fallback behavior of blanking out the convolution ref file, since we're not generating it from the build system anymore * Delete stale comment * Merge stuff for the convolution ref script * Clean up rebase mess * Review comments * Review comment (n_foo -> foo_count)
122db5ff · Adam Procter · Scott Cyphers · 3b84d91a · 122db5ff · 122db5ff
Commit 122db5ff authored Dec 18, 2017 by Adam Procter Committed by Scott Cyphers Dec 18, 2017
18 changed files
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -39,6 +39,7 @@ set (SRC
    ops/concatenate.cpp
    ops/constant.cpp
    ops/convert.cpp
+    ops/convolution.cpp
    ops/cos.cpp
    ops/cosh.cpp
    ops/divide.cpp

--- a/src/ngraph/coordinate_transform.cpp
+++ b/src/ngraph/coordinate_transform.cpp
@@ -21,15 +21,10 @@
 #include "ngraph/common.hpp"
 #include "ngraph/coordinate_transform.hpp"
 #include "ngraph/except.hpp"
+#include "ngraph/util.hpp"
 using namespace ngraph;
-template <typename T>
-inline T ceil_div(T x, T y)
-{
-    return (x == 0 ? 0 : (1 + (x - 1) / y));
-}
 CoordinateTransform::CoordinateTransform(const Shape& source_shape,
                                         const Coordinate& source_start_corner,
                                         const Coordinate& source_end_corner,

--- a/src/ngraph/ngraph.hpp
+++ b/src/ngraph/ngraph.hpp
@@ -69,6 +69,7 @@
 #include "ngraph/ops/concatenate.hpp"
 #include "ngraph/ops/constant.hpp"
 #include "ngraph/ops/convert.hpp"
+#include "ngraph/ops/convolution.hpp"
 #include "ngraph/ops/cos.hpp"
 #include "ngraph/ops/cosh.hpp"
 #include "ngraph/ops/divide.hpp"

--- a/src/ngraph/ops/convolution.cpp
+++ b/src/ngraph/ops/convolution.cpp
+// ----------------------------------------------------------------------------
+// Copyright 2017 Nervana Systems Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// ----------------------------------------------------------------------------
+#include "ngraph/ops/convolution.hpp"
+#include "ngraph/util.hpp"
+using namespace std;
+using namespace ngraph;
+op::Convolution::Convolution(const std::shared_ptr<Node>& image_batch,
+                             const std::shared_ptr<Node>& filters,
+                             const Strides& window_movement_strides,
+                             const Strides& window_dilation_strides)
+    : RequiresTensorViewArgs("Convolution", {image_batch, filters})
+    , m_window_movement_strides(window_movement_strides)
+    , m_window_dilation_strides(window_dilation_strides)
+{
+    auto image_batch_tensor_view_type = get_inputs().at(0).get_tensor_view_type();
+    auto& image_batch_shape = image_batch_tensor_view_type->get_shape();
+    auto filters_tensor_view_type = get_inputs().at(1).get_tensor_view_type();
+    auto& filters_shape = filters_tensor_view_type->get_shape();
+    //
+    // Make sure image_batch: NCiDi for some Di of rank>0, N != 0, Ci != 0.
+    //
+    if (image_batch_shape.size() < 3)
+    {
+        throw ngraph_error(
+            "Convolution image batch input must have rank of at least 3 (one batch axis, one "
+            "input-channel axis, at least one image dimension).");
+    }
+    m_batch_size = image_batch_shape[0];
+    if (m_batch_size == 0)
+    {
+        throw ngraph_error("Convolution image batch size is zero.");
+    }
+    m_input_channel_count = image_batch_shape[1];
+    if (m_input_channel_count == 0)
+    {
+        throw ngraph_error("Convolution requires at least one input channel.");
+    }
+    m_image_dimension_count = image_batch_shape.size() - 2;
+    //
+    // Make sure filters: CoCiWv for some Co>0, rank of W = rank of Di.
+    //
+    if (filters_shape.size() != 2 + m_image_dimension_count)
+    {
+        throw ngraph_error("Convolution filter input must have rank of 2 + n_image_dimensions.");
+    }
+    m_output_channel_count = filters_shape[0];
+    if (m_output_channel_count == 0)
+    {
+        throw ngraph_error("Convolution requires at least one output channel.");
+    }
+    if (filters_shape[1] != m_input_channel_count)
+    {
+        throw ngraph_error("Convolution image batch and filter input channel counts do not match.");
+    }
+    //
+    // Make sure window movement strides and window dilation strades have same rank as Di.
+    //
+    if (m_window_movement_strides.size() != m_image_dimension_count)
+    {
+        throw ngraph_error(
+            "Convolution window movement stride rank does not match number of image dimensions.");
+    }
+    if (m_window_dilation_strides.size() != m_image_dimension_count)
+    {
+        throw ngraph_error(
+            "Convolution window dilation stride rank does not match number of image dimensions.");
+    }
+    //
+    // Extract input image shape Di and make sure all dimensions are larger than 0.
+    //
+    for (size_t i = 0; i < m_image_dimension_count; i++)
+    {
+        m_input_image_shape.push_back(image_batch_shape[1 + 1 + +i]);
+        if (m_input_image_shape[i] == 0)
+        {
+            throw ngraph_error("Convolution input image dimension is zero.");
+        }
+    }
+    //
+    // Extract the virtual shape Wv of the convolution window, *not* including dilation, from the filter dimensions.
+    // At the same time, make sure window shape dimensions are all larger than 0.
+    //
+    for (size_t i = 0; i < m_image_dimension_count; i++)
+    {
+        m_window_virtual_shape.push_back(filters_shape[1 + 1 + i]);
+        if (m_window_virtual_shape[i] == 0)
+        {
+            throw ngraph_error("Convolution window shape has a zero-length axis.");
+        }
+    }
+    //
+    // Compute physical shape Wp of the convolution window, *including* dilation. At the same time, make sure all
+    // window dilation strides are larger than 0, and that the dilated filter fits within the image dimensions.
+    //
+    for (size_t i = 0; i < m_image_dimension_count; i++)
+    {
+        if (m_window_dilation_strides[i] == 0)
+        {
+            throw ngraph_error("Convolution window axis dilation stride is zero.");
+        }
+        m_window_physical_shape.push_back(
+            (m_window_virtual_shape[i] - 1) * m_window_dilation_strides[i] + 1);
+        if (m_window_physical_shape[i] > m_input_image_shape[i])
+        {
+            throw ngraph_error("Convolution window after dilation is larger than the image.");
+        }
+    }
+    //
+    // Compute image output shape Do, checking at the same time that all window movement strides are larger than 0.
+    //
+    for (size_t i = 0; i < m_image_dimension_count; i++)
+    {
+        if (m_window_movement_strides[i] == 0)
+        {
+            throw ngraph_error("Convolution window axis movement stride is zero.");
+        }
+        m_output_image_shape.push_back(ceil_div(
+            m_input_image_shape[i] - m_window_physical_shape[i] + 1, m_window_movement_strides[i]));
+    }
+    //
+    // Construct result shape: NCoDo.
+    //
+    Shape result_shape(1 + 1 + m_image_dimension_count);
+    result_shape[0] = m_batch_size;
+    result_shape[1] = m_output_channel_count;
+    std::copy(m_output_image_shape.begin(), m_output_image_shape.end(), result_shape.begin() + 2);
+    set_value_type_checked(make_shared<TensorViewType>(
+        image_batch_tensor_view_type->get_element_type(), result_shape));
+}
+Strides default_strides(const std::shared_ptr<Node>& image_batch)
+{
+    auto image_batch_value_type = image_batch->get_value_type();
+    auto image_batch_tensor_view_type =
+        dynamic_pointer_cast<const TensorViewType>(image_batch_value_type);
+    if (image_batch_tensor_view_type == nullptr)
+    {
+        throw ngraph_error("Convolution image batch argument has non-tensor view type");
+    }
+    auto& image_batch_shape = image_batch_tensor_view_type->get_shape();
+    if (image_batch_shape.size() < 3)
+    {
+        // For consistency we should throw the same error message here that we throw in the constructor.
+        throw ngraph_error(
+            "Convolution image batch input must have rank of at least 3 (one batch axis, one "
+            "input-channel axis, at least one image dimension).");
+    }
+    return Strides(image_batch_shape.size() - 2, 1);
+}
+op::Convolution::Convolution(const std::shared_ptr<Node>& image_batch,
+                             const std::shared_ptr<Node>& filters,
+                             const Strides& window_movement_strides)
+    : Convolution(image_batch, filters, window_movement_strides, default_strides(image_batch))
+{
+}
+op::Convolution::Convolution(const std::shared_ptr<Node>& image_batch,
+                             const std::shared_ptr<Node>& filters)
+    : Convolution(image_batch, filters, default_strides(image_batch), default_strides(image_batch))
+{
+}
+std::shared_ptr<Node>
+    op::Convolution::copy_with_new_args(const std::vector<std::shared_ptr<Node>>& new_args) const
+{
+    if (new_args.size() != 2)
+    {
+        throw ngraph_error("Incorrect number of new arguments");
+    }
+    return std::make_shared<Convolution>(
+        new_args.at(0), new_args.at(1), m_window_movement_strides, m_window_dilation_strides);
+}
+/*
+void op::Convolution::generate_adjoints(autodiff::Adjoints& adjoints, const std::shared_ptr<Node>& delta)
+{
+}
+*/
--- a/src/ngraph/ops/convolution.hpp
+++ b/src/ngraph/ops/convolution.hpp
+// ----------------------------------------------------------------------------
+// Copyright 2017 Nervana Systems Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// ----------------------------------------------------------------------------
+#pragma once
+#include "ngraph/ops/op.hpp"
+namespace ngraph
+{
+    namespace op
+    {
+        /// \brief Batched convolution operation, with optional window dilation and stride.
+        ///
+        /// Convolution takes two inputs:
+        ///
+        /// 1. <i>(the image batch)</i> a tensor of shape \f$(N,C_\textit{in},d_1,\dots,d_n)\f$ where \f$n > 0\f$, every \f$d_i > 0\f$, and where \f$N\f$ is the batch size
+        ///    (number of images) and \f$C_\textit{in} > 0\f$ is the number of input channels (sometimes called features); and
+        /// 2. <i>(the filters)</i> a tensor of shape \f$(C_\textit{out},C_\textit{in},d^f_1,\dots,d^f_n)\f$, where \f$C_\textit{out} > 0\f$ is the number of output channels
+        ///    (sometimes called features) and \f$(d^f_1,\dots,d^f_n)\f$ are the filter dimensions. It is required that for all \f$i\f$, \f$0 < l_i(d^f_i - 1) + 1 \le d_i\f$.
+        ///    (See below for the definition of the dilation \f$l_i\f$);
+        ///
+        /// and two optional parameters:
+        ///
+        /// 3. <i>(the window movement strides)</i> a vector of positive integers \f$(s_1,\dots,s_n)\f$, and
+        /// 4. <i>(the window dilation strides)</i> a vector of positive integers \f$(l_1,\dots,l_n)\f$.
+        ///
+        /// Define the <i>physical window size</i> as the vector \f$(p_1,\dots,p_n)\f$ where \f$p_i = l_i(d^f_i - 1) + 1\f$.
+        ///
+        /// The output has the shape \f$(N,C_\textit{out},d'_1,\dots,d'_n)\f$, where \f$d'_n = \lceil \frac{d_i - p_i + 1}{s_i} \rceil\f$.
+        ///
+        /// Given an input image batch tensor \f$T_\textit{in}\f$ and an input filter tensor \f$T_\textit{filt}\f$, the output tensor is defined by the equation (TODO: I'm sure
+        /// I messed something up here)
+        ///
+        /// \f[
+        ///      T_\textit{out}[a,c_\textit{out},i_1,\dots,i_n] = \sum_{c_\textit{in}=0,j_1=0,\dots,j_n=0}^{c_\textit{in}=C_\textit{in}-1,j_1=d^f_1-1,\dots,j_n=d^f_n-1} (T_\textit{filt}[c_\textit{out},c_\textit{in},j_1,\dots,j_n] \cdot T_\textit{in}[a,c_\textit{in},s_1i_1+l_1j_1,\dots,s_ni_n+l_nj_n])
+        /// \f]
+        ///
+        class Convolution : public RequiresTensorViewArgs
+        {
+        public:
+            /// \brief Constructs a batched convolution operation.
+            ///
+            /// \param image_batch The node producing the input image batch tensor.
+            /// \param filters The node producing the filters tensor.
+            /// \param window_movement_strides The window movement strides.
+            /// \param window_dilation_strides The window dilation strides.
+            Convolution(const std::shared_ptr<Node>& image_batch,
+                        const std::shared_ptr<Node>& filters,
+                        const Strides& window_movement_strides,
+                        const Strides& window_dilation_strides);
+            /// \brief Constructs a batched convolution operation with no window dilation (i.e., all dilation strides are 1).
+            ///
+            /// \param image_batch The node producing the input image batch tensor.
+            /// \param filters The node producing the filters tensor.
+            /// \param window_movement_strides The window movement strides.
+            Convolution(const std::shared_ptr<Node>& image_batch,
+                        const std::shared_ptr<Node>& filters,
+                        const Strides& window_movement_strides);
+            /// \brief Constructs a batched convolution operation with no window dilation or movement stride (i.e., all dilation and movement strides are 1).
+            ///
+            /// \param image_batch The node producing the input image batch tensor.
+            /// \param filters The node producing the filters tensor.
+            Convolution(const std::shared_ptr<Node>& image_batch,
+                        const std::shared_ptr<Node>& filters);
+            virtual std::shared_ptr<Node> copy_with_new_args(
+                const std::vector<std::shared_ptr<Node>>& new_args) const override;
+            /// \return The window movement strides.
+            const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
+            /// \return The window dilation strides.
+            const Strides& get_window_dilation_strides() const { return m_window_dilation_strides; }
+            /// \return The number of input channels.
+            size_t get_input_channel_count() const { return m_input_channel_count; }
+            /// \return The number of output channels.
+            size_t get_output_channel_count() const { return m_output_channel_count; }
+            /// \return The input image shape.
+            Shape get_input_image_shape() const { return m_input_image_shape; }
+            /// \return The output image shape.
+            Shape get_output_image_shape() const { return m_output_image_shape; }
+            /// \return The physical window shape.
+            Shape get_window_physical_shape() const { return m_window_physical_shape; }
+            /// \return The virtual window shape.
+            Shape get_window_virtual_shape() const { return m_window_virtual_shape; }
+            /// \return The batch size.
+            size_t get_batch_size() const { return m_batch_size; }
+            /// \return The number of image dimensions.
+            size_t get_image_dimension_count() const { return m_image_dimension_count; }
+        protected:
+            Strides m_window_movement_strides;
+            Strides m_window_dilation_strides;
+            size_t m_input_channel_count;
+            size_t m_output_channel_count;
+            Shape m_input_image_shape;
+            Shape m_output_image_shape;
+            Shape m_window_physical_shape;
+            Shape m_window_virtual_shape;
+            size_t m_batch_size;
+            size_t m_image_dimension_count;
+        };
+    }
+}
--- a/src/ngraph/runtime/cpu/cpu_emitter.cpp
+++ b/src/ngraph/runtime/cpu/cpu_emitter.cpp
@@ -24,6 +24,7 @@
 #include "ngraph/ops/broadcast.hpp"
 #include "ngraph/ops/concatenate.hpp"
 #include "ngraph/ops/constant.hpp"
+#include "ngraph/ops/convolution.hpp"
 #include "ngraph/ops/dot.hpp"
 #include "ngraph/ops/function_call.hpp"
 #include "ngraph/ops/get_output_element.hpp"
@@ -1652,6 +1653,28 @@ void runtime::cpu::CPU_Emitter::EmitSqrt(const ngraph::Node* n,
    m_out << "}\n";
 }
+void runtime::cpu::CPU_Emitter::EmitConvolution(const ngraph::Node* n,
+                                                const vector<runtime::cpu::TensorViewWrapper>& args,
+                                                const vector<runtime::cpu::TensorViewWrapper>& out)
+{
+    auto convolution = static_cast<const op::Convolution*>(n);
+    auto arg0_shape = args[0].get_shape();
+    auto arg1_shape = args[1].get_shape();
+    auto result_shape = out[0].get_shape();
+    m_out << "kernel::convolution<" << out[0].get_type() << ">(" << args[0].get_name() << ",\n";
+    m_out << "                         " << args[1].get_name() << ",\n";
+    m_out << "                         " << out[0].get_name() << ",\n";
+    m_out << "                         {" << join(arg0_shape) << "},\n";
+    m_out << "                         {" << join(arg1_shape) << "},\n";
+    m_out << "                         {" << join(result_shape) << "},\n";
+    m_out << "                         {" << join(convolution->get_window_movement_strides())
+          << "},\n";
+    m_out << "                         {" << join(convolution->get_window_dilation_strides())
+          << "});\n";
+}
 //------------------------------------------------------------------------------------------------
 // Utility methods
 //------------------------------------------------------------------------------------------------

--- a/src/ngraph/runtime/cpu/cpu_emitter.hpp
+++ b/src/ngraph/runtime/cpu/cpu_emitter.hpp
@@ -102,6 +102,7 @@ namespace ngraph
                void EMITTER_DECL(EmitFloor);
                void EMITTER_DECL(EmitCeiling);
                void EMITTER_DECL(EmitSqrt);
+                void EMITTER_DECL(EmitConvolution);
            private:
                void generate_call(const std::vector<TensorViewWrapper>& args,

--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
@@ -40,6 +40,7 @@
 #include "ngraph/ops/concatenate.hpp"
 #include "ngraph/ops/constant.hpp"
 #include "ngraph/ops/convert.hpp"
+#include "ngraph/ops/convolution.hpp"
 #include "ngraph/ops/cos.hpp"
 #include "ngraph/ops/cosh.hpp"
 #include "ngraph/ops/divide.hpp"
@@ -166,6 +167,7 @@ static const runtime::cpu::OpMap dispatcher{
    {TI(ngraph::op::Floor), &runtime::cpu::CPU_Emitter::EmitFloor},
    {TI(ngraph::op::Ceiling), &runtime::cpu::CPU_Emitter::EmitCeiling},
    {TI(ngraph::op::Sqrt), &runtime::cpu::CPU_Emitter::EmitSqrt},
+    {TI(ngraph::op::Convolution), &runtime::cpu::CPU_Emitter::EmitConvolution},
 };
 runtime::cpu::CPU_ExternalFunction::CPU_ExternalFunction(
@@ -209,6 +211,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
 #include "ngraph/runtime/cpu/cpu_kernels.hpp"
 #include "ngraph/runtime/kernel/broadcast.hpp"
 #include "ngraph/runtime/kernel/concat.hpp"
+#include "ngraph/runtime/kernel/convolution.hpp"
 #include "ngraph/runtime/kernel/dot.hpp"
 #include "ngraph/runtime/kernel/one_hot.hpp"
 #include "ngraph/runtime/kernel/reduce.hpp"

--- a/src/ngraph/runtime/interpreter/int_call_frame.hpp
+++ b/src/ngraph/runtime/interpreter/int_call_frame.hpp
@@ -23,6 +23,7 @@
 #include "ngraph/ops/broadcast.hpp"
 #include "ngraph/ops/concatenate.hpp"
 #include "ngraph/ops/constant.hpp"
+#include "ngraph/ops/convolution.hpp"
 #include "ngraph/ops/dot.hpp"
 #include "ngraph/ops/one_hot.hpp"
 #include "ngraph/ops/reduce.hpp"
@@ -44,6 +45,7 @@
 #include "ngraph/runtime/kernel/concat.hpp"
 #include "ngraph/runtime/kernel/constant.hpp"
 #include "ngraph/runtime/kernel/convert.hpp"
+#include "ngraph/runtime/kernel/convolution.hpp"
 #include "ngraph/runtime/kernel/copy.hpp"
 #include "ngraph/runtime/kernel/cos.hpp"
 #include "ngraph/runtime/kernel/cosh.hpp"
@@ -269,6 +271,18 @@ private:
                               reinterpret_cast<S*>(out[0]->get_data_ptr()),
                               out[0]->get_element_count());
        }
+        else if (node_op == "Convolution")
+        {
+            auto c = static_cast<const op::Convolution*>(&node);
+            kernel::convolution<T>(reinterpret_cast<T*>(args[0]->get_data_ptr()),
+                                   reinterpret_cast<T*>(args[1]->get_data_ptr()),
+                                   reinterpret_cast<T*>(out[0]->get_data_ptr()),
+                                   args[0]->get_shape(),
+                                   args[1]->get_shape(),
+                                   out[0]->get_shape(),
+                                   c->get_window_movement_strides(),
+                                   c->get_window_dilation_strides());
+        }
        else if (node_op == "Cos")
        {
            kernel::cos<T>(reinterpret_cast<T*>(args[0]->get_data_ptr()),

--- a/src/ngraph/runtime/kernel/convolution.hpp
+++ b/src/ngraph/runtime/kernel/convolution.hpp
+// ----------------------------------------------------------------------------
+// Copyright 2017 Nervana Systems Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// ----------------------------------------------------------------------------
+#pragma once
+#include <cmath>
+#include "ngraph/common.hpp"
+#include "ngraph/coordinate_transform.hpp"
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace kernel
+        {
+            template <typename T>
+            void convolution(T* arg0,
+                             T* arg1,
+                             T* out,
+                             const Shape& arg0_shape,
+                             const Shape& arg1_shape,
+                             const Shape& out_shape,
+                             const Strides& window_movement_strides,
+                             const Strides& window_dilation_strides)
+            {
+                // At the outermost level we will walk over every output coordinate O.
+                CoordinateTransform output_transform(out_shape);
+                for (Coordinate out_coord : output_transform)
+                {
+                    // Our output coordinate O will have the form:
+                    //
+                    //   (img,chan_out,i_1,...,i_n)
+                    size_t img_index = out_coord[0];
+                    size_t output_channel = out_coord[1];
+                    // For the input images we need to iterate the coordinate:
+                    //
+                    //   I:
+                    //
+                    // over the range (noninclusive on the right):
+                    //
+                    //   (img,0,s_1*i_1,s_2*i_2,...,s_n*i_n) ->
+                    //
+                    //     (img+1,chans_in_count,s_1*i_1 + l_1*filter_dims_1,...,s_n*i_n + l_n*filter_dims_n)
+                    //
+                    // with strides:
+                    //
+                    //   (1,l_1,...,l_n).
+                    size_t n_image_dimensions = arg0_shape.size() - 2;
+                    size_t n_input_channels = arg0_shape[1];
+                    Shape input_batch_transform_start(2 + n_image_dimensions);
+                    Shape input_batch_transform_end(2 + n_image_dimensions);
+                    Shape input_batch_transform_strides(2 + n_image_dimensions, 1);
+                    input_batch_transform_start[0] = img_index;
+                    input_batch_transform_end[0] = img_index + 1;
+                    input_batch_transform_start[1] = 0;
+                    input_batch_transform_end[1] = n_input_channels;
+                    for (size_t i = 2; i < n_image_dimensions + 2; i++)
+                    {
+                        size_t dilation_stride = window_dilation_strides[i - 2];
+                        size_t movement_stride = window_movement_strides[i - 2];
+                        input_batch_transform_start[i] = movement_stride * out_coord[i];
+                        input_batch_transform_end[i] = input_batch_transform_start[i] +
+                                                       (arg1_shape[i] - 1) * dilation_stride + 1;
+                        input_batch_transform_strides[i] = dilation_stride;
+                    }
+                    CoordinateTransform input_batch_transform(arg0_shape,
+                                                              input_batch_transform_start,
+                                                              input_batch_transform_end,
+                                                              input_batch_transform_strides);
+                    // Simultaneously with iterating I, for the filters we need to iterate the coordinate:
+                    //
+                    //   F
+                    //
+                    // over the range (noninclusive on the right):
+                    //
+                    //   (chan_out,0,0,...,0) -> (chan_out+1,chans_in_count,filter_dims_1,...,filter_dims_n)
+                    //
+                    // with unit stride.
+                    Shape filter_transform_start(2 + n_image_dimensions);
+                    Shape filter_transform_end(2 + n_image_dimensions);
+                    filter_transform_start[0] = output_channel;
+                    filter_transform_end[0] = output_channel + 1;
+                    filter_transform_start[1] = 0;
+                    filter_transform_end[1] = n_input_channels;
+                    for (size_t i = 2; i < n_image_dimensions + 2; i++)
+                    {
+                        filter_transform_start[i] = 0;
+                        filter_transform_end[i] = arg1_shape[i];
+                    }
+                    CoordinateTransform filter_transform(
+                        arg1_shape, filter_transform_start, filter_transform_end);
+                    // As we go, we sum up:
+                    //
+                    //   output[O] += arg0[I] * arg1[F].
+                    T result = 0;
+                    CoordinateTransform::Iterator input_it = input_batch_transform.begin();
+                    CoordinateTransform::Iterator filter_it = filter_transform.begin();
+                    while (input_it != input_batch_transform.end() &&
+                           filter_it != filter_transform.end())
+                    {
+                        Coordinate input_batch_coord = *input_it++;
+                        Coordinate filter_coord = *filter_it++;
+                        result += arg0[input_batch_transform.index(input_batch_coord)] *
+                                  arg1[filter_transform.index(filter_coord)];
+                    }
+                    out[output_transform.index(out_coord)] = result;
+                }
+            }
+        }
+    }
+}
--- a/src/ngraph/runtime/ngvm/external_function.cpp
+++ b/src/ngraph/runtime/ngvm/external_function.cpp
@@ -33,6 +33,7 @@
 #include "ngraph/ops/concatenate.hpp"
 #include "ngraph/ops/constant.hpp"
 #include "ngraph/ops/convert.hpp"
+#include "ngraph/ops/convolution.hpp"
 #include "ngraph/ops/cos.hpp"
 #include "ngraph/ops/cosh.hpp"
 #include "ngraph/ops/divide.hpp"
@@ -83,6 +84,7 @@
 #include "ngraph/runtime/ngvm/instruction/concat.hpp"
 #include "ngraph/runtime/ngvm/instruction/constant.hpp"
 #include "ngraph/runtime/ngvm/instruction/convert.hpp"
+#include "ngraph/runtime/ngvm/instruction/convolution.hpp"
 #include "ngraph/runtime/ngvm/instruction/copy.hpp"
 #include "ngraph/runtime/ngvm/instruction/copy_by_index.hpp"
 #include "ngraph/runtime/ngvm/instruction/cos.hpp"
@@ -540,6 +542,35 @@ ExternalFunction::OpMap& ExternalFunction::get_op_map()
 #undef REGISTER_CONVERT
        };
+        REGISTER_TO_OP_MAP(op::Convolution)
+        {
+            auto convolution = static_cast<const op::Convolution*>(n);
+            auto arg0_tensor_type = n->get_inputs().at(0).get_tensor_view_type();
+            auto arg0_shape = arg0_tensor_type->get_shape();
+            auto arg1_tensor_type = n->get_inputs().at(1).get_tensor_view_type();
+            auto arg1_shape = arg1_tensor_type->get_shape();
+            auto result_tensor_type =
+                dynamic_pointer_cast<const TensorViewType>(n->get_value_type());
+            assert(nullptr != result_tensor_type);
+            auto result_shape = result_tensor_type->get_shape();
+            auto& result_element_type = result_tensor_type->get_element_type();
+            PUSH_POLYMORPHIC_INSTRUCTION(result_element_type,
+                                         "Convolution has unhandled element type",
+                                         instruction::ConvolutionInstruction,
+                                         in[0],
+                                         in[1],
+                                         out[0],
+                                         arg0_shape,
+                                         arg1_shape,
+                                         result_shape,
+                                         convolution->get_window_movement_strides(),
+                                         convolution->get_window_dilation_strides());
+        };
        REGISTER_TO_OP_MAP(op::Dot)
        {
            auto dot = static_cast<const op::Dot*>(n);

--- a/src/ngraph/runtime/ngvm/instruction/convolution.hpp
+++ b/src/ngraph/runtime/ngvm/instruction/convolution.hpp
+// ----------------------------------------------------------------------------
+// Copyright 2017 Nervana Systems Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// ----------------------------------------------------------------------------
+#pragma once
+#include "ngraph/runtime/kernel/convolution.hpp"
+#include "ngraph/runtime/ngvm/call_frame.hpp"
+#include "ngraph/runtime/ngvm/instruction.hpp"
+#include "ngraph/runtime/ngvm/utils.hpp"
+#include "ngraph/runtime/tensor_view.hpp"
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace ngvm
+        {
+            namespace instruction
+            {
+                template <typename ET>
+                class ConvolutionInstruction : public Instruction
+                {
+                public:
+                    ConvolutionInstruction(const TensorViewInfo& arg0,
+                                           const TensorViewInfo& arg1,
+                                           const TensorViewInfo& out,
+                                           const Shape& arg0_shape,
+                                           const Shape& arg1_shape,
+                                           const Shape& out_shape,
+                                           const Strides& window_movement_strides,
+                                           const Strides& window_dilation_strides)
+                        : m_arg0(arg0)
+                        , m_arg1(arg1)
+                        , m_out(out)
+                        , m_arg0_shape(arg0_shape)
+                        , m_arg1_shape(arg1_shape)
+                        , m_out_shape(out_shape)
+                        , m_window_movement_strides(window_movement_strides)
+                        , m_window_dilation_strides(window_dilation_strides)
+                    {
+                    }
+                    virtual void execute(CallFrame& call_frame) const override
+                    {
+                        typename ET::type* arg0 = get_tensor_data_ptr<ET>(call_frame, m_arg0);
+                        typename ET::type* arg1 = get_tensor_data_ptr<ET>(call_frame, m_arg1);
+                        typename ET::type* out = get_tensor_data_ptr<ET>(call_frame, m_out);
+                        kernel::convolution<typename ET::type>(arg0,
+                                                               arg1,
+                                                               out,
+                                                               m_arg0_shape,
+                                                               m_arg1_shape,
+                                                               m_out_shape,
+                                                               m_window_movement_strides,
+                                                               m_window_dilation_strides);
+                    }
+                protected:
+                    TensorViewInfo m_arg0;
+                    TensorViewInfo m_arg1;
+                    TensorViewInfo m_out;
+                    Shape m_arg0_shape;
+                    Shape m_arg1_shape;
+                    Shape m_out_shape;
+                    Strides m_window_movement_strides;
+                    Strides m_window_dilation_strides;
+                };
+            }
+        }
+    }
+}
--- a/src/ngraph/util.hpp
+++ b/src/ngraph/util.hpp
@@ -223,6 +223,12 @@ namespace ngraph
        return a * b;
    }
+    template <typename T>
+    T ceil_div(const T& x, const T& y)
+    {
+        return (x == 0 ? 0 : (1 + (x - 1) / y));
+    }
    void traverse_nodes(Function* p, std::function<void(std::shared_ptr<Node>)> f);
    void traverse_nodes(std::shared_ptr<Function> p, std::function<void(std::shared_ptr<Node>)> f);
    void traverse_functions(std::shared_ptr<Function> p,

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -82,7 +82,9 @@ endif()
 foreach(BACKEND_NAME ${BACKEND_NAMES})
    configure_file(backend_test.in.cpp backend_test_${BACKEND_NAME}.cpp)
+    configure_file(convolution_test.in.cpp convolution_test_${BACKEND_NAME}.cpp)
    set(SRC ${SRC} ${CMAKE_CURRENT_BINARY_DIR}/backend_test_${BACKEND_NAME}.cpp)
+    set(SRC ${SRC} ${CMAKE_CURRENT_BINARY_DIR}/convolution_test_${BACKEND_NAME}.cpp)
    message(STATUS "Adding unit test for backend ${BACKEND_NAME}")
 endforeach()

--- a/test/convolution_test.in.cpp
+++ b/test/convolution_test.in.cpp
--- a/test/ref_generators/generate_convolution_ref.py
+++ b/test/ref_generators/generate_convolution_ref.py
--- a/test/type_prop.cpp
+++ b/test/type_prop.cpp
--- a/test/update_reference.sh
+++ b/test/update_reference.sh
+#!/bin/bash
+declare THIS_SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+python ${THIS_SCRIPT_DIR}/ref_generators/generate_convolution_ref.py ${THIS_SCRIPT_DIR}/convolution_test.in.cpp
+${THIS_SCRIPT_DIR}/../maint/apply-code-format.sh