Convolution backprop ops (#416)

d933d531 · Adam Procter · GitHub · a87675fe · d933d531 · d933d531
Unverified Commit d933d531 authored Jan 30, 2018 by Adam Procter Committed by GitHub Jan 30, 2018
18 changed files
--- a/changes.md
+++ b/changes.md
 # API Changes
+## Changes to convolution and pooling ops
+* Backprop ops have been added for convolution ops.
+* The convolution and pooling ops have had several methods/fields renamed, to reflect a shift
+  in terminology from "images" to "data". Generally this just means that you will have to
+  `s/image_batch/data_batch/` and `s/image_dilation_strides/data_dilation_strides/`.
+* The following functions have been removed:
+  + `AvgPool`: `get_channel_count get_input_image_physical_shape get_input_image_virtual_shape get_output_image_shape get_batch_size get_image_dimension_count`
+  + `MaxPool`: `get_channel_count get_input_image_shape get_output_image_shape get_batch_size get_image_dimension_count`
+  + `Convolution`: `get_input_channel_count get_output_channel_count get_input_image_physical_shape get_input_image_virtual_shape get_output_image_shape get_window_physical_shape get_window_virtual_shape get_batch_size get_image_dimension_count`
+  All of the above information can be inferred from the shapes and parameters of the op.
 ## Negative convolution padding
 `Convolution` now allows negative padding. This means that the `padding_below` and `padding_above`

--- a/src/ngraph/ops/avg_pool.cpp
+++ b/src/ngraph/ops/avg_pool.cpp
@@ -37,109 +37,115 @@ op::AvgPool::AvgPool(const std::shared_ptr<Node>& arg,
    if (arg_shape.size() < 3)
    {
        throw ngraph_error(
-            "Average-pool image batch input must have rank of at least 3 (one batch axis, one "
+            "Average-pool data batch input must have rank of at least 3 (one batch axis, one "
-            "channel axis, at least one image dimension).");
+            "channel axis, at least one spatial dimension).");
    }
-    m_batch_size = arg_shape[0];
+    size_t batch_size = arg_shape[0];
-    if (m_batch_size == 0)
+    if (batch_size == 0)
    {
-        throw ngraph_error("Average-pool image batch size is zero.");
+        throw ngraph_error("Average-pool data batch size is zero.");
    }
-    m_channel_count = arg_shape[1];
+    size_t channel_count = arg_shape[1];
-    if (m_channel_count == 0)
+    if (channel_count == 0)
    {
-        throw ngraph_error("Average-pool requires at least one image depth channel.");
+        throw ngraph_error("Average-pool requires at least one feature channel.");
    }
-    m_image_dimension_count = arg_shape.size() - 2;
+    size_t spatial_dimension_count = arg_shape.size() - 2;
    //
    // Make sure window shape, window movement strides, and  have same rank as Di.
    //
-    if (m_window_shape.size() != m_image_dimension_count)
+    if (window_shape.size() != spatial_dimension_count)
    {
        throw ngraph_error(
-            "Average-pool window shape rank does not match number of image dimensions.");
+            "Average-pool window shape rank does not match number of spatial dimensions.");
    }
-    if (m_window_movement_strides.size() != m_image_dimension_count)
+    if (window_movement_strides.size() != spatial_dimension_count)
    {
        throw ngraph_error(
-            "Average-pool window movement stride rank does not match number of image dimensions.");
+            "Average-pool window movement stride rank does not match number of spatial "
+            "dimensions.");
    }
-    if (m_padding_below.size() != m_image_dimension_count)
+    if (padding_below.size() != spatial_dimension_count)
    {
        throw ngraph_error(
-            "Average-pool below-padding rank does not match number of image dimensions.");
+            "Average-pool below-padding rank does not match number of spatial dimensions.");
    }
-    if (m_padding_above.size() != m_image_dimension_count)
+    if (padding_above.size() != spatial_dimension_count)
    {
        throw ngraph_error(
-            "Average-pool above-padding rank does not match number of image dimensions.");
+            "Average-pool above-padding rank does not match number of spatial dimensions.");
    }
    //
-    // Extract input image shape Di and make sure all dimensions are larger than 0.
+    // Extract input item shape Di and make sure all dimensions are larger than 0.
    //
-    for (size_t i = 0; i < m_image_dimension_count; i++)
+    Shape input_item_virtual_shape;
+    for (size_t i = 0; i < spatial_dimension_count; i++)
    {
        size_t dim_size = arg_shape[1 + 1 + i];
-        m_input_image_physical_shape.push_back(dim_size);
+        size_t virtual_dim_size = padding_below[i] + dim_size + padding_above[i];
-        m_input_image_virtual_shape.push_back(padding_below[i] + dim_size + padding_above[i]);
+        input_item_virtual_shape.push_back(virtual_dim_size);
-        if (m_input_image_virtual_shape[i] == 0)
+        if (virtual_dim_size == 0)
        {
-            throw ngraph_error("Average-pool input image dimension is zero even after padding.");
+            throw ngraph_error("Average-pool input spatial dimension is zero even after padding.");
        }
    }
    //
    // Make sure window shape dimensions are all larger than 0.
    //
-    for (size_t i = 0; i < m_image_dimension_count; i++)
+    for (size_t i = 0; i < spatial_dimension_count; i++)
    {
-        if (m_window_shape[i] == 0)
+        if (window_shape[i] == 0)
        {
            throw ngraph_error("Average-pool window shape has a zero-length axis.");
        }
    }
    //
-    // Make the max pooling window fits within the image dimensions.
+    // Make the max pooling window fits within the spatial dimensions.
    //
-    for (size_t i = 0; i < m_image_dimension_count; i++)
+    for (size_t i = 0; i < spatial_dimension_count; i++)
    {
-        if (m_window_shape[i] > m_input_image_virtual_shape[i])
+        if (window_shape[i] > input_item_virtual_shape[i])
        {
            throw ngraph_error(
-                "Average-pool window shape is larger than the image even after padding.");
+                "Average-pool window shape is larger than the spatial dimensions even after "
+                "padding.");
        }
    }
    //
-    // Compute image output shape Do, checking at the same time that all window movement strides are larger than 0.
+    // Compute output item shape Do, checking at the same time that all window movement strides are larger than 0.
    //
-    for (size_t i = 0; i < m_image_dimension_count; i++)
+    Shape output_item_shape;
+    for (size_t i = 0; i < spatial_dimension_count; i++)
    {
-        if (m_window_movement_strides[i] == 0)
+        if (window_movement_strides[i] == 0)
        {
            throw ngraph_error("Average-pool window axis movement stride is zero.");
        }
-        m_output_image_shape.push_back(ceil_div(
+        output_item_shape.push_back(ceil_div(input_item_virtual_shape[i] - window_shape[i] + 1,
-            m_input_image_virtual_shape[i] - m_window_shape[i] + 1, m_window_movement_strides[i]));
+                                             window_movement_strides[i]));
    }
    //
    // Construct result shape: NCDo.
    //
-    Shape result_shape(1 + 1 + m_image_dimension_count);
+    Shape result_shape(1 + 1 + spatial_dimension_count);
-    result_shape[0] = m_batch_size;
+    result_shape[0] = batch_size;
-    result_shape[1] = m_channel_count;
+    result_shape[1] = channel_count;
-    std::copy(m_output_image_shape.begin(), m_output_image_shape.end(), result_shape.begin() + 2);
+    std::copy(output_item_shape.begin(), output_item_shape.end(), result_shape.begin() + 2);
    set_value_type_checked(get_input_element_type(0), result_shape);
 }
@@ -148,7 +154,7 @@ static Shape default_padding(const std::shared_ptr<Node>& arg)
 {
    if (arg->get_outputs().size() != 1)
    {
-        throw ngraph_error("Average-pool image batch argument must have exactly one output");
+        throw ngraph_error("Average-pool data batch argument must have exactly one output");
    }
    auto& arg_shape = arg->get_outputs().at(0).get_shape();
@@ -156,8 +162,8 @@ static Shape default_padding(const std::shared_ptr<Node>& arg)
    {
        // For consistency we should throw the same error message here that we throw in the constructor.
        throw ngraph_error(
-            "Average-pool image batch input must have rank of at least 3 (one batch axis, one "
+            "Average-pool data batch input must have rank of at least 3 (one batch axis, one "
-            "channel axis, at least one image dimension).");
+            "channel axis, at least one spatial dimension).");
    }
    return Shape(arg_shape.size() - 2, 0);
 }
@@ -174,7 +180,7 @@ static Strides default_strides(const std::shared_ptr<Node>& arg)
 {
    if (arg->get_outputs().size() != 1)
    {
-        throw ngraph_error("Average-pool image batch argument must have exactly one output");
+        throw ngraph_error("Average-pool data batch argument must have exactly one output");
    }
    auto& arg_shape = arg->get_outputs().at(0).get_shape();
@@ -182,8 +188,8 @@ static Strides default_strides(const std::shared_ptr<Node>& arg)
    {
        // For consistency we should throw the same error message here that we throw in the constructor.
        throw ngraph_error(
-            "Average-pool image batch input must have rank of at least 3 (one batch axis, one "
+            "Average-pool data batch input must have rank of at least 3 (one batch axis, one "
-            "channel axis, at least one image dimension).");
+            "channel axis, at least one spatial dimension).");
    }
    return Strides(arg_shape.size() - 2, 1);
 }
@@ -203,13 +209,6 @@ bool op::AvgPool::is_functionally_identical(const Node& other) const
        rc &= m_window_movement_strides == rhs.m_window_movement_strides;
        rc &= m_padding_below == rhs.m_padding_below;
        rc &= m_padding_above == rhs.m_padding_above;
-        rc &= m_window_movement_strides == rhs.m_window_movement_strides;
-        rc &= m_channel_count == rhs.m_channel_count;
-        rc &= m_input_image_physical_shape == rhs.m_input_image_physical_shape;
-        rc &= m_input_image_virtual_shape == rhs.m_input_image_virtual_shape;
-        rc &= m_output_image_shape == rhs.m_output_image_shape;
-        rc &= m_batch_size == rhs.m_batch_size;
-        rc &= m_image_dimension_count == rhs.m_image_dimension_count;
    }
    else
    {

--- a/src/ngraph/ops/avg_pool.hpp
+++ b/src/ngraph/ops/avg_pool.hpp
@@ -22,8 +22,9 @@ namespace ngraph
    {
        /// \brief Batched average pooling operation, with optional padding and window stride.
        ///
-        /// Average pooling takes as its input an image batch tensor of shape \f$(N,C,d_1,\dots,d_n)\f$ where \f$n > 0\f$, every \f$d_i > 0\f$, and where \f$N\f$ is
+        /// Average pooling takes as its input an data batch tensor of shape \f$(N,C,d_1,\dots,d_n)\f$ where \f$n > 0\f$, every \f$d_i > 0\f$, and where \f$N\f$ is
-        /// the batch size, and \f$C > 0\f$ is the number of channels (sometimes called features). It also takes four parameters:
+        /// the batch size, and \f$C > 0\f$ is the number of channels (sometimes called features). The dimensions \f$(d_1,\dots,d_n)\f$ correspond to the shape of
+        /// an \f$n\f$-dimensional data item in a batch. For example, where \f$n=2\f$, the data may represent a two-dimensional image. It also takes four parameters:
        ///
        /// 1. <i>(the window shape)</i> a size vector \f$(w_1,\dots,w_n)\f$ where every \f$w_i \le d_i\f$; and
        /// 2. <i>(the window movement strides, optional)</i> a vector of positive integers \f$(s_1,\dots,s_n)\f$.
@@ -32,7 +33,7 @@ namespace ngraph
        ///
        /// The output has the shape \f$(N,C,d'_1,\dots,d'_n)\f$, where \f$d'_n = \lceil \frac{p_i + d_i + q_i - w_i + 1}{s_i} \rceil\f$.
        ///
-        /// *In the absence of padding*, given an input image batch tensor \f$T_\textit{in}\f$, the output tensor is defined by the equation
+        /// *In the absence of padding*, given an input data batch tensor \f$T_\textit{in}\f$, the output tensor is defined by the equation
        ///
        /// \f[
        ///      T_\textit{out}[a,c,i_1,\dots,i_n] = \frac{\sum_{j_1 = s_1 i_1, \dots, j_n = s_n i_n}^{j_1 = s_1 i_1 + w_1 - 1, \dots, j_n = s_n i_n + w_n - 1} T_\textit{in}[a,c,j_1,\dots,j_n]}{\prod_{i=1}^n{w_n}}
@@ -65,7 +66,7 @@ namespace ngraph
        public:
            /// \brief Constructs a batched average pooling operation.
            ///
-            /// \param arg The node producing the input image batch tensor.
+            /// \param arg The node producing the input data batch tensor.
            /// \param window_shape The window shape.
            /// \param window_movement_strides The window movement strides.
            /// \param padding_below The below-padding shape.
@@ -78,7 +79,7 @@ namespace ngraph
            /// \brief Constructs a batched, unpadded average pooling operation (i.e., all padding shapes are set to 0).
            ///
-            /// \param arg The node producing the input image batch tensor.
+            /// \param arg The node producing the input data batch tensor.
            /// \param window_shape The window shape.
            /// \param window_movement_strides The window movement strides.
            AvgPool(const std::shared_ptr<Node>& arg,
@@ -87,7 +88,7 @@ namespace ngraph
            /// \brief Constructs an unstrided batched convolution operation (i.e., all window movement strides are 1 and all padding shapes are set to 0).
            ///
-            /// \param arg The node producing the input image batch tensor.
+            /// \param arg The node producing the input data batch tensor.
            /// \param window_shape The window shape.
            AvgPool(const std::shared_ptr<Node>& arg, const Shape& window_shape);
@@ -102,6 +103,7 @@ namespace ngraph
                                                 m_padding_below,
                                                 m_padding_above);
            }
+            bool is_functionally_identical(const Node&) const override;
            /// \return The window shape.
            const Shape& get_window_shape() const { return m_window_shape; }
@@ -111,38 +113,11 @@ namespace ngraph
            const Shape& get_padding_below() const { return m_padding_below; }
            /// \return The above-padding shape.
            const Shape& get_padding_above() const { return m_padding_above; }
-            /// \return The number of image channels.
-            size_t get_channel_count() const { return m_channel_count; }
-            /// \return The input image physical shape, not including padding.
-            const Shape& get_input_image_physical_shape() const
-            {
-                return m_input_image_physical_shape;
-            }
-            /// \return The input image virtual shape, including padding.
-            const Shape& get_input_image_virtual_shape() const
-            {
-                return m_input_image_virtual_shape;
-            }
-            /// \return The output image shape.
-            const Shape& get_output_image_shape() const { return m_output_image_shape; }
-            /// \return The batch size.
-            size_t get_batch_size() const { return m_batch_size; }
-            /// \return The number of image dimensions.
-            size_t get_image_dimension_count() const { return m_image_dimension_count; }
-            bool is_functionally_identical(const Node&) const override;
        protected:
            Shape m_window_shape;
            Strides m_window_movement_strides;
            Shape m_padding_below;
            Shape m_padding_above;
-            size_t m_channel_count;
-            Shape m_input_image_physical_shape;
-            Shape m_input_image_virtual_shape;
-            Shape m_output_image_shape;
-            size_t m_batch_size;
-            size_t m_image_dimension_count;
        };
    }
 }
--- a/src/ngraph/ops/convolution.cpp
+++ b/src/ngraph/ops/convolution.cpp
--- a/src/ngraph/ops/convolution.hpp
+++ b/src/ngraph/ops/convolution.hpp
--- a/src/ngraph/ops/max_pool.cpp
+++ b/src/ngraph/ops/max_pool.cpp
@@ -38,93 +38,98 @@ op::MaxPool::MaxPool(const std::shared_ptr<Node>& arg,
    if (arg_shape.size() < 3)
    {
        throw ngraph_error(
-            "Max pool image batch input must have rank of at least 3 (one batch axis, one "
+            "Max pool data batch input must have rank of at least 3 (one batch axis, one "
-            "channel axis, at least one image dimension).");
+            "channel axis, at least one spatial dimension).");
    }
-    m_batch_size = arg_shape[0];
+    size_t batch_size = arg_shape[0];
-    if (m_batch_size == 0)
+    if (batch_size == 0)
    {
-        throw ngraph_error("Max pool image batch size is zero.");
+        throw ngraph_error("Max pool data batch size is zero.");
    }
-    m_channel_count = arg_shape[1];
+    size_t channel_count = arg_shape[1];
-    if (m_channel_count == 0)
+    if (channel_count == 0)
    {
-        throw ngraph_error("Max pool requires at least one image depth channel.");
+        throw ngraph_error("Max pool requires at least one feature channel.");
    }
-    m_image_dimension_count = arg_shape.size() - 2;
+    size_t spatial_dimension_count = arg_shape.size() - 2;
    //
    // Make sure window shape and movement strides have same rank as Di.
    //
-    if (m_window_shape.size() != m_image_dimension_count)
+    if (window_shape.size() != spatial_dimension_count)
    {
-        throw ngraph_error("Max pool window shape rank does not match number of image dimensions.");
+        throw ngraph_error(
+            "Max pool window shape rank does not match number of spatial dimensions.");
    }
-    if (m_window_movement_strides.size() != m_image_dimension_count)
+    if (window_movement_strides.size() != spatial_dimension_count)
    {
        throw ngraph_error(
-            "Max pool window movement stride rank does not match number of image dimensions.");
+            "Max pool window movement stride rank does not match number of spatial dimensions.");
    }
    //
-    // Extract input image shape Di and make sure all dimensions are larger than 0.
+    // Extract input item shape Di and make sure all dimensions are larger than 0.
    //
-    for (size_t i = 0; i < m_image_dimension_count; i++)
+    Shape input_spatial_shape;
+    for (size_t i = 0; i < spatial_dimension_count; i++)
    {
-        m_input_image_shape.push_back(arg_shape[1 + 1 + i]);
+        input_spatial_shape.push_back(arg_shape[1 + 1 + i]);
-        if (m_input_image_shape[i] == 0)
+        if (input_spatial_shape[i] == 0)
        {
-            throw ngraph_error("Max pool input image dimension is zero.");
+            throw ngraph_error("Max pool input spatial dimension is zero.");
        }
    }
    //
    // Make sure window shape dimensions are all larger than 0.
    //
-    for (size_t i = 0; i < m_image_dimension_count; i++)
+    for (size_t i = 0; i < spatial_dimension_count; i++)
    {
-        if (m_window_shape[i] == 0)
+        if (window_shape[i] == 0)
        {
            throw ngraph_error("Max pool window shape has a zero-length axis.");
        }
    }
    //
-    // Make the max pooling window fits within the image dimensions.
+    // Make the max pooling window fits within the spatial dimensions.
    //
-    for (size_t i = 0; i < m_image_dimension_count; i++)
+    for (size_t i = 0; i < spatial_dimension_count; i++)
    {
-        if (m_window_shape[i] > m_input_image_shape[i])
+        if (window_shape[i] > input_spatial_shape[i])
        {
-            throw ngraph_error("Max pool window shape is larger than the image.");
+            throw ngraph_error("Max pool window shape is larger than the spatial dimensions.");
        }
    }
    //
-    // Compute image output shape Do, checking at the same time that all window movement strides are larger than 0.
+    // Compute output item shape Do, checking at the same time that all window movement strides are larger than 0.
    //
-    for (size_t i = 0; i < m_image_dimension_count; i++)
+    Shape output_spatial_shape;
+    for (size_t i = 0; i < spatial_dimension_count; i++)
    {
-        if (m_window_movement_strides[i] == 0)
+        if (window_movement_strides[i] == 0)
        {
            throw ngraph_error("Max pool window axis movement stride is zero.");
        }
-        m_output_image_shape.push_back(
+        output_spatial_shape.push_back(
-            ceil_div(m_input_image_shape[i] - m_window_shape[i] + 1, m_window_movement_strides[i]));
+            ceil_div(input_spatial_shape[i] - window_shape[i] + 1, window_movement_strides[i]));
    }
    //
    // Construct result shape: NCDo.
    //
-    Shape result_shape(1 + 1 + m_image_dimension_count);
+    Shape result_shape(1 + 1 + spatial_dimension_count);
-    result_shape[0] = m_batch_size;
+    result_shape[0] = batch_size;
-    result_shape[1] = m_channel_count;
+    result_shape[1] = channel_count;
-    std::copy(m_output_image_shape.begin(), m_output_image_shape.end(), result_shape.begin() + 2);
+    std::copy(output_spatial_shape.begin(), output_spatial_shape.end(), result_shape.begin() + 2);
    set_value_type_checked(get_inputs().at(0).get_element_type(), result_shape);
 }
@@ -133,7 +138,7 @@ static Strides default_strides(const std::shared_ptr<Node>& arg)
 {
    if (arg->get_outputs().size() != 1)
    {
-        throw ngraph_error("Max pool image batch argument must have exactly one output");
+        throw ngraph_error("Max pool data batch argument must have exactly one output");
    }
    auto& arg_shape = arg->get_outputs().at(0).get_shape();
@@ -141,8 +146,8 @@ static Strides default_strides(const std::shared_ptr<Node>& arg)
    {
        // For consistency we should throw the same error message here that we throw in the constructor.
        throw ngraph_error(
-            "Max pool image batch input must have rank of at least 3 (one batch axis, one "
+            "Max pool data batch input must have rank of at least 3 (one batch axis, one "
-            "channel axis, at least one image dimension).");
+            "channel axis, at least one spatial dimension).");
    }
    return Strides(arg_shape.size() - 2, 1);
 }
@@ -160,11 +165,6 @@ bool op::MaxPool::is_functionally_identical(const Node& other) const
        const MaxPool& rhs = dynamic_cast<const MaxPool&>(other);
        rc &= m_window_shape == rhs.m_window_shape;
        rc &= m_window_movement_strides == rhs.m_window_movement_strides;
-        rc &= m_channel_count == rhs.m_channel_count;
-        rc &= m_input_image_shape == rhs.m_input_image_shape;
-        rc &= m_output_image_shape == rhs.m_output_image_shape;
-        rc &= m_batch_size == rhs.m_batch_size;
-        rc &= m_image_dimension_count == rhs.m_image_dimension_count;
    }
    else
    {

--- a/src/ngraph/ops/max_pool.hpp
+++ b/src/ngraph/ops/max_pool.hpp
@@ -22,15 +22,16 @@ namespace ngraph
    {
        /// \brief Batched max pooling operation, with optional window stride.
        ///
-        /// Max pooling takes as its input an image batch tensor of shape \f$(N,C,d_1,\dots,d_n)\f$ where \f$n > 0\f$, every \f$d_i > 0\f$, and where \f$N\f$ is
+        /// Max pooling takes as its input a data batch tensor of shape \f$(N,C,d_1,\dots,d_n)\f$ where \f$n > 0\f$, every \f$d_i > 0\f$, and where \f$N\f$ is
-        /// the batch size, and \f$C > 0\f$ is the number of channels (sometimes called features). It also takes two parameters:
+        /// the batch size, and \f$C > 0\f$ is the number of channels (sometimes called features). The dimensions \f$(d_1,\dots,d_n)\f$ correspond to the shape of
+        /// an \f$n\f$-dimensional data item in a batch. For example, where \f$n=2\f$, the data may represent a two-dimensional image. It also takes two parameters:
        ///
        /// 1. <i>(the window shape)</i> a size vector \f$(w_1,\dots,w_n)\f$ where every \f$w_i \le d_i\f$; and
        /// 2. <i>(the window movement strides, optional)</i> a vector of positive integers \f$(s_1,\dots,s_n)\f$.
        ///
        /// The output has the shape \f$(N,C,d'_1,\dots,d'_n)\f$, where \f$d'_n = \lceil \frac{d_i - w_i + 1}{s_i} \rceil\f$.
        ///
-        /// Given an input image batch tensor \f$T_\textit{in}\f$, the output tensor is defined by the equation
+        /// Given an input data batch tensor \f$T_\textit{in}\f$, the output tensor is defined by the equation
        ///
        /// \f[
        ///      T_\textit{out}[a,c,i_1,\dots,i_n] = \max_{j_1 = s_1 i_1, \dots, j_n = s_n i_n}^{j_1 = s_1 i_1 + w_1 - 1, \dots, j_n = s_n i_n + w_n - 1} (T_\textit{in}[a,c,j_1,\dots,j_n])
@@ -41,7 +42,7 @@ namespace ngraph
        public:
            /// \brief Constructs a batched max pooling operation.
            ///
-            /// \param arg The node producing the input image batch tensor.
+            /// \param arg The node producing the input data batch tensor.
            /// \param window_shape The window shape.
            /// \param window_movement_strides The window movement strides.
            MaxPool(const std::shared_ptr<Node>& arg,
@@ -50,7 +51,7 @@ namespace ngraph
            /// \brief Constructs an unstrided batched convolution operation (i.e., all window movement strides are 1).
            ///
-            /// \param arg The node producing the input image batch tensor.
+            /// \param arg The node producing the input data batch tensor.
            /// \param window_shape The window shape.
            MaxPool(const std::shared_ptr<Node>& arg, const Shape& window_shape);
@@ -62,35 +63,18 @@ namespace ngraph
                return std::make_shared<MaxPool>(
                    new_args.at(0), m_window_shape, m_window_movement_strides);
            }
+            bool is_functionally_identical(const Node&) const override;
            /// \return The window shape.
            const Shape& get_window_shape() const { return m_window_shape; }
            /// \return The window movement strides.
            const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
-            /// \return The number of image channels.
-            size_t get_channel_count() const { return m_channel_count; }
-            /// \return The input image shape.
-            const Shape& get_input_image_shape() const { return m_input_image_shape; }
-            /// \return The output image shape.
-            const Shape& get_output_image_shape() const { return m_output_image_shape; }
-            /// \return The batch size.
-            size_t get_batch_size() const { return m_batch_size; }
-            /// \return The number of image dimensions.
-            size_t get_image_dimension_count() const { return m_image_dimension_count; }
-            bool is_functionally_identical(const Node&) const override;
        protected:
            virtual void generate_adjoints(autodiff::Adjoints& adjoints,
                                           const std::shared_ptr<Node>& delta) override;
            Shape m_window_shape;
            Strides m_window_movement_strides;
-            size_t m_channel_count;
-            Shape m_input_image_shape;
-            Shape m_output_image_shape;
-            size_t m_batch_size;
-            size_t m_image_dimension_count;
        };
    }
 }
--- a/src/ngraph/runtime/cpu/cpu_emitter.cpp
+++ b/src/ngraph/runtime/cpu/cpu_emitter.cpp
@@ -1847,17 +1847,17 @@ void runtime::cpu::CPU_Emitter::EmitConvolution(codegen::CodeWriter& writer,
        filter_dilated = filter_dilated || (s != 1);
    }
-    bool images_dilated = false;
+    bool data_dilated = false;
-    for (size_t s : convolution->get_image_dilation_strides())
+    for (size_t s : convolution->get_data_dilation_strides())
    {
-        images_dilated = images_dilated || (s != 1);
+        data_dilated = data_dilated || (s != 1);
    }
    // TODO(jmenon): MKLDNN streams should be static so we need to either implement
    // codegen for statics or move primitive and stream construction out
    // of the generated function and only generate code to run/rerun the stream
-    if (!filter_dilated && !images_dilated && arg0_rank == 4 && arg1_rank == 4 &&
+    if (!filter_dilated && !data_dilated && arg0_rank == 4 && arg1_rank == 4 &&
        args[0].get_element_type() == element::f32)
    {
        const string& et = get_mkldnn_data_type(args[0].get_element_type().c_type_string());
@@ -1890,7 +1890,7 @@ void runtime::cpu::CPU_Emitter::EmitConvolution(codegen::CodeWriter& writer,
        writer.indent--;
        writer << "}\n";
    }
-    else if (filter_dilated && !images_dilated && arg0_rank == 4 && arg1_rank == 4 &&
+    else if (filter_dilated && !data_dilated && arg0_rank == 4 && arg1_rank == 4 &&
             args[0].get_element_type() == element::f32)
    {
        // For dilation, MKLDNN wants to know how many elements to insert between, not how far
@@ -1948,11 +1948,75 @@ void runtime::cpu::CPU_Emitter::EmitConvolution(codegen::CodeWriter& writer,
               << "},\n";
        writer << "                         {" << join(convolution->get_padding_below()) << "},\n";
        writer << "                         {" << join(convolution->get_padding_above()) << "},\n";
-        writer << "                         {" << join(convolution->get_image_dilation_strides())
+        writer << "                         {" << join(convolution->get_data_dilation_strides())
-               << "});\n";
+               << "},\n";
+        writer << "                         0, 1, 1, 0, 0, 1, false);\n";
    }
 }
+void runtime::cpu::CPU_Emitter::EmitConvolutionBackpropFilters(
+    codegen::CodeWriter& writer,
+    const ngraph::Node* n,
+    const vector<runtime::cpu::TensorViewWrapper>& args,
+    const vector<runtime::cpu::TensorViewWrapper>& out)
+{
+    auto convolution = static_cast<const op::ConvolutionBackpropFilters*>(n);
+    auto arg0_shape = args[0].get_shape();
+    auto arg1_shape = args[1].get_shape();
+    auto result_shape = out[0].get_shape();
+    writer << "kernel::convolution<" << out[0].get_type() << ">(" << args[0].get_name() << ",\n";
+    writer << "                         " << args[1].get_name() << ",\n";
+    writer << "                         " << out[0].get_name() << ",\n";
+    writer << "                         {" << join(arg0_shape) << "},\n";
+    writer << "                         {" << join(arg1_shape) << "},\n";
+    writer << "                         {" << join(result_shape) << "},\n";
+    writer << "                         {"
+           << join(convolution->get_window_movement_strides_backward()) << "},\n";
+    writer << "                         {"
+           << join(convolution->get_window_dilation_strides_backward()) << "},\n";
+    writer << "                         {" << join(convolution->get_padding_below_backward())
+           << "},\n";
+    writer << "                         {" << join(convolution->get_padding_above_backward())
+           << "},\n";
+    writer << "                         {"
+           << join(convolution->get_data_dilation_strides_backward()) << "},\n";
+    writer << "                         1, 0, 0, 1, 1, 0, false);\n";
+}
+void runtime::cpu::CPU_Emitter::EmitConvolutionBackpropData(
+    codegen::CodeWriter& writer,
+    const ngraph::Node* n,
+    const vector<runtime::cpu::TensorViewWrapper>& args,
+    const vector<runtime::cpu::TensorViewWrapper>& out)
+{
+    auto convolution = static_cast<const op::ConvolutionBackpropData*>(n);
+    auto arg0_shape = args[0].get_shape();
+    auto arg1_shape = args[1].get_shape();
+    auto result_shape = out[0].get_shape();
+    // Note that args[1] and args[0] are switched here from the usual order.
+    writer << "kernel::convolution<" << out[0].get_type() << ">(" << args[1].get_name() << ",\n";
+    writer << "                         " << args[0].get_name() << ",\n";
+    writer << "                         " << out[0].get_name() << ",\n";
+    writer << "                         {" << join(arg1_shape) << "},\n";
+    writer << "                         {" << join(arg0_shape) << "},\n";
+    writer << "                         {" << join(result_shape) << "},\n";
+    writer << "                         {"
+           << join(convolution->get_window_movement_strides_backward()) << "},\n";
+    writer << "                         {"
+           << join(convolution->get_window_dilation_strides_backward()) << "},\n";
+    writer << "                         {" << join(convolution->get_padding_below_backward())
+           << "},\n";
+    writer << "                         {" << join(convolution->get_padding_above_backward())
+           << "},\n";
+    writer << "                         {"
+           << join(convolution->get_data_dilation_strides_backward()) << "},\n";
+    writer << "                         0, 1, 0, 1, 0, 1, true);\n";
+}
 void runtime::cpu::CPU_Emitter::EmitNot(codegen::CodeWriter& writer,
                                        const ngraph::Node* n,
                                        const vector<runtime::cpu::TensorViewWrapper>& args,

--- a/src/ngraph/runtime/cpu/cpu_emitter.hpp
+++ b/src/ngraph/runtime/cpu/cpu_emitter.hpp
@@ -85,6 +85,8 @@ namespace ngraph
                static void EMITTER_DECL(EmitCeiling);
                static void EMITTER_DECL(EmitSqrt);
                static void EMITTER_DECL(EmitConvolution);
+                static void EMITTER_DECL(EmitConvolutionBackpropFilters);
+                static void EMITTER_DECL(EmitConvolutionBackpropData);
                static void EMITTER_DECL(EmitNot);
                static void EMITTER_DECL(EmitMaxPool);
                static void EMITTER_DECL(EmitReverse);

--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
@@ -187,6 +187,10 @@ static const runtime::cpu::OpMap dispatcher{
    {TI(ngraph::op::Ceiling), &runtime::cpu::CPU_Emitter::EmitCeiling},
    {TI(ngraph::op::Sqrt), &runtime::cpu::CPU_Emitter::EmitSqrt},
    {TI(ngraph::op::Convolution), &runtime::cpu::CPU_Emitter::EmitConvolution},
+    {TI(ngraph::op::ConvolutionBackpropFilters),
+     &runtime::cpu::CPU_Emitter::EmitConvolutionBackpropFilters},
+    {TI(ngraph::op::ConvolutionBackpropData),
+     &runtime::cpu::CPU_Emitter::EmitConvolutionBackpropData},
    {TI(ngraph::op::Not), &runtime::cpu::CPU_Emitter::EmitNot},
    {TI(ngraph::op::MaxPool), &runtime::cpu::CPU_Emitter::EmitMaxPool},
    {TI(ngraph::op::Reverse), &runtime::cpu::CPU_Emitter::EmitReverse},

--- a/src/ngraph/runtime/interpreter/int_call_frame.hpp
+++ b/src/ngraph/runtime/interpreter/int_call_frame.hpp
@@ -322,7 +322,59 @@ private:
                                   c->get_window_dilation_strides(),
                                   c->get_padding_below(),
                                   c->get_padding_above(),
-                                   c->get_image_dilation_strides());
+                                   c->get_data_dilation_strides(),
+                                   0,
+                                   1,
+                                   1,
+                                   0,
+                                   0,
+                                   1,
+                                   false);
+        }
+        else if (node_op == "ConvolutionBackpropFilters")
+        {
+            auto c = static_cast<const op::ConvolutionBackpropFilters*>(&node);
+            kernel::convolution<T>(reinterpret_cast<T*>(args[0]->get_data_ptr()),
+                                   reinterpret_cast<T*>(args[1]->get_data_ptr()),
+                                   reinterpret_cast<T*>(out[0]->get_data_ptr()),
+                                   args[0]->get_shape(),
+                                   args[1]->get_shape(),
+                                   out[0]->get_shape(),
+                                   c->get_window_movement_strides_backward(),
+                                   c->get_window_dilation_strides_backward(),
+                                   c->get_padding_below_backward(),
+                                   c->get_padding_above_backward(),
+                                   c->get_data_dilation_strides_backward(),
+                                   1,
+                                   0,
+                                   0,
+                                   1,
+                                   1,
+                                   0,
+                                   false);
+        }
+        else if (node_op == "ConvolutionBackpropData")
+        {
+            // Note that args[1] and args[0] are switched here from the usual order.
+            auto c = static_cast<const op::ConvolutionBackpropData*>(&node);
+            kernel::convolution<T>(reinterpret_cast<T*>(args[1]->get_data_ptr()),
+                                   reinterpret_cast<T*>(args[0]->get_data_ptr()),
+                                   reinterpret_cast<T*>(out[0]->get_data_ptr()),
+                                   args[1]->get_shape(),
+                                   args[0]->get_shape(),
+                                   out[0]->get_shape(),
+                                   c->get_window_movement_strides_backward(),
+                                   c->get_window_dilation_strides_backward(),
+                                   c->get_padding_below_backward(),
+                                   c->get_padding_above_backward(),
+                                   c->get_data_dilation_strides_backward(),
+                                   0,
+                                   1,
+                                   0,
+                                   1,
+                                   0,
+                                   1,
+                                   true);
        }
        else if (node_op == "Cos")
        {

--- a/src/ngraph/runtime/kernel/avg_pool.hpp
+++ b/src/ngraph/runtime/kernel/avg_pool.hpp
@@ -42,36 +42,36 @@ namespace ngraph
                {
                    // Our output coordinate O will have the form:
                    //
-                    //   (img,chan,i_1,...,i_n)
+                    //   (N,chan,i_1,...,i_n)
-                    size_t img_index = out_coord[0];
+                    size_t batch_index = out_coord[0];
                    size_t channel = out_coord[1];
-                    // For the input images we need to iterate the coordinate:
+                    // For the input data we need to iterate the coordinate:
                    //
                    //   I:
                    //
                    // over the range (noninclusive on the right):
                    //
-                    //   (img,chan,s_1*i_1,s_2*i_2,...,s_n*i_n) ->
+                    //   (N,chan,s_1*i_1,s_2*i_2,...,s_n*i_n) ->
                    //
-                    //     (img+1,chan+1,s_1*i_1 + window_shape_1,...,s_n*i_n + window_shape_n)
+                    //     (N+1,chan+1,s_1*i_1 + window_shape_1,...,s_n*i_n + window_shape_n)
                    //
                    // with unit stride.
                    //
-                    // We iterate this over the *padded* image, so below we will need to check for coordinates that fall in the padding area.
+                    // We iterate this over the *padded* data, so below we will need to check for coordinates that fall in the padding area.
-                    size_t n_image_dimensions = arg_shape.size() - 2;
+                    size_t n_spatial_dimensions = arg_shape.size() - 2;
-                    Coordinate input_batch_transform_start(2 + n_image_dimensions);
+                    Coordinate input_batch_transform_start(2 + n_spatial_dimensions);
-                    Coordinate input_batch_transform_end(2 + n_image_dimensions);
+                    Coordinate input_batch_transform_end(2 + n_spatial_dimensions);
-                    Strides input_batch_transform_source_strides(2 + n_image_dimensions, 1);
+                    Strides input_batch_transform_source_strides(2 + n_spatial_dimensions, 1);
-                    AxisVector input_batch_transform_source_axis_order(2 + n_image_dimensions);
+                    AxisVector input_batch_transform_source_axis_order(2 + n_spatial_dimensions);
-                    CoordinateDiff input_batch_transform_padding_below(2 + n_image_dimensions);
+                    CoordinateDiff input_batch_transform_padding_below(2 + n_spatial_dimensions);
-                    CoordinateDiff input_batch_transform_padding_above(2 + n_image_dimensions);
+                    CoordinateDiff input_batch_transform_padding_above(2 + n_spatial_dimensions);
-                    input_batch_transform_start[0] = img_index;
+                    input_batch_transform_start[0] = batch_index;
-                    input_batch_transform_end[0] = img_index + 1;
+                    input_batch_transform_end[0] = batch_index + 1;
                    input_batch_transform_start[1] = channel;
                    input_batch_transform_end[1] = channel + 1;
                    input_batch_transform_padding_below[0] = 0;
@@ -79,7 +79,7 @@ namespace ngraph
                    input_batch_transform_padding_above[0] = 0;
                    input_batch_transform_padding_above[1] = 0;
-                    for (size_t i = 2; i < n_image_dimensions + 2; i++)
+                    for (size_t i = 2; i < n_spatial_dimensions + 2; i++)
                    {
                        size_t window_shape_this_dim = window_shape[i - 2];
                        size_t movement_stride = window_movement_strides[i - 2];

--- a/src/ngraph/runtime/kernel/convolution.hpp
+++ b/src/ngraph/runtime/kernel/convolution.hpp
@@ -37,8 +37,23 @@ namespace ngraph
                             const Strides& window_dilation_strides,
                             const CoordinateDiff& padding_below,
                             const CoordinateDiff& padding_above,
-                             const Strides& image_dilation_strides)
+                             const Strides& data_dilation_strides,
+                             size_t batch_axis_data,
+                             size_t input_channel_axis_data,
+                             size_t input_channel_axis_filters,
+                             size_t output_channel_axis_filters,
+                             size_t batch_axis_result,
+                             size_t output_channel_axis_result,
+                             bool rotate_filter)
            {
+                // Comments throughout assume without loss of generality that:
+                //
+                // * batch axes for both input data and output data are 0
+                // * input channel axes for both input data and filters are 1
+                // * output channel axes for filters is 0
+                // * output channel axis for output data is 1
+                // * rotate_filter is false
                // At the outermost level we will walk over every output coordinate O.
                CoordinateTransform output_transform(out_shape);
@@ -46,50 +61,50 @@ namespace ngraph
                {
                    // Our output coordinate O will have the form:
                    //
-                    //   (img,chan_out,i_1,...,i_n)
+                    //   (N,chan_out,i_1,...,i_n)
-                    size_t img_index = out_coord[0];
+                    size_t batch_index = out_coord[batch_axis_result];
-                    size_t output_channel = out_coord[1];
+                    size_t output_channel = out_coord[output_channel_axis_result];
-                    // For the input images we need to iterate the coordinate:
+                    // For the input data we need to iterate the coordinate:
                    //
                    //   I:
                    //
                    // over the range (noninclusive on the right):
                    //
-                    //   (img,0,s_1*i_1,s_2*i_2,...,s_n*i_n) ->
+                    //   (N,0,s_1*i_1,s_2*i_2,...,s_n*i_n) ->
                    //
-                    //     (img+1,chans_in_count,s_1*i_1 + l_1*filter_dims_1,...,s_n*i_n + l_n*filter_dims_n)
+                    //     (N+1,chans_in_count,s_1*i_1 + l_1*filter_dims_1,...,s_n*i_n + l_n*filter_dims_n)
                    //
                    // with strides:
                    //
                    //   (1,l_1,...,l_n).
                    //
-                    // Note that we are iterating within the *padded* and *dilated* image batch, so further
+                    // Note that we are iterating within the *padded* and *dilated* data batch, so further
                    // down we must check the current coordinate is in the padding or dilation gap.
-                    size_t n_image_dimensions = arg0_shape.size() - 2;
+                    size_t n_spatial_dimensions = arg0_shape.size() - 2;
-                    size_t n_input_channels = arg0_shape[1];
+                    size_t n_input_channels = arg0_shape[input_channel_axis_data];
-                    Coordinate input_batch_transform_start(2 + n_image_dimensions);
+                    Coordinate input_batch_transform_start(2 + n_spatial_dimensions);
-                    Coordinate input_batch_transform_end(2 + n_image_dimensions);
+                    Coordinate input_batch_transform_end(2 + n_spatial_dimensions);
-                    Strides input_batch_transform_movement_strides(2 + n_image_dimensions, 1);
+                    Strides input_batch_transform_movement_strides(2 + n_spatial_dimensions, 1);
-                    CoordinateDiff input_batch_transform_padding_below(2 + n_image_dimensions, 0);
+                    CoordinateDiff input_batch_transform_padding_below(2 + n_spatial_dimensions, 0);
-                    CoordinateDiff input_batch_transform_padding_above(2 + n_image_dimensions, 0);
+                    CoordinateDiff input_batch_transform_padding_above(2 + n_spatial_dimensions, 0);
-                    Strides input_batch_transform_dilation_strides(2 + n_image_dimensions, 1);
+                    Strides input_batch_transform_dilation_strides(2 + n_spatial_dimensions, 1);
-                    input_batch_transform_start[0] = img_index;
+                    input_batch_transform_start[batch_axis_data] = batch_index;
-                    input_batch_transform_end[0] = img_index + 1;
+                    input_batch_transform_end[batch_axis_data] = batch_index + 1;
-                    input_batch_transform_start[1] = 0;
+                    input_batch_transform_start[input_channel_axis_data] = 0;
-                    input_batch_transform_end[1] = n_input_channels;
+                    input_batch_transform_end[input_channel_axis_data] = n_input_channels;
-                    for (size_t i = 2; i < n_image_dimensions + 2; i++)
+                    for (size_t i = 2; i < n_spatial_dimensions + 2; i++)
                    {
                        size_t window_dilation_stride = window_dilation_strides[i - 2];
                        size_t window_movement_stride = window_movement_strides[i - 2];
                        std::ptrdiff_t below_pad = padding_below[i - 2];
                        std::ptrdiff_t above_pad = padding_above[i - 2];
-                        size_t image_dilation_stride = image_dilation_strides[i - 2];
+                        size_t data_dilation_stride = data_dilation_strides[i - 2];
                        input_batch_transform_start[i] = window_movement_stride * out_coord[i];
                        input_batch_transform_end[i] =
@@ -98,10 +113,10 @@ namespace ngraph
                        input_batch_transform_movement_strides[i] = window_dilation_stride;
                        input_batch_transform_padding_below[i] = below_pad;
                        input_batch_transform_padding_above[i] = above_pad;
-                        input_batch_transform_dilation_strides[i] = image_dilation_stride;
+                        input_batch_transform_dilation_strides[i] = data_dilation_stride;
                    }
-                    AxisVector input_batch_transform_axis_order(2 + n_image_dimensions);
+                    AxisVector input_batch_transform_axis_order(2 + n_spatial_dimensions);
                    size_t n = 0;
                    std::generate(input_batch_transform_axis_order.begin(),
                                  input_batch_transform_axis_order.end(),
@@ -127,15 +142,15 @@ namespace ngraph
                    //
                    // with unit stride.
-                    Shape filter_transform_start(2 + n_image_dimensions);
+                    Shape filter_transform_start(2 + n_spatial_dimensions);
-                    Shape filter_transform_end(2 + n_image_dimensions);
+                    Shape filter_transform_end(2 + n_spatial_dimensions);
-                    filter_transform_start[0] = output_channel;
+                    filter_transform_start[output_channel_axis_filters] = output_channel;
-                    filter_transform_end[0] = output_channel + 1;
+                    filter_transform_end[output_channel_axis_filters] = output_channel + 1;
-                    filter_transform_start[1] = 0;
+                    filter_transform_start[input_channel_axis_filters] = 0;
-                    filter_transform_end[1] = n_input_channels;
+                    filter_transform_end[input_channel_axis_filters] = n_input_channels;
-                    for (size_t i = 2; i < n_image_dimensions + 2; i++)
+                    for (size_t i = 2; i < n_spatial_dimensions + 2; i++)
                    {
                        filter_transform_start[i] = 0;
                        filter_transform_end[i] = arg1_shape[i];
@@ -157,7 +172,19 @@ namespace ngraph
                           filter_it != filter_transform.end())
                    {
                        const Coordinate& input_batch_coord = *input_it;
-                        const Coordinate& filter_coord = *filter_it;
+                        Coordinate filter_coord = *filter_it;
+                        if (rotate_filter)
+                        {
+                            Shape target_shape = filter_transform.get_target_shape();
+                            // Note that we only reverse the spatial dimensions here (loop
+                            // starts at 2)
+                            for (size_t i = 2; i < filter_coord.size(); i++)
+                            {
+                                filter_coord[i] = target_shape[i] - filter_coord[i] - 1;
+                            }
+                        }
                        T v = input_batch_transform.has_source_coordinate(input_batch_coord)
                                  ? arg0[input_batch_transform.index(input_batch_coord)]

--- a/src/ngraph/runtime/kernel/max_pool.hpp
+++ b/src/ngraph/runtime/kernel/max_pool.hpp
@@ -40,34 +40,34 @@ namespace ngraph
                {
                    // Our output coordinate O will have the form:
                    //
-                    //   (img,chan,i_1,...,i_n)
+                    //   (N,chan,i_1,...,i_n)
-                    size_t img_index = out_coord[0];
+                    size_t batch_index = out_coord[0];
                    size_t channel = out_coord[1];
-                    // For the input images we need to iterate the coordinate:
+                    // For the input data we need to iterate the coordinate:
                    //
                    //   I:
                    //
                    // over the range (noninclusive on the right):
                    //
-                    //   (img,chan,s_1*i_1,s_2*i_2,...,s_n*i_n) ->
+                    //   (N,chan,s_1*i_1,s_2*i_2,...,s_n*i_n) ->
                    //
-                    //     (img+1,chan+1,s_1*i_1 + window_shape_1,...,s_n*i_n + window_shape_n)
+                    //     (N+1,chan+1,s_1*i_1 + window_shape_1,...,s_n*i_n + window_shape_n)
                    //
                    // with unit stride.
-                    size_t n_image_dimensions = arg_shape.size() - 2;
+                    size_t n_spatial_dimensions = arg_shape.size() - 2;
-                    Coordinate input_batch_transform_start(2 + n_image_dimensions);
+                    Coordinate input_batch_transform_start(2 + n_spatial_dimensions);
-                    Coordinate input_batch_transform_end(2 + n_image_dimensions);
+                    Coordinate input_batch_transform_end(2 + n_spatial_dimensions);
-                    input_batch_transform_start[0] = img_index;
+                    input_batch_transform_start[0] = batch_index;
-                    input_batch_transform_end[0] = img_index + 1;
+                    input_batch_transform_end[0] = batch_index + 1;
                    input_batch_transform_start[1] = channel;
                    input_batch_transform_end[1] = channel + 1;
-                    for (size_t i = 2; i < n_image_dimensions + 2; i++)
+                    for (size_t i = 2; i < n_spatial_dimensions + 2; i++)
                    {
                        size_t window_shape_this_dim = window_shape[i - 2];
                        size_t movement_stride = window_movement_strides[i - 2];

--- a/src/ngraph/serializer.cpp
+++ b/src/ngraph/serializer.cpp
@@ -379,15 +379,79 @@ static shared_ptr<ngraph::Function>
                node_js.at("window_dilation_strides").get<vector<size_t>>();
            auto padding_below = node_js.at("padding_below").get<vector<std::ptrdiff_t>>();
            auto padding_above = node_js.at("padding_above").get<vector<std::ptrdiff_t>>();
-            auto image_dilation_strides =
-                node_js.at("image_dilation_strides").get<vector<size_t>>();
+            // For backwards compatibility, we accept "image_dilation_strides" in place of
-            node = make_shared<op::Convolution>(args[0],
+            // "data_dilation_strides", and we also allow it to be omitted altogether.
-                                                args[1],
+            auto data_dilation_strides_maybe = node_js["data_dilation_strides"];
-                                                window_movement_strides,
+            if (data_dilation_strides_maybe.empty())
-                                                window_dilation_strides,
+            {
-                                                padding_below,
+                data_dilation_strides_maybe = node_js["image_dilation_strides"];
-                                                padding_above,
+            }
-                                                image_dilation_strides);
+            if (data_dilation_strides_maybe.empty())
+            {
+                node = make_shared<op::Convolution>(args[0],
+                                                    args[1],
+                                                    window_movement_strides,
+                                                    window_dilation_strides,
+                                                    padding_below,
+                                                    padding_above);
+            }
+            else
+            {
+                node = make_shared<op::Convolution>(
+                    args[0],
+                    args[1],
+                    window_movement_strides,
+                    window_dilation_strides,
+                    padding_below,
+                    padding_above,
+                    data_dilation_strides_maybe.get<std::vector<size_t>>());
+            }
+        }
+        else if (node_op == "ConvolutionBackpropData")
+        {
+            auto data_batch_shape = node_js.at("data_batch_shape").get<vector<size_t>>();
+            auto window_movement_strides_forward =
+                node_js.at("window_movement_strides_forward").get<vector<size_t>>();
+            auto window_dilation_strides_forward =
+                node_js.at("window_dilation_strides_forward").get<vector<size_t>>();
+            auto padding_below_forward =
+                node_js.at("padding_below_forward").get<vector<std::ptrdiff_t>>();
+            auto padding_above_forward =
+                node_js.at("padding_above_forward").get<vector<std::ptrdiff_t>>();
+            auto data_dilation_strides_forward =
+                node_js.at("data_dilation_strides_forward").get<vector<size_t>>();
+            node = make_shared<op::ConvolutionBackpropData>(data_batch_shape,
+                                                            args[0],
+                                                            args[1],
+                                                            window_movement_strides_forward,
+                                                            window_dilation_strides_forward,
+                                                            padding_below_forward,
+                                                            padding_above_forward,
+                                                            data_dilation_strides_forward);
+        }
+        else if (node_op == "ConvolutionBackpropFilters")
+        {
+            auto filters_shape = node_js.at("filters_shape").get<vector<size_t>>();
+            auto window_movement_strides_forward =
+                node_js.at("window_movement_strides_forward").get<vector<size_t>>();
+            auto window_dilation_strides_forward =
+                node_js.at("window_dilation_strides_forward").get<vector<size_t>>();
+            auto padding_below_forward =
+                node_js.at("padding_below_forward").get<vector<std::ptrdiff_t>>();
+            auto padding_above_forward =
+                node_js.at("padding_above_forward").get<vector<std::ptrdiff_t>>();
+            auto data_dilation_strides_forward =
+                node_js.at("data_dilation_strides_forward").get<vector<size_t>>();
+            node = make_shared<op::ConvolutionBackpropFilters>(args[0],
+                                                               filters_shape,
+                                                               args[1],
+                                                               window_movement_strides_forward,
+                                                               window_dilation_strides_forward,
+                                                               padding_below_forward,
+                                                               padding_above_forward,
+                                                               data_dilation_strides_forward);
        }
        else if (node_op == "Cos")
        {
@@ -718,7 +782,27 @@ static json write(const Node& n)
        node["window_dilation_strides"] = tmp->get_window_dilation_strides();
        node["padding_below"] = tmp->get_padding_below();
        node["padding_above"] = tmp->get_padding_above();
-        node["image_dilation_strides"] = tmp->get_image_dilation_strides();
+        node["data_dilation_strides"] = tmp->get_data_dilation_strides();
+    }
+    else if (node_op == "ConvolutionBackpropData")
+    {
+        auto tmp = dynamic_cast<const op::ConvolutionBackpropData*>(&n);
+        node["data_batch_shape"] = tmp->get_data_batch_shape();
+        node["window_movement_strides_forward"] = tmp->get_window_movement_strides_forward();
+        node["window_dilation_strides_forward"] = tmp->get_window_dilation_strides_forward();
+        node["padding_below_forward"] = tmp->get_padding_below_forward();
+        node["padding_above_forward"] = tmp->get_padding_above_forward();
+        node["data_dilation_strides_forward"] = tmp->get_data_dilation_strides_forward();
+    }
+    else if (node_op == "ConvolutionBackpropFilters")
+    {
+        auto tmp = dynamic_cast<const op::ConvolutionBackpropFilters*>(&n);
+        node["filters_shape"] = tmp->get_filters_shape();
+        node["window_movement_strides_forward"] = tmp->get_window_movement_strides_forward();
+        node["window_dilation_strides_forward"] = tmp->get_window_dilation_strides_forward();
+        node["padding_below_forward"] = tmp->get_padding_below_forward();
+        node["padding_above_forward"] = tmp->get_padding_above_forward();
+        node["data_dilation_strides_forward"] = tmp->get_data_dilation_strides_forward();
    }
    else if (node_op == "Cos")
    {

--- a/test/convolution_test.in.cpp
+++ b/test/convolution_test.in.cpp
--- a/test/ref_generators/generate_convolution_ref.py
+++ b/test/ref_generators/generate_convolution_ref.py
--- a/test/type_prop.cpp
+++ b/test/type_prop.cpp