Forward prop for average pooling (#380)

* Average pool type checking and kernel; type checking tests * Fix and enable average-pool tests * Docstring fix * Extend AvgPool op type checking to support padding * Untested code for padded avg-pool * Unit tests for padded avg-pool * Add CPU implementation * Temp delete * Docstring fix * Docstring fix * Add tests mixing padding and stride * Temporary cut to ease merge * Restore temporary cut for merge * Empty commit to try to force CI to wake up

Forward prop for average pooling (#380)
* Average pool type checking and kernel; type checking tests * Fix and enable average-pool tests * Docstring fix * Extend AvgPool op type checking to support padding * Untested code for padded avg-pool * Unit tests for padded avg-pool * Add CPU implementation * Temp delete * Docstring fix * Docstring fix * Add tests mixing padding and stride * Temporary cut to ease merge * Restore temporary cut for merge * Empty commit to try to force CI to wake up
0931b83b · Adam Procter · Scott Cyphers · eb74486c · 0931b83b · 0931b83b
Commit 0931b83b authored Jan 19, 2018 by Adam Procter Committed by Scott Cyphers Jan 19, 2018
12 changed files
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -32,6 +32,7 @@ set (SRC
    node.cpp
    ops/abs.cpp
    ops/add.cpp
+    ops/avg_pool.cpp
    ops/binary_elementwise_arithmetic.cpp
    ops/binary_elementwise_comparison.cpp
    ops/binary_elementwise.cpp

--- a/src/ngraph/ngraph.hpp
+++ b/src/ngraph/ngraph.hpp
@@ -68,6 +68,7 @@
 #include "ngraph/ops/add.hpp"
 #include "ngraph/ops/asin.hpp"
 #include "ngraph/ops/atan.hpp"
+#include "ngraph/ops/avg_pool.hpp"
 #include "ngraph/ops/broadcast.hpp"
 #include "ngraph/ops/ceiling.hpp"
 #include "ngraph/ops/concatenate.hpp"

--- a/src/ngraph/ops/avg_pool.cpp
+++ b/src/ngraph/ops/avg_pool.cpp
+// ----------------------------------------------------------------------------
+// Copyright 2017 Nervana Systems Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// ----------------------------------------------------------------------------
+#include "ngraph/ops/avg_pool.hpp"
+#include "ngraph/util.hpp"
+using namespace std;
+using namespace ngraph;
+op::AvgPool::AvgPool(const std::shared_ptr<Node>& arg,
+                     const Shape& window_shape,
+                     const Strides& window_movement_strides,
+                     const Shape& padding_below,
+                     const Shape& padding_above)
+    : RequiresTensorViewArgs("AvgPool", {arg})
+    , m_window_shape(window_shape)
+    , m_window_movement_strides(window_movement_strides)
+    , m_padding_below(padding_below)
+    , m_padding_above(padding_above)
+{
+    auto& arg_shape = get_inputs().at(0).get_shape();
+    //
+    // Make sure arg: NCDi for some Di of rank>0, N != 0, C != 0.
+    //
+    if (arg_shape.size() < 3)
+    {
+        throw ngraph_error(
+            "Average-pool image batch input must have rank of at least 3 (one batch axis, one "
+            "channel axis, at least one image dimension).");
+    }
+    m_batch_size = arg_shape[0];
+    if (m_batch_size == 0)
+    {
+        throw ngraph_error("Average-pool image batch size is zero.");
+    }
+    m_channel_count = arg_shape[1];
+    if (m_channel_count == 0)
+    {
+        throw ngraph_error("Average-pool requires at least one image depth channel.");
+    }
+    m_image_dimension_count = arg_shape.size() - 2;
+    //
+    // Make sure window shape, window movement strides, and  have same rank as Di.
+    //
+    if (m_window_shape.size() != m_image_dimension_count)
+    {
+        throw ngraph_error(
+            "Average-pool window shape rank does not match number of image dimensions.");
+    }
+    if (m_window_movement_strides.size() != m_image_dimension_count)
+    {
+        throw ngraph_error(
+            "Average-pool window movement stride rank does not match number of image dimensions.");
+    }
+    if (m_padding_below.size() != m_image_dimension_count)
+    {
+        throw ngraph_error(
+            "Average-pool below-padding rank does not match number of image dimensions.");
+    }
+    if (m_padding_above.size() != m_image_dimension_count)
+    {
+        throw ngraph_error(
+            "Average-pool above-padding rank does not match number of image dimensions.");
+    }
+    //
+    // Extract input image shape Di and make sure all dimensions are larger than 0.
+    //
+    for (size_t i = 0; i < m_image_dimension_count; i++)
+    {
+        size_t dim_size = arg_shape[1 + 1 + i];
+        m_input_image_physical_shape.push_back(dim_size);
+        m_input_image_virtual_shape.push_back(padding_below[i] + dim_size + padding_above[i]);
+        if (m_input_image_virtual_shape[i] == 0)
+        {
+            throw ngraph_error("Average-pool input image dimension is zero even after padding.");
+        }
+    }
+    //
+    // Make sure window shape dimensions are all larger than 0.
+    //
+    for (size_t i = 0; i < m_image_dimension_count; i++)
+    {
+        if (m_window_shape[i] == 0)
+        {
+            throw ngraph_error("Average-pool window shape has a zero-length axis.");
+        }
+    }
+    //
+    // Make the max pooling window fits within the image dimensions.
+    //
+    for (size_t i = 0; i < m_image_dimension_count; i++)
+    {
+        if (m_window_shape[i] > m_input_image_virtual_shape[i])
+        {
+            throw ngraph_error(
+                "Average-pool window shape is larger than the image even after padding.");
+        }
+    }
+    //
+    // Compute image output shape Do, checking at the same time that all window movement strides are larger than 0.
+    //
+    for (size_t i = 0; i < m_image_dimension_count; i++)
+    {
+        if (m_window_movement_strides[i] == 0)
+        {
+            throw ngraph_error("Average-pool window axis movement stride is zero.");
+        }
+        m_output_image_shape.push_back(ceil_div(
+            m_input_image_virtual_shape[i] - m_window_shape[i] + 1, m_window_movement_strides[i]));
+    }
+    //
+    // Construct result shape: NCDo.
+    //
+    Shape result_shape(1 + 1 + m_image_dimension_count);
+    result_shape[0] = m_batch_size;
+    result_shape[1] = m_channel_count;
+    std::copy(m_output_image_shape.begin(), m_output_image_shape.end(), result_shape.begin() + 2);
+    set_value_type_checked(get_input_element_type(0), result_shape);
+}
+static Shape default_padding(const std::shared_ptr<Node>& arg)
+{
+    if (arg->get_outputs().size() != 1)
+    {
+        throw ngraph_error("Average-pool image batch argument must have exactly one output");
+    }
+    auto& arg_shape = arg->get_outputs().at(0).get_shape();
+    if (arg_shape.size() < 3)
+    {
+        // For consistency we should throw the same error message here that we throw in the constructor.
+        throw ngraph_error(
+            "Average-pool image batch input must have rank of at least 3 (one batch axis, one "
+            "channel axis, at least one image dimension).");
+    }
+    return Shape(arg_shape.size() - 2, 0);
+}
+op::AvgPool::AvgPool(const std::shared_ptr<Node>& arg,
+                     const Shape& window_shape,
+                     const Strides& window_movement_strides)
+    : AvgPool(
+          arg, window_shape, window_movement_strides, default_padding(arg), default_padding(arg))
+{
+}
+static Strides default_strides(const std::shared_ptr<Node>& arg)
+{
+    if (arg->get_outputs().size() != 1)
+    {
+        throw ngraph_error("Average-pool image batch argument must have exactly one output");
+    }
+    auto& arg_shape = arg->get_outputs().at(0).get_shape();
+    if (arg_shape.size() < 3)
+    {
+        // For consistency we should throw the same error message here that we throw in the constructor.
+        throw ngraph_error(
+            "Average-pool image batch input must have rank of at least 3 (one batch axis, one "
+            "channel axis, at least one image dimension).");
+    }
+    return Strides(arg_shape.size() - 2, 1);
+}
+op::AvgPool::AvgPool(const std::shared_ptr<Node>& arg, const Shape& window_shape)
+    : AvgPool(arg, window_shape, default_strides(arg), default_padding(arg), default_padding(arg))
+{
+}
+bool op::AvgPool::is_functionally_identical(const Node& other) const
+{
+    bool rc = true;
+    if (Node::is_functionally_identical(other))
+    {
+        const AvgPool& rhs = dynamic_cast<const AvgPool&>(other);
+        rc &= m_window_shape == rhs.m_window_shape;
+        rc &= m_window_movement_strides == rhs.m_window_movement_strides;
+        rc &= m_padding_below == rhs.m_padding_below;
+        rc &= m_padding_above == rhs.m_padding_above;
+        rc &= m_window_movement_strides == rhs.m_window_movement_strides;
+        rc &= m_channel_count == rhs.m_channel_count;
+        rc &= m_input_image_physical_shape == rhs.m_input_image_physical_shape;
+        rc &= m_input_image_virtual_shape == rhs.m_input_image_virtual_shape;
+        rc &= m_output_image_shape == rhs.m_output_image_shape;
+        rc &= m_batch_size == rhs.m_batch_size;
+        rc &= m_image_dimension_count == rhs.m_image_dimension_count;
+    }
+    else
+    {
+        rc = false;
+    }
+    return rc;
+}
+/*
+void op::AvgPool::generate_adjoints(autodiff::Adjoints& adjoints, const std::shared_ptr<Node>& delta)
+{
+}
+*/
--- a/src/ngraph/ops/avg_pool.hpp
+++ b/src/ngraph/ops/avg_pool.hpp
+// ----------------------------------------------------------------------------
+// Copyright 2017 Nervana Systems Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// ----------------------------------------------------------------------------
+#pragma once
+#include "ngraph/ops/op.hpp"
+namespace ngraph
+{
+    namespace op
+    {
+        /// \brief Batched average pooling operation, with optional padding and window stride.
+        ///
+        /// Average pooling takes as its input an image batch tensor of shape \f$(N,C,d_1,\dots,d_n)\f$ where \f$n > 0\f$, every \f$d_i > 0\f$, and where \f$N\f$ is
+        /// the batch size, and \f$C > 0\f$ is the number of channels (sometimes called features). It also takes four parameters:
+        ///
+        /// 1. <i>(the window shape)</i> a size vector \f$(w_1,\dots,w_n)\f$ where every \f$w_i \le d_i\f$; and
+        /// 2. <i>(the window movement strides, optional)</i> a vector of positive integers \f$(s_1,\dots,s_n)\f$.
+        /// 3. <i>(the padding below, optional)</i> a vector of positive integers \f$(p_1,\dots,p_n)\f$.
+        /// 4. <i>(the padding above, optional)</i> a vector of positive integers \f$(q_1,\dots,q_n)\f$.
+        ///
+        /// The output has the shape \f$(N,C,d'_1,\dots,d'_n)\f$, where \f$d'_n = \lceil \frac{p_i + d_i + q_i - w_i + 1}{s_i} \rceil\f$.
+        ///
+        /// *In the absence of padding*, given an input image batch tensor \f$T_\textit{in}\f$, the output tensor is defined by the equation
+        ///
+        /// \f[
+        ///      T_\textit{out}[a,c,i_1,\dots,i_n] = \frac{\sum_{j_1 = s_1 i_1, \dots, j_n = s_n i_n}^{j_1 = s_1 i_1 + w_1 - 1, \dots, j_n = s_n i_n + w_n - 1} T_\textit{in}[a,c,j_1,\dots,j_n]}{\prod_{i=1}^n{w_n}}
+        /// \f]
+        ///
+        /// *In the presence of padding*, we do not always want to divide by a reciprocal equal to the number of elements in the window, since some of the output points are
+        /// determined by a window that is partly hanging beyond the edge of the tensor. In this case we can define the output via a few intermediate steps.
+        ///
+        /// First define the <i>sum tensor</i> \f$T_\textit{sum}\f$, with shape \f$(N,C,d'_1,\dots,d'_n)\f$, as follows.
+        ///
+        /// \f[
+        ///      T_\textit{sum}[a,c,i_1,\dots,i_n] = \frac{\sum_{j_1 = s_1 i_1, \dots, j_n = s_n i_n}^{j_1 = s_1 i_1 + w_1 - 1, \dots, j_n = s_n i_n + w_n - 1} \textit{val}[a,c,j_1,\dots,j_n]}{\prod_{i=1}^n{w_n}}
+        /// \f]
+        ///
+        /// where \f$\textit{val}[a,c,j_1,\dots,j_n] = T_\textit{in}[a,c,j_1,\dots,j_n]\f$ if for all \f$k\f$, \f$p_k \le j_k < p_k + d_k\f$; else \f$0\f$.
+        ///
+        /// Second, define the <i>divisor tensor</i> \f$T_\textit{div}\f$, with shape \f$(N,C,d'_1,\dots,d'_n)\f$, as follows.
+        ///
+        /// \f[
+        ///      T_\textit{div}[a,c,i_1,\dots,i_n] = \frac{\sum_{j_1 = s_1 i_1, \dots, j_n = s_n i_n}^{j_1 = s_1 i_1 + w_1 - 1, \dots, j_n = s_n i_n + w_n - 1} \textit{val}[a,c,j_1,\dots,j_n]}{\prod_{i=1}^n{w_n}}
+        /// \f]
+        ///
+        /// where \f$\textit{val}[a,c,j_1,\dots,j_n] = 1\f$ if for all \f$k\f$, \f$p_k \le j_k < p_k + d_k\f$; else \f$0\f$.
+        ///
+        /// Finally, define \f$T_\textit{out}\f$ as the result of elementwise dividing \f$T_\textit{sum}\f$ by \f$T_\textit{div}\f$.
+        /// Note that at positions where \f$T_\textit{div}\f$ is zero, values may be infinity or nan. (This corresponds to a condition where the pooling window is completely
+        /// out of bounds, encompassing no valid values.)
+        class AvgPool : public RequiresTensorViewArgs
+        {
+        public:
+            /// \brief Constructs a batched average pooling operation.
+            ///
+            /// \param arg The node producing the input image batch tensor.
+            /// \param window_shape The window shape.
+            /// \param window_movement_strides The window movement strides.
+            /// \param padding_below The below-padding shape.
+            /// \param padding_above The above-padding shape.
+            AvgPool(const std::shared_ptr<Node>& arg,
+                    const Shape& window_shape,
+                    const Strides& window_movement_strides,
+                    const Shape& padding_below,
+                    const Shape& padding_above);
+            /// \brief Constructs a batched, unpadded average pooling operation (i.e., all padding shapes are set to 0).
+            ///
+            /// \param arg The node producing the input image batch tensor.
+            /// \param window_shape The window shape.
+            /// \param window_movement_strides The window movement strides.
+            AvgPool(const std::shared_ptr<Node>& arg,
+                    const Shape& window_shape,
+                    const Strides& window_movement_strides);
+            /// \brief Constructs an unstrided batched convolution operation (i.e., all window movement strides are 1 and all padding shapes are set to 0).
+            ///
+            /// \param arg The node producing the input image batch tensor.
+            /// \param window_shape The window shape.
+            AvgPool(const std::shared_ptr<Node>& arg, const Shape& window_shape);
+            virtual std::shared_ptr<Node> copy_with_new_args(
+                const std::vector<std::shared_ptr<Node>>& new_args) const override
+            {
+                if (new_args.size() != 1)
+                    throw ngraph_error("Incorrect number of new arguments");
+                return std::make_shared<AvgPool>(new_args.at(0),
+                                                 m_window_shape,
+                                                 m_window_movement_strides,
+                                                 m_padding_below,
+                                                 m_padding_above);
+            }
+            /// \return The window shape.
+            const Shape& get_window_shape() const { return m_window_shape; }
+            /// \return The window movement strides.
+            const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
+            /// \return The below-padding shape.
+            const Shape& get_padding_below() const { return m_padding_below; }
+            /// \return The above-padding shape.
+            const Shape& get_padding_above() const { return m_padding_above; }
+            /// \return The number of image channels.
+            size_t get_channel_count() const { return m_channel_count; }
+            /// \return The input image physical shape, not including padding.
+            const Shape& get_input_image_physical_shape() const
+            {
+                return m_input_image_physical_shape;
+            }
+            /// \return The input image virtual shape, including padding.
+            const Shape& get_input_image_virtual_shape() const
+            {
+                return m_input_image_virtual_shape;
+            }
+            /// \return The output image shape.
+            const Shape& get_output_image_shape() const { return m_output_image_shape; }
+            /// \return The batch size.
+            size_t get_batch_size() const { return m_batch_size; }
+            /// \return The number of image dimensions.
+            size_t get_image_dimension_count() const { return m_image_dimension_count; }
+            bool is_functionally_identical(const Node&) const override;
+        protected:
+            Shape m_window_shape;
+            Strides m_window_movement_strides;
+            Shape m_padding_below;
+            Shape m_padding_above;
+            size_t m_channel_count;
+            Shape m_input_image_physical_shape;
+            Shape m_input_image_virtual_shape;
+            Shape m_output_image_shape;
+            size_t m_batch_size;
+            size_t m_image_dimension_count;
+        };
+    }
+}
--- a/src/ngraph/ops/max_pool.hpp
+++ b/src/ngraph/ops/max_pool.hpp
@@ -33,7 +33,7 @@ namespace ngraph
        /// Given an input image batch tensor \f$T_\textit{in}\f$, the output tensor is defined by the equation
        ///
        /// \f[
-        ///      T_\textit{out}[a,c,i_1,\dots,i_n] = \max_{j_1 = i_1, \dots, j_n = i_n}^{j_1 = i_1 + w_1 - 1, \dots, j_n = i_n + w_n - 1} (T_\textit{in}[a,c,j_1,\dots,j_n])
+        ///      T_\textit{out}[a,c,i_1,\dots,i_n] = \max_{j_1 = s_1 i_1, \dots, j_n = s_n i_n}^{j_1 = s_1 i_1 + w_1 - 1, \dots, j_n = s_n i_n + w_n - 1} (T_\textit{in}[a,c,j_1,\dots,j_n])
        /// \f]
        ///
        class MaxPool : public RequiresTensorViewArgs

--- a/src/ngraph/runtime/cpu/cpu_emitter.cpp
+++ b/src/ngraph/runtime/cpu/cpu_emitter.cpp
@@ -21,6 +21,7 @@
 #include <vector>
 #include "ngraph/node.hpp"
+#include "ngraph/ops/avg_pool.hpp"
 #include "ngraph/ops/broadcast.hpp"
 #include "ngraph/ops/concatenate.hpp"
 #include "ngraph/ops/constant.hpp"
@@ -1969,6 +1970,26 @@ void runtime::cpu::CPU_Emitter::EmitSelectAndScatter(
    writer << "}\n";
 }
+void runtime::cpu::CPU_Emitter::EmitAvgPool(codegen::CodeWriter& writer,
+                                            const ngraph::Node* n,
+                                            const vector<runtime::cpu::TensorViewWrapper>& args,
+                                            const vector<runtime::cpu::TensorViewWrapper>& out)
+{
+    auto avg_pool = static_cast<const op::AvgPool*>(n);
+    auto arg_shape = args[0].get_shape();
+    auto result_shape = out[0].get_shape();
+    writer << "kernel::avg_pool<" << out[0].get_type() << ">(" << args[0].get_name() << ",\n";
+    writer << "                 " << out[0].get_name() << ",\n";
+    writer << "                 {" << join(arg_shape) << "},\n";
+    writer << "                 {" << join(result_shape) << "},\n";
+    writer << "                 {" << join(avg_pool->get_window_shape()) << "},\n";
+    writer << "                 {" << join(avg_pool->get_window_movement_strides()) << "},\n";
+    writer << "                 {" << join(avg_pool->get_padding_below()) << "},\n";
+    writer << "                 {" << join(avg_pool->get_padding_above()) << "});\n";
+}
 //------------------------------------------------------------------------------------------------
 // Utility methods
 //------------------------------------------------------------------------------------------------

--- a/src/ngraph/runtime/cpu/cpu_emitter.hpp
+++ b/src/ngraph/runtime/cpu/cpu_emitter.hpp
@@ -90,6 +90,7 @@ namespace ngraph
                static void EMITTER_DECL(EmitReverse);
                static void EMITTER_DECL(EmitReduceWindow);
                static void EMITTER_DECL(EmitSelectAndScatter);
+                static void EMITTER_DECL(EmitAvgPool);
            private:
                static std::string emit_vector(const TensorViewWrapper&,

--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
@@ -37,6 +37,7 @@
 #include "ngraph/ops/add.hpp"
 #include "ngraph/ops/asin.hpp"
 #include "ngraph/ops/atan.hpp"
+#include "ngraph/ops/avg_pool.hpp"
 #include "ngraph/ops/broadcast.hpp"
 #include "ngraph/ops/ceiling.hpp"
 #include "ngraph/ops/concatenate.hpp"
@@ -189,6 +190,7 @@ static const runtime::cpu::OpMap dispatcher{
    {TI(ngraph::op::Reverse), &runtime::cpu::CPU_Emitter::EmitReverse},
    {TI(ngraph::op::ReduceWindow), &runtime::cpu::CPU_Emitter::EmitReduceWindow},
    {TI(ngraph::op::SelectAndScatter), &runtime::cpu::CPU_Emitter::EmitSelectAndScatter},
+    {TI(ngraph::op::AvgPool), &runtime::cpu::CPU_Emitter::EmitAvgPool},
 };
 runtime::cpu::CPU_ExternalFunction::CPU_ExternalFunction(
@@ -231,6 +233,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
 #include "ngraph/runtime/aligned_buffer.hpp"
 #include "ngraph/runtime/cpu/cpu_eigen_utils.hpp"
 #include "ngraph/runtime/cpu/cpu_kernels.hpp"
+#include "ngraph/runtime/kernel/avg_pool.hpp"
 #include "ngraph/runtime/kernel/broadcast.hpp"
 #include "ngraph/runtime/kernel/concat.hpp"
 #include "ngraph/runtime/kernel/convolution.hpp"

--- a/src/ngraph/runtime/interpreter/int_call_frame.hpp
+++ b/src/ngraph/runtime/interpreter/int_call_frame.hpp
@@ -21,6 +21,7 @@
 #include "ngraph/function.hpp"
 #include "ngraph/graph_util.hpp"
 #include "ngraph/node.hpp"
+#include "ngraph/ops/avg_pool.hpp"
 #include "ngraph/ops/broadcast.hpp"
 #include "ngraph/ops/concatenate.hpp"
 #include "ngraph/ops/constant.hpp"
@@ -44,6 +45,7 @@
 #include "ngraph/runtime/kernel/add.hpp"
 #include "ngraph/runtime/kernel/asin.hpp"
 #include "ngraph/runtime/kernel/atan.hpp"
+#include "ngraph/runtime/kernel/avg_pool.hpp"
 #include "ngraph/runtime/kernel/broadcast.hpp"
 #include "ngraph/runtime/kernel/ceiling.hpp"
 #include "ngraph/runtime/kernel/concat.hpp"
@@ -246,6 +248,19 @@ private:
                            reinterpret_cast<T*>(out[0]->get_data_ptr()),
                            out[0]->get_element_count());
        }
+        else if (node_op == "AvgPool")
+        {
+            ngraph::op::AvgPool* avg_pool = dynamic_cast<ngraph::op::AvgPool*>(&node);
+            kernel::avg_pool<T>(reinterpret_cast<T*>(args[0]->get_data_ptr()),
+                                reinterpret_cast<T*>(out[0]->get_data_ptr()),
+                                args[0]->get_shape(),
+                                out[0]->get_shape(),
+                                avg_pool->get_window_shape(),
+                                avg_pool->get_window_movement_strides(),
+                                avg_pool->get_padding_below(),
+                                avg_pool->get_padding_above());
+        }
        else if (node_op == "Broadcast")
        {
            ngraph::op::Broadcast* broadcast = dynamic_cast<ngraph::op::Broadcast*>(&node);

--- a/src/ngraph/runtime/kernel/avg_pool.hpp
+++ b/src/ngraph/runtime/kernel/avg_pool.hpp
+// ----------------------------------------------------------------------------
+// Copyright 2017 Nervana Systems Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// ----------------------------------------------------------------------------
+#pragma once
+#include <cmath>
+#include "ngraph/common.hpp"
+#include "ngraph/coordinate_transform.hpp"
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace kernel
+        {
+            template <typename T>
+            void avg_pool(T* arg,
+                          T* out,
+                          const Shape& arg_shape,
+                          const Shape& out_shape,
+                          const Shape& window_shape,
+                          const Strides& window_movement_strides,
+                          const Shape& padding_below,
+                          const Shape& padding_above)
+            {
+                // At the outermost level we will walk over every output coordinate O.
+                CoordinateTransform output_transform(out_shape);
+                for (const Coordinate& out_coord : output_transform)
+                {
+                    // Our output coordinate O will have the form:
+                    //
+                    //   (img,chan,i_1,...,i_n)
+                    size_t img_index = out_coord[0];
+                    size_t channel = out_coord[1];
+                    // For the input images we need to iterate the coordinate:
+                    //
+                    //   I:
+                    //
+                    // over the range (noninclusive on the right):
+                    //
+                    //   (img,chan,s_1*i_1,s_2*i_2,...,s_n*i_n) ->
+                    //
+                    //     (img+1,chan+1,s_1*i_1 + window_shape_1,...,s_n*i_n + window_shape_n)
+                    //
+                    // with unit stride.
+                    //
+                    // We iterate this over the *padded* image, so below we will need to check for coordinates that fall in the padding area.
+                    size_t n_image_dimensions = arg_shape.size() - 2;
+                    Shape input_batch_transform_start(2 + n_image_dimensions);
+                    Shape input_batch_transform_end(2 + n_image_dimensions);
+                    Shape input_batch_transform_source_strides(2 + n_image_dimensions, 1);
+                    Shape input_batch_transform_source_axis_order(2 + n_image_dimensions);
+                    Shape input_batch_transform_padding_below(2 + n_image_dimensions);
+                    Shape input_batch_transform_padding_above(2 + n_image_dimensions);
+                    input_batch_transform_start[0] = img_index;
+                    input_batch_transform_end[0] = img_index + 1;
+                    input_batch_transform_start[1] = channel;
+                    input_batch_transform_end[1] = channel + 1;
+                    input_batch_transform_padding_below[0] = 0;
+                    input_batch_transform_padding_below[1] = 0;
+                    input_batch_transform_padding_above[0] = 0;
+                    input_batch_transform_padding_above[1] = 0;
+                    for (size_t i = 2; i < n_image_dimensions + 2; i++)
+                    {
+                        size_t window_shape_this_dim = window_shape[i - 2];
+                        size_t movement_stride = window_movement_strides[i - 2];
+                        input_batch_transform_start[i] = movement_stride * out_coord[i];
+                        input_batch_transform_end[i] =
+                            input_batch_transform_start[i] + window_shape_this_dim;
+                        input_batch_transform_padding_below[i] = padding_below[i - 2];
+                        input_batch_transform_padding_above[i] = padding_above[i - 2];
+                    }
+                    for (size_t i = 0; i < arg_shape.size(); i++)
+                    {
+                        input_batch_transform_source_axis_order[i] = i;
+                    }
+                    CoordinateTransform input_batch_transform(
+                        arg_shape,
+                        input_batch_transform_start,
+                        input_batch_transform_end,
+                        input_batch_transform_source_strides,
+                        input_batch_transform_source_axis_order,
+                        input_batch_transform_padding_below,
+                        input_batch_transform_padding_above);
+                    // As we go, we compute the sum value:
+                    //
+                    //   output[O] := output[O] + arg[I]
+                    //
+                    // and the number of elements:
+                    //
+                    //   n_elements := n_elements + 1
+                    T result = 0;
+                    size_t n_elements = 0;
+                    for (const Coordinate& input_batch_coord : input_batch_transform)
+                    {
+                        bool in_bounds =
+                            input_batch_transform.has_source_coordinate(input_batch_coord);
+                        T v = in_bounds ? arg[input_batch_transform.index(input_batch_coord)] : 0;
+                        result += v;
+                        if (in_bounds)
+                        {
+                            n_elements++;
+                        }
+                    }
+                    out[output_transform.index(out_coord)] = result / n_elements;
+                }
+            }
+        }
+    }
+}
--- a/test/backend_test.in.cpp
+++ b/test/backend_test.in.cpp
--- a/test/type_prop.cpp
+++ b/test/type_prop.cpp