Back Propagation for Average Pooling (#407)

* bprop for avg pool remove debug statements + formatting * fix CPU test failures * numeric tests * use make_shared; unprotect c-tor

Back Propagation for Average Pooling (#407)
* bprop for avg pool remove debug statements + formatting * fix CPU test failures * numeric tests * use make_shared; unprotect c-tor
b408a08e · Nick Korovaiko · Scott Cyphers · e87b4936 · b408a08e · b408a08e
Commit b408a08e authored Jan 31, 2018 by Nick Korovaiko Committed by Scott Cyphers Jan 31, 2018
8 changed files
--- a/src/ngraph/ops/avg_pool.cpp
+++ b/src/ngraph/ops/avg_pool.cpp
@@ -217,8 +217,48 @@ bool op::AvgPool::is_functionally_identical(const Node& other) const
    return rc;
 }

-/*
-void op::AvgPool::generate_adjoints(autodiff::Adjoints& adjoints, const std::shared_ptr<Node>& delta)
+op::AvgPoolBprop::AvgPoolBprop(const std::shared_ptr<Node>& arg,
+                               const std::shared_ptr<Node>& delta,
+                               const Shape& window_shape,
+                               const Strides& window_movement_strides,
+                               const Shape& padding_below,
+                               const Shape& padding_above)
+    : RequiresTensorViewArgs("AvgPoolBprop", {arg, delta})
+    , m_window_shape(window_shape)
+    , m_window_movement_strides(window_movement_strides)
+    , m_padding_below(padding_below)
+    , m_padding_above(padding_above)
+{
+    set_value_type_checked(get_input_element_type(0), arg->get_shape());
+}
+
+bool op::AvgPoolBprop::is_functionally_identical(const Node& other) const
+{
+    bool rc = true;
+    if (Node::is_functionally_identical(other))
+    {
+        const AvgPoolBprop& rhs = dynamic_cast<const AvgPoolBprop&>(other);
+        rc &= m_window_shape == rhs.m_window_shape;
+        rc &= m_window_movement_strides == rhs.m_window_movement_strides;
+        rc &= m_padding_below == rhs.m_padding_below;
+        rc &= m_padding_above == rhs.m_padding_above;
+    }
+    else
+    {
+        rc = false;
+    }
+    return rc;
+}
+
+void op::AvgPool::generate_adjoints(autodiff::Adjoints& adjoints,
+                                    const std::shared_ptr<Node>& delta)
 {
+    auto operand = get_input_op(0);
+    auto bprop = std::make_shared<op::AvgPoolBprop>(operand,
+                                                    delta,
+                                                    m_window_shape,
+                                                    m_window_movement_strides,
+                                                    m_padding_below,
+                                                    m_padding_above);
+    adjoints.add_delta(operand, bprop);
 }
-*/
--- a/src/ngraph/ops/avg_pool.hpp
+++ b/src/ngraph/ops/avg_pool.hpp
@@ -97,6 +97,7 @@ namespace ngraph
            {
                if (new_args.size() != 1)
                    throw ngraph_error("Incorrect number of new arguments");
+
                return std::make_shared<AvgPool>(new_args.at(0),
                                                 m_window_shape,
                                                 m_window_movement_strides,
@@ -105,6 +106,9 @@ namespace ngraph
            }
            bool is_functionally_identical(const Node&) const override;

+            virtual void generate_adjoints(autodiff::Adjoints& adjoints,
+                                           const std::shared_ptr<Node>& delta) override;
+
            /// \return The window shape.
            const Shape& get_window_shape() const { return m_window_shape; }
            /// \return The window movement strides.
@@ -119,5 +123,43 @@ namespace ngraph
            Shape m_padding_below;
            Shape m_padding_above;
        };
+
+        class AvgPoolBprop : public RequiresTensorViewArgs
+        {
+        public:
+            AvgPoolBprop(const std::shared_ptr<Node>& arg,
+                         const std::shared_ptr<Node>& delta,
+                         const Shape& window_shape,
+                         const Strides& window_movement_strides,
+                         const Shape& padding_below,
+                         const Shape& padding_above);
+
+            virtual std::shared_ptr<Node> copy_with_new_args(
+                const std::vector<std::shared_ptr<Node>>& new_args) const override
+            {
+                if (new_args.size() != 2)
+                    throw ngraph_error("Incorrect number of new arguments");
+
+                AvgPoolBprop* avpn = new AvgPoolBprop(new_args.at(0),
+                                                      new_args.at(1),
+                                                      m_window_shape,
+                                                      m_window_movement_strides,
+                                                      m_padding_below,
+                                                      m_padding_above);
+                return std::shared_ptr<op::AvgPoolBprop>(avpn);
+            }
+
+            const Shape& get_window_shape() const { return m_window_shape; }
+            const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
+            const Shape& get_padding_below() const { return m_padding_below; }
+            const Shape& get_padding_above() const { return m_padding_above; }
+            bool is_functionally_identical(const Node&) const override;
+
+        protected:
+            Shape m_window_shape;
+            Strides m_window_movement_strides;
+            Shape m_padding_below;
+            Shape m_padding_above;
+        };
    }
 }
--- a/src/ngraph/runtime/cpu/cpu_emitter.cpp
+++ b/src/ngraph/runtime/cpu/cpu_emitter.cpp
@@ -2341,6 +2341,29 @@ void runtime::cpu::CPU_Emitter::EmitPad(codegen::CodeWriter& writer,
    writer << "            {" << join(pad->get_padding_interior()) << "});\n";
 }

+void runtime::cpu::CPU_Emitter::EmitAvgPoolBprop(
+    codegen::CodeWriter& writer,
+    const ngraph::Node* n,
+    const vector<runtime::cpu::TensorViewWrapper>& args,
+    const vector<runtime::cpu::TensorViewWrapper>& out)
+{
+    auto apb = static_cast<const op::AvgPoolBprop*>(n);
+
+    auto arg_shape = args[0].get_shape();
+    auto delta_shape = args[1].get_shape();
+
+    writer << "kernel::avg_pool_bprop<" << out[0].get_type() << ">(" << args[0].get_name() << ",\n";
+    writer << "                 " << args[1].get_name() << ",\n";
+    writer << "                 " << out[0].get_name() << ",\n";
+    writer << "                 {" << join(arg_shape) << "},\n";
+    writer << "                 {" << join(delta_shape) << "},\n";
+    writer << "                 {" << join(apb->get_window_shape()) << "},\n";
+    writer << "                 {" << join(apb->get_window_movement_strides()) << "},\n";
+    writer << "                 {" << join(apb->get_padding_below()) << "},\n";
+    writer << "                 {" << join(apb->get_padding_above()) << "},\n";
+    writer << "                 true);\n";
+}
+
 //------------------------------------------------------------------------------------------------
 // Utility methods
 //------------------------------------------------------------------------------------------------

--- a/src/ngraph/runtime/cpu/cpu_emitter.hpp
+++ b/src/ngraph/runtime/cpu/cpu_emitter.hpp
@@ -94,6 +94,7 @@ namespace ngraph
                static void EMITTER_DECL(EmitReduceWindow);
                static void EMITTER_DECL(EmitSelectAndScatter);
                static void EMITTER_DECL(EmitAvgPool);
+                static void EMITTER_DECL(EmitAvgPoolBprop);
                static void EMITTER_DECL(EmitPad);

                static void EmitMKLDNNPreamble(codegen::CodeWriter& writer);

--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
@@ -199,6 +199,7 @@ static const runtime::cpu::OpMap dispatcher{
    {TI(ngraph::op::ReduceWindow), &runtime::cpu::CPU_Emitter::EmitReduceWindow},
    {TI(ngraph::op::SelectAndScatter), &runtime::cpu::CPU_Emitter::EmitSelectAndScatter},
    {TI(ngraph::op::AvgPool), &runtime::cpu::CPU_Emitter::EmitAvgPool},
+    {TI(ngraph::op::AvgPoolBprop), &runtime::cpu::CPU_Emitter::EmitAvgPoolBprop},
    {TI(ngraph::op::Pad), &runtime::cpu::CPU_Emitter::EmitPad},
 };


--- a/src/ngraph/runtime/interpreter/int_call_frame.hpp
+++ b/src/ngraph/runtime/interpreter/int_call_frame.hpp
@@ -262,6 +262,21 @@ private:
                                avg_pool->get_padding_below(),
                                avg_pool->get_padding_above());
        }
+        else if (node_op == "AvgPoolBprop")
+        {
+            ngraph::op::AvgPoolBprop* apb = dynamic_cast<ngraph::op::AvgPoolBprop*>(&node);
+            kernel::avg_pool_bprop<T>(
+                reinterpret_cast<T*>(args[0]->get_data_ptr()),
+                reinterpret_cast<T*>(args[1]->get_data_ptr()),
+                reinterpret_cast<T*>(out[0]->get_data_ptr()),
+                args[0]->get_shape(),
+                args[1]->get_shape(), /*delta shape*/
+                apb->get_window_shape(),
+                apb->get_window_movement_strides(),
+                apb->get_padding_below(),
+                apb->get_padding_above(),
+                true /*divide by the number of physical elements in a window*/);
+        }
        else if (node_op == "Broadcast")
        {
            ngraph::op::Broadcast* broadcast = dynamic_cast<ngraph::op::Broadcast*>(&node);

--- a/src/ngraph/runtime/kernel/avg_pool.hpp
+++ b/src/ngraph/runtime/kernel/avg_pool.hpp
@@ -14,10 +14,13 @@

 #pragma once

+#include <algorithm>
 #include <cmath>
+#include <vector>

 #include "ngraph/common.hpp"
 #include "ngraph/coordinate_transform.hpp"
+#include "ngraph/shape.hpp"

 namespace ngraph
 {
@@ -25,6 +28,94 @@ namespace ngraph
    {
        namespace kernel
        {
+            template <typename T>
+            void avg_pool_bprop(T* arg,
+                                T* delta,
+                                T* out, //out is also arg_shape
+                                const Shape& arg_shape,
+                                const Shape& delta_shape,
+                                const Shape& window_shape,
+                                const Strides& window_movement_strides,
+                                const Shape& padding_below,
+                                const Shape& padding_above,
+                                bool count_only_physical)
+            {
+                memset(out, 0, sizeof(T) * shape_size(arg_shape));
+                size_t j = 0; //for iterating over delta (ep) elements
+                size_t num_elements_in_window = shape_size(window_shape);
+                CoordinateTransform output_transform(delta_shape);
+
+                for (const Coordinate& out_coord : output_transform)
+                {
+                    size_t img_index = out_coord[0];
+                    size_t channel = out_coord[1];
+
+                    size_t n_image_dimensions = arg_shape.size() - 2;
+                    Coordinate input_batch_transform_start(2 + n_image_dimensions);
+                    Coordinate input_batch_transform_end(2 + n_image_dimensions);
+                    Strides input_batch_transform_source_strides(2 + n_image_dimensions, 1);
+                    AxisVector input_batch_transform_source_axis_order(2 + n_image_dimensions);
+                    CoordinateDiff input_batch_transform_padding_below(2 + n_image_dimensions);
+                    CoordinateDiff input_batch_transform_padding_above(2 + n_image_dimensions);
+
+                    input_batch_transform_start[0] = img_index;
+                    input_batch_transform_end[0] = img_index + 1;
+                    input_batch_transform_start[1] = channel;
+                    input_batch_transform_end[1] = channel + 1;
+                    input_batch_transform_padding_below[0] = 0;
+                    input_batch_transform_padding_below[1] = 0;
+                    input_batch_transform_padding_above[0] = 0;
+                    input_batch_transform_padding_above[1] = 0;
+
+                    for (size_t i = 2; i < n_image_dimensions + 2; i++)
+                    {
+                        size_t window_shape_this_dim = window_shape[i - 2];
+                        size_t movement_stride = window_movement_strides[i - 2];
+
+                        input_batch_transform_start[i] = movement_stride * out_coord[i];
+                        input_batch_transform_end[i] =
+                            input_batch_transform_start[i] + window_shape_this_dim;
+                        input_batch_transform_padding_below[i] = padding_below[i - 2];
+                        input_batch_transform_padding_above[i] = padding_above[i - 2];
+                    }
+                    std::iota(begin(input_batch_transform_source_axis_order),
+                              end(input_batch_transform_source_axis_order),
+                              0);
+
+                    CoordinateTransform input_batch_transform(
+                        arg_shape,
+                        input_batch_transform_start,
+                        input_batch_transform_end,
+                        input_batch_transform_source_strides,
+                        input_batch_transform_source_axis_order,
+                        input_batch_transform_padding_below,
+                        input_batch_transform_padding_above);
+
+                    if (count_only_physical)
+                    {
+                        num_elements_in_window = 0;
+                        //Dumb! But should work for now
+                        for (const Coordinate& input_batch_coord : input_batch_transform)
+                        {
+                            if (input_batch_transform.has_source_coordinate(input_batch_coord))
+                            {
+                                num_elements_in_window++;
+                            }
+                        }
+                    }
+
+                    for (const Coordinate& input_batch_coord : input_batch_transform)
+                    {
+                        if (input_batch_transform.has_source_coordinate(input_batch_coord))
+                        {
+                            size_t index = input_batch_transform.index(input_batch_coord);
+                            out[index] += delta[j] / num_elements_in_window;
+                        }
+                    }
+                    j++; //move to the next ep
+                }
+            }
+
            template <typename T>
            void avg_pool(T* arg,
                          T* out,

--- a/test/autodiff.in.cpp
+++ b/test/autodiff.in.cpp
@@ -24,6 +24,8 @@
 #include "util/autodiff/numeric_compare.hpp"
 #include "util/random.hpp"

+#include "ngraph/runtime/kernel/avg_pool.hpp"
+
 using namespace std;
 using namespace ngraph;

@@ -117,6 +119,263 @@ TEST(${BACKEND_NAME}, backwards_maxpool_n2_c1_hw5_3x3_str2_max)
    ASSERT_TRUE(read_vector<int>(output) == expected);
 }

+TEST(${BACKEND_NAME}, backwards_avgpool_n1_c1_hw2x2)
+{
+    auto manager = runtime::Manager::get("${BACKEND_NAME}");
+    auto backend = manager->allocate_backend();
+
+    auto padding = Shape{1, 1};
+
+    auto shape_a = Shape{1, 1, 2, 2};
+    auto avgpool_shape = Shape{1, 1, 2, 2};
+
+    auto A = make_shared<op::Parameter>(element::i32, shape_a);
+    auto window_shape = Shape{2, 2};
+    auto window_movement_strides = Strides{2, 2};
+    auto avgpool =
+        make_shared<op::AvgPool>(A, window_shape, window_movement_strides, padding, padding);
+    auto f = make_shared<Function>(avgpool, op::Parameters{A});
+
+    shared_ptr<runtime::TensorView> ep =
+        backend->make_primary_tensor_view(element::i32, avgpool_shape);
+    vector<int> dataEp(shape_size(avgpool_shape), 4);
+
+    shared_ptr<runtime::TensorView> input =
+        backend->make_primary_tensor_view(element::i32, shape_a);
+
+    shared_ptr<runtime::TensorView> output =
+        backend->make_primary_tensor_view(element::i32, shape_a);
+
+    vector<int> dataInput{4, 8, 12, 16};
+
+    vector<int> expected{1, 2, 3, 4};
+
+    copy_data(ep, dataEp);
+    copy_data(input, dataInput);
+
+    auto C = make_shared<op::Parameter>(element::i32, avgpool_shape);
+    auto df = autodiff::backprop_function(f);
+    auto external = manager->compile(df);
+    auto cf = backend->make_call_frame(external);
+    cf->tensor_call({input, ep}, {output});
+    ASSERT_TRUE(read_vector<int>(output) == dataEp);
+}
+
+TEST(${BACKEND_NAME}, backwards_avgpool_n1_c1_hw4x4)
+{
+    auto manager = runtime::Manager::get("${BACKEND_NAME}");
+    auto backend = manager->allocate_backend();
+
+    auto shape_a = Shape{1, 1, 4, 4};
+    auto avgpool_shape = Shape{1, 1, 3, 3};
+
+    auto A = make_shared<op::Parameter>(element::i32, shape_a);
+    auto window_shape = Shape{2, 2};
+    auto window_movement_strides = Strides{1, 1};
+    auto avgpool = make_shared<op::AvgPool>(A, window_shape, window_movement_strides);
+    auto f = make_shared<Function>(avgpool, op::Parameters{A});
+
+    shared_ptr<runtime::TensorView> ep =
+        backend->make_primary_tensor_view(element::i32, avgpool_shape);
+    vector<int> dataEp(shape_size(avgpool_shape), 4);
+
+    shared_ptr<runtime::TensorView> input =
+        backend->make_primary_tensor_view(element::i32, shape_a);
+
+    shared_ptr<runtime::TensorView> output =
+        backend->make_primary_tensor_view(element::i32, shape_a);
+
+    vector<int> dataInput{1, 3, 1, 3, 1, 3, 1, 3, 3, 5, 3, 5, 3, 5, 3, 5};
+
+    vector<int> expected{1, 2, 2, 1, 2, 4, 4, 2, 2, 4, 4, 2, 1, 2, 2, 1};
+
+    copy_data(ep, dataEp);
+    copy_data(input, dataInput);
+
+    auto C = make_shared<op::Parameter>(element::i32, avgpool_shape);
+    auto df = autodiff::backprop_function(f);
+    auto external = manager->compile(df);
+    auto cf = backend->make_call_frame(external);
+    cf->tensor_call({input, ep}, {output});
+    ASSERT_TRUE(read_vector<int>(output) == expected);
+}
+
+TEST(${BACKEND_NAME}, backwards_avgpool_n2_c2_hw4x4)
+{
+    auto manager = runtime::Manager::get("${BACKEND_NAME}");
+    auto backend = manager->allocate_backend();
+
+    auto shape_a = Shape{2, 2, 4, 4};
+    auto avgpool_shape = Shape{2, 2, 2, 2};
+
+    auto A = make_shared<op::Parameter>(element::i32, shape_a);
+    auto window_shape = Shape{2, 2};
+    auto window_movement_strides = Strides{2, 2};
+    auto avgpool = make_shared<op::AvgPool>(A, window_shape, window_movement_strides);
+    auto f = make_shared<Function>(avgpool, op::Parameters{A});
+
+    shared_ptr<runtime::TensorView> ep =
+        backend->make_primary_tensor_view(element::i32, avgpool_shape);
+    vector<int> dataEp(shape_size(avgpool_shape), 12);
+
+    shared_ptr<runtime::TensorView> input =
+        backend->make_primary_tensor_view(element::i32, shape_a);
+
+    shared_ptr<runtime::TensorView> output =
+        backend->make_primary_tensor_view(element::i32, shape_a);
+
+    vector<int> dataInput{//i1c1
+                          1,
+                          2,
+                          6,
+                          7,
+                          3,
+                          4,
+                          4,
+                          3,
+                          19,
+                          1,
+                          2,
+                          3,
+                          18,
+                          2,
+                          3,
+                          2,
+                          //i1c2
+                          4,
+                          1,
+                          5,
+                          5,
+                          1,
+                          4,
+                          5,
+                          5,
+                          12,
+                          8,
+                          2,
+                          3,
+                          15,
+                          5,
+                          3,
+                          2,
+                          //i2c1
+                          2,
+                          3,
+                          7,
+                          7,
+                          3,
+                          2,
+                          3,
+                          3,
+                          13,
+                          7,
+                          1,
+                          2,
+                          7,
+                          13,
+                          3,
+                          4,
+                          //i2c2
+                          1,
+                          1,
+                          2,
+                          2,
+                          7,
+                          1,
+                          2,
+                          14,
+                          6,
+                          16,
+                          4,
+                          1,
+                          14,
+                          4,
+                          4,
+                          1};
+
+    vector<int> expected(shape_size(shape_a), 3);
+    copy_data(ep, dataEp);
+    copy_data(input, dataInput);
+
+    auto C = make_shared<op::Parameter>(element::i32, avgpool_shape);
+    auto df = autodiff::backprop_function(f);
+    auto external = manager->compile(df);
+    auto cf = backend->make_call_frame(external);
+    cf->tensor_call({input, ep}, {output});
+    ASSERT_TRUE(read_vector<int>(output) == expected);
+}
+
+TEST(${BACKEND_NAME}, backwards_avgpool_n2_c2_hw4x4_numeric)
+{
+    auto manager = runtime::Manager::get("${BACKEND_NAME}");
+    auto backend = manager->allocate_backend();
+    auto shape_a = Shape{2, 2, 4, 4};
+    test::Uniform<float> rng(1.0f, 10.0f);
+
+    auto make_graph = [shape_a]() {
+        auto A = make_shared<op::Parameter>(element::f32, shape_a);
+        auto window_shape = Shape{2, 2};
+        auto window_movement_strides = Strides{2, 2};
+        auto avgpool = make_shared<op::AvgPool>(A, window_shape, window_movement_strides);
+        return make_shared<Function>(avgpool, op::Parameters{A});
+
+    };
+
+    for (auto i = 0; i < 100; i++)
+    {
+        auto x = rng.initialize(backend->make_primary_tensor_view(element::f32, shape_a));
+        EXPECT_TRUE(autodiff_numeric_compare<float>(manager, backend, make_graph, {x}, .01f, .01f));
+    }
+}
+
+TEST(${BACKEND_NAME}, backwards_avgpool_n2_c2_hw4x4_win_2x2_str_1x1_numeric)
+{
+    auto manager = runtime::Manager::get("${BACKEND_NAME}");
+    auto backend = manager->allocate_backend();
+    auto shape_a = Shape{2, 2, 4, 4};
+    test::Uniform<float> rng(1.0f, 10.0f);
+
+    auto make_graph = [shape_a]() {
+        auto A = make_shared<op::Parameter>(element::f32, shape_a);
+        auto window_shape = Shape{2, 2};
+        auto window_movement_strides = Strides{1, 1};
+        auto avgpool = make_shared<op::AvgPool>(A, window_shape, window_movement_strides);
+        return make_shared<Function>(avgpool, op::Parameters{A});
+
+    };
+
+    for (auto i = 0; i < 100; i++)
+    {
+        auto x = rng.initialize(backend->make_primary_tensor_view(element::f32, shape_a));
+        EXPECT_TRUE(autodiff_numeric_compare<float>(manager, backend, make_graph, {x}, .01f, .01f));
+    }
+}
+
+TEST(${BACKEND_NAME}, backwards_avgpool_n2_c2_hw2x2_win_2x2_str_1x1_padding_numeric)
+{
+    auto manager = runtime::Manager::get("${BACKEND_NAME}");
+    auto backend = manager->allocate_backend();
+    auto shape_a = Shape{2, 2, 4, 4};
+    test::Uniform<float> rng(1.0f, 10.0f);
+
+    auto make_graph = [shape_a]() {
+        auto A = make_shared<op::Parameter>(element::f32, shape_a);
+        auto window_shape = Shape{2, 2};
+        auto padding = Shape{1, 1};
+        auto window_movement_strides = Strides{2, 2};
+        auto avgpool =
+            make_shared<op::AvgPool>(A, window_shape, window_movement_strides, padding, padding);
+        return make_shared<Function>(avgpool, op::Parameters{A});
+
+    };
+
+    for (auto i = 0; i < 100; i++)
+    {
+        auto x = rng.initialize(backend->make_primary_tensor_view(element::f32, shape_a));
+        EXPECT_TRUE(autodiff_numeric_compare<float>(manager, backend, make_graph, {x}, .01f, .01f));
+    }
+}
+
 TEST(${BACKEND_NAME}, backwards_abs)
 {
    auto manager = runtime::Manager::get("${BACKEND_NAME}");