convolution backprop (#404)

* fix convlution reference script * convolution backprop * cleanup * fix build warnings * Missing include * fix build warning part 2 * move numeric_compare to its own header code review feedback * fix build warnings 3 * fix build warnings 4 * clang-format * cast to avoid implicit cast warning

convolution backprop (#404)
* fix convlution reference script * convolution backprop * cleanup * fix build warnings * Missing include * fix build warning part 2 * move numeric_compare to its own header code review feedback * fix build warnings 3 * fix build warnings 4 * clang-format * cast to avoid implicit cast warning
72a2ce72 · adstraw · Matthew Brookhart · 2c048174 · 72a2ce72 · 72a2ce72
Commit 72a2ce72 authored Jan 23, 2018 by adstraw Committed by Matthew Brookhart Jan 23, 2018
6 changed files
--- a/src/ngraph/ops/convolution.cpp
+++ b/src/ngraph/ops/convolution.cpp
@@ -12,7 +12,11 @@
 // See the License for the specific language governing permissions and
 // ----------------------------------------------------------------------------

+#include <numeric>
+
 #include "ngraph/ops/convolution.hpp"
+#include "ngraph/ops/reshape.hpp"
+#include "ngraph/ops/reverse.hpp"
 #include "ngraph/util.hpp"

 using namespace std;
@@ -328,8 +332,111 @@ bool op::Convolution::is_functionally_identical(const Node& other) const
    return rc;
 }

-/*
-void op::Convolution::generate_adjoints(autodiff::Adjoints& adjoints, const std::shared_ptr<Node>& delta)
+std::shared_ptr<op::Reshape> flipDim0and1(std::shared_ptr<Node> node, const Shape& shape)
 {
+    AxisVector input_order(shape.size());
+    std::iota(input_order.begin(), input_order.end(), 0);
+    std::swap(input_order[0], input_order[1]);
+
+    auto output_shape = shape;
+    std::swap(output_shape[0], output_shape[1]);
+
+    return std::make_shared<op::Reshape>(node, input_order, output_shape);
+}
+
+void op::Convolution::generate_adjoints(autodiff::Adjoints& adjoints,
+                                        const std::shared_ptr<Node>& delta)
+{
+    // input
+    // {N, Cin, d1,...,dn}
+    auto x = get_input_op(0);
+    const auto x_shape = x->get_shape();
+
+    // filters
+    // {Cout, Cin, df1,...,dfn}
+    auto f = get_input_op(1);
+    const auto f_shape = f->get_shape();
+
+    // {N, Cout, d'1,...,d'n}
+    const auto delta_shape = delta->get_shape();
+
+    AxisSet data_axes_2_to_n;
+
+    // adjust padding for x and f adjoints per:
+    // https://wiki.ith.intel.com/display/intelnervana/Autodiff
+    CoordinateDiff x_adjoint_padding_below;
+    CoordinateDiff x_adjoint_padding_above;
+    CoordinateDiff f_adjoint_padding_below;
+    CoordinateDiff f_adjoint_padding_above;
+
+    // loop over data axes
+    for (auto i = 0; i < delta_shape.size() - 2; ++i)
+    {
+        data_axes_2_to_n.insert(i + 2);
+
+        // (Sw - 1)%Q
+        // (Ax + (Sx - 1)Px + Bx - (Sf - 1)Pf) % Q
+        auto sw_mod_q = (m_padding_below[i] + (x_shape[i + 2] - 1) * m_image_dilation_strides[i] +
+                         m_padding_above[i] - (f_shape[i + 2] - 1) * m_window_dilation_strides[i]) %
+                        m_window_movement_strides[i];
+
+        // (Sf - 1)Pf + (Sw - 1)%Q - Bx
+        x_adjoint_padding_above.push_back((f_shape[i + 2] - 1) * m_window_dilation_strides[i] +
+                                          sw_mod_q - m_padding_above[i]);
+
+        // (Sf - 1)Pf - Ax
+        x_adjoint_padding_below.push_back((f_shape[i + 2] - 1) * m_window_dilation_strides[i] -
+                                          m_padding_below[i]);
+
+        // Bx  - (SW - 1)%Q
+        f_adjoint_padding_above.push_back(m_padding_above[i] - sw_mod_q);
+
+        // Ax
+        f_adjoint_padding_below.push_back(m_padding_below[i]);
+    }
+
+    // to calculate adjoint of the input...
+    // 1) reshape filter (flip channel dimensions)
+    // {Cin, Cout, df1,...,dfn}
+    auto f_reshape = flipDim0and1(f, f_shape);
+
+    // 2) reverse filter data
+    auto f_reshape_reverse = std::make_shared<op::Reverse>(f_reshape, data_axes_2_to_n);
+
+    // 3) convolve delta with reshaped/reversed filter
+    //    swap image_dilation_stride and window_movement_stride
+    // {N, Cin, d1,...,dn}
+    auto x_adjoint = std::make_shared<op::Convolution>(delta,
+                                                       f_reshape_reverse,
+                                                       m_image_dilation_strides,
+                                                       m_window_dilation_strides,
+                                                       x_adjoint_padding_below,
+                                                       x_adjoint_padding_above,
+                                                       m_window_movement_strides);
+
+    adjoints.add_delta(x, x_adjoint);
+
+    // to calculate adjoint of the filter...
+    // 1) reshape input
+    // {Cin, N, d1,...,dn}
+    auto x_reshape = flipDim0and1(x, x_shape);
+
+    // 2) reshape delta
+    // {Cout, N, d'1,...d'n}
+    auto delta_reshape = flipDim0and1(delta, delta_shape);
+
+    // 3) convolve reshaped input with reshaped delta
+    //    swap window_movement_stride and window_dilation_stride
+    // {Cin, Cout, df1,...,dfn}
+    auto f_adjoint = std::make_shared<op::Convolution>(x_reshape,
+                                                       delta_reshape,
+                                                       m_window_dilation_strides,
+                                                       m_window_movement_strides,
+                                                       f_adjoint_padding_below,
+                                                       f_adjoint_padding_above,
+                                                       m_image_dilation_strides);
+
+    // 4) reshape result to match filter dimentions
+    // {Cout, Cin, df1,...,dfn}
+    adjoints.add_delta(f, flipDim0and1(f_adjoint, f_adjoint->get_shape()));
 }
-*/
--- a/src/ngraph/ops/convolution.hpp
+++ b/src/ngraph/ops/convolution.hpp
@@ -152,6 +152,8 @@ namespace ngraph
            /// \return The number of image dimensions.
            size_t get_image_dimension_count() const { return m_image_dimension_count; }
            bool is_functionally_identical(const Node&) const override;
+            void generate_adjoints(autodiff::Adjoints& adjoints,
+                                   const std::shared_ptr<Node>& delta) override;

        protected:
            Strides m_window_movement_strides;

--- a/test/autodiff.in.cpp
+++ b/test/autodiff.in.cpp
@@ -20,80 +20,13 @@
 #include "gtest/gtest.h"

 #include "ngraph/ngraph.hpp"
-#include "util/all_close.hpp"
-#include "util/autodiff/backprop_derivative.hpp"
 #include "util/autodiff/backprop_function.hpp"
-#include "util/autodiff/numeric_derivative.hpp"
+#include "util/autodiff/numeric_compare.hpp"
 #include "util/random.hpp"

 using namespace std;
 using namespace ngraph;

-template <typename T>
-bool autodiff_numeric_compare(const std::shared_ptr<runtime::Manager>& manager,
-                              const std::shared_ptr<runtime::Backend>& backend,
-                              std::function<std::shared_ptr<Function>()> make_graph,
-                              const std::vector<std::shared_ptr<runtime::TensorView>>& args,
-                              T rtol,
-                              T atol)
-{
-    auto f = make_graph();
-    auto results_num =
-        autodiff::numeric_derivative<T>(manager, backend, f, args, .001f, f->get_parameters());
-
-    auto g = make_graph();
-    auto results_sym =
-        autodiff::backprop_derivative<T>(manager, backend, g, args, g->get_parameters());
-
-    return test::all_close(results_num, results_sym, rtol, atol);
-}
-
-template <typename T>
-bool autodiff_numeric_compare_selective(
-    const std::shared_ptr<runtime::Manager>& manager,
-    const std::shared_ptr<runtime::Backend>& backend,
-    std::function<std::shared_ptr<Function>()> make_graph,
-    const std::vector<std::shared_ptr<runtime::TensorView>>& args,
-    T rtol,
-    T atol,
-    const std::vector<bool>& indep_param_mask)
-{
-    std::vector<std::shared_ptr<op::Parameter>> f_indep_params;
-    auto f = make_graph();
-
-    size_t i = 0;
-
-    for (auto b : indep_param_mask)
-    {
-        if (b)
-        {
-            f_indep_params.push_back(f->get_parameters().at(i));
-        }
-        i++;
-    }
-
-    auto results_num =
-        autodiff::numeric_derivative<T>(manager, backend, f, args, .001f, f_indep_params);
-
-    std::vector<std::shared_ptr<op::Parameter>> g_indep_params;
-    auto g = make_graph();
-
-    i = 0;
-
-    for (auto b : indep_param_mask)
-    {
-        if (b)
-        {
-            g_indep_params.push_back(g->get_parameters().at(i));
-        }
-        i++;
-    }
-
-    auto results_sym = autodiff::backprop_derivative<T>(manager, backend, g, args, g_indep_params);
-
-    return test::all_close(results_num, results_sym, rtol, atol);
-}
-
 TEST(${BACKEND_NAME}, backwards_maxpool_n4_c1_hw4_2x2_max)
 {
    auto manager = runtime::Manager::get("${BACKEND_NAME}");

--- a/test/convolution_test.in.cpp
+++ b/test/convolution_test.in.cpp
--- a/test/ref_generators/generate_convolution_ref.py
+++ b/test/ref_generators/generate_convolution_ref.py
--- a/test/util/autodiff/numeric_compare.hpp
+++ b/test/util/autodiff/numeric_compare.hpp
+// ----------------------------------------------------------------------------
+// Copyright 2017 Nervana Systems Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// ----------------------------------------------------------------------------
+
+#include "util/all_close.hpp"
+#include "util/autodiff/backprop_derivative.hpp"
+#include "util/autodiff/numeric_derivative.hpp"
+
+template <typename T>
+bool autodiff_numeric_compare(const std::shared_ptr<ngraph::runtime::Manager>& manager,
+                              const std::shared_ptr<ngraph::runtime::Backend>& backend,
+                              std::function<std::shared_ptr<ngraph::Function>()> make_graph,
+                              const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& args,
+                              T rtol,
+                              T atol)
+{
+    T delta = static_cast<T>(0.001);
+    auto f = make_graph();
+    auto results_num = ngraph::autodiff::numeric_derivative<T>(
+        manager, backend, f, args, delta, f->get_parameters());
+
+    auto g = make_graph();
+    auto results_sym =
+        ngraph::autodiff::backprop_derivative<T>(manager, backend, g, args, g->get_parameters());
+
+    return ngraph::test::all_close(results_num, results_sym, rtol, atol);
+}
+
+template <typename T>
+bool autodiff_numeric_compare_selective(
+    const std::shared_ptr<ngraph::runtime::Manager>& manager,
+    const std::shared_ptr<ngraph::runtime::Backend>& backend,
+    std::function<std::shared_ptr<ngraph::Function>()> make_graph,
+    const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& args,
+    T rtol,
+    T atol,
+    const std::vector<bool>& indep_param_mask)
+{
+    std::vector<std::shared_ptr<ngraph::op::Parameter>> f_indep_params;
+    auto f = make_graph();
+
+    size_t i = 0;
+
+    for (auto b : indep_param_mask)
+    {
+        if (b)
+        {
+            f_indep_params.push_back(f->get_parameters().at(i));
+        }
+        i++;
+    }
+
+    auto results_num =
+        ngraph::autodiff::numeric_derivative<T>(manager, backend, f, args, .001f, f_indep_params);
+
+    std::vector<std::shared_ptr<ngraph::op::Parameter>> g_indep_params;
+    auto g = make_graph();
+
+    i = 0;
+
+    for (auto b : indep_param_mask)
+    {
+        if (b)
+        {
+            g_indep_params.push_back(g->get_parameters().at(i));
+        }
+        i++;
+    }
+
+    auto results_sym =
+        ngraph::autodiff::backprop_derivative<T>(manager, backend, g, args, g_indep_params);
+
+    return ngraph::test::all_close(results_num, results_sym, rtol, atol);
+}