Merge pull request #3282 from NervanaSystems/cyphers/adjointout

Remove support for adjoints of nodes

Merge pull request #3282 from NervanaSystems/cyphers/adjointout
Remove support for adjoints of nodes
a509de7b · Robert Kimball · GitHub · a2593991 · 67e71e57 · a509de7b
Unverified Commit a509de7b authored Jul 24, 2019 by Robert Kimball Committed by GitHub Jul 24, 2019
14 changed files
--- a/doc/examples/abc/abc.cpp
+++ b/doc/examples/abc/abc.cpp
@@ -32,7 +32,7 @@ int main()
    auto t1 = std::make_shared<op::Multiply>(t0, c);
    // Make the function
-    auto f = std::make_shared<Function>(NodeVector{t1},
+    auto f = std::make_shared<Function>(OutputVector{t1},
                                        ParameterVector{a, b, c});
    // Create the backend

--- a/doc/examples/abc_operator/abc_operator.cpp
+++ b/doc/examples/abc_operator/abc_operator.cpp
@@ -31,7 +31,7 @@ int main()
    auto t1 = (a + b) * c;
    // Make the function
-    auto f = std::make_shared<Function>(NodeVector{t1},
+    auto f = std::make_shared<Function>(OutputVector{t1},
                                        ParameterVector{a, b, c});
    // Get the backend

--- a/doc/examples/mnist_mlp/dist_mnist_mlp.cpp
+++ b/doc/examples/mnist_mlp/dist_mnist_mlp.cpp
@@ -175,8 +175,8 @@ int main(int argc, char* argv[])
    auto delta = -learning_rate * loss;
    // Updates
-    ngraph::autodiff::Adjoints adjoints(NodeVector{loss},
+    ngraph::autodiff::Adjoints adjoints(OutputVector{loss},
-                                        NodeVector{delta});
+                                        OutputVector{delta});
    auto grad_W0 = adjoints.backprop_node(W0);
    auto grad_b0 = adjoints.backprop_node(b0);
    auto grad_W1 = adjoints.backprop_node(W1);
@@ -231,7 +231,7 @@ int main(int argc, char* argv[])
    NodeMap train_node_map;
    auto train_function = clone_function(
        Function(
-            NodeVector{loss, softmax, W0_next, b0_next, W1_next, b1_next},
+            OutputVector{loss, softmax, W0_next, b0_next, W1_next, b1_next},
            ParameterVector{X, Y, N, learning_rate, W0, b0, W1, b1}),
        train_node_map);
    auto train_exec = backend->compile(train_function);
@@ -240,7 +240,7 @@ int main(int argc, char* argv[])
    // X, W0, b0, W1, b1 -> softmax
    NodeMap inference_node_map;
    auto inference_function = clone_function(
-        Function(NodeVector{softmax}, ParameterVector{X, W0, b0, W1, b1}),
+        Function(OutputVector{softmax}, ParameterVector{X, W0, b0, W1, b1}),
        inference_node_map);
    auto inference_exec = backend->compile(inference_function);

--- a/doc/examples/mnist_mlp/mnist_mlp.cpp
+++ b/doc/examples/mnist_mlp/mnist_mlp.cpp
@@ -172,8 +172,8 @@ int main(int argc, const char* argv[])
    auto delta = -learning_rate * loss;
    // Updates
-    ngraph::autodiff::Adjoints adjoints(NodeVector{loss},
+    ngraph::autodiff::Adjoints adjoints(OutputVector{loss},
-                                        NodeVector{delta});
+                                        OutputVector{delta});
    auto W0_next = W0 + adjoints.backprop_node(W0);
    auto b0_next = b0 + adjoints.backprop_node(b0);
    auto W1_next = W1 + adjoints.backprop_node(W1);
@@ -218,7 +218,7 @@ int main(int argc, const char* argv[])
    NodeMap train_node_map;
    auto train_function = clone_function(
        Function(
-            NodeVector{loss, softmax, W0_next, b0_next, W1_next, b1_next},
+            OutputVector{loss, softmax, W0_next, b0_next, W1_next, b1_next},
            ParameterVector{X, Y, N, learning_rate, W0, b0, W1, b1}),
        train_node_map);
    auto train_exec = backend->compile(train_function);
@@ -227,7 +227,7 @@ int main(int argc, const char* argv[])
    // X, W0, b0, W1, b1 -> softmax
    NodeMap inference_node_map;
    auto inference_function = clone_function(
-        Function(NodeVector{softmax}, ParameterVector{X, W0, b0, W1, b1}),
+        Function(OutputVector{softmax}, ParameterVector{X, W0, b0, W1, b1}),
        inference_node_map);
    auto inference_exe = backend->compile(inference_function);

--- a/doc/sphinx/source/core/constructing-graphs/execute.rst
+++ b/doc/sphinx/source/core/constructing-graphs/execute.rst
@@ -99,8 +99,8 @@ Once the graph is built, we need to package it in a ``Function``:
   :lines: 35-36
 The first argument to the constuctor specifies the nodes that the function will 
-return; in this case, the product. A ``NodeVector`` is a vector of shared 
+return; in this case, the product. An ``OutputVector`` is a vector of references to 
-pointers of ``op::Node``.  The second argument specifies the parameters of the 
+outputs of ``op::Node``.  The second argument specifies the parameters of the 
 function, in the order they are to be passed to the compiled function. A 
 ``ParameterVector`` is a vector of shared pointers to ``op::Parameter``. 

--- a/src/ngraph/autodiff/adjoints.cpp
+++ b/src/ngraph/autodiff/adjoints.cpp
@@ -51,11 +51,6 @@ OutputVector make_zeros(std::shared_ptr<Node> x)
    return zeros;
 }
-autodiff::Adjoints::Adjoints(const NodeVector& ys, const NodeVector& cs)
-    : Adjoints(OutputVector(ys.begin(), ys.end()), OutputVector(cs.begin(), cs.end()))
-{
-}
 autodiff::Adjoints::Adjoints(const OutputVector& ys, const OutputVector& cs)
 {
    if (ys.size() != cs.size())

--- a/src/ngraph/autodiff/adjoints.hpp
+++ b/src/ngraph/autodiff/adjoints.hpp
@@ -46,8 +46,6 @@ namespace ngraph
            /// \param c An expression for where to evaluate the derivatives
            Adjoints(const OutputVector& y, const OutputVector& c);
-            Adjoints(const NodeVector& y, const NodeVector& c);
            Adjoints(const Adjoints& adjoints) = default;
            Adjoints& operator=(const Adjoints& adjoints) = default;
            Adjoints() = default;

--- a/src/ngraph/function.cpp
+++ b/src/ngraph/function.cpp
@@ -41,6 +41,30 @@ Function::Function(const ResultVector& results,
    init();
 }
+Function::Function(const OutputVector& results,
+                   const ParameterVector& parameters,
+                   const std::string& name)
+    : m_results(results.size())
+    , m_parameters(parameters)
+    , m_temporary_pool_size(0)
+    , m_instance_id(m_next_instance_id.fetch_add(1))
+    , m_name(name)
+    , m_unique_name("Function_" + to_string(m_instance_id))
+{
+    if (std::any_of(results.cbegin(), results.cend(), [](Output<Node> n) {
+            return std::dynamic_pointer_cast<op::Result>(n.get_node_shared_ptr());
+        }))
+    {
+        throw ngraph_error(
+            " Results already contain op::Results. Use a c-tor that takes a ResultVector");
+    }
+    std::transform(results.begin(), results.end(), m_results.begin(), [](Output<Node> n) {
+        return std::make_shared<op::Result>(n);
+    });
+    init();
+}
 Function::Function(const NodeVector& results,
                   const ParameterVector& parameters,
                   const std::string& name)
@@ -208,6 +232,11 @@ shared_ptr<Node> Function::get_output_op(size_t i) const
    return m_results.at(i);
 }
+Output<Node> Function::output(size_t i) const
+{
+    return m_results.at(i);
+}
 shared_ptr<Node> Function::get_result() const
 {
    if (m_results.size() != 1)

--- a/src/ngraph/function.hpp
+++ b/src/ngraph/function.hpp
@@ -37,6 +37,10 @@ namespace ngraph
                 const ParameterVector& parameters,
                 const std::string& name = "");
+        Function(const OutputVector& results,
+                 const ParameterVector& parameters,
+                 const std::string& name = "");
        Function(const std::shared_ptr<Node>& result,
                 const ParameterVector& parameters,
                 const std::string& name = "");
@@ -55,6 +59,8 @@ namespace ngraph
        /// Return the op that generates output i
        std::shared_ptr<Node> get_output_op(size_t i) const;
+        Output<Node> output(size_t i) const;
        /// Return the element type of output i
        const element::Type& get_output_element_type(size_t i) const;

--- a/test/backend/batch_norm.in.cpp
+++ b/test/backend/batch_norm.in.cpp
@@ -733,8 +733,8 @@ NGRAPH_TEST(${BACKEND_NAME}, batch_norm_bprop_n4c3h2w2)
    auto C = std::make_shared<op::Parameter>(element::f32, shape_r);
    auto zero = ngraph::make_zero(bn_dgamma->get_element_type(), bn_dgamma->get_shape());
-    ngraph::autodiff::Adjoints adjoints(NodeVector{bn_dx, bn_dgamma, bn_dbeta},
+    ngraph::autodiff::Adjoints adjoints(OutputVector{bn_dx, bn_dgamma, bn_dbeta},
-                                        NodeVector{C, zero, zero});
+                                        OutputVector{C, zero, zero});
    auto dinput = adjoints.backprop_node(input);
    auto dgamma = adjoints.backprop_node(gamma);

--- a/test/backend/binary_elementwise.in.cpp
+++ b/test/backend/binary_elementwise.in.cpp
@@ -257,10 +257,10 @@ NGRAPH_TEST(${BACKEND_NAME}, divide_adjoint_stability)
        auto B = make_shared<op::Parameter>(element::f32, shape);
        auto f = make_shared<Function>(make_shared<op::Divide>(A, B), ParameterVector{A, B});
-        auto Y_out = f->get_output_op(0);
+        auto Y_out = f->output(0);
        auto Xs = f->get_parameters();
-        auto C = std::make_shared<op::Parameter>(Y_out->get_element_type(), Y_out->get_shape());
+        auto C = std::make_shared<op::Parameter>(Y_out.get_element_type(), Y_out.get_shape());
-        ngraph::autodiff::Adjoints adjoints(NodeVector{Y_out}, NodeVector{C});
+        ngraph::autodiff::Adjoints adjoints(OutputVector{Y_out}, OutputVector{C});
        std::vector<std::shared_ptr<Node>> dYdXs(Xs.size());
        transform(
            Xs.begin(), Xs.end(), dYdXs.begin(), [C, &adjoints](const std::shared_ptr<Node>& X) {

--- a/test/cpu_fusion.cpp
+++ b/test/cpu_fusion.cpp
@@ -507,7 +507,8 @@ TEST(cpu_fusion, conv_bias_bprop_n1c1h3w3)
    auto f = make_shared<Function>(
        convolution_bias, ParameterVector{conv_test.data, conv_test.weights, conv_test.bias});
-    ngraph::autodiff::Adjoints adjoints(NodeVector{convolution_bias}, NodeVector{conv_test.delta});
+    ngraph::autodiff::Adjoints adjoints(OutputVector{convolution_bias},
+                                        OutputVector{conv_test.delta});
    auto d_data = adjoints.backprop_node(conv_test.data);
    auto d_weights = adjoints.backprop_node(conv_test.weights);
@@ -546,7 +547,7 @@ TEST(cpu_fusion, conv_bias_bprop)
    pass_manager.register_pass<pass::VisualizeTree>("conv_bias_bprop_fusion.png");
    auto f = make_shared<Function>(conv_bias, ParameterVector{data_batch, filters, bias});
-    ngraph::autodiff::Adjoints adjoints(NodeVector{conv_bias}, NodeVector{delta});
+    ngraph::autodiff::Adjoints adjoints(OutputVector{conv_bias}, OutputVector{delta});
    auto d_data = adjoints.backprop_node(data_batch);
    auto d_weights = adjoints.backprop_node(filters);
@@ -1452,7 +1453,7 @@ TEST(cpu_fusion, max_pool_with_indices)
    auto max_pool = std::make_shared<op::MaxPool>(input, window_shape);
    auto C = std::make_shared<op::Parameter>(element::f32, max_pool->get_shape());
-    ngraph::autodiff::Adjoints adjoints(NodeVector{max_pool}, NodeVector{C});
+    ngraph::autodiff::Adjoints adjoints(ngraph::OutputVector{max_pool}, ngraph::OutputVector{C});
    auto dinput = adjoints.backprop_node(input);
@@ -1789,14 +1790,14 @@ static std::shared_ptr<ngraph::Function> make_forward_function()
    return std::make_shared<Function>(NodeVector{max_pool, neg, absn}, ParameterVector{input});
 }
-static std::pair<std::shared_ptr<ngraph::Function>, std::vector<std::shared_ptr<ngraph::Node>>>
+static std::pair<std::shared_ptr<ngraph::Function>, OutputVector>
    make_backward_function(std::shared_ptr<ngraph::Function> f)
 {
    // get parameters
    std::vector<std::shared_ptr<ngraph::op::Parameter>> back_parameters = f->get_parameters();
-    ngraph::NodeVector adjoints;
+    ngraph::OutputVector adjoints;
-    ngraph::NodeVector outputs;
+    ngraph::OutputVector outputs;
    for (auto Y : f->get_results())
    {
        // Get the output
@@ -1809,7 +1810,7 @@ static std::pair<std::shared_ptr<ngraph::Function>, std::vector<std::shared_ptr<
    ngraph::autodiff::Adjoints adjoint{outputs, adjoints};
    // Perform autodiff
-    std::vector<std::shared_ptr<Node>> dYdXs(back_parameters.size());
+    OutputVector dYdXs(back_parameters.size());
    transform(back_parameters.begin(),
              back_parameters.end(),
              dYdXs.begin(),
@@ -1818,7 +1819,8 @@ static std::pair<std::shared_ptr<ngraph::Function>, std::vector<std::shared_ptr<
    // create the backward function
    std::vector<std::shared_ptr<ngraph::op::Parameter>> param_adjoints;
    for (auto n : adjoints)
-        param_adjoints.push_back(std::dynamic_pointer_cast<ngraph::op::Parameter>(n));
+        param_adjoints.push_back(
+            std::dynamic_pointer_cast<ngraph::op::Parameter>(n.get_node_shared_ptr()));
    back_parameters.insert(back_parameters.begin(), param_adjoints.begin(), param_adjoints.end());
    return {std::make_shared<ngraph::Function>(dYdXs, back_parameters), adjoints};
@@ -2703,7 +2705,7 @@ void sigmoid_multiply_fusion_backward_compute(runtime::Backend* backend,
    auto sigmoid_mul =
        make_shared<op::SigmoidMultiply>(input_0_alt, input_1_alt, input_0_type, input_1_type);
-    ngraph::autodiff::Adjoints adjoints(NodeVector{sigmoid_mul}, NodeVector{delta_param});
+    ngraph::autodiff::Adjoints adjoints(OutputVector{sigmoid_mul}, OutputVector{delta_param});
    auto d_input_0 = adjoints.backprop_node(input_0_adjoint);
    auto d_input_1 = adjoints.backprop_node(input_1_adjoint);
    auto df = make_shared<Function>(NodeVector{d_input_0, d_input_1}, back_params);

--- a/test/util/autodiff/backprop_derivative.hpp
+++ b/test/util/autodiff/backprop_derivative.hpp
@@ -144,7 +144,7 @@ namespace ngraph
            // df/dX*
            std::vector<std::shared_ptr<Node>> df_output_params;
-            Adjoints adjoints(NodeVector{f->get_output_op(0)}, NodeVector{c_param});
+            Adjoints adjoints(OutputVector{f->output(0)}, OutputVector{c_param});
            // for each x "of interest"
            for (auto x : indep_params)

--- a/test/util/autodiff/backprop_function.cpp
+++ b/test/util/autodiff/backprop_function.cpp
@@ -32,10 +32,10 @@ using namespace ngraph;
 std::shared_ptr<Function> autodiff::backprop_function(const std::shared_ptr<Function>& f)
 {
-    auto Y_out = f->get_output_op(0);
+    auto Y_out = f->output(0);
    auto Xs = f->get_parameters();
-    auto C = std::make_shared<op::Parameter>(Y_out->get_element_type(), Y_out->get_shape());
+    auto C = std::make_shared<op::Parameter>(Y_out.get_element_type(), Y_out.get_shape());
-    Adjoints adjoints(NodeVector{Y_out}, NodeVector{C});
+    Adjoints adjoints(OutputVector{Y_out}, OutputVector{C});
    std::vector<std::shared_ptr<Node>> dYdXs(Xs.size());
    transform(Xs.begin(), Xs.end(), dYdXs.begin(), [C, &adjoints](const std::shared_ptr<Node>& X) {
        return adjoints.backprop_node(X);