Unverified Commit a509de7b authored by Robert Kimball's avatar Robert Kimball Committed by GitHub

Merge pull request #3282 from NervanaSystems/cyphers/adjointout

Remove support for adjoints of nodes
parents a2593991 67e71e57
...@@ -32,7 +32,7 @@ int main() ...@@ -32,7 +32,7 @@ int main()
auto t1 = std::make_shared<op::Multiply>(t0, c); auto t1 = std::make_shared<op::Multiply>(t0, c);
// Make the function // Make the function
auto f = std::make_shared<Function>(NodeVector{t1}, auto f = std::make_shared<Function>(OutputVector{t1},
ParameterVector{a, b, c}); ParameterVector{a, b, c});
// Create the backend // Create the backend
......
...@@ -31,7 +31,7 @@ int main() ...@@ -31,7 +31,7 @@ int main()
auto t1 = (a + b) * c; auto t1 = (a + b) * c;
// Make the function // Make the function
auto f = std::make_shared<Function>(NodeVector{t1}, auto f = std::make_shared<Function>(OutputVector{t1},
ParameterVector{a, b, c}); ParameterVector{a, b, c});
// Get the backend // Get the backend
......
...@@ -175,8 +175,8 @@ int main(int argc, char* argv[]) ...@@ -175,8 +175,8 @@ int main(int argc, char* argv[])
auto delta = -learning_rate * loss; auto delta = -learning_rate * loss;
// Updates // Updates
ngraph::autodiff::Adjoints adjoints(NodeVector{loss}, ngraph::autodiff::Adjoints adjoints(OutputVector{loss},
NodeVector{delta}); OutputVector{delta});
auto grad_W0 = adjoints.backprop_node(W0); auto grad_W0 = adjoints.backprop_node(W0);
auto grad_b0 = adjoints.backprop_node(b0); auto grad_b0 = adjoints.backprop_node(b0);
auto grad_W1 = adjoints.backprop_node(W1); auto grad_W1 = adjoints.backprop_node(W1);
...@@ -231,7 +231,7 @@ int main(int argc, char* argv[]) ...@@ -231,7 +231,7 @@ int main(int argc, char* argv[])
NodeMap train_node_map; NodeMap train_node_map;
auto train_function = clone_function( auto train_function = clone_function(
Function( Function(
NodeVector{loss, softmax, W0_next, b0_next, W1_next, b1_next}, OutputVector{loss, softmax, W0_next, b0_next, W1_next, b1_next},
ParameterVector{X, Y, N, learning_rate, W0, b0, W1, b1}), ParameterVector{X, Y, N, learning_rate, W0, b0, W1, b1}),
train_node_map); train_node_map);
auto train_exec = backend->compile(train_function); auto train_exec = backend->compile(train_function);
...@@ -240,7 +240,7 @@ int main(int argc, char* argv[]) ...@@ -240,7 +240,7 @@ int main(int argc, char* argv[])
// X, W0, b0, W1, b1 -> softmax // X, W0, b0, W1, b1 -> softmax
NodeMap inference_node_map; NodeMap inference_node_map;
auto inference_function = clone_function( auto inference_function = clone_function(
Function(NodeVector{softmax}, ParameterVector{X, W0, b0, W1, b1}), Function(OutputVector{softmax}, ParameterVector{X, W0, b0, W1, b1}),
inference_node_map); inference_node_map);
auto inference_exec = backend->compile(inference_function); auto inference_exec = backend->compile(inference_function);
......
...@@ -172,8 +172,8 @@ int main(int argc, const char* argv[]) ...@@ -172,8 +172,8 @@ int main(int argc, const char* argv[])
auto delta = -learning_rate * loss; auto delta = -learning_rate * loss;
// Updates // Updates
ngraph::autodiff::Adjoints adjoints(NodeVector{loss}, ngraph::autodiff::Adjoints adjoints(OutputVector{loss},
NodeVector{delta}); OutputVector{delta});
auto W0_next = W0 + adjoints.backprop_node(W0); auto W0_next = W0 + adjoints.backprop_node(W0);
auto b0_next = b0 + adjoints.backprop_node(b0); auto b0_next = b0 + adjoints.backprop_node(b0);
auto W1_next = W1 + adjoints.backprop_node(W1); auto W1_next = W1 + adjoints.backprop_node(W1);
...@@ -218,7 +218,7 @@ int main(int argc, const char* argv[]) ...@@ -218,7 +218,7 @@ int main(int argc, const char* argv[])
NodeMap train_node_map; NodeMap train_node_map;
auto train_function = clone_function( auto train_function = clone_function(
Function( Function(
NodeVector{loss, softmax, W0_next, b0_next, W1_next, b1_next}, OutputVector{loss, softmax, W0_next, b0_next, W1_next, b1_next},
ParameterVector{X, Y, N, learning_rate, W0, b0, W1, b1}), ParameterVector{X, Y, N, learning_rate, W0, b0, W1, b1}),
train_node_map); train_node_map);
auto train_exec = backend->compile(train_function); auto train_exec = backend->compile(train_function);
...@@ -227,7 +227,7 @@ int main(int argc, const char* argv[]) ...@@ -227,7 +227,7 @@ int main(int argc, const char* argv[])
// X, W0, b0, W1, b1 -> softmax // X, W0, b0, W1, b1 -> softmax
NodeMap inference_node_map; NodeMap inference_node_map;
auto inference_function = clone_function( auto inference_function = clone_function(
Function(NodeVector{softmax}, ParameterVector{X, W0, b0, W1, b1}), Function(OutputVector{softmax}, ParameterVector{X, W0, b0, W1, b1}),
inference_node_map); inference_node_map);
auto inference_exe = backend->compile(inference_function); auto inference_exe = backend->compile(inference_function);
......
...@@ -99,8 +99,8 @@ Once the graph is built, we need to package it in a ``Function``: ...@@ -99,8 +99,8 @@ Once the graph is built, we need to package it in a ``Function``:
:lines: 35-36 :lines: 35-36
The first argument to the constuctor specifies the nodes that the function will The first argument to the constuctor specifies the nodes that the function will
return; in this case, the product. A ``NodeVector`` is a vector of shared return; in this case, the product. An ``OutputVector`` is a vector of references to
pointers of ``op::Node``. The second argument specifies the parameters of the outputs of ``op::Node``. The second argument specifies the parameters of the
function, in the order they are to be passed to the compiled function. A function, in the order they are to be passed to the compiled function. A
``ParameterVector`` is a vector of shared pointers to ``op::Parameter``. ``ParameterVector`` is a vector of shared pointers to ``op::Parameter``.
......
...@@ -51,11 +51,6 @@ OutputVector make_zeros(std::shared_ptr<Node> x) ...@@ -51,11 +51,6 @@ OutputVector make_zeros(std::shared_ptr<Node> x)
return zeros; return zeros;
} }
autodiff::Adjoints::Adjoints(const NodeVector& ys, const NodeVector& cs)
: Adjoints(OutputVector(ys.begin(), ys.end()), OutputVector(cs.begin(), cs.end()))
{
}
autodiff::Adjoints::Adjoints(const OutputVector& ys, const OutputVector& cs) autodiff::Adjoints::Adjoints(const OutputVector& ys, const OutputVector& cs)
{ {
if (ys.size() != cs.size()) if (ys.size() != cs.size())
......
...@@ -46,8 +46,6 @@ namespace ngraph ...@@ -46,8 +46,6 @@ namespace ngraph
/// \param c An expression for where to evaluate the derivatives /// \param c An expression for where to evaluate the derivatives
Adjoints(const OutputVector& y, const OutputVector& c); Adjoints(const OutputVector& y, const OutputVector& c);
Adjoints(const NodeVector& y, const NodeVector& c);
Adjoints(const Adjoints& adjoints) = default; Adjoints(const Adjoints& adjoints) = default;
Adjoints& operator=(const Adjoints& adjoints) = default; Adjoints& operator=(const Adjoints& adjoints) = default;
Adjoints() = default; Adjoints() = default;
......
...@@ -41,6 +41,30 @@ Function::Function(const ResultVector& results, ...@@ -41,6 +41,30 @@ Function::Function(const ResultVector& results,
init(); init();
} }
Function::Function(const OutputVector& results,
const ParameterVector& parameters,
const std::string& name)
: m_results(results.size())
, m_parameters(parameters)
, m_temporary_pool_size(0)
, m_instance_id(m_next_instance_id.fetch_add(1))
, m_name(name)
, m_unique_name("Function_" + to_string(m_instance_id))
{
if (std::any_of(results.cbegin(), results.cend(), [](Output<Node> n) {
return std::dynamic_pointer_cast<op::Result>(n.get_node_shared_ptr());
}))
{
throw ngraph_error(
" Results already contain op::Results. Use a c-tor that takes a ResultVector");
}
std::transform(results.begin(), results.end(), m_results.begin(), [](Output<Node> n) {
return std::make_shared<op::Result>(n);
});
init();
}
Function::Function(const NodeVector& results, Function::Function(const NodeVector& results,
const ParameterVector& parameters, const ParameterVector& parameters,
const std::string& name) const std::string& name)
...@@ -208,6 +232,11 @@ shared_ptr<Node> Function::get_output_op(size_t i) const ...@@ -208,6 +232,11 @@ shared_ptr<Node> Function::get_output_op(size_t i) const
return m_results.at(i); return m_results.at(i);
} }
Output<Node> Function::output(size_t i) const
{
return m_results.at(i);
}
shared_ptr<Node> Function::get_result() const shared_ptr<Node> Function::get_result() const
{ {
if (m_results.size() != 1) if (m_results.size() != 1)
......
...@@ -37,6 +37,10 @@ namespace ngraph ...@@ -37,6 +37,10 @@ namespace ngraph
const ParameterVector& parameters, const ParameterVector& parameters,
const std::string& name = ""); const std::string& name = "");
Function(const OutputVector& results,
const ParameterVector& parameters,
const std::string& name = "");
Function(const std::shared_ptr<Node>& result, Function(const std::shared_ptr<Node>& result,
const ParameterVector& parameters, const ParameterVector& parameters,
const std::string& name = ""); const std::string& name = "");
...@@ -55,6 +59,8 @@ namespace ngraph ...@@ -55,6 +59,8 @@ namespace ngraph
/// Return the op that generates output i /// Return the op that generates output i
std::shared_ptr<Node> get_output_op(size_t i) const; std::shared_ptr<Node> get_output_op(size_t i) const;
Output<Node> output(size_t i) const;
/// Return the element type of output i /// Return the element type of output i
const element::Type& get_output_element_type(size_t i) const; const element::Type& get_output_element_type(size_t i) const;
......
...@@ -733,8 +733,8 @@ NGRAPH_TEST(${BACKEND_NAME}, batch_norm_bprop_n4c3h2w2) ...@@ -733,8 +733,8 @@ NGRAPH_TEST(${BACKEND_NAME}, batch_norm_bprop_n4c3h2w2)
auto C = std::make_shared<op::Parameter>(element::f32, shape_r); auto C = std::make_shared<op::Parameter>(element::f32, shape_r);
auto zero = ngraph::make_zero(bn_dgamma->get_element_type(), bn_dgamma->get_shape()); auto zero = ngraph::make_zero(bn_dgamma->get_element_type(), bn_dgamma->get_shape());
ngraph::autodiff::Adjoints adjoints(NodeVector{bn_dx, bn_dgamma, bn_dbeta}, ngraph::autodiff::Adjoints adjoints(OutputVector{bn_dx, bn_dgamma, bn_dbeta},
NodeVector{C, zero, zero}); OutputVector{C, zero, zero});
auto dinput = adjoints.backprop_node(input); auto dinput = adjoints.backprop_node(input);
auto dgamma = adjoints.backprop_node(gamma); auto dgamma = adjoints.backprop_node(gamma);
......
...@@ -257,10 +257,10 @@ NGRAPH_TEST(${BACKEND_NAME}, divide_adjoint_stability) ...@@ -257,10 +257,10 @@ NGRAPH_TEST(${BACKEND_NAME}, divide_adjoint_stability)
auto B = make_shared<op::Parameter>(element::f32, shape); auto B = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Divide>(A, B), ParameterVector{A, B}); auto f = make_shared<Function>(make_shared<op::Divide>(A, B), ParameterVector{A, B});
auto Y_out = f->get_output_op(0); auto Y_out = f->output(0);
auto Xs = f->get_parameters(); auto Xs = f->get_parameters();
auto C = std::make_shared<op::Parameter>(Y_out->get_element_type(), Y_out->get_shape()); auto C = std::make_shared<op::Parameter>(Y_out.get_element_type(), Y_out.get_shape());
ngraph::autodiff::Adjoints adjoints(NodeVector{Y_out}, NodeVector{C}); ngraph::autodiff::Adjoints adjoints(OutputVector{Y_out}, OutputVector{C});
std::vector<std::shared_ptr<Node>> dYdXs(Xs.size()); std::vector<std::shared_ptr<Node>> dYdXs(Xs.size());
transform( transform(
Xs.begin(), Xs.end(), dYdXs.begin(), [C, &adjoints](const std::shared_ptr<Node>& X) { Xs.begin(), Xs.end(), dYdXs.begin(), [C, &adjoints](const std::shared_ptr<Node>& X) {
......
...@@ -507,7 +507,8 @@ TEST(cpu_fusion, conv_bias_bprop_n1c1h3w3) ...@@ -507,7 +507,8 @@ TEST(cpu_fusion, conv_bias_bprop_n1c1h3w3)
auto f = make_shared<Function>( auto f = make_shared<Function>(
convolution_bias, ParameterVector{conv_test.data, conv_test.weights, conv_test.bias}); convolution_bias, ParameterVector{conv_test.data, conv_test.weights, conv_test.bias});
ngraph::autodiff::Adjoints adjoints(NodeVector{convolution_bias}, NodeVector{conv_test.delta}); ngraph::autodiff::Adjoints adjoints(OutputVector{convolution_bias},
OutputVector{conv_test.delta});
auto d_data = adjoints.backprop_node(conv_test.data); auto d_data = adjoints.backprop_node(conv_test.data);
auto d_weights = adjoints.backprop_node(conv_test.weights); auto d_weights = adjoints.backprop_node(conv_test.weights);
...@@ -546,7 +547,7 @@ TEST(cpu_fusion, conv_bias_bprop) ...@@ -546,7 +547,7 @@ TEST(cpu_fusion, conv_bias_bprop)
pass_manager.register_pass<pass::VisualizeTree>("conv_bias_bprop_fusion.png"); pass_manager.register_pass<pass::VisualizeTree>("conv_bias_bprop_fusion.png");
auto f = make_shared<Function>(conv_bias, ParameterVector{data_batch, filters, bias}); auto f = make_shared<Function>(conv_bias, ParameterVector{data_batch, filters, bias});
ngraph::autodiff::Adjoints adjoints(NodeVector{conv_bias}, NodeVector{delta}); ngraph::autodiff::Adjoints adjoints(OutputVector{conv_bias}, OutputVector{delta});
auto d_data = adjoints.backprop_node(data_batch); auto d_data = adjoints.backprop_node(data_batch);
auto d_weights = adjoints.backprop_node(filters); auto d_weights = adjoints.backprop_node(filters);
...@@ -1452,7 +1453,7 @@ TEST(cpu_fusion, max_pool_with_indices) ...@@ -1452,7 +1453,7 @@ TEST(cpu_fusion, max_pool_with_indices)
auto max_pool = std::make_shared<op::MaxPool>(input, window_shape); auto max_pool = std::make_shared<op::MaxPool>(input, window_shape);
auto C = std::make_shared<op::Parameter>(element::f32, max_pool->get_shape()); auto C = std::make_shared<op::Parameter>(element::f32, max_pool->get_shape());
ngraph::autodiff::Adjoints adjoints(NodeVector{max_pool}, NodeVector{C}); ngraph::autodiff::Adjoints adjoints(ngraph::OutputVector{max_pool}, ngraph::OutputVector{C});
auto dinput = adjoints.backprop_node(input); auto dinput = adjoints.backprop_node(input);
...@@ -1789,14 +1790,14 @@ static std::shared_ptr<ngraph::Function> make_forward_function() ...@@ -1789,14 +1790,14 @@ static std::shared_ptr<ngraph::Function> make_forward_function()
return std::make_shared<Function>(NodeVector{max_pool, neg, absn}, ParameterVector{input}); return std::make_shared<Function>(NodeVector{max_pool, neg, absn}, ParameterVector{input});
} }
static std::pair<std::shared_ptr<ngraph::Function>, std::vector<std::shared_ptr<ngraph::Node>>> static std::pair<std::shared_ptr<ngraph::Function>, OutputVector>
make_backward_function(std::shared_ptr<ngraph::Function> f) make_backward_function(std::shared_ptr<ngraph::Function> f)
{ {
// get parameters // get parameters
std::vector<std::shared_ptr<ngraph::op::Parameter>> back_parameters = f->get_parameters(); std::vector<std::shared_ptr<ngraph::op::Parameter>> back_parameters = f->get_parameters();
ngraph::NodeVector adjoints; ngraph::OutputVector adjoints;
ngraph::NodeVector outputs; ngraph::OutputVector outputs;
for (auto Y : f->get_results()) for (auto Y : f->get_results())
{ {
// Get the output // Get the output
...@@ -1809,7 +1810,7 @@ static std::pair<std::shared_ptr<ngraph::Function>, std::vector<std::shared_ptr< ...@@ -1809,7 +1810,7 @@ static std::pair<std::shared_ptr<ngraph::Function>, std::vector<std::shared_ptr<
ngraph::autodiff::Adjoints adjoint{outputs, adjoints}; ngraph::autodiff::Adjoints adjoint{outputs, adjoints};
// Perform autodiff // Perform autodiff
std::vector<std::shared_ptr<Node>> dYdXs(back_parameters.size()); OutputVector dYdXs(back_parameters.size());
transform(back_parameters.begin(), transform(back_parameters.begin(),
back_parameters.end(), back_parameters.end(),
dYdXs.begin(), dYdXs.begin(),
...@@ -1818,7 +1819,8 @@ static std::pair<std::shared_ptr<ngraph::Function>, std::vector<std::shared_ptr< ...@@ -1818,7 +1819,8 @@ static std::pair<std::shared_ptr<ngraph::Function>, std::vector<std::shared_ptr<
// create the backward function // create the backward function
std::vector<std::shared_ptr<ngraph::op::Parameter>> param_adjoints; std::vector<std::shared_ptr<ngraph::op::Parameter>> param_adjoints;
for (auto n : adjoints) for (auto n : adjoints)
param_adjoints.push_back(std::dynamic_pointer_cast<ngraph::op::Parameter>(n)); param_adjoints.push_back(
std::dynamic_pointer_cast<ngraph::op::Parameter>(n.get_node_shared_ptr()));
back_parameters.insert(back_parameters.begin(), param_adjoints.begin(), param_adjoints.end()); back_parameters.insert(back_parameters.begin(), param_adjoints.begin(), param_adjoints.end());
return {std::make_shared<ngraph::Function>(dYdXs, back_parameters), adjoints}; return {std::make_shared<ngraph::Function>(dYdXs, back_parameters), adjoints};
...@@ -2703,7 +2705,7 @@ void sigmoid_multiply_fusion_backward_compute(runtime::Backend* backend, ...@@ -2703,7 +2705,7 @@ void sigmoid_multiply_fusion_backward_compute(runtime::Backend* backend,
auto sigmoid_mul = auto sigmoid_mul =
make_shared<op::SigmoidMultiply>(input_0_alt, input_1_alt, input_0_type, input_1_type); make_shared<op::SigmoidMultiply>(input_0_alt, input_1_alt, input_0_type, input_1_type);
ngraph::autodiff::Adjoints adjoints(NodeVector{sigmoid_mul}, NodeVector{delta_param}); ngraph::autodiff::Adjoints adjoints(OutputVector{sigmoid_mul}, OutputVector{delta_param});
auto d_input_0 = adjoints.backprop_node(input_0_adjoint); auto d_input_0 = adjoints.backprop_node(input_0_adjoint);
auto d_input_1 = adjoints.backprop_node(input_1_adjoint); auto d_input_1 = adjoints.backprop_node(input_1_adjoint);
auto df = make_shared<Function>(NodeVector{d_input_0, d_input_1}, back_params); auto df = make_shared<Function>(NodeVector{d_input_0, d_input_1}, back_params);
......
...@@ -144,7 +144,7 @@ namespace ngraph ...@@ -144,7 +144,7 @@ namespace ngraph
// df/dX* // df/dX*
std::vector<std::shared_ptr<Node>> df_output_params; std::vector<std::shared_ptr<Node>> df_output_params;
Adjoints adjoints(NodeVector{f->get_output_op(0)}, NodeVector{c_param}); Adjoints adjoints(OutputVector{f->output(0)}, OutputVector{c_param});
// for each x "of interest" // for each x "of interest"
for (auto x : indep_params) for (auto x : indep_params)
......
...@@ -32,10 +32,10 @@ using namespace ngraph; ...@@ -32,10 +32,10 @@ using namespace ngraph;
std::shared_ptr<Function> autodiff::backprop_function(const std::shared_ptr<Function>& f) std::shared_ptr<Function> autodiff::backprop_function(const std::shared_ptr<Function>& f)
{ {
auto Y_out = f->get_output_op(0); auto Y_out = f->output(0);
auto Xs = f->get_parameters(); auto Xs = f->get_parameters();
auto C = std::make_shared<op::Parameter>(Y_out->get_element_type(), Y_out->get_shape()); auto C = std::make_shared<op::Parameter>(Y_out.get_element_type(), Y_out.get_shape());
Adjoints adjoints(NodeVector{Y_out}, NodeVector{C}); Adjoints adjoints(OutputVector{Y_out}, OutputVector{C});
std::vector<std::shared_ptr<Node>> dYdXs(Xs.size()); std::vector<std::shared_ptr<Node>> dYdXs(Xs.size());
transform(Xs.begin(), Xs.end(), dYdXs.begin(), [C, &adjoints](const std::shared_ptr<Node>& X) { transform(Xs.begin(), Xs.end(), dYdXs.begin(), [C, &adjoints](const std::shared_ptr<Node>& X) {
return adjoints.backprop_node(X); return adjoints.backprop_node(X);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment