Commit 27fee946 authored by adstraw's avatar adstraw Committed by Matthew Brookhart

check derivatives from bprop against derivatives from fprop cache bprop (#469)

* compare derivatives from bprop and bprop with fprop cache

* code format
parent e054366e
/******************************************************************************* /*******************************************************************************
* Copyright 2017-2018 Intel Corporation * Copyright 2017-2018 Intel Corporation
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*******************************************************************************/ *******************************************************************************/
#pragma once #pragma once
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "ngraph/log.hpp" #include "ngraph/log.hpp"
#include "ngraph/types/element_type.hpp" #include "ngraph/types/element_type.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
#include "util/all_close.hpp"
#include "util/test_tools.hpp" #include "util/test_tools.hpp"
namespace ngraph namespace ngraph
...@@ -32,93 +33,46 @@ namespace ngraph ...@@ -32,93 +33,46 @@ namespace ngraph
{ {
class Backend; class Backend;
class Manager; class Manager;
} // namespace runtime }
namespace autodiff namespace autodiff
{ {
template <typename T> template <typename T>
std::vector<std::shared_ptr<runtime::TensorView>> std::vector<std::shared_ptr<runtime::TensorView>>
backprop_derivative(const std::shared_ptr<runtime::Manager>& manager, get_autodiff(const std::shared_ptr<runtime::Manager>& manager,
const std::shared_ptr<runtime::Backend>& backend, const std::shared_ptr<runtime::Backend>& backend,
const std::shared_ptr<Function>& f, std::shared_ptr<Function>& df,
const std::vector<std::shared_ptr<runtime::TensorView>>& args, const std::vector<std::shared_ptr<runtime::TensorView>>& df_input_args,
const std::vector<std::shared_ptr<op::Parameter>>& indep_params) const std::vector<std::shared_ptr<op::Parameter>>& indep_params)
{ {
// y = f(X) // df/dX* = f'(c, ...)
// using X (upper case) to denote all paramenters of f // using X* to denote all x "of interest" (represented by indep_params)
// using x (lower case) to denote an individual paramemter of f a.k.a. Xj
// NOTE: using X* to denote all x "of interest" represented by indep_params // return value for this function
Shape y_shape = f->get_output_shape(0); std::vector<std::shared_ptr<runtime::TensorView>> results;
// adjoint // adjoint
auto c_param = std::make_shared<op::Parameter>(element::from<T>(), y_shape); auto c_arg = df_input_args[0];
auto c_arg = backend->make_primary_tensor_view<T>(y_shape); auto y_shape = c_arg->get_shape();
// df/dX* // df/dX* arguments
// return value for f'(X, c)
std::vector<std::shared_ptr<Node>> df_output_params;
std::vector<std::shared_ptr<runtime::TensorView>> df_output_args; std::vector<std::shared_ptr<runtime::TensorView>> df_output_args;
// return value for this function
std::vector<std::shared_ptr<runtime::TensorView>> results;
// for each x "of interest" // for each x "of interest"
for (auto x : indep_params) for (auto x : indep_params)
{ {
// add df/dx to df/dX* arguments
auto x_shape = x->get_shape(); auto x_shape = x->get_shape();
df_output_args.push_back(backend->make_primary_tensor_view<T>(x_shape));
// each element of y has a derivative with respect to each element of x // each element of y has a derivative with respect to each element of x
// hence, create a y by x sized tensor for this result // hence, create a y by x sized tensor for this result
auto y_by_x_shape = y_shape; auto y_by_x_shape = y_shape;
y_by_x_shape.insert(y_by_x_shape.end(), x_shape.begin(), x_shape.end()); y_by_x_shape.insert(y_by_x_shape.end(), x_shape.begin(), x_shape.end());
results.push_back(backend->make_primary_tensor_view<T>(y_by_x_shape)); results.push_back(backend->make_primary_tensor_view<T>(y_by_x_shape));
// add df/dx to df/dX*
df_output_params.push_back(f->get_output_op(0)->backprop_node(x, c_param));
df_output_args.push_back(backend->make_primary_tensor_view<T>(x_shape));
} }
// (X, c)
// input to f'(X, c)
std::vector<std::shared_ptr<op::Parameter>> df_input_params = f->get_parameters();
df_input_params.push_back(c_param);
// df/dX* = f'(X, c)
auto df = std::make_shared<Function>(df_output_params, df_input_params);
// create fprop cache
// creates modified forward function -> (y, cached) = f(x)
// creates modified backward function -> df/dX* = f'(c, cached)
auto fprop_cache = cache_fprop(f, df, {c_param});
// modified f outputs
std::vector<std::shared_ptr<ngraph::runtime::TensorView>> f_output_args;
f_output_args.push_back(backend->make_primary_tensor_view<T>(y_shape));
// modified f' inputs
std::vector<std::shared_ptr<ngraph::runtime::TensorView>> df_input_args;
df_input_args.push_back(c_arg);
// add cached nodes to both modified f outputs and modified f' inputs
for (auto node : fprop_cache.fprop_output_nodes)
{
auto tv = backend->make_primary_tensor_view<T>(node->get_shape());
df_input_args.push_back(tv);
f_output_args.push_back(tv);
}
// compile and run modified (y, cached) = f(x)
auto cache_fwd = manager->compile(fprop_cache.fprop);
auto cache_fwd_cf = backend->make_call_frame(cache_fwd);
cache_fwd_cf->tensor_call(args, f_output_args);
// compile modified df/dX* = f'(c, cached)
auto external = manager->compile(fprop_cache.bprop);
auto cf = backend->make_call_frame(external);
// create storage for results // create storage for results
// * outer vector size = number of x "of interest"
// * inner vector size = number of elements in y * number of elements in x
std::vector<std::vector<T>> result_vect; std::vector<std::vector<T>> result_vect;
std::vector<typename std::vector<T>::iterator> result_pos; std::vector<typename std::vector<T>::iterator> result_pos;
for (auto result : results) for (auto result : results)
...@@ -127,6 +81,10 @@ namespace ngraph ...@@ -127,6 +81,10 @@ namespace ngraph
result_pos.push_back(result_vect.back().begin()); result_pos.push_back(result_vect.back().begin());
} }
// compile f'
auto external = manager->compile(df);
auto cf = backend->make_call_frame(external);
// get adjoint and force to all elements to zero // get adjoint and force to all elements to zero
auto c_vec = read_vector<T>(c_arg); auto c_vec = read_vector<T>(c_arg);
fill(c_vec.begin(), c_vec.end(), 0); fill(c_vec.begin(), c_vec.end(), 0);
...@@ -163,5 +121,90 @@ namespace ngraph ...@@ -163,5 +121,90 @@ namespace ngraph
} }
return results; return results;
} }
template <typename T>
std::vector<std::shared_ptr<runtime::TensorView>> backprop_derivative(
const std::shared_ptr<runtime::Manager>& manager,
const std::shared_ptr<runtime::Backend>& backend,
const std::shared_ptr<Function>& f,
const std::vector<std::shared_ptr<runtime::TensorView>>& f_input_args,
const std::vector<std::shared_ptr<op::Parameter>>& indep_params)
{
// y = f(X)
// using X (upper case) to denote all paramenters of f (represented by f_input_args)
// using x (lower case) to denote an individual paramemter of f
// using X* to denote all x "of interest" (represented by indep_params)
Shape y_shape = f->get_output_shape(0);
// adjoint
auto c_param = std::make_shared<op::Parameter>(element::from<T>(), y_shape);
auto c_arg = backend->make_primary_tensor_view<T>(y_shape);
// df/dX*
std::vector<std::shared_ptr<Node>> df_output_params;
// for each x "of interest"
for (auto x : indep_params)
{
// add df/dx to df/dX*
auto x_shape = x->get_shape();
df_output_params.push_back(f->get_output_op(0)->backprop_node(x, c_param));
}
// (c, X)
std::vector<std::shared_ptr<op::Parameter>> df_input_params = f->get_parameters();
df_input_params.insert(df_input_params.begin(), c_param);
// df/dX* = f'(c, X)
auto df = std::make_shared<Function>(df_output_params, df_input_params);
// (c, X) arguments
std::vector<std::shared_ptr<runtime::TensorView>> df_input_args = f_input_args;
df_input_args.insert(df_input_args.begin(), c_arg);
// call f'(c,X) to get df/dX*
auto dfdx = get_autodiff<T>(manager, backend, df, df_input_args, indep_params);
// create fprop cache
// creates modified forward function -> (y, cached) = f(x)
// creates modified backward function -> df/dX* = f'(c, cached)
auto fprop_cache = cache_fprop(f, df, {c_param});
// (y, cached) arguments
std::vector<std::shared_ptr<runtime::TensorView>> mod_f_output_args;
mod_f_output_args.push_back(backend->make_primary_tensor_view<T>(y_shape));
// (c, cached) arguments
std::vector<std::shared_ptr<runtime::TensorView>> mod_df_input_args;
mod_df_input_args.push_back(c_arg);
// add cached nodes to both modified f output and modified f' input arguments
for (auto node : fprop_cache.fprop_output_nodes)
{
auto tv = backend->make_primary_tensor_view<T>(node->get_shape());
mod_f_output_args.push_back(tv);
mod_df_input_args.push_back(tv);
}
// compile and run modified (y, cached) = f(x)
auto cache_fwd = manager->compile(fprop_cache.fprop);
auto cache_fwd_cf = backend->make_call_frame(cache_fwd);
cache_fwd_cf->tensor_call(f_input_args, mod_f_output_args);
// call modfied f'(c, cached) to get df/dX*
auto cache_dfdx = get_autodiff<T>(
manager, backend, fprop_cache.bprop, mod_df_input_args, indep_params);
const auto numpy_atol = 1e-5f;
const auto numpy_rtol = 1e-8f;
auto close = ngraph::test::all_close<T>(dfdx, cache_dfdx, numpy_atol, numpy_rtol);
if (!close)
{
throw ngraph_error(
"Derivatives mismatch between cache and non-cache bprop functions");
}
return dfdx;
}
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment