backprop_derivative.hpp 9.8 KB
Newer Older
1
//*****************************************************************************
2
// Copyright 2017-2019 Intel Corporation
3 4 5 6 7 8 9 10 11 12 13 14 15
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
Scott Cyphers's avatar
Scott Cyphers committed
16 17 18 19

#pragma once

#include <memory>
20
#include <unordered_map>
Scott Cyphers's avatar
Scott Cyphers committed
21

22
#include "ngraph/autodiff/adjoints.hpp"
23
#include "ngraph/graph_util.hpp"
Robert Kimball's avatar
Robert Kimball committed
24
#include "ngraph/log.hpp"
25
#include "ngraph/type/element_type.hpp"
Robert Kimball's avatar
Robert Kimball committed
26
#include "ngraph/util.hpp"
27
#include "util/all_close.hpp"
28
#include "util/test_tools.hpp"
Scott Cyphers's avatar
Scott Cyphers committed
29 30 31 32 33 34

namespace ngraph
{
    class Node;
    class Function;

35 36 37
    static std::unordered_map<std::shared_ptr<Function>, std::shared_ptr<Function>> s_df_map;
    static std::unordered_map<std::shared_ptr<Function>, std::shared_ptr<Function>> s_clone_bwd_map;

Scott Cyphers's avatar
Scott Cyphers committed
38 39 40 41
    namespace runtime
    {
        class Backend;
        class Manager;
42
    }
Scott Cyphers's avatar
Scott Cyphers committed
43 44 45

    namespace autodiff
    {
46
        template <typename T>
47
        std::vector<std::shared_ptr<runtime::Tensor>>
48
            get_autodiff(runtime::Backend* backend,
49
                         std::shared_ptr<Function>& df,
50
                         const std::vector<std::shared_ptr<runtime::Tensor>>& df_input_args,
51
                         const std::vector<std::shared_ptr<op::Parameter>>& indep_params)
Robert Kimball's avatar
Robert Kimball committed
52
        {
53 54 55 56
            // df/dX* = f'(c, ...)
            // using X* to denote all x "of interest" (represented by indep_params)

            // return value for this function
57
            std::vector<std::shared_ptr<runtime::Tensor>> results;
Robert Kimball's avatar
Robert Kimball committed
58

59
            // adjoint
60 61
            auto c_arg = df_input_args[0];
            auto y_shape = c_arg->get_shape();
Robert Kimball's avatar
Robert Kimball committed
62

63
            // df/dX* arguments
64
            std::vector<std::shared_ptr<runtime::Tensor>> df_output_args;
65 66 67

            // for each x "of interest"
            for (auto x : indep_params)
Robert Kimball's avatar
Robert Kimball committed
68
            {
69
                // add df/dx to df/dX* arguments
70
                auto x_shape = x->get_shape();
71
                df_output_args.push_back(backend->create_tensor<T>(x_shape));
72 73 74 75 76

                // each element of y has a derivative with respect to each element of x
                // hence, create a y by x sized tensor for this result
                auto y_by_x_shape = y_shape;
                y_by_x_shape.insert(y_by_x_shape.end(), x_shape.begin(), x_shape.end());
77
                results.push_back(backend->create_tensor<T>(y_by_x_shape));
Robert Kimball's avatar
Robert Kimball committed
78 79
            }

80
            // create storage for results
81
            std::vector<std::vector<T>> result_vect;
82
            std::vector<typename std::vector<T>::iterator> result_pos;
Robert Kimball's avatar
Robert Kimball committed
83 84
            for (auto result : results)
            {
85
                result_vect.push_back(read_vector<T>(result));
Robert Kimball's avatar
Robert Kimball committed
86 87 88
                result_pos.push_back(result_vect.back().begin());
            }

89
            // get adjoint and force to all elements to zero
90
            auto c_vec = read_vector<T>(c_arg);
91
            fill(c_vec.begin(), c_vec.end(), static_cast<T>(0));
92

93 94
            static std::unordered_map<std::shared_ptr<Function>,
                                      std::shared_ptr<runtime::Executable>>
95 96
                s_compiled_functions;
            auto it = s_compiled_functions.find(df);
97
            std::shared_ptr<runtime::Executable> df_handle;
98 99 100 101 102 103 104 105 106 107
            if (it == s_compiled_functions.end())
            {
                df_handle = backend->compile(df);
                s_compiled_functions.insert({df, df_handle});
            }
            else
            {
                df_handle = it->second;
            }

108 109
            // for each element of the adjoint
            // same as saying for each element of y
Robert Kimball's avatar
Robert Kimball committed
110 111
            for (size_t i = 0; i < c_vec.size(); i++)
            {
112
                // set a single adjoint element
Robert Kimball's avatar
Robert Kimball committed
113
                c_vec[i] = 1;
114
                write_vector(c_arg, c_vec);
115 116

                // call modified df/dX* = f'(c, cached)
117
                df_handle->call_with_validate(df_output_args, df_input_args);
118 119

                // reset the adjoint element
Robert Kimball's avatar
Robert Kimball committed
120
                c_vec[i] = 0;
121
                write_vector(c_arg, c_vec);
122 123 124

                // for each result
                // same as saying for each x "of interest"
Robert Kimball's avatar
Robert Kimball committed
125 126
                for (size_t j = 0; j < results.size(); j++)
                {
127 128 129
                    // copy df/dx to storage for this element of y
                    auto dfdx = read_vector<T>(df_output_args[j]);
                    result_pos[j] = std::copy(dfdx.begin(), dfdx.end(), result_pos[j]);
Robert Kimball's avatar
Robert Kimball committed
130 131 132
                }
            }

133
            // copy storage to results and return
Robert Kimball's avatar
Robert Kimball committed
134 135
            for (size_t j = 0; j < results.size(); j++)
            {
136
                write_vector(results[j], result_vect[j]);
Robert Kimball's avatar
Robert Kimball committed
137 138 139
            }
            return results;
        }
140 141

        template <typename T>
142
        std::vector<std::shared_ptr<runtime::Tensor>>
143
            backprop_derivative(runtime::Backend* backend,
144 145 146
                                const std::shared_ptr<Function>& f,
                                const std::vector<std::shared_ptr<runtime::Tensor>>& f_input_args,
                                const std::vector<std::shared_ptr<op::Parameter>>& indep_params)
147 148 149 150 151 152 153 154 155
        {
            // y = f(X)
            // using X (upper case) to denote all paramenters of f (represented by f_input_args)
            // using x (lower case) to denote an individual paramemter of f
            // using X* to denote all x "of interest" (represented by indep_params)
            Shape y_shape = f->get_output_shape(0);

            // adjoint
            auto c_param = std::make_shared<op::Parameter>(element::from<T>(), y_shape);
156
            auto c_arg = backend->create_tensor<T>(y_shape);
157 158 159 160

            // df/dX*
            std::vector<std::shared_ptr<Node>> df_output_params;

161 162
            Adjoints adjoints(NodeVector{f->get_output_op(0)}, NodeVector{c_param});

163 164 165 166
            // for each x "of interest"
            for (auto x : indep_params)
            {
                // add df/dx to df/dX*
167
                df_output_params.push_back(adjoints.backprop_node(x));
168 169 170 171 172 173 174
            }

            // (c, X)
            std::vector<std::shared_ptr<op::Parameter>> df_input_params = f->get_parameters();
            df_input_params.insert(df_input_params.begin(), c_param);

            // df/dX* = f'(c, X)
175 176 177 178 179
            if (!s_df_map[f])
            {
                s_df_map[f] = std::make_shared<Function>(df_output_params, df_input_params);
            }
            auto df = s_df_map[f];
180 181

            // (c, X) arguments
182
            std::vector<std::shared_ptr<runtime::Tensor>> df_input_args = f_input_args;
183 184 185
            df_input_args.insert(df_input_args.begin(), c_arg);

            // call f'(c,X) to get df/dX*
186
            auto dfdx = get_autodiff<T>(backend, df, df_input_args, indep_params);
187 188 189 190

            // create fprop cache
            // creates modified forward function -> (y, cached) = f(x)
            // creates modified backward function -> df/dX* = f'(c, cached)
191
            auto fprop_cache = cache_fprop(f, df);
192 193

            // (y, cached) arguments
194
            std::vector<std::shared_ptr<runtime::Tensor>> mod_f_output_args;
195
            mod_f_output_args.push_back(backend->create_tensor<T>(y_shape));
196 197

            // (c, cached) arguments
198
            std::vector<std::shared_ptr<runtime::Tensor>> mod_df_input_args = df_input_args;
199 200 201 202

            // add cached nodes to both modified f output and modified f' input arguments
            for (auto node : fprop_cache.fprop_output_nodes)
            {
203
                auto tv = backend->create_tensor(node->get_element_type(), node->get_shape());
204 205 206 207 208
                mod_f_output_args.push_back(tv);
                mod_df_input_args.push_back(tv);
            }

            // compile and run modified (y, cached) = f(x)
209 210
            static std::unordered_map<std::shared_ptr<Function>, std::shared_ptr<Function>>
                s_clone_fwd_map;
211 212 213 214 215
            if (!s_clone_fwd_map[f])
            {
                s_clone_fwd_map[f] = clone_function(*fprop_cache.fprop);
            }
            auto clone_fwd = s_clone_fwd_map[f];
216 217
            static std::unordered_map<std::shared_ptr<Function>,
                                      std::shared_ptr<runtime::Executable>>
218 219
                s_compiled_functions;
            auto it = s_compiled_functions.find(clone_fwd);
220
            std::shared_ptr<runtime::Executable> clone_fwd_handle;
221 222 223 224 225 226 227 228 229
            if (it == s_compiled_functions.end())
            {
                clone_fwd_handle = backend->compile(clone_fwd);
                s_compiled_functions.insert({clone_fwd, clone_fwd_handle});
            }
            else
            {
                clone_fwd_handle = it->second;
            }
230

231
            clone_fwd_handle->call_with_validate(mod_f_output_args, f_input_args);
232 233

            // call modfied f'(c, cached) to get df/dX*
234 235 236 237 238
            if (!s_clone_bwd_map[f])
            {
                s_clone_bwd_map[f] = clone_function(*fprop_cache.bprop);
            }
            auto clone_bwd = s_clone_bwd_map[f];
239
            auto cache_dfdx = get_autodiff<T>(backend, clone_bwd, mod_df_input_args, indep_params);
240

241 242
            const T numpy_atol = static_cast<const T>(1e-5f);
            const T numpy_rtol = static_cast<const T>(1e-8f);
243 244 245 246 247 248 249 250 251
            auto close = ngraph::test::all_close<T>(dfdx, cache_dfdx, numpy_atol, numpy_rtol);
            if (!close)
            {
                throw ngraph_error(
                    "Derivatives mismatch between cache and non-cache bprop functions");
            }

            return dfdx;
        }
Scott Cyphers's avatar
Scott Cyphers committed
252 253
    }
}