ref_lrn.cpp 9.53 KB
Newer Older
openvino-pushbot's avatar
openvino-pushbot committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
/*******************************************************************************
* Copyright 2016-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/

#include <assert.h>
#include <math.h>

#include "c_types_map.hpp"
#include "mkldnn_thread.hpp"
#include "type_helpers.hpp"

#include "ref_lrn.hpp"

namespace mkldnn {
namespace impl {
namespace cpu {

static inline float fast_negative_powf(float omega, float beta) {
    float Y;
32 33 34 35 36 37 38 39
/*
 * Y = omega^(-3/4) =
 * = 1.0f / sqrtf(omega) * sqrtf(1.0f / sqrtf(omega))
 * = sqrtf(1.0f / sqrtf(omega)) * 1.0f / sqrtf(omega)
 * = sqrtf(1.0f / sqrtf(omega)) / sqrtf(omega)
 * = sqrtf(1.0f / sqrtf(omega) / omega)
 * = sqrtf(1.0f / (sqrtf(omega) * omega))
 */
openvino-pushbot's avatar
openvino-pushbot committed
40
    if (beta == 0.75f) {
41
        Y = sqrtf(1.0f / (sqrtf(omega) * omega));
openvino-pushbot's avatar
openvino-pushbot committed
42
    } else {
43
        Y = 1.0f / powf(omega, beta);
openvino-pushbot's avatar
openvino-pushbot committed
44 45 46 47 48 49
    }
    return Y;
};

template <impl::data_type_t data_type>
template <mkldnn_memory_format_t fmt>
50
void ref_lrn_fwd_t<data_type>::execute_forward() const {
openvino-pushbot's avatar
openvino-pushbot committed
51 52 53 54 55 56 57
    using namespace alg_kind;
    using namespace memory_format;

    auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
    auto dst = reinterpret_cast<data_t*>(this->memory(0));
    auto ws = reinterpret_cast<data_t*>(this->memory(1));

58 59
    const memory_desc_wrapper data_d(pd()->src_pd());
    const memory_desc_wrapper ws_d(pd()->workspace_pd());
openvino-pushbot's avatar
openvino-pushbot committed
60 61
    MAYBE_UNUSED(ws_d);

62 63 64
    const int C = pd()->C();
    const int H = pd()->H();
    const int W = pd()->W();
openvino-pushbot's avatar
openvino-pushbot committed
65
    const size_t stride_mb = data_d.blocking_desc().strides[0][0];
66
    const bool across_channels = pd()->desc()->alg_kind == lrn_across_channels;
openvino-pushbot's avatar
openvino-pushbot committed
67 68 69 70 71 72 73 74 75 76 77 78 79 80
    constexpr int blksize = fmt == nChw16c ? 16 : 8;

    auto data_off = [&](int mb, int c, int h, int w) -> size_t {
        switch (fmt) {
        case nChw16c:
        case nChw8c: return mb * stride_mb + c / blksize * H * W * blksize
                     + h * W * blksize + w * blksize + c % blksize;
        case nchw: return mb * stride_mb + c * H * W + h * W + w;
        case nhwc: return mb * stride_mb + h * W * C + w * C + c;
        default: return data_d.off(mb, c, h, w);
        }
    };

    auto ker = [=](data_t *d, int mb, int oc, int oh, int ow) {
81 82 83
        const float alpha = static_cast<float>(pd()->desc()->lrn_alpha);
        const float beta = static_cast<float>(pd()->desc()->lrn_beta);
        const float k = static_cast<float>(pd()->desc()->lrn_k);
openvino-pushbot's avatar
openvino-pushbot committed
84

85
        const int size = pd()->desc()->local_size;
openvino-pushbot's avatar
openvino-pushbot committed
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
        const int half_size = (size - 1) / 2;

        float sum = 0;
        if (across_channels) {
            const int c_st = nstl::max(oc - half_size + 0, 0);
            const int c_en = nstl::min(oc + half_size + 1, C);

            for (int c = c_st; c < c_en; ++c) {
                const float s = src[data_off(mb, c, oh, ow)];
                sum += s * s;
            }
        } else {
            int h_st = nstl::max(oh - half_size + 0, 0);
            int h_en = nstl::min(oh + half_size + 1, H);
            int w_st = nstl::max(ow - half_size + 0, 0);
            int w_en = nstl::min(ow + half_size + 1, W);
            for (int h = h_st; h < h_en; ++h) {
                for (int w = w_st; w < w_en; ++w) {
                    const float s = src[data_off(mb, oc, h, w)];
                    sum += s * s;
                }
            }
        }
        const int summands = across_channels ? size : size * size;
        sum = k + alpha * sum / summands;
        size_t off = data_off(mb, oc, oh, ow);
        if (ws)
            ws[off] = static_cast<data_t>(sum);
        d[0] = static_cast<data_t>(src[off] * fast_negative_powf(sum, beta));
    };

117
    const int MB = pd()->MB();
openvino-pushbot's avatar
openvino-pushbot committed
118
    if (fmt == nChw16c || fmt == nChw8c) {
Alexey Suhov's avatar
Alexey Suhov committed
119 120 121
        parallel_nd(MB, utils::div_up(C, blksize), H, W,
            [&](int mb, int c_blk, int h, int w) {
            int c = c_blk * blksize;
openvino-pushbot's avatar
openvino-pushbot committed
122 123 124 125 126
            const size_t off = mb * stride_mb + c * H * W
                + (h * W + w) * blksize;
            PRAGMA_OMP_SIMD()
            for (int cc = 0; cc < nstl::min(blksize, C - c); ++cc)
                ker(&dst[off + cc], mb, c + cc, h, w);
Alexey Suhov's avatar
Alexey Suhov committed
127
        });
openvino-pushbot's avatar
openvino-pushbot committed
128
    } else if (fmt == nhwc) {
Alexey Suhov's avatar
Alexey Suhov committed
129 130
        parallel_nd(MB, H, W, C,
            [&](int mb, int h, int w, int c) {
openvino-pushbot's avatar
openvino-pushbot committed
131 132
            const size_t off = mb * stride_mb + h * W * C + w * C + c;
            ker(&dst[off], mb, c, h, w);
Alexey Suhov's avatar
Alexey Suhov committed
133
        });
openvino-pushbot's avatar
openvino-pushbot committed
134
    } else {
Alexey Suhov's avatar
Alexey Suhov committed
135 136
        parallel_nd(MB, C, H, W,
            [&](int mb, int c, int h, int w) {
openvino-pushbot's avatar
openvino-pushbot committed
137 138
            const size_t off = data_off(mb, c, h, w);
            ker(&dst[off], mb, c, h, w);
Alexey Suhov's avatar
Alexey Suhov committed
139
        });
openvino-pushbot's avatar
openvino-pushbot committed
140 141 142 143 144
    }
}

template <impl::data_type_t data_type>
template <mkldnn_memory_format_t fmt>
145
void ref_lrn_bwd_t<data_type>::execute_backward() const {
openvino-pushbot's avatar
openvino-pushbot committed
146 147 148 149 150 151 152
    using namespace alg_kind;
    using namespace memory_format;

    auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
    auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(1));
    auto diff_src = reinterpret_cast<data_t*>(this->memory(0));

153 154
    const memory_desc_wrapper data_d(pd()->src_pd());
    const memory_desc_wrapper diff_data_d(pd()->diff_dst_pd());
openvino-pushbot's avatar
openvino-pushbot committed
155 156
    MAYBE_UNUSED(diff_data_d);

157 158 159 160
    const int MB = pd()->MB();
    const int C = pd()->C();
    const int H = pd()->H();
    const int W = pd()->W();
openvino-pushbot's avatar
openvino-pushbot committed
161 162 163
    const size_t stride_mb = data_d.blocking_desc().strides[0][0];
    constexpr int blksize = fmt == nChw16c ? 16 : 8;

164 165 166 167
    const float alpha = static_cast<float>(pd()->desc()->lrn_alpha);
    const float beta = static_cast<float>(pd()->desc()->lrn_beta);
    const float k = static_cast<float>(pd()->desc()->lrn_k);
    const int kernel_size = pd()->desc()->local_size;
openvino-pushbot's avatar
openvino-pushbot committed
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
    const int half_ksize = (kernel_size - 1) / 2;

    auto data_off = [&](int mb, int c, int h, int w) -> size_t {
        switch (fmt) {
        case nChw16c:
        case nChw8c: return mb * stride_mb + c/blksize * H * W * blksize
                     + h * W * blksize + w * blksize + c%blksize;
        case nchw: return mb * stride_mb + c * H * W + h * W + w;
        case nhwc: return mb * stride_mb + h * W * C + w * C + c;
        default: return data_d.off(mb, c, h, w);
        }
    };

    auto ker = [=](data_t *d, int mb, int oc, int oh, int ow) {
        const int c_st = nstl::max(oc - half_ksize + 0, 0);
        const int c_en = nstl::min(oc + half_ksize + 1, C);

        float A = 0, B = 0, omega_mid = 0;
        for (int c = c_st; c < c_en; c++) {
            float sum = 0.0;
            const int i_st = nstl::max(c - half_ksize, 0);
            const int i_en = nstl::min(c + kernel_size - half_ksize, C);

            for (int i = i_st; i < i_en; ++i) {
                const float value = src[data_off(mb, i, oh, ow)];
                sum += value * value;
            }
            const float omega = static_cast<float>(k + sum * alpha / kernel_size);
            if (c == oc) omega_mid = omega;
            float t = src[data_off(mb, c, oh, ow)]
                   * fast_negative_powf(omega, beta);
            B += 1.0f / omega * t * diff_dst[data_off(mb, c, oh, ow)];
        }

        const size_t off = data_off(mb, oc, oh, ow);
        A = fast_negative_powf(omega_mid, beta) * diff_dst[off];
        B *= src[off];
        B *= (2.0f * alpha * beta) / kernel_size;
        *d = static_cast<data_t>(A - B); // final cast down to data_t
    };

    if (fmt == nChw16c || fmt == nChw8c) {
Alexey Suhov's avatar
Alexey Suhov committed
210 211 212
        parallel_nd(MB, utils::div_up(C, blksize), H, W,
            [&](int mb, int c_blk, int h, int w) {
            int c = c_blk * blksize;
openvino-pushbot's avatar
openvino-pushbot committed
213 214 215 216 217
            const size_t off = mb * stride_mb + c * H * W +
                (h * W + w) * blksize;
            PRAGMA_OMP_SIMD()
            for (int cc = 0; cc < nstl::min(blksize, C - c); ++cc)
                ker(&diff_src[off + cc], mb, c + cc, h, w);
Alexey Suhov's avatar
Alexey Suhov committed
218
        });
openvino-pushbot's avatar
openvino-pushbot committed
219
    } else if (fmt == nhwc) {
Alexey Suhov's avatar
Alexey Suhov committed
220 221
        parallel_nd(MB, H, W, C,
            [&](int mb, int h, int w, int c) {
openvino-pushbot's avatar
openvino-pushbot committed
222 223
            const size_t off = mb * stride_mb + h * W * C + w * C + c;
            ker(&diff_src[off], mb, c, h, w);
Alexey Suhov's avatar
Alexey Suhov committed
224
        });
openvino-pushbot's avatar
openvino-pushbot committed
225
    } else {
Alexey Suhov's avatar
Alexey Suhov committed
226 227
        parallel_nd(MB, C, H, W,
            [&](int mb, int c, int h, int w) {
openvino-pushbot's avatar
openvino-pushbot committed
228 229
            const size_t off = data_off(mb, c, h, w);
            ker(&diff_src[off], mb, c, h, w);
Alexey Suhov's avatar
Alexey Suhov committed
230
        });
openvino-pushbot's avatar
openvino-pushbot committed
231 232 233
    }
}

234 235 236 237 238 239 240 241 242 243
template void ref_lrn_fwd_t<data_type::f32>::execute_forward<memory_format::nChw16c>() const;
template void ref_lrn_fwd_t<data_type::f32>::execute_forward<memory_format::nChw8c>() const;
template void ref_lrn_fwd_t<data_type::f32>::execute_forward<memory_format::nchw>() const;
template void ref_lrn_fwd_t<data_type::f32>::execute_forward<memory_format::nhwc>() const;
template void ref_lrn_fwd_t<data_type::f32>::execute_forward<memory_format::any>() const;
template void ref_lrn_bwd_t<data_type::f32>::execute_backward<memory_format::nChw16c>() const;
template void ref_lrn_bwd_t<data_type::f32>::execute_backward<memory_format::nChw8c>() const;
template void ref_lrn_bwd_t<data_type::f32>::execute_backward<memory_format::nchw>() const;
template void ref_lrn_bwd_t<data_type::f32>::execute_backward<memory_format::nhwc>() const;
template void ref_lrn_bwd_t<data_type::f32>::execute_backward<memory_format::any>() const;
openvino-pushbot's avatar
openvino-pushbot committed
244 245 246 247 248 249

}
}
}

// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s