prior_box_layer.cpp 11.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#include "../precomp.hpp"
#include "layers_common.hpp"
#include <float.h>
#include <algorithm>
#include <cmath>

namespace cv
{
namespace dnn
{

53
class PriorBoxLayerImpl : public PriorBoxLayer
Anna Petrovicheva's avatar
Anna Petrovicheva committed
54
{
55 56 57 58
public:
    bool getParameterDict(const LayerParams &params,
                          const std::string &parameterName,
                          DictValue& result)
59
    {
60
        if (!params.has(parameterName))
61
        {
62
            return false;
63
        }
64

65 66 67
        result = params.get(parameterName);
        return true;
    }
68

69 70 71 72 73 74
    template<typename T>
    T getParameter(const LayerParams &params,
                   const std::string &parameterName,
                   const size_t &idx=0,
                   const bool required=true,
                   const T& defaultValue=T())
75
    {
76 77 78
        DictValue dictValue;
        bool success = getParameterDict(params, parameterName, dictValue);
        if(!success)
79
        {
80
            if(required)
81
            {
82 83 84 85 86
                std::string message = _layerName;
                message += " layer parameter does not contain ";
                message += parameterName;
                message += " parameter.";
                CV_Error(Error::StsBadArg, message);
87
            }
88
            else
89
            {
90
                return defaultValue;
91 92
            }
        }
93
        return dictValue.get<T>(idx);
94
    }
95

96
    void getAspectRatios(const LayerParams &params)
97
    {
98 99 100
        DictValue aspectRatioParameter;
        bool aspectRatioRetieved = getParameterDict(params, "aspect_ratio", aspectRatioParameter);
        CV_Assert(aspectRatioRetieved);
101

102
        for (int i = 0; i < aspectRatioParameter.size(); ++i)
103
        {
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
            float aspectRatio = aspectRatioParameter.get<float>(i);
            bool alreadyExists = false;

            for (size_t j = 0; j < _aspectRatios.size(); ++j)
            {
                if (fabs(aspectRatio - _aspectRatios[j]) < 1e-6)
                {
                    alreadyExists = true;
                    break;
                }
            }
            if (!alreadyExists)
            {
                _aspectRatios.push_back(aspectRatio);
                if (_flip)
                {
                    _aspectRatios.push_back(1./aspectRatio);
                }
            }
123 124
        }
    }
125 126

    void getVariance(const LayerParams &params)
127
    {
128 129 130 131 132 133
        DictValue varianceParameter;
        bool varianceParameterRetrieved = getParameterDict(params, "variance", varianceParameter);
        CV_Assert(varianceParameterRetrieved);

        int varianceSize = varianceParameter.size();
        if (varianceSize > 1)
134
        {
135 136 137 138 139 140 141 142 143
            // Must and only provide 4 variance.
            CV_Assert(varianceSize == 4);

            for (int i = 0; i < varianceSize; ++i)
            {
                float variance = varianceParameter.get<float>(i);
                CV_Assert(variance > 0);
                _variance.push_back(variance);
            }
144 145 146
        }
        else
        {
147 148 149 150 151 152 153 154 155 156 157
            if (varianceSize == 1)
            {
                float variance = varianceParameter.get<float>(0);
                CV_Assert(variance > 0);
                _variance.push_back(variance);
            }
            else
            {
                // Set default to 0.1.
                _variance.push_back(0.1f);
            }
158 159 160
        }
    }

161 162 163 164 165
    PriorBoxLayerImpl(const LayerParams &params)
    {
        setParamsFrom(params);
        _minSize = getParameter<unsigned>(params, "min_size");
        CV_Assert(_minSize > 0);
Anna Petrovicheva's avatar
Anna Petrovicheva committed
166

167 168
        _flip = getParameter<bool>(params, "flip");
        _clip = getParameter<bool>(params, "clip");
Anna Petrovicheva's avatar
Anna Petrovicheva committed
169

170 171
        _aspectRatios.clear();
        _aspectRatios.push_back(1.);
Anna Petrovicheva's avatar
Anna Petrovicheva committed
172

173 174
        getAspectRatios(params);
        getVariance(params);
Anna Petrovicheva's avatar
Anna Petrovicheva committed
175

176
        _numPriors = _aspectRatios.size();
Anna Petrovicheva's avatar
Anna Petrovicheva committed
177

178 179 180 181 182
        _maxSize = -1;
        if (params.has("max_size"))
        {
            _maxSize = params.get("max_size").get<float>(0);
            CV_Assert(_maxSize > _minSize);
Anna Petrovicheva's avatar
Anna Petrovicheva committed
183

184 185
            _numPriors += 1;
        }
Anna Petrovicheva's avatar
Anna Petrovicheva committed
186 187
    }

188 189 190
    void allocate(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs)
    {
        CV_Assert(inputs.size() == 2);
191

192 193
        _layerWidth = inputs[0]->size[3];
        _layerHeight = inputs[0]->size[2];
194

195 196
        _imageWidth = inputs[1]->size[3];
        _imageHeight = inputs[1]->size[2];
197

198 199
        _stepX = static_cast<float>(_imageWidth) / _layerWidth;
        _stepY = static_cast<float>(_imageHeight) / _layerHeight;
200

201 202 203 204 205 206 207
        // Since all images in a batch has same height and width, we only need to
        // generate one set of priors which can be shared across all images.
        int outNum = 1;
        // 2 channels. First channel stores the mean of each prior coordinate.
        // Second channel stores the variance of each prior coordinate.
        int outChannels = 2;
        _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;
208

209 210 211
        int outsz[] = { outNum, outChannels, (int)_outChannelSize };
        outputs[0].create(3, outsz, CV_32F);
    }
212

213 214 215
    void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs)
    {
        (void)inputs; // to suppress unused parameter warning
apetrovichev's avatar
apetrovichev committed
216

217
        float* outputPtr = outputs[0].ptr<float>();
218

219 220 221
        // first prior: aspect_ratio = 1, size = min_size
        int idx = 0;
        for (size_t h = 0; h < _layerHeight; ++h)
222
        {
223
            for (size_t w = 0; w < _layerWidth; ++w)
224
            {
225 226 227 228
                _boxWidth = _boxHeight = _minSize;

                float center_x = (w + 0.5) * _stepX;
                float center_y = (h + 0.5) * _stepY;
229 230 231 232 233 234 235 236 237
                // xmin
                outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth;
                // ymin
                outputPtr[idx++] = (center_y - _boxHeight / 2.) / _imageHeight;
                // xmax
                outputPtr[idx++] = (center_x + _boxWidth / 2.) / _imageWidth;
                // ymax
                outputPtr[idx++] = (center_y + _boxHeight / 2.) / _imageHeight;

238
                if (_maxSize > 0)
239
                {
240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
                    // second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
                    _boxWidth = _boxHeight = sqrt(_minSize * _maxSize);
                    // xmin
                    outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth;
                    // ymin
                    outputPtr[idx++] = (center_y - _boxHeight / 2.) / _imageHeight;
                    // xmax
                    outputPtr[idx++] = (center_x + _boxWidth / 2.) / _imageWidth;
                    // ymax
                    outputPtr[idx++] = (center_y + _boxHeight / 2.) / _imageHeight;
                }

                // rest of priors
                for (size_t r = 0; r < _aspectRatios.size(); ++r)
                {
                    float ar = _aspectRatios[r];
                    if (fabs(ar - 1.) < 1e-6)
                    {
                        continue;
                    }
                    _boxWidth = _minSize * sqrt(ar);
                    _boxHeight = _minSize / sqrt(ar);
                    // xmin
                    outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth;
                    // ymin
                    outputPtr[idx++] = (center_y - _boxHeight / 2.) / _imageHeight;
                    // xmax
                    outputPtr[idx++] = (center_x + _boxWidth / 2.) / _imageWidth;
                    // ymax
                    outputPtr[idx++] = (center_y + _boxHeight / 2.) / _imageHeight;
270 271 272
                }
            }
        }
273 274 275 276 277 278 279 280 281 282 283
        // clip the prior's coordidate such that it is within [0, 1]
        if (_clip)
        {
            for (size_t d = 0; d < _outChannelSize; ++d)
            {
                outputPtr[d] = std::min<float>(std::max<float>(outputPtr[d], 0.), 1.);
            }
        }
        // set the variance.
        outputPtr = outputs[0].ptr<float>(0, 1);
        if(_variance.size() == 1)
284
        {
285 286
            Mat secondChannel(outputs[0].size[2], outputs[0].size[3], CV_32F, outputPtr);
            secondChannel.setTo(Scalar(_variance[0]));
287
        }
288
        else
289
        {
290 291
            int count = 0;
            for (size_t h = 0; h < _layerHeight; ++h)
292
            {
293
                for (size_t w = 0; w < _layerWidth; ++w)
294
                {
295
                    for (size_t i = 0; i < _numPriors; ++i)
296
                    {
297 298 299 300 301
                        for (int j = 0; j < 4; ++j)
                        {
                            outputPtr[count] = _variance[j];
                            ++count;
                        }
302 303 304 305 306
                    }
                }
            }
        }
    }
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341

    size_t _layerWidth;
    size_t _layerHeight;

    size_t _imageWidth;
    size_t _imageHeight;

    size_t _outChannelSize;

    float _stepX;
    float _stepY;

    float _minSize;
    float _maxSize;

    float _boxWidth;
    float _boxHeight;

    std::vector<float> _aspectRatios;
    std::vector<float> _variance;

    bool _flip;
    bool _clip;

    size_t _numPriors;

    static const size_t _numAxes = 4;
    static const std::string _layerName;
};

const std::string PriorBoxLayerImpl::_layerName = std::string("PriorBox");

Ptr<PriorBoxLayer> PriorBoxLayer::create(const LayerParams &params)
{
    return Ptr<PriorBoxLayer>(new PriorBoxLayerImpl(params));
342
}
343

344 345
}
}