/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#include "../precomp.hpp"
#include "layers_common.hpp"
#include "lrn_layer.hpp"
#include "opencl_kernels_dnn.hpp"
#include <opencv2/imgproc.hpp>
#include <opencv2/core/ocl.hpp>
#include <opencv2/dnn/shape_utils.hpp>
#include <algorithm>

namespace cv
{
namespace dnn
{

LRNLayerImpl::LRNLayerImpl(int type_, int size_, double alpha_, double beta_, double bias_, bool normBySize_)
{
    type = type_;
    size = size_;
    alpha = alpha_;
    beta = beta_;
    bias = bias_;
    normBySize = normBySize_;
}

void LRNLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
    CV_Assert(inputs.size() == 1 && inputs[0]->dims() == 4);
    CV_Assert(type == CHANNEL_NRM || type == SPATIAL_NRM);
    useOpenCL = cv::ocl::useOpenCL();

    if (type == SPATIAL_NRM && !useOpenCL)
        buf.create(inputs[0]->shape().slice(2), inputs[0]->type(), Blob::ALLOC_MAT);
    if (type == CHANNEL_NRM && useOpenCL)
        buf.create(inputs[0]->shape().slice(2), inputs[0]->type(), Blob::ALLOC_UMAT);

    outputs.resize(1);
    outputs[0].create(inputs[0]->shape(), inputs[0]->type());
}

void LRNLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
    Blob &src = *inputs[0];
    Blob &dst = outputs[0];

    switch (type)
    {
    case CHANNEL_NRM:
        channelNoramlization(src, dst);
        break;
    case SPATIAL_NRM:
        spatialNormalization(src, dst);
        break;
    default:
        CV_Error(Error::StsNotImplemented, "Unimplemented mode of LRN layer");
        break;
    }
}

template<typename XMat>
static XMat getPlane(XMat &m, int n, int cn)
{
    return reshaped(slice(m, n, cn), BlobShape::like(m).slice(2));
}

void LRNLayerImpl::channelNoramlization(Blob &src, Blob &dst)
{
    if (!useOpenCL)
        channelNormalization_<Mat>(src, dst);
    else
    {
        //channelNoramlization_ocl(src.getRefConst<UMat>(), dst.getRef<UMat>()); //consumes a lot of memory
        channelNormalization_<UMat>(src, dst);
    }
}

template<typename XMat>
void LRNLayerImpl::channelNormalization_(Blob &srcBlob, Blob &dstBlob)
{
    int num = srcBlob.num();
    int channels = srcBlob.channels();
    int ksize = (size - 1) / 2;
    int sizeNormFactor = normBySize ? size : 1;

    XMat srcMat = srcBlob.getRefConst<XMat>().clone();
    XMat dstMat = dstBlob.getRef<XMat>();

    for (int n = 0; n < num; n++)
    {
        XMat accum = getPlane(dstMat, n, channels-1); //trick for memory saving
        accum.setTo(0);

        for (int cn = 0; cn < std::min(ksize, channels); cn++)
            cv::accumulateSquare(getPlane(srcMat, n, cn), accum);

        for (int cn = 0; cn < channels; cn++)
        {
            if (cn + ksize < channels)
            {
                cv::accumulateSquare(getPlane(srcMat, n, cn + ksize), accum);
            }

            if (cn - ksize - 1 >= 0)
            {
                //subtractSquare
                XMat left = getPlane(srcMat, n, cn - ksize - 1);
                cv::pow(left, 2, left);
                cv::subtract(accum, left, accum);
            }

            XMat dst = getPlane(dstMat, n, cn);
            accum.convertTo(dst, dst.type(), alpha/sizeNormFactor, bias);
            cv::pow(dst, beta, dst);
            cv::divide(getPlane(srcMat, n, cn), dst, dst);
        }
    }
}

bool LRNLayerImpl::channelNormalization_ocl(const UMat &src, UMat &dst)
{
#ifdef HAVE_OPENCL
    if (src.offset != 0 || dst.offset != 0) //TODO: add offset
        return false;

    String buildOpts = String("-DT=") + ocl::typeToStr(src.type());

    ocl::Kernel kerScale("LRNFillScale", ocl::dnn::lrn_oclsrc, buildOpts);
    if (kerScale.empty())
        return false;

    ocl::Kernel kerOutput("LRNComputeOutput", ocl::dnn::lrn_oclsrc, buildOpts);
    if (kerOutput.empty())
        return false;

    Shape shape = Shape::like(src);
    int ksize = (size - 1) / 2;
    int sizeNormFactor = normBySize ? size : 1;
    // TODO: add bias
    size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
    UMat &scaleBuf = buf.umatRef();

    size_t nthreads = (size_t)(shape.total() / shape[1]);
    kerScale.args((int)nthreads,
                  ocl::KernelArg::PtrReadOnly(src), shape[0], shape[1], shape[2], shape[3],
                  size, (float)(alpha/sizeNormFactor), (float)ksize, ocl::KernelArg::PtrWriteOnly(scaleBuf));
    if (!kerScale.run(1, &nthreads, &wgSize, true))
        return false;

    nthreads = (size_t)shape.total();
    kerOutput.args((int)nthreads,
                   ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrReadOnly(scaleBuf),
                   -beta, ocl::KernelArg::PtrWriteOnly(dst) );
    if (!kerOutput.run(1, &nthreads, &wgSize, true))
        return false;

    return true;
#else
    (void)src;
    (void)dst;
    return false;
#endif
}

void LRNLayerImpl::spatialNormalization(Blob &src, Blob &dst)
{
    if (!useOpenCL)
        spatialNormalization_<Mat>(src, dst);
    else
        spatialNormalization_<UMat>(src, dst);
}

//TODO: fix cv::boxFilter with BORDER_ISOLATED flag in CPU mode
template<>
void LRNLayerImpl::sqrBoxFilter_<Mat>(const Mat &src, Mat &dst)
{
    Mat srcRawWrapper(src.rows, src.cols, src.type(), src.data, src.step[0]);
    cv::sqrBoxFilter(srcRawWrapper, dst, dst.depth(), Size(size, size), Point(-1, -1), false, BORDER_CONSTANT);
}

template<>
void LRNLayerImpl::sqrBoxFilter_<UMat>(const UMat &src, UMat &dst)
{
    cv::sqrBoxFilter(src, dst, dst.depth(), Size(size, size), Point(-1, -1), false, BORDER_CONSTANT | BORDER_ISOLATED);
}

template<typename XMat>
void LRNLayerImpl::spatialNormalization_(Blob &srcBlob, Blob &dstBlob)
{
    int num = srcBlob.num();
    int channels = srcBlob.channels();
    int sizeNormFactor = normBySize ? size*size : 1;

    XMat srcMat = srcBlob.getRefConst<XMat>();
    XMat dstMat = dstBlob.getRef<XMat>();

    for (int n = 0; n < num; n++)
    {
        for (int cn = 0; cn < channels; cn++)
        {
            XMat src = getPlane(srcMat, n, cn);
            XMat dst = getPlane(dstMat, n, cn);

            sqrBoxFilter_(src, dst);

            dst.convertTo(dst, dst.type(), alpha/sizeNormFactor, bias);
            cv::pow(dst, beta, dst);
            cv::divide(src, dst, dst);
        }
    }
}


Ptr<LRNLayer> LRNLayer::create(int type, int size, double alpha, double beta, double bias,
                               bool normBySize)
{
    return Ptr<LRNLayer>(new LRNLayerImpl(type, size, alpha, beta, bias, normBySize));
}

}
}