/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#ifndef __OPENCV_DNN_LAYERS_ELEMENTWISE_LAYERS_HPP__
#define __OPENCV_DNN_LAYERS_ELEMENTWISE_LAYERS_HPP__
#include "../precomp.hpp"
#include "layers_common.hpp"
#include <cmath>
#include <opencv2/dnn/all_layers.hpp>
#include <opencv2/core/ocl.hpp>
#ifdef HAVE_OPENCL
#include "modules/dnn/opencl_kernels_dnn.hpp"
#endif

namespace cv
{
namespace dnn
{

using std::abs;
using std::exp;
using std::tanh;
using std::pow;

template<typename Func>
class ElementWiseLayer : public Func::Layer
{
    bool useOpenCL;
    Func func;

    template<typename Dtype>
    class PBody : public cv::ParallelLoopBody
    {
        Func &func;
        Dtype *data;
    public:

        PBody(Mat &mat, Func &func_) :
            func(func_), data(mat.ptr<Dtype>())
        {}

        void operator()(const Range &r) const
        {
            for (int i = r.start; i < r.end; i++)
                data[i] = func(data[i]);
        }
    };

public:

    ElementWiseLayer() {}
    ElementWiseLayer(const Func &f) : func(f) {}

    void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
    {
        useOpenCL = ocl::useOpenCL();

        outputs.resize(inputs.size());
        for (size_t i = 0; i < inputs.size(); i++)
        {
            outputs[i].shareFrom(*inputs[i]); //no data copy

            //hotfix: shareFrom doesn't provide properly Mat/UMat switching
            if (useOpenCL)
                outputs[i].umatRef() = inputs[i]->umatRefConst();
            else
                outputs[i].matRef() = inputs[i]->matRefConst();
        }
    }

    void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
    {
        #ifdef HAVE_OPENCL
        if (useOpenCL)
            forwardOCL(inputs, outputs);
        else
        #endif
            forwardCPU(inputs, outputs);
    }

    #ifdef HAVE_OPENCL
    void forwardOCL(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
    {
        size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();

        for (size_t i = 0; i < inputs.size(); i++)
        {
            const UMat &src = inputs[i]->umatRefConst();
            UMat &dst = outputs[i].umatRef();
            CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset);

            ocl::Kernel ker;
            CV_Assert(func.initKernel(ker, src));
            ker.set(0, (int)src.total());
            ker.set(1, ocl::KernelArg::PtrReadOnly(src));
            ker.set(2, ocl::KernelArg::PtrWriteOnly(dst));

            size_t gSize = src.total();
            CV_Assert(ker.run(1, &gSize, &wgSize, true));
        }
    }
    #endif

    void forwardCPU(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
    {
        for (size_t i = 0; i < inputs.size(); i++)
        {
            const Mat &src = inputs[i]->matRefConst();
            Mat &dst = outputs[i].matRef();
            CV_Assert(src.ptr() == dst.ptr() && src.isContinuous());

            Range sizeRange = Range(0, dst.total());
            if (dst.type() == CV_32F)
            {
                cv::parallel_for_(sizeRange, PBody<float>(dst, func));
            }
            else if (dst.type() == CV_64F)
            {
                cv::parallel_for_(sizeRange, PBody<double>(dst, func));
            }
            else
            {
                CV_Error(Error::StsNotImplemented, "Only CV_32F and CV_64F blobs are supported");
            }
        }
    }
};

#ifdef HAVE_OPENCL
static String oclGetTMacro(const UMat &m)
{
    return String("-DT=") + ocl::typeToStr(m.type()) + String(" ");
}
#endif

struct ReLUFunctor
{
    typedef ReLULayer Layer;

    double slope;

    ReLUFunctor(double slope_)
        : slope(slope_) {}

    template<typename TFloat>
    inline TFloat operator()(TFloat x) const
    {
        return (x >= (TFloat)0) ? x : (TFloat)slope * x;
    }

    #ifdef HAVE_OPENCL
    bool initKernel(ocl::Kernel &ker, const UMat &src) const
    {
        const char *buildoptSlope = (slope == 0) ? "-DRELU_NO_SLOPE" : "";
        String buildopt = oclGetTMacro(src) + buildoptSlope;

        if (!ker.create("ReLUForward", ocl::dnn::activations_oclsrc, buildopt))
            return false;

        if (slope != 0)
            ker.set(3, (float)slope);

        return true;
    }
    #endif
};

struct TanHFunctor
{
    typedef TanHLayer Layer;

    template<typename TFloat>
    inline TFloat operator()(TFloat x) const
    {
        return tanh(x);
    }

    #ifdef HAVE_OPENCL
    bool initKernel(ocl::Kernel &ker, const UMat &src) const
    {
        if (!ker.create("TanHForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
            return false;
        return true;
    }
    #endif
};

struct SigmoidFunctor
{
    typedef SigmoidLayer Layer;

    template<typename TFloat>
    inline TFloat operator()(TFloat x) const
    {
        return (TFloat)1 / ((TFloat)1 + exp(-x));
    }

    #ifdef HAVE_OPENCL
    bool initKernel(ocl::Kernel &ker, const UMat &src) const
    {
        if (!ker.create("SigmoidForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
            return false;
        return true;
    }
    #endif
};

struct AbsValFunctor
{
    typedef AbsLayer Layer;

    template<typename TFloat>
    inline TFloat operator()(TFloat x) const
    {
        return abs(x);
    }

    #ifdef HAVE_OPENCL
    bool initKernel(ocl::Kernel &ker, const UMat &src) const
    {
        if (!ker.create("AbsValForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
            return false;
        return true;
    }
    #endif
};

struct BNLLFunctor
{
    typedef BNLLLayer Layer;

    template<typename TFloat>
    inline TFloat operator()(TFloat x) const
    {
        return log((TFloat)1 + exp(-abs(x)));
    }

    #ifdef HAVE_OPENCL
    bool initKernel(ocl::Kernel &ker, const UMat &src) const
    {
        if (!ker.create("BNLLForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
            return false;
        return true;
    }
    #endif
};

struct PowerFunctor
{
    typedef PowerLayer Layer;

    double power, scale, shift;

    PowerFunctor(double power_, double scale_ = 1, double shift_ = 0)
        : power(power_), scale(scale_), shift(shift_) {}

    template<typename TFloat>
    inline TFloat operator()(TFloat x) const
    {
        return pow((TFloat)shift + (TFloat)scale * x, (TFloat)power);
    }

    #ifdef HAVE_OPENCL
    bool initKernel(ocl::Kernel &ker, const UMat &src) const
    {
        if (!ker.create("PowForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
            return false;

        ker.set(3, (float)power);
        ker.set(4, (float)scale);
        ker.set(5, (float)shift);

        return true;
    }
    #endif
};

}
}
#endif