Commit bf9e9b81 authored by Vitaliy Lyudvichenko's avatar Vitaliy Lyudvichenko

Adding of OCL and public interface for Softmax layer

parent d8507fef
......@@ -260,6 +260,13 @@ namespace dnn
static Ptr<PoolingLayer> create(int type = MAX, Size kernel = Size(2, 2), Size stride = Size(1, 1), Size pad = Size(0, 0));
};
class CV_EXPORTS_W SoftmaxLayer : public Layer
{
public:
static Ptr<SoftmaxLayer> create(int axis = 1);
};
//! @}
//! @}
......
......@@ -77,7 +77,7 @@ void initModule()
return;
REG_RUNTIME_LAYER_CLASS(Slice, SliceLayer)
REG_RUNTIME_LAYER_CLASS(Softmax, SoftMaxLayer)
REG_STATIC_LAYER_FUNC(Softmax, createSoftmaxLayerFromCaffe)
REG_RUNTIME_LAYER_CLASS(Split, SplitLayer)
REG_RUNTIME_LAYER_CLASS(Reshape, ReshapeLayer)
REG_STATIC_LAYER_FUNC(Flatten, createFlattenLayer)
......
......@@ -269,7 +269,7 @@ Ptr<PoolingLayer> PoolingLayer::create(int type, Size kernel, Size stride, Size
Ptr<Layer> createPoolingLayerFromCaffe(LayerParams &params)
{
int type;
Size kernel, pad, stride;
Size kernel, stride, pad;
if (params.has("pool"))
{
......
......@@ -42,6 +42,8 @@
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "softmax_layer.hpp"
#include <opencv2/core/ocl.hpp>
#include "modules/dnn/opencl_kernels_dnn.hpp"
#include <algorithm>
#include <stdlib.h>
using std::max;
......@@ -50,95 +52,179 @@ namespace cv
{
namespace dnn
{
//TODO: set default axis number to 1, and add custom shape length in FullyConnected
SoftMaxLayer::SoftMaxLayer(LayerParams &params) : Layer(params)
SoftMaxLayerImpl::SoftMaxLayerImpl(int axis)
{
axisRaw = axis;
}
void SoftMaxLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() == 1);
axis = inputs[0]->canonicalAxis(axisRaw);
useOpenCL = ocl::useOpenCL();
BlobShape shape = inputs[0]->shape();
outerSize = shape.total(0, axis);
channels = shape[axis];
innerSize = shape.total(axis + 1);
int allocFlag = (useOpenCL) ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT;
shape[axis] = 1;
buf.create(shape, inputs[0]->type(), allocFlag);
outputs.resize(1);
outputs[0].create(inputs[0]->shape(), inputs[0]->type(), allocFlag);
}
void SoftMaxLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
Blob &src = *inputs[0];
Blob &dst = outputs[0];
if (!useOpenCL)
forward_cpu(src, dst);
else
{
//hotfix!!!
axis_ = params.get<int>("axis", 1);
CV_Assert(forward_ocl(src, dst));
}
}
void SoftMaxLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() == 1);
axis = inputs[0]->canonicalAxis(axis_);
#ifdef HAVE_OPENCL
bool SoftMaxLayerImpl::forward_ocl(Blob &src, Blob &dst)
{
const UMat &srcMat = src.umatRefConst();
UMat &dstMat = dst.umatRef();
srcMat.copyTo(dstMat);
UMat &bufMat = buf.umatRef();
CV_Assert(dstMat.offset == 0);
BlobShape shape = inputs[0]->shape();
outputs.resize(1);
outputs[0].create(shape);
String buildOpts = String("-DT=") + ocl::typeToStr(src.type());
ocl::Kernel kmax, ksub, ksum, kdiv;
shape[axis] = 1;
maxAggregator.create(shape);
}
if (!kmax.create("kernel_channel_max", ocl::dnn::softmax_oclsrc, buildOpts))
return false;
void SoftMaxLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
Blob &src = *inputs[0];
Blob &dst = outputs[0];
if (!ksub.create("kernel_channel_subtract", ocl::dnn::softmax_oclsrc, buildOpts))
return false;
float *srcPtr = src.ptrf();
float *dstPtr = dst.ptrf();
float *bufPtr = maxAggregator.ptrf();
if (!ksum.create("kernel_channel_sum", ocl::dnn::softmax_oclsrc, buildOpts))
return false;
size_t outerSize = src.total(0, axis);
size_t channels = src.size(axis);
size_t innerSize = src.total(axis + 1);
if (!kdiv.create("kernel_channel_div", ocl::dnn::softmax_oclsrc, buildOpts))
return false;
size_t outerStep = src.total(axis);
size_t cnStep = src.total(axis + 1);
size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
size_t bufSize = buf.total();
size_t totalSize = src.total();
//compute max along axis
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
kmax.args((int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
if (!kmax.run(1, &bufSize, &wgSize, true))
return false;
ksub.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
if (!ksub.run(1, &totalSize, &wgSize, true))
return false;
cv::exp(dstMat, dstMat);
memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float));
ksum.args((int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
if (!ksum.run(1, &bufSize, &wgSize, true))
return false;
for (size_t cnDim = 1; cnDim < channels; cnDim++)
{
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]);
}
kdiv.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
if (!kdiv.run(1, &totalSize, &wgSize, true))
return false;
return true;
}
#else
bool SoftMaxLayerImpl::forward_ocl(Blob&, Blob&)
{
return false;
}
#endif
void SoftMaxLayerImpl::forward_cpu(Blob &src, Blob &dst)
{
CV_Assert(src.type() == CV_32F);
float *srcPtr = src.ptrf();
float *dstPtr = dst.ptrf();
float *bufPtr = buf.ptrf();
size_t outerStep = src.total(axis);
size_t cnStep = src.total(axis + 1);
//compute max along axis
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float));
for (size_t cnDim = 1; cnDim < channels; cnDim++)
{
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]);
}
}
//subtract max
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
//subtract max
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
for (size_t i = 0; i < innerSize; i++)
dstPtr[srcOffset + cnDim * cnStep + i] = srcPtr[srcOffset + cnDim * cnStep + i] - bufPtr[bufOffset + i];
}
for (size_t i = 0; i < innerSize; i++)
dstPtr[srcOffset + cnDim * cnStep + i] = srcPtr[srcOffset + cnDim * cnStep + i] - bufPtr[bufOffset + i];
}
}
cv::exp(dst.matRef(), dst.matRef());
cv::exp(dst.matRef(), dst.matRef());
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
//sum exp along axis
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] = 0.f;
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] += dstPtr[srcOffset + cnDim * cnStep + i];
}
//sum exp along axis
//divide by computed sum
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] = 0.f;
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] += dstPtr[srcOffset + cnDim * cnStep + i];
}
//divide by computed sum
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
for (size_t i = 0; i < innerSize; i++)
dstPtr[srcOffset + cnDim * cnStep + i] /= bufPtr[bufOffset + i];
}
dstPtr[srcOffset + cnDim * cnStep + i] /= bufPtr[bufOffset + i];
}
}
}
Ptr<SoftmaxLayer> SoftmaxLayer::create(int axis)
{
return Ptr<SoftmaxLayer>(new SoftMaxLayerImpl(axis));
}
Ptr<Layer> createSoftmaxLayerFromCaffe(LayerParams &params)
{
int axis = params.get<int>("axis", 1);
return Ptr<Layer>(SoftmaxLayer::create(axis));
}
}
}
......@@ -42,21 +42,33 @@
#ifndef __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
class SoftMaxLayer : public Layer
{
int axis_, axis;
Blob maxAggregator;
public:
SoftMaxLayer(LayerParams &params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
class SoftMaxLayerImpl : public SoftmaxLayer
{
int axis, axisRaw;
Blob buf;
bool useOpenCL;
size_t outerSize, channels, innerSize;
bool forward_ocl(Blob &src, Blob &dst);
void forward_cpu(Blob &src, Blob &dst);
public:
SoftMaxLayerImpl(int axis = 1);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
Ptr<Layer> createSoftmaxLayerFromCaffe(LayerParams &params);
}
}
#endif
/*************************************************************************************
* Copyright (c) 2015, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
**************************************************************************************/
__kernel void kernel_channel_max(const int num, const int channels,
const int spatial_dim, __global const T* data, __global T* out) {
int index = get_global_id(0);
if(index < num * spatial_dim) {
int n = index / spatial_dim;
int s = index % spatial_dim;
T maxval = -FLT_MAX;
for (int c = 0; c < channels; ++c) {
maxval = max(data[(n * channels + c) * spatial_dim + s], maxval);
}
out[index] = maxval;
}
}
__kernel void kernel_channel_subtract(const int count,
const int num, const int channels,
const int spatial_dim, __global const T* channel_max, __global T* data) {
int index = get_global_id(0);
if(index < count) {
int n = index / channels / spatial_dim;
int s = index % spatial_dim;
data[index] -= channel_max[n * spatial_dim + s];
}
}
__kernel void kernel_channel_sum(const int num, const int channels,
const int spatial_dim, __global const T* data, __global T* channel_sum) {
int index = get_global_id(0);
if(index < num * spatial_dim) {
int n = index / spatial_dim;
int s = index % spatial_dim;
T sum = 0;
for (int c = 0; c < channels; ++c) {
sum += data[(n * channels + c) * spatial_dim + s];
}
channel_sum[index] = sum;
}
}
__kernel void kernel_channel_div(const int count,
const int num, const int channels,
const int spatial_dim, __global const T* channel_sum, __global T* data) {
int index = get_global_id(0);
if(index < count) {
int n = index / channels / spatial_dim;
int s = index % spatial_dim;
data[index] /= channel_sum[n * spatial_dim + s];
}
}
\ No newline at end of file
......@@ -87,7 +87,7 @@ void testLayerUsingCaffeModels(String basename, bool useCaffeModel = false, bool
TEST(Layer_Test_Softmax, Accuracy)
{
testLayerUsingCaffeModels("layer_softmax");
OCL_OFF(testLayerUsingCaffeModels("layer_softmax"));
}
OCL_TEST(Layer_Test_Softmax, Accuracy)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment